1122cdce33e3e0a01a7f82645617317530aa571fbA. Unique TensorFlower/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
29c3043ff3bf31a6a81810b4ce9e87ef936f1f529Manjunath Kudlur
39c3043ff3bf31a6a81810b4ce9e87ef936f1f529Manjunath KudlurLicensed under the Apache License, Version 2.0 (the "License");
49c3043ff3bf31a6a81810b4ce9e87ef936f1f529Manjunath Kudluryou may not use this file except in compliance with the License.
59c3043ff3bf31a6a81810b4ce9e87ef936f1f529Manjunath KudlurYou may obtain a copy of the License at
69c3043ff3bf31a6a81810b4ce9e87ef936f1f529Manjunath Kudlur
79c3043ff3bf31a6a81810b4ce9e87ef936f1f529Manjunath Kudlur    http://www.apache.org/licenses/LICENSE-2.0
89c3043ff3bf31a6a81810b4ce9e87ef936f1f529Manjunath Kudlur
99c3043ff3bf31a6a81810b4ce9e87ef936f1f529Manjunath KudlurUnless required by applicable law or agreed to in writing, software
109c3043ff3bf31a6a81810b4ce9e87ef936f1f529Manjunath Kudlurdistributed under the License is distributed on an "AS IS" BASIS,
119c3043ff3bf31a6a81810b4ce9e87ef936f1f529Manjunath KudlurWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
129c3043ff3bf31a6a81810b4ce9e87ef936f1f529Manjunath KudlurSee the License for the specific language governing permissions and
139c3043ff3bf31a6a81810b4ce9e87ef936f1f529Manjunath Kudlurlimitations under the License.
149c3043ff3bf31a6a81810b4ce9e87ef936f1f529Manjunath Kudlur==============================================================================*/
159c3043ff3bf31a6a81810b4ce9e87ef936f1f529Manjunath Kudlur
16f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// Exposes the family of BLAS routines as pre-canned high performance calls for
17f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// use in conjunction with the StreamExecutor abstraction.
18f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur//
19f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// Note that this interface is optionally supported by platforms; see
20f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// StreamExecutor::SupportsBlas() for details.
21f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur//
22f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// This abstraction makes it simple to entrain BLAS operations on GPU data into
23f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// a Stream -- users typically will not use this API directly, but will use the
24f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// Stream builder methods to entrain these operations "under the hood". For
25f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// example:
26f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur//
27f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur//  DeviceMemory<float> x = stream_exec->AllocateArray<float>(1024);
28f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur//  DeviceMemory<float> y = stream_exec->AllocateArray<float>(1024);
29f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur//  // ... populate x and y ...
30f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur//  Stream stream{stream_exec};
31f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur//  stream
32f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur//    .Init()
33553e8f14c8c025a8c09e0a6cb824c786bc258f56A. Unique TensorFlower//    .ThenBlasAxpy(1024, 5.5, x, 1, &y, 1);
34553e8f14c8c025a8c09e0a6cb824c786bc258f56A. Unique TensorFlower//  SE_CHECK_OK(stream.BlockHostUntilDone());
35f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur//
36f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// By using stream operations in this manner the user can easily intermix custom
37f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// kernel launches (via StreamExecutor::ThenLaunch()) with these pre-canned BLAS
38f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// routines.
39f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
40f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur#ifndef TENSORFLOW_STREAM_EXECUTOR_BLAS_H_
41f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur#define TENSORFLOW_STREAM_EXECUTOR_BLAS_H_
42f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
43f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur#include <complex>
44f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur#include "tensorflow/stream_executor/platform/port.h"
45f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
46f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur#include "tensorflow/stream_executor/lib/array_slice.h"
47b4fa4ad13831174f2276f0b382e06a7d5c42a7e7A. Unique TensorFlower
48b4fa4ad13831174f2276f0b382e06a7d5c42a7e7A. Unique TensorFlowernamespace Eigen {
49b4fa4ad13831174f2276f0b382e06a7d5c42a7e7A. Unique TensorFlowerstruct half;
50b4fa4ad13831174f2276f0b382e06a7d5c42a7e7A. Unique TensorFlower}  // namespace Eigen
51f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
52f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlurnamespace perftools {
53f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlurnamespace gputools {
54f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
55f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlurclass Stream;
5605ea40f180e528dbfde36cd338a0b6ac3cca6dd9A. Unique TensorFlowerclass ScratchAllocator;
57f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
58f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlurtemplate <typename ElemT>
59f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlurclass DeviceMemory;
60f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
61f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlurnamespace blas {
62f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
63f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// Specifies whether the input matrix will be transposed or
64f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// transposed+conjugated before any BLAS operations.
65f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlurenum class Transpose { kNoTranspose, kTranspose, kConjugateTranspose };
66f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
67f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// Returns a name for t.
68f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlurstring TransposeString(Transpose t);
69f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
70f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// Specifies whether the upper or lower triangular part of a
71f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// symmetric/Hermitian matrix is used.
72f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlurenum class UpperLower { kUpper, kLower };
73f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
74f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// Returns a name for ul.
75f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlurstring UpperLowerString(UpperLower ul);
76f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
77f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// Specifies whether a matrix is unit triangular.
78f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlurenum class Diagonal { kUnit, kNonUnit };
79f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
80f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// Returns a name for d.
81f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlurstring DiagonalString(Diagonal d);
82f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
83f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// Specifies whether a Hermitian matrix appears on the left or right in
84f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// operation.
85f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlurenum class Side { kLeft, kRight };
86f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
87f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// Returns a name for s.
88f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlurstring SideString(Side s);
89f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
9001194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar// Type with which intermediate computations of a blas routine are performed.
9101194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar//
9201194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar// Some blas calls can perform computations with a type that's different than
9301194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar// the type of their inputs/outputs.  This lets you e.g. multiply two matricies
9401194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar// of int8s using float32s to store the matmul's intermediate values.
9501194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebarenum class ComputationType {
9601194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar  kF16,         // 16-bit floating-point
9701194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar  kF32,         // 32-bit floating-point
9801194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar  kF64,         // 64-bit floating-point
99a2ee8bca3f3fa08bf022f5c8a68c3e1cf829f2cdA. Unique TensorFlower  kI32,         // 32-bit integer
10001194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar  kComplexF32,  // Complex number comprised of two f32s.
101a2ee8bca3f3fa08bf022f5c8a68c3e1cf829f2cdA. Unique TensorFlower  kComplexF64,  // Complex number comprised of two f64s.
10201194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar};
10301194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar
10401194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar// Converts a ComputationType to a string.
10501194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebarstring ComputationTypeString(ComputationType ty);
10601194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar
10701194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar// Opaque identifier for an "algorithm" used by a blas routine.  This functions
10801194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar// as a hint to the blas library.
10901194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebartypedef int64 AlgorithmType;
1103e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wangconstexpr AlgorithmType kDefaultAlgorithm = -1;
1113e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wangconstexpr AlgorithmType kDefaultBlasGemm = -2;
1123e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wangconstexpr AlgorithmType kDefaultBlasGemv = -3;
1133e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wangconstexpr AlgorithmType kNoAlgorithm = -4;
11401194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar
115a2ee8bca3f3fa08bf022f5c8a68c3e1cf829f2cdA. Unique TensorFlower// blas uses -1 to represent the default algorithm. This happens to match up
116a2ee8bca3f3fa08bf022f5c8a68c3e1cf829f2cdA. Unique TensorFlower// with the CUBLAS_GEMM_DFALT constant, so cuda_blas.cc is using static_cast
117a2ee8bca3f3fa08bf022f5c8a68c3e1cf829f2cdA. Unique TensorFlower// to convert from AlgorithmType to cublasGemmAlgo_t, and uses a static_assert
118a2ee8bca3f3fa08bf022f5c8a68c3e1cf829f2cdA. Unique TensorFlower// to ensure that this assumption does not break.
119a2ee8bca3f3fa08bf022f5c8a68c3e1cf829f2cdA. Unique TensorFlower// If another blas implementation uses a different value for the default
120a2ee8bca3f3fa08bf022f5c8a68c3e1cf829f2cdA. Unique TensorFlower// algorithm, then it needs to convert kDefaultGemmAlgo to that value
121a2ee8bca3f3fa08bf022f5c8a68c3e1cf829f2cdA. Unique TensorFlower// (e.g. via a function called ToWhateverGemmAlgo).
122a2ee8bca3f3fa08bf022f5c8a68c3e1cf829f2cdA. Unique TensorFlowerconstexpr AlgorithmType kDefaultGemmAlgo = -1;
123a2ee8bca3f3fa08bf022f5c8a68c3e1cf829f2cdA. Unique TensorFlower
12401194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar// Describes the result of a performance experiment, usually timing the speed of
12501194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar// a particular AlgorithmType.
12601194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar//
12701194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar// If the call we were benchmarking failed (a common occurrence; not all
12801194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar// algorithms are valid for all calls), is_valid() will be false.
12901194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebarclass ProfileResult {
13001194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar public:
13101194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar  bool is_valid() const { return is_valid_; }
13201194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar  void set_is_valid(bool val) { is_valid_ = val; }
13301194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar  AlgorithmType algorithm() const { return algorithm_; }
13401194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar  void set_algorithm(AlgorithmType val) { algorithm_ = val; }
13501194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar  float elapsed_time_in_ms() const { return elapsed_time_in_ms_; }
13601194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar  void set_elapsed_time_in_ms(float val) { elapsed_time_in_ms_ = val; }
13701194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar
13801194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar private:
13901194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar  bool is_valid_ = false;
1403e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang  AlgorithmType algorithm_ = kDefaultAlgorithm;
14101194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar  float elapsed_time_in_ms_ = std::numeric_limits<float>::max();
14201194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar};
14301194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar
1443e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wangclass AlgorithmConfig {
1453e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang public:
1463e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang  AlgorithmConfig() : algorithm_(kDefaultAlgorithm) {}
1473e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang  explicit AlgorithmConfig(AlgorithmType algorithm) : algorithm_(algorithm) {}
1483e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang  AlgorithmType algorithm() const { return algorithm_; }
1493e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang  void set_algorithm(AlgorithmType val) { algorithm_ = val; }
1503e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang  bool operator==(const AlgorithmConfig &other) const {
1513e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang    return this->algorithm_ == other.algorithm_;
1523e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang  }
1533e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang  bool operator!=(const AlgorithmConfig &other) const {
1543e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang    return !(*this == other);
1553e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang  }
1563e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang  string ToString() const;
1573e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang
1583e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang private:
1593e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang  AlgorithmType algorithm_;
1603e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang};
1613e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang
162f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// BLAS support interface -- this can be derived from a GPU executor when the
163f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// underlying platform has an BLAS library implementation available. See
164f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// StreamExecutor::AsBlas().
165f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur//
166f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// Thread-hostile: CUDA associates a CUDA-context with a particular thread in
167f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// the system. Any operation that a user attempts to perform by enqueueing BLAS
168f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// operations on a thread not-associated with the CUDA-context has unknown
169f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// behavior at the current time; see b/13176597
170f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlurclass BlasSupport {
171f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur public:
172f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual ~BlasSupport() {}
173f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
174f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // Computes the sum of magnitudes of the vector elements.
175f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // result <- |Re x(1)| + |Im x(1)| + |Re  x(2)| + |Im  x(2)|+ ... + |Re  x(n)|
176f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // + |Im x(n)|.
177f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // Note that Im x(i) = 0 for real types float/double.
178f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasAsum(Stream *stream, uint64 elem_count,
179f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<float> &x, int incx,
180f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<float> *result) = 0;
181f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasAsum(Stream *stream, uint64 elem_count,
182f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<double> &x, int incx,
183f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<double> *result) = 0;
184f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasAsum(Stream *stream, uint64 elem_count,
185f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<float>> &x, int incx,
186f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<float> *result) = 0;
187f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasAsum(Stream *stream, uint64 elem_count,
188f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<double>> &x, int incx,
189f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<double> *result) = 0;
190f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
191f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // Performs a BLAS y <- ax+y operation.
192f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasAxpy(Stream *stream, uint64 elem_count, float alpha,
193f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<float> &x, int incx,
194f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<float> *y, int incy) = 0;
195f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasAxpy(Stream *stream, uint64 elem_count, double alpha,
196f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<double> &x, int incx,
197f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<double> *y, int incy) = 0;
198f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasAxpy(Stream *stream, uint64 elem_count,
199f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          std::complex<float> alpha,
200f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<float>> &x, int incx,
201f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<std::complex<float>> *y, int incy) = 0;
202f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasAxpy(Stream *stream, uint64 elem_count,
203f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          std::complex<double> alpha,
204f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<double>> &x, int incx,
205f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<std::complex<double>> *y, int incy) = 0;
206f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
207f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // Copies vector to another vector: y <- x.
208f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasCopy(Stream *stream, uint64 elem_count,
209f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<float> &x, int incx,
210f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<float> *y, int incy) = 0;
211f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasCopy(Stream *stream, uint64 elem_count,
212f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<double> &x, int incx,
213f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<double> *y, int incy) = 0;
214f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasCopy(Stream *stream, uint64 elem_count,
215f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<float>> &x, int incx,
216f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<std::complex<float>> *y, int incy) = 0;
217f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasCopy(Stream *stream, uint64 elem_count,
218f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<double>> &x, int incx,
219f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<std::complex<double>> *y, int incy) = 0;
220f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
221f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // Performs a BLAS dot product result <- x . y.
222f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasDot(Stream *stream, uint64 elem_count,
223f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                         const DeviceMemory<float> &x, int incx,
224f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                         const DeviceMemory<float> &y, int incy,
225f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                         DeviceMemory<float> *result) = 0;
226f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasDot(Stream *stream, uint64 elem_count,
227f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                         const DeviceMemory<double> &x, int incx,
228f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                         const DeviceMemory<double> &y, int incy,
229f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                         DeviceMemory<double> *result) = 0;
230f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
231f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // Performs a BLAS dot product result <- conj(x) . y for complex types.
232f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasDotc(Stream *stream, uint64 elem_count,
233f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<float>> &x, int incx,
234f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<float>> &y, int incy,
235f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<std::complex<float>> *result) = 0;
236f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasDotc(Stream *stream, uint64 elem_count,
237f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<double>> &x, int incx,
238f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<double>> &y, int incy,
239f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<std::complex<double>> *result) = 0;
240f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
241f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // Performs a BLAS dot product result <- x . y for complex types. Note that
242f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // x is unconjugated in this routine.
243f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasDotu(Stream *stream, uint64 elem_count,
244f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<float>> &x, int incx,
245f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<float>> &y, int incy,
246f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<std::complex<float>> *result) = 0;
247f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasDotu(Stream *stream, uint64 elem_count,
248f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<double>> &x, int incx,
249f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<double>> &y, int incy,
250f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<std::complex<double>> *result) = 0;
251f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
252f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // Computes the Euclidean norm of a vector: result <- ||x||.
253f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // See the following link for more information of Euclidean norm:
254f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // http://en.wikipedia.org/wiki/Norm_(mathematics)#Euclidean_norm
255f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasNrm2(Stream *stream, uint64 elem_count,
256f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<float> &x, int incx,
257f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<float> *result) = 0;
258f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasNrm2(Stream *stream, uint64 elem_count,
259f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<double> &x, int incx,
260f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<double> *result) = 0;
261f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasNrm2(Stream *stream, uint64 elem_count,
262f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<float>> &x, int incx,
263f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<float> *result) = 0;
264f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasNrm2(Stream *stream, uint64 elem_count,
265f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<double>> &x, int incx,
266f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<double> *result) = 0;
267f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
268f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // Performs rotation of points in the plane:
269f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // x(i) = c*x(i) + s*y(i)
270f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // y(i) = c*y(i) - s*x(i).
271f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasRot(Stream *stream, uint64 elem_count,
272f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                         DeviceMemory<float> *x, int incx,
273f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                         DeviceMemory<float> *y, int incy, float c,
274f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                         float s) = 0;
275f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasRot(Stream *stream, uint64 elem_count,
276f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                         DeviceMemory<double> *x, int incx,
277f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                         DeviceMemory<double> *y, int incy, double c,
278f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                         double s) = 0;
279f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasRot(Stream *stream, uint64 elem_count,
280f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                         DeviceMemory<std::complex<float>> *x, int incx,
281f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                         DeviceMemory<std::complex<float>> *y, int incy,
282f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                         float c, float s) = 0;
283f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasRot(Stream *stream, uint64 elem_count,
284f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                         DeviceMemory<std::complex<double>> *x, int incx,
285f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                         DeviceMemory<std::complex<double>> *y, int incy,
286f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                         double c, double s) = 0;
287f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
288f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // Computes the parameters for a Givens rotation.
289f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // Given the Cartesian coordinates (a, b) of a point, these routines return
290f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // the parameters c, s, r, and z associated with the Givens rotation. The
291f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // parameters c and s define a unitary matrix such that:
292f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
293f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //   |  c s |.| a | = | r |
294f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //   | -s c | | b |   | 0 |
295f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
296f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // The parameter z is defined such that if |a| > |b|, z is s; otherwise if
297f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // c is not 0 z is 1/c; otherwise z is 1.
298f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasRotg(Stream *stream, DeviceMemory<float> *a,
299f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<float> *b, DeviceMemory<float> *c,
300f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<float> *s) = 0;
301f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasRotg(Stream *stream, DeviceMemory<double> *a,
302f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<double> *b, DeviceMemory<double> *c,
303f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<double> *s) = 0;
304f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasRotg(Stream *stream, DeviceMemory<std::complex<float>> *a,
305f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<std::complex<float>> *b,
306f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<float> *c,
307f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<std::complex<float>> *s) = 0;
308f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasRotg(Stream *stream, DeviceMemory<std::complex<double>> *a,
309f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<std::complex<double>> *b,
310f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<double> *c,
311f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<std::complex<double>> *s) = 0;
312f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
313f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // Performs modified Givens rotation of points in the plane.
314f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // Given two vectors x and y, each vector element of these vectors is replaced
315f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // as follows:
316f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
317f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //   | x(i) | =  H | x(i) |
318f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //   | y(i) |      | y(i) |
319f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
320f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // for i=1 to n, where H is a modified Givens transformation matrix whose
321f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // values are stored in the param[1] through param[4] array.
322f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // For more information please Google this routine.
323f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasRotm(Stream *stream, uint64 elem_count,
324f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<float> *x, int incx,
325f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<float> *y, int incy,
326f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<float> &param) = 0;
327f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasRotm(Stream *stream, uint64 elem_count,
328f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<double> *x, int incx,
329f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<double> *y, int incy,
330f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<double> &param) = 0;
331f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
332f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // Computes the parameters for a modified Givens rotation.
333f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // Given Cartesian coordinates (x1, y1) of an input vector, these routines
334f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // compute the components of a modified Givens transformation matrix H that
335f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // zeros the y-component of the resulting vector:
336f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
337f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //   | x1 | =  H | x1 * sqrt(d1) |
338f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //   |  0 |      | y1 * sqrt(d1) |
339f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
340f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // For more information please Google this routine.
341f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasRotmg(Stream *stream, DeviceMemory<float> *d1,
342f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                           DeviceMemory<float> *d2, DeviceMemory<float> *x1,
343f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                           const DeviceMemory<float> &y1,
344f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                           DeviceMemory<float> *param) = 0;
345f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasRotmg(Stream *stream, DeviceMemory<double> *d1,
346f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                           DeviceMemory<double> *d2, DeviceMemory<double> *x1,
347f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                           const DeviceMemory<double> &y1,
348f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                           DeviceMemory<double> *param) = 0;
349f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
350f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // Computes the product of a vector by a scalar: x <- a*x.
351f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasScal(Stream *stream, uint64 elem_count, float alpha,
352f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<float> *x, int incx) = 0;
353f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasScal(Stream *stream, uint64 elem_count, double alpha,
354f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<double> *x, int incx) = 0;
355f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasScal(Stream *stream, uint64 elem_count, float alpha,
356f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<std::complex<float>> *x, int incx) = 0;
357f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasScal(Stream *stream, uint64 elem_count, double alpha,
358f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<std::complex<double>> *x, int incx) = 0;
359f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasScal(Stream *stream, uint64 elem_count,
360f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          std::complex<float> alpha,
361f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<std::complex<float>> *x, int incx) = 0;
362f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasScal(Stream *stream, uint64 elem_count,
363f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          std::complex<double> alpha,
364f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<std::complex<double>> *x, int incx) = 0;
365f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
366f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // Swaps a vector with another vector.
367f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasSwap(Stream *stream, uint64 elem_count,
368f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<float> *x, int incx,
369f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<float> *y, int incy) = 0;
370f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasSwap(Stream *stream, uint64 elem_count,
371f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<double> *x, int incx,
372f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<double> *y, int incy) = 0;
373f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasSwap(Stream *stream, uint64 elem_count,
374f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<std::complex<float>> *x, int incx,
375f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<std::complex<float>> *y, int incy) = 0;
376f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasSwap(Stream *stream, uint64 elem_count,
377f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<std::complex<double>> *x, int incx,
378f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<std::complex<double>> *y, int incy) = 0;
379f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
380f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // Finds the index of the element with maximum absolute value.
381f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasIamax(Stream *stream, uint64 elem_count,
382f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                           const DeviceMemory<float> &x, int incx,
383f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                           DeviceMemory<int> *result) = 0;
384f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasIamax(Stream *stream, uint64 elem_count,
385f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                           const DeviceMemory<double> &x, int incx,
386f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                           DeviceMemory<int> *result) = 0;
387f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasIamax(Stream *stream, uint64 elem_count,
388f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                           const DeviceMemory<std::complex<float>> &x, int incx,
389f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                           DeviceMemory<int> *result) = 0;
390f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasIamax(Stream *stream, uint64 elem_count,
391f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                           const DeviceMemory<std::complex<double>> &x,
392f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                           int incx, DeviceMemory<int> *result) = 0;
393f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
394f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // Finds the index of the element with minimum absolute value.
395f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasIamin(Stream *stream, uint64 elem_count,
396f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                           const DeviceMemory<float> &x, int incx,
397f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                           DeviceMemory<int> *result) = 0;
398f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasIamin(Stream *stream, uint64 elem_count,
399f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                           const DeviceMemory<double> &x, int incx,
400f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                           DeviceMemory<int> *result) = 0;
401f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasIamin(Stream *stream, uint64 elem_count,
402f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                           const DeviceMemory<std::complex<float>> &x, int incx,
403f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                           DeviceMemory<int> *result) = 0;
404f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasIamin(Stream *stream, uint64 elem_count,
405f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                           const DeviceMemory<std::complex<double>> &x,
406f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                           int incx, DeviceMemory<int> *result) = 0;
407f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
408f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // Computes a matrix-vector product using a general band matrix:
409f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
410f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //     y <- alpha * a * x + beta * y,
411f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // or
412f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //     y <- alpha * a' * x + beta * y,
413f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // or
414f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //     y <- alpha * conj(a') * x + beta * y,
415f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
416f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // alpha and beta are scalars; a is an m-by-n general band matrix, with kl
417f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // sub-diagonals and ku super-diagonals; x is a vector with
418f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // n(trans==kNoTranspose)/m(otherwise) elements;
419f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // y is a vector with m(trans==kNoTranspose)/n(otherwise) elements.
420f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasGbmv(Stream *stream, blas::Transpose trans, uint64 m,
421f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          uint64 n, uint64 kl, uint64 ku, float alpha,
422f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<float> &a, int lda,
423f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<float> &x, int incx, float beta,
424f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<float> *y, int incy) = 0;
425f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasGbmv(Stream *stream, blas::Transpose trans, uint64 m,
426f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          uint64 n, uint64 kl, uint64 ku, double alpha,
427f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<double> &a, int lda,
428f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<double> &x, int incx, double beta,
429f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<double> *y, int incy) = 0;
430f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasGbmv(Stream *stream, blas::Transpose trans, uint64 m,
431f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          uint64 n, uint64 kl, uint64 ku,
432f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          std::complex<float> alpha,
433f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<float>> &a, int lda,
434f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<float>> &x, int incx,
435f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          std::complex<float> beta,
436f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<std::complex<float>> *y, int incy) = 0;
437f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasGbmv(Stream *stream, blas::Transpose trans, uint64 m,
438f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          uint64 n, uint64 kl, uint64 ku,
439f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          std::complex<double> alpha,
440f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<double>> &a, int lda,
441f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<double>> &x, int incx,
442f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          std::complex<double> beta,
443f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<std::complex<double>> *y, int incy) = 0;
444f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
445f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // Computes a matrix-vector product using a general matrix.
446f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
447f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //     y <- alpha * a * x + beta * y,
448f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // or
449f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //     y <- alpha * a' * x + beta * y,
450f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // or
451f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //     y <- alpha * conj(a') * x + beta * y,
452f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
453f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // alpha and beta are scalars; a is an m-by-n general matrix; x is a vector
454f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // with n(trans==kNoTranspose)/m(otherwise) elements;
455f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // y is a vector with m(trans==kNoTranspose)/n(otherwise) elements.
456f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasGemv(Stream *stream, blas::Transpose trans, uint64 m,
457f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          uint64 n, float alpha, const DeviceMemory<float> &a,
458f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          int lda, const DeviceMemory<float> &x, int incx,
459f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          float beta, DeviceMemory<float> *y, int incy) = 0;
460f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasGemv(Stream *stream, blas::Transpose trans, uint64 m,
461f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          uint64 n, double alpha, const DeviceMemory<double> &a,
462f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          int lda, const DeviceMemory<double> &x, int incx,
463f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          double beta, DeviceMemory<double> *y, int incy) = 0;
464f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasGemv(Stream *stream, blas::Transpose trans, uint64 m,
465f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          uint64 n, std::complex<float> alpha,
466f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<float>> &a, int lda,
467f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<float>> &x, int incx,
468f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          std::complex<float> beta,
469f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<std::complex<float>> *y, int incy) = 0;
470f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasGemv(Stream *stream, blas::Transpose trans, uint64 m,
471f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          uint64 n, std::complex<double> alpha,
472f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<double>> &a, int lda,
473f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<double>> &x, int incx,
474f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          std::complex<double> beta,
475f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<std::complex<double>> *y, int incy) = 0;
476f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
4773e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang  virtual bool DoBlasGemvWithProfiling(
4783e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang      Stream *stream, blas::Transpose trans, uint64 m, uint64 n, float alpha,
4793e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang      const DeviceMemory<float> &a, int lda, const DeviceMemory<float> &x,
4803e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang      int incx, float beta, DeviceMemory<float> *y, int incy,
4813e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang      ProfileResult *output_profile_result) = 0;
4823e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang  virtual bool DoBlasGemvWithProfiling(
4833e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang      Stream *stream, blas::Transpose trans, uint64 m, uint64 n, double alpha,
4843e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang      const DeviceMemory<double> &a, int lda, const DeviceMemory<double> &x,
4853e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang      int incx, double beta, DeviceMemory<double> *y, int incy,
4863e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang      ProfileResult *output_profile_result) = 0;
4873e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang  virtual bool DoBlasGemvWithProfiling(
4883e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang      Stream *stream, blas::Transpose trans, uint64 m, uint64 n,
4893e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang      std::complex<float> alpha, const DeviceMemory<std::complex<float>> &a,
4903e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang      int lda, const DeviceMemory<std::complex<float>> &x, int incx,
4913e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang      std::complex<float> beta, DeviceMemory<std::complex<float>> *y, int incy,
4923e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang      ProfileResult *output_profile_result) = 0;
4933e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang  virtual bool DoBlasGemvWithProfiling(
4943e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang      Stream *stream, blas::Transpose trans, uint64 m, uint64 n,
4953e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang      std::complex<double> alpha, const DeviceMemory<std::complex<double>> &a,
4963e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang      int lda, const DeviceMemory<std::complex<double>> &x, int incx,
4973e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang      std::complex<double> beta, DeviceMemory<std::complex<double>> *y,
4983e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang      int incy, ProfileResult *output_profile_result) = 0;
4993e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang
500f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // Performs a rank-1 update of a general matrix.
501f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
502f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //     a <- alpha * x * y' + a,
503f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
504f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // alpha is a scalar; x is an m-element vector; y is an n-element vector; a is
505f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // an m-by-n general matrix.
506f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasGer(Stream *stream, uint64 m, uint64 n, float alpha,
507f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                         const DeviceMemory<float> &x, int incx,
508f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                         const DeviceMemory<float> &y, int incy,
509f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                         DeviceMemory<float> *a, int lda) = 0;
510f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasGer(Stream *stream, uint64 m, uint64 n, double alpha,
511f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                         const DeviceMemory<double> &x, int incx,
512f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                         const DeviceMemory<double> &y, int incy,
513f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                         DeviceMemory<double> *a, int lda) = 0;
514f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
515f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // Performs a rank-1 update (conjugated) of a general matrix.
516f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
517f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //     a <- alpha * x * conj(y') + a,
518f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
519f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // alpha is a scalar; x is an m-element vector; y is an n-element vector; a is
520f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // an m-by-n general matrix.
521f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasGerc(Stream *stream, uint64 m, uint64 n,
522f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          std::complex<float> alpha,
523f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<float>> &x, int incx,
524f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<float>> &y, int incy,
525f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<std::complex<float>> *a, int lda) = 0;
526f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasGerc(Stream *stream, uint64 m, uint64 n,
527f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          std::complex<double> alpha,
528f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<double>> &x, int incx,
529f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<double>> &y, int incy,
530f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<std::complex<double>> *a, int lda) = 0;
531f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
532f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // Performs a rank-1 update (unconjugated) of a general matrix.
533f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
534f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //     a <- alpha * x * y' + a,
535f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
536f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // alpha is a scalar; x is an m-element vector; y is an n-element vector; a is
537f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // an m-by-n general matrix.
538f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasGeru(Stream *stream, uint64 m, uint64 n,
539f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          std::complex<float> alpha,
540f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<float>> &x, int incx,
541f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<float>> &y, int incy,
542f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<std::complex<float>> *a, int lda) = 0;
543f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasGeru(Stream *stream, uint64 m, uint64 n,
544f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          std::complex<double> alpha,
545f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<double>> &x, int incx,
546f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<double>> &y, int incy,
547f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<std::complex<double>> *a, int lda) = 0;
548f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
549f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // Computes a matrix-vector product using a Hermitian band matrix.
550f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
551f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //     y <- alpha * a * x + beta * y,
552f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
553f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // alpha and beta are scalars; a is an n-by-n Hermitian band matrix, with k
554f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // super-diagonals; x and y are n-element vectors.
555f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasHbmv(Stream *stream, blas::UpperLower uplo, uint64 n,
556f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          uint64 k, std::complex<float> alpha,
557f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<float>> &a, int lda,
558f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<float>> &x, int incx,
559f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          std::complex<float> beta,
560f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<std::complex<float>> *y, int incy) = 0;
561f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasHbmv(Stream *stream, blas::UpperLower uplo, uint64 n,
562f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          uint64 k, std::complex<double> alpha,
563f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<double>> &a, int lda,
564f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<double>> &x, int incx,
565f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          std::complex<double> beta,
566f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<std::complex<double>> *y, int incy) = 0;
567f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
568f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // Computes a matrix-vector product using a Hermitian matrix.
569f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
570f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //     y <- alpha * a * x + beta * y,
571f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
572f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // alpha and beta are scalars; a is an n-by-n Hermitian matrix; x and y are
573f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // n-element vectors.
574f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasHemv(Stream *stream, blas::UpperLower uplo, uint64 n,
575f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          std::complex<float> alpha,
576f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<float>> &a, int lda,
577f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<float>> &x, int incx,
578f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          std::complex<float> beta,
579f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<std::complex<float>> *y, int incy) = 0;
580f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasHemv(Stream *stream, blas::UpperLower uplo, uint64 n,
581f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          std::complex<double> alpha,
582f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<double>> &a, int lda,
583f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<double>> &x, int incx,
584f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          std::complex<double> beta,
585f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<std::complex<double>> *y, int incy) = 0;
586f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
587f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // Performs a rank-1 update of a Hermitian matrix.
588f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
589f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //     a <- alpha * x * conj(x') + a,
590f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
591f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // alpha is a scalar; x is an n-element vector; a is an n-by-n Hermitian
592f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // matrix.
593f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasHer(Stream *stream, blas::UpperLower uplo, uint64 n,
594f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                         float alpha,
595f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                         const DeviceMemory<std::complex<float>> &x, int incx,
596f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                         DeviceMemory<std::complex<float>> *a, int lda) = 0;
597f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasHer(Stream *stream, blas::UpperLower uplo, uint64 n,
598f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                         double alpha,
599f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                         const DeviceMemory<std::complex<double>> &x, int incx,
600f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                         DeviceMemory<std::complex<double>> *a, int lda) = 0;
601f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
602f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // Performs a rank-2 update of a Hermitian matrix.
603f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
604f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //     a <- alpha * x * conj(x') + conj(alpha) * y * conj(x') + a,
605f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
606f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // alpha is a scalar; x and y are n-element vectors; a is an n-by-n Hermitian
607f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // matrix.
608f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasHer2(Stream *stream, blas::UpperLower uplo, uint64 n,
609f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          std::complex<float> alpha,
610f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<float>> &x, int incx,
611f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<float>> &y, int incy,
612f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<std::complex<float>> *a, int lda) = 0;
613f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasHer2(Stream *stream, blas::UpperLower uplo, uint64 n,
614f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          std::complex<double> alpha,
615f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<double>> &x, int incx,
616f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<double>> &y, int incy,
617f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<std::complex<double>> *a, int lda) = 0;
618f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
619f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // Computes a matrix-vector product using a Hermitian packed matrix.
620f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
621f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //     y <- alpha * a * x + beta * y,
622f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
623f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // alpha and beta are scalars; a is an n-by-n Hermitian matrix, supplied in
624f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // packed form; x and y are n-element vectors.
625f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasHpmv(Stream *stream, blas::UpperLower uplo, uint64 n,
626f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          std::complex<float> alpha,
627f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<float>> &ap,
628f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<float>> &x, int incx,
629f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          std::complex<float> beta,
630f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<std::complex<float>> *y, int incy) = 0;
631f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasHpmv(Stream *stream, blas::UpperLower uplo, uint64 n,
632f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          std::complex<double> alpha,
633f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<double>> &ap,
634f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<double>> &x, int incx,
635f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          std::complex<double> beta,
636f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<std::complex<double>> *y, int incy) = 0;
637f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
638f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // Performs a rank-1 update of a Hermitian packed matrix.
639f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
640f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //     a <- alpha * x * conj(x') + a,
641f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
642f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // alpha is a scalar; x is an n-element vector; a is an n-by-n Hermitian
643f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // matrix, supplied in packed form.
644f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasHpr(Stream *stream, blas::UpperLower uplo, uint64 n,
645f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                         float alpha,
646f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                         const DeviceMemory<std::complex<float>> &x, int incx,
647f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                         DeviceMemory<std::complex<float>> *ap) = 0;
648f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasHpr(Stream *stream, blas::UpperLower uplo, uint64 n,
649f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                         double alpha,
650f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                         const DeviceMemory<std::complex<double>> &x, int incx,
651f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                         DeviceMemory<std::complex<double>> *ap) = 0;
652f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
653f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // Performs a rank-2 update of a Hermitian packed matrix.
654f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
655f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //     a <- alpha * x * conj(x') + conj(alpha) * y * conj(x') + a,
656f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
657f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // alpha is a scalar; x and y are n-element vectors; a is an n-by-n Hermitian
658f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // matrix, supplied in packed form.
659f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasHpr2(Stream *stream, blas::UpperLower uplo, uint64 n,
660f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          std::complex<float> alpha,
661f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<float>> &x, int incx,
662f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<float>> &y, int incy,
663f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<std::complex<float>> *ap) = 0;
664f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasHpr2(Stream *stream, blas::UpperLower uplo, uint64 n,
665f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          std::complex<double> alpha,
666f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<double>> &x, int incx,
667f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<double>> &y, int incy,
668f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<std::complex<double>> *ap) = 0;
669f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
670f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // Computes a matrix-vector product using a symmetric band matrix.
671f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
672f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //     y <- alpha * a * x + beta * y,
673f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
674f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // alpha and beta are scalars; a is an n-by-n symmetric band matrix, with k
675f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // super-diagonals; x and y are n-element vectors.
676f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasSbmv(Stream *stream, blas::UpperLower uplo, uint64 n,
677f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          uint64 k, float alpha, const DeviceMemory<float> &a,
678f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          int lda, const DeviceMemory<float> &x, int incx,
679f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          float beta, DeviceMemory<float> *y, int incy) = 0;
680f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasSbmv(Stream *stream, blas::UpperLower uplo, uint64 n,
681f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          uint64 k, double alpha, const DeviceMemory<double> &a,
682f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          int lda, const DeviceMemory<double> &x, int incx,
683f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          double beta, DeviceMemory<double> *y, int incy) = 0;
684f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
685f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // Computes a matrix-vector product using a symmetric packed matrix.
686f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
687f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //     y <- alpha * a * x + beta * y,
688f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
689f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // alpha and beta are scalars; a is an n-by-n symmetric matrix, supplied in
690f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // packed form; x and y are n-element vectors.
691f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasSpmv(Stream *stream, blas::UpperLower uplo, uint64 n,
692f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          float alpha, const DeviceMemory<float> &ap,
693f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<float> &x, int incx, float beta,
694f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<float> *y, int incy) = 0;
695f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasSpmv(Stream *stream, blas::UpperLower uplo, uint64 n,
696f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          double alpha, const DeviceMemory<double> &ap,
697f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<double> &x, int incx, double beta,
698f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<double> *y, int incy) = 0;
699f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
700f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // Performs a rank-1 update of a symmetric packed matrix.
701f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
702f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //     a <- alpha * x * x' + a,
703f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
704f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // alpha is a scalar; x is an n-element vector; a is an n-by-n symmetric
705f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // matrix, supplied in packed form.
706f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasSpr(Stream *stream, blas::UpperLower uplo, uint64 n,
707f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                         float alpha, const DeviceMemory<float> &x, int incx,
708f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                         DeviceMemory<float> *ap) = 0;
709f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasSpr(Stream *stream, blas::UpperLower uplo, uint64 n,
710f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                         double alpha, const DeviceMemory<double> &x, int incx,
711f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                         DeviceMemory<double> *ap) = 0;
712f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
713f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // Performs a rank-2 update of a symmetric packed matrix.
714f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
715f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //     a <- alpha * x * x' + alpha * y * x' + a,
716f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
717f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // alpha is a scalar; x and y are n-element vectors; a is an n-by-n symmetric
718f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // matrix, supplied in packed form.
719f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasSpr2(Stream *stream, blas::UpperLower uplo, uint64 n,
720f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          float alpha, const DeviceMemory<float> &x, int incx,
721f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<float> &y, int incy,
722f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<float> *ap) = 0;
723f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasSpr2(Stream *stream, blas::UpperLower uplo, uint64 n,
724f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          double alpha, const DeviceMemory<double> &x, int incx,
725f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<double> &y, int incy,
726f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<double> *ap) = 0;
727f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
728f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // Computes a matrix-vector product for a symmetric matrix.
729f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
730f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //     y <- alpha * a * x + beta * y,
731f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
732f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // alpha and beta are scalars; a is an n-by-n symmetric matrix; x and y are
733f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // n-element vectors.
734f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasSymv(Stream *stream, blas::UpperLower uplo, uint64 n,
735f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          float alpha, const DeviceMemory<float> &a, int lda,
736f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<float> &x, int incx, float beta,
737f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<float> *y, int incy) = 0;
738f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasSymv(Stream *stream, blas::UpperLower uplo, uint64 n,
739f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          double alpha, const DeviceMemory<double> &a, int lda,
740f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<double> &x, int incx, double beta,
741f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<double> *y, int incy) = 0;
742f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
743f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // Performs a rank-1 update of a symmetric matrix.
744f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
745f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //     a <- alpha * x * x' + a,
746f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
747f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // alpha is a scalar; x is an n-element vector; a is an n-by-n symmetric
748f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // matrix.
749f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasSyr(Stream *stream, blas::UpperLower uplo, uint64 n,
750f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                         float alpha, const DeviceMemory<float> &x, int incx,
751f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                         DeviceMemory<float> *a, int lda) = 0;
752f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasSyr(Stream *stream, blas::UpperLower uplo, uint64 n,
753f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                         double alpha, const DeviceMemory<double> &x, int incx,
754f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                         DeviceMemory<double> *a, int lda) = 0;
755f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
756f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // Performs a rank-2 update of symmetric matrix.
757f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
758f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //     a <- alpha * x * x' + alpha * y * x' + a,
759f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
760f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // alpha is a scalar; x and y are n-element vectors; a is an n-by-n symmetric
761f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // matrix.
762f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasSyr2(Stream *stream, blas::UpperLower uplo, uint64 n,
763f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          float alpha, const DeviceMemory<float> &x, int incx,
764f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<float> &y, int incy,
765f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<float> *a, int lda) = 0;
766f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasSyr2(Stream *stream, blas::UpperLower uplo, uint64 n,
767f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          double alpha, const DeviceMemory<double> &x, int incx,
768f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<double> &y, int incy,
769f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<double> *a, int lda) = 0;
770f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
771f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // Computes a matrix-vector product using a triangular band matrix.
772f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
773f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //     x <- a * x,
774f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // or
775f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //     x <- a' * x,
776f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // or
777f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //     x <- conj(a') * x,
778f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
779f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // a is an n-by-n unit, or non-unit, upper or lower triangular band matrix,
780f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // with k+1 diagonals; x is a n-element vector.
781f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasTbmv(Stream *stream, blas::UpperLower uplo,
782f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          blas::Transpose trans, blas::Diagonal diag, uint64 n,
783f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          uint64 k, const DeviceMemory<float> &a, int lda,
784f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<float> *x, int incx) = 0;
785f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasTbmv(Stream *stream, blas::UpperLower uplo,
786f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          blas::Transpose trans, blas::Diagonal diag, uint64 n,
787f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          uint64 k, const DeviceMemory<double> &a, int lda,
788f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<double> *x, int incx) = 0;
789f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasTbmv(Stream *stream, blas::UpperLower uplo,
790f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          blas::Transpose trans, blas::Diagonal diag, uint64 n,
791f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          uint64 k, const DeviceMemory<std::complex<float>> &a,
792f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          int lda, DeviceMemory<std::complex<float>> *x,
793f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          int incx) = 0;
794f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasTbmv(Stream *stream, blas::UpperLower uplo,
795f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          blas::Transpose trans, blas::Diagonal diag, uint64 n,
796f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          uint64 k, const DeviceMemory<std::complex<double>> &a,
797f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          int lda, DeviceMemory<std::complex<double>> *x,
798f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          int incx) = 0;
799f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
800f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // Solves a system of linear equations whose coefficients are in a triangular
801f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // band matrix as below:
802f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
803f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //     a * x = b,
804f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // or
805f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //     a' * x = b,
806f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // or
807f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //     conj(a') * x = b,
808f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
809f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // b and x are n-element vectors; a is an n-by-n unit, or non-unit, upper or
810f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // lower triangular band matrix, with k+1 diagonals.
811f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasTbsv(Stream *stream, blas::UpperLower uplo,
812f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          blas::Transpose trans, blas::Diagonal diag, uint64 n,
813f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          uint64 k, const DeviceMemory<float> &a, int lda,
814f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<float> *x, int incx) = 0;
815f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasTbsv(Stream *stream, blas::UpperLower uplo,
816f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          blas::Transpose trans, blas::Diagonal diag, uint64 n,
817f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          uint64 k, const DeviceMemory<double> &a, int lda,
818f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<double> *x, int incx) = 0;
819f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasTbsv(Stream *stream, blas::UpperLower uplo,
820f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          blas::Transpose trans, blas::Diagonal diag, uint64 n,
821f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          uint64 k, const DeviceMemory<std::complex<float>> &a,
822f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          int lda, DeviceMemory<std::complex<float>> *x,
823f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          int incx) = 0;
824f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasTbsv(Stream *stream, blas::UpperLower uplo,
825f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          blas::Transpose trans, blas::Diagonal diag, uint64 n,
826f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          uint64 k, const DeviceMemory<std::complex<double>> &a,
827f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          int lda, DeviceMemory<std::complex<double>> *x,
828f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          int incx) = 0;
829f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
830f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // Computes a matrix-vector product using a triangular packed matrix.
831f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
832f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //     x <- a * x,
833f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // or
834f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //     x <- a' * x,
835f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // or
836f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //     x <- conj(a') * x,
837f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
838f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // a is an n-by-n unit, or non-unit, upper or lower triangular matrix,
839f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // supplied in packed form; x is a n-element vector.
840f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasTpmv(Stream *stream, blas::UpperLower uplo,
841f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          blas::Transpose trans, blas::Diagonal diag, uint64 n,
842f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<float> &ap, DeviceMemory<float> *x,
843f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          int incx) = 0;
844f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasTpmv(Stream *stream, blas::UpperLower uplo,
845f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          blas::Transpose trans, blas::Diagonal diag, uint64 n,
846f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<double> &ap,
847f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<double> *x, int incx) = 0;
848f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasTpmv(Stream *stream, blas::UpperLower uplo,
849f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          blas::Transpose trans, blas::Diagonal diag, uint64 n,
850f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<float>> &ap,
851f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<std::complex<float>> *x, int incx) = 0;
852f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasTpmv(Stream *stream, blas::UpperLower uplo,
853f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          blas::Transpose trans, blas::Diagonal diag, uint64 n,
854f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<double>> &ap,
855f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<std::complex<double>> *x, int incx) = 0;
856f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
857f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // Solves a system of linear equations whose coefficients are in a triangular
858f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // packed matrix as below:
859f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
860f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //     a * x = b,
861f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // or
862f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //     a' * x = b,
863f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // or
864f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //     conj(a') * x = b,
865f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
866f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // b and x are n-element vectors; a is an n-by-n unit, or non-unit, upper or
867f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // lower triangular matrix, supplied in packed form.
868f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasTpsv(Stream *stream, blas::UpperLower uplo,
869f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          blas::Transpose trans, blas::Diagonal diag, uint64 n,
870f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<float> &ap, DeviceMemory<float> *x,
871f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          int incx) = 0;
872f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasTpsv(Stream *stream, blas::UpperLower uplo,
873f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          blas::Transpose trans, blas::Diagonal diag, uint64 n,
874f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<double> &ap,
875f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<double> *x, int incx) = 0;
876f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasTpsv(Stream *stream, blas::UpperLower uplo,
877f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          blas::Transpose trans, blas::Diagonal diag, uint64 n,
878f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<float>> &ap,
879f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<std::complex<float>> *x, int incx) = 0;
880f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasTpsv(Stream *stream, blas::UpperLower uplo,
881f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          blas::Transpose trans, blas::Diagonal diag, uint64 n,
882f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<double>> &ap,
883f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<std::complex<double>> *x, int incx) = 0;
884f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
885f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // Computes a matrix-vector product using a triangular matrix.
886f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
887f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //     x <- a * x,
888f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // or
889f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //     x <- a' * x,
890f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // or
891f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //     x <- conj(a') * x,
892f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
893f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // a is an n-by-n unit, or non-unit, upper or lower triangular matrix; x is a
894f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // n-element vector.
895f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasTrmv(Stream *stream, blas::UpperLower uplo,
896f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          blas::Transpose trans, blas::Diagonal diag, uint64 n,
897f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<float> &a, int lda,
898f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<float> *x, int incx) = 0;
899f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasTrmv(Stream *stream, blas::UpperLower uplo,
900f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          blas::Transpose trans, blas::Diagonal diag, uint64 n,
901f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<double> &a, int lda,
902f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<double> *x, int incx) = 0;
903f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasTrmv(Stream *stream, blas::UpperLower uplo,
904f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          blas::Transpose trans, blas::Diagonal diag, uint64 n,
905f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<float>> &a, int lda,
906f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<std::complex<float>> *x, int incx) = 0;
907f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasTrmv(Stream *stream, blas::UpperLower uplo,
908f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          blas::Transpose trans, blas::Diagonal diag, uint64 n,
909f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<double>> &a, int lda,
910f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<std::complex<double>> *x, int incx) = 0;
911f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
912f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // Solves a system of linear equations whose coefficients are in a triangular
913f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // matrix as below:
914f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
915f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //     a * x = b,
916f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // or
917f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //     a' * x = b,
918f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // or
919f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //     conj(a') * x = b,
920f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
921f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // b and x are n-element vectors; a is an n-by-n unit, or non-unit, upper or
922f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // lower triangular matrix.
923f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasTrsv(Stream *stream, blas::UpperLower uplo,
924f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          blas::Transpose trans, blas::Diagonal diag, uint64 n,
925f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<float> &a, int lda,
926f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<float> *x, int incx) = 0;
927f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasTrsv(Stream *stream, blas::UpperLower uplo,
928f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          blas::Transpose trans, blas::Diagonal diag, uint64 n,
929f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<double> &a, int lda,
930f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<double> *x, int incx) = 0;
931f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasTrsv(Stream *stream, blas::UpperLower uplo,
932f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          blas::Transpose trans, blas::Diagonal diag, uint64 n,
933f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<float>> &a, int lda,
934f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<std::complex<float>> *x, int incx) = 0;
935f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasTrsv(Stream *stream, blas::UpperLower uplo,
936f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          blas::Transpose trans, blas::Diagonal diag, uint64 n,
937f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<double>> &a, int lda,
938f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<std::complex<double>> *x, int incx) = 0;
939f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
940f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // Computes a matrix-matrix product with general matrices:
941f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
942f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //     c <- alpha * op(a) * op(b) + beta * c,
943f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
944f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // op(X) is one of op(X) = X, or op(X) = X', or op(X) = conj(X'); alpha and
945f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // beta are scalars; a, b, and c are matrices; op(a) is an m-by-k matrix;
946f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // op(b) is a k-by-n matrix; c is an m-by-n matrix.
947523055469c8a61425e3b8f104be67787c2933ccbA. Unique TensorFlower  //
948523055469c8a61425e3b8f104be67787c2933ccbA. Unique TensorFlower  // Note: The half interface uses float precision internally; the version
949523055469c8a61425e3b8f104be67787c2933ccbA. Unique TensorFlower  // that uses half precision internally is not yet supported. There is no
950523055469c8a61425e3b8f104be67787c2933ccbA. Unique TensorFlower  // batched version of the half-precision interface.
951523055469c8a61425e3b8f104be67787c2933ccbA. Unique TensorFlower  virtual bool DoBlasGemm(Stream *stream, blas::Transpose transa,
952523055469c8a61425e3b8f104be67787c2933ccbA. Unique TensorFlower                          blas::Transpose transb, uint64 m, uint64 n, uint64 k,
95301194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar                          float alpha, const DeviceMemory<Eigen::half> &a,
95401194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar                          int lda, const DeviceMemory<Eigen::half> &b, int ldb,
95501194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar                          float beta, DeviceMemory<Eigen::half> *c,
95601194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar                          int ldc) = 0;
957f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasGemm(Stream *stream, blas::Transpose transa,
958f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          blas::Transpose transb, uint64 m, uint64 n, uint64 k,
959f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          float alpha, const DeviceMemory<float> &a, int lda,
960f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<float> &b, int ldb, float beta,
961f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<float> *c, int ldc) = 0;
962f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasGemm(Stream *stream, blas::Transpose transa,
963f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          blas::Transpose transb, uint64 m, uint64 n, uint64 k,
964f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          double alpha, const DeviceMemory<double> &a, int lda,
965f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<double> &b, int ldb, double beta,
966f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<double> *c, int ldc) = 0;
967f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasGemm(Stream *stream, blas::Transpose transa,
968f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          blas::Transpose transb, uint64 m, uint64 n, uint64 k,
969f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          std::complex<float> alpha,
970f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<float>> &a, int lda,
971f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<float>> &b, int ldb,
972f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          std::complex<float> beta,
973f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<std::complex<float>> *c, int ldc) = 0;
974f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasGemm(Stream *stream, blas::Transpose transa,
975f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          blas::Transpose transb, uint64 m, uint64 n, uint64 k,
976f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          std::complex<double> alpha,
977f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<double>> &a, int lda,
978f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<double>> &b, int ldb,
979f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          std::complex<double> beta,
980f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<std::complex<double>> *c, int ldc) = 0;
981f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
9823e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang  virtual bool DoBlasGemmWithProfiling(
9833e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang      Stream *stream, blas::Transpose transa, blas::Transpose transb, uint64 m,
9843e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang      uint64 n, uint64 k, float alpha, const DeviceMemory<Eigen::half> &a,
9853e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang      int lda, const DeviceMemory<Eigen::half> &b, int ldb, float beta,
9863e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang      DeviceMemory<Eigen::half> *c, int ldc,
9873e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang      ProfileResult *output_profile_result) = 0;
9883e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang  virtual bool DoBlasGemmWithProfiling(
9893e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang      Stream *stream, blas::Transpose transa, blas::Transpose transb, uint64 m,
9903e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang      uint64 n, uint64 k, float alpha, const DeviceMemory<float> &a, int lda,
9913e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang      const DeviceMemory<float> &b, int ldb, float beta, DeviceMemory<float> *c,
9923e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang      int ldc, ProfileResult *output_profile_result) = 0;
9933e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang  virtual bool DoBlasGemmWithProfiling(
9943e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang      Stream *stream, blas::Transpose transa, blas::Transpose transb, uint64 m,
9953e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang      uint64 n, uint64 k, double alpha, const DeviceMemory<double> &a, int lda,
9963e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang      const DeviceMemory<double> &b, int ldb, double beta,
9973e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang      DeviceMemory<double> *c, int ldc,
9983e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang      ProfileResult *output_profile_result) = 0;
9993e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang  virtual bool DoBlasGemmWithProfiling(
10003e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang      Stream *stream, blas::Transpose transa, blas::Transpose transb, uint64 m,
10013e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang      uint64 n, uint64 k, std::complex<float> alpha,
10023e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang      const DeviceMemory<std::complex<float>> &a, int lda,
10033e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang      const DeviceMemory<std::complex<float>> &b, int ldb,
10043e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang      std::complex<float> beta, DeviceMemory<std::complex<float>> *c, int ldc,
10053e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang      ProfileResult *output_profile_result) = 0;
10063e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang  virtual bool DoBlasGemmWithProfiling(
10073e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang      Stream *stream, blas::Transpose transa, blas::Transpose transb, uint64 m,
10083e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang      uint64 n, uint64 k, std::complex<double> alpha,
10093e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang      const DeviceMemory<std::complex<double>> &a, int lda,
10103e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang      const DeviceMemory<std::complex<double>> &b, int ldb,
10113e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang      std::complex<double> beta, DeviceMemory<std::complex<double>> *c, int ldc,
10123e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang      ProfileResult *output_profile_result) = 0;
10133e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang
10143e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang  // Gets a list of supported algorithms for DoBlasGemmWithAlgorithm.
101501194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar  virtual bool GetBlasGemmAlgorithms(
101601194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      std::vector<AlgorithmType> *out_algorithms) = 0;
101701194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar
101801194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar  // Like DoBlasGemm, but accepts an algorithm and an compute type.
101901194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar  //
102001194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar  // The compute type lets you say (e.g.) that the inputs and outputs are
102101194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar  // Eigen::halfs, but you want the internal computations to be done with
102201194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar  // float32 precision.
102301194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar  //
102401194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar  // Note the subtle difference in the version that accepts Eigen:::half --
102501194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar  // alpha and beta have type const Eigen::half&, not float.
102601194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar  //
102701194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar  // If output_profile_result is not null, a failure here does not put the
102801194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar  // stream in a failure state.  Instead, success/failure is indicated by
102901194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar  // output_profile_result->is_valid().  This lets you use this function for
103001194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar  // choosing the best algorithm among many (some of which may fail) without
103101194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar  // creating a new Stream for each attempt.
103201194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar  virtual bool DoBlasGemmWithAlgorithm(
103301194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      Stream *stream, blas::Transpose transa, blas::Transpose transb, uint64 m,
1034a2ee8bca3f3fa08bf022f5c8a68c3e1cf829f2cdA. Unique TensorFlower      uint64 n, uint64 k, int alpha, const DeviceMemory<int8> &a, int lda,
1035a2ee8bca3f3fa08bf022f5c8a68c3e1cf829f2cdA. Unique TensorFlower      const DeviceMemory<int8> &b, int ldb, int beta, DeviceMemory<int32> *c,
1036a2ee8bca3f3fa08bf022f5c8a68c3e1cf829f2cdA. Unique TensorFlower      int ldc, ComputationType computation_type, AlgorithmType algorithm,
1037a2ee8bca3f3fa08bf022f5c8a68c3e1cf829f2cdA. Unique TensorFlower      ProfileResult *output_profile_result) = 0;
1038a2ee8bca3f3fa08bf022f5c8a68c3e1cf829f2cdA. Unique TensorFlower  virtual bool DoBlasGemmWithAlgorithm(
1039a2ee8bca3f3fa08bf022f5c8a68c3e1cf829f2cdA. Unique TensorFlower      Stream *stream, blas::Transpose transa, blas::Transpose transb, uint64 m,
104001194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      uint64 n, uint64 k, const Eigen::half &alpha,
104101194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      const DeviceMemory<Eigen::half> &a, int lda,
104201194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      const DeviceMemory<Eigen::half> &b, int ldb, const Eigen::half &beta,
104301194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      DeviceMemory<Eigen::half> *c, int ldc, ComputationType computation_type,
104401194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      AlgorithmType algorithm, ProfileResult *output_profile_result) = 0;
104501194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar  virtual bool DoBlasGemmWithAlgorithm(
104601194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      Stream *stream, blas::Transpose transa, blas::Transpose transb, uint64 m,
104701194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      uint64 n, uint64 k, float alpha, const DeviceMemory<float> &a, int lda,
104801194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      const DeviceMemory<float> &b, int ldb, float beta, DeviceMemory<float> *c,
104901194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      int ldc, ComputationType computation_type, AlgorithmType algorithm,
105001194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      ProfileResult *output_profile_result) = 0;
105101194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar  virtual bool DoBlasGemmWithAlgorithm(
105201194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      Stream *stream, blas::Transpose transa, blas::Transpose transb, uint64 m,
105301194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      uint64 n, uint64 k, double alpha, const DeviceMemory<double> &a, int lda,
105401194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      const DeviceMemory<double> &b, int ldb, double beta,
105501194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      DeviceMemory<double> *c, int ldc, ComputationType computation_type,
105601194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      AlgorithmType algorithm, ProfileResult *output_profile_result) = 0;
105701194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar  virtual bool DoBlasGemmWithAlgorithm(
105801194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      Stream *stream, blas::Transpose transa, blas::Transpose transb, uint64 m,
105901194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      uint64 n, uint64 k, std::complex<float> alpha,
106001194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      const DeviceMemory<std::complex<float>> &a, int lda,
106101194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      const DeviceMemory<std::complex<float>> &b, int ldb,
106201194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      std::complex<float> beta, DeviceMemory<std::complex<float>> *c, int ldc,
106301194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      ComputationType computation_type, AlgorithmType algorithm,
106401194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      ProfileResult *output_profile_result) = 0;
106501194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar  virtual bool DoBlasGemmWithAlgorithm(
106601194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      Stream *stream, blas::Transpose transa, blas::Transpose transb, uint64 m,
106701194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      uint64 n, uint64 k, std::complex<double> alpha,
106801194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      const DeviceMemory<std::complex<double>> &a, int lda,
106901194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      const DeviceMemory<std::complex<double>> &b, int ldb,
107001194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      std::complex<double> beta, DeviceMemory<std::complex<double>> *c, int ldc,
107101194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      ComputationType computation_type, AlgorithmType algorithm,
107201194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      ProfileResult *output_profile_result) = 0;
107301194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar
1074f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // Computes a batch of matrix-matrix product with general matrices.
1075f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // This is a batched version of DoBlasGemm.
1076f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // The batched GEMM computes matrix product for each input/output in a, b,
1077f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // and c, which contain batch_count DeviceMemory objects.
1078f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasGemmBatched(
1079f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      Stream *stream, blas::Transpose transa, blas::Transpose transb, uint64 m,
1080f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      uint64 n, uint64 k, float alpha,
1081f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      const port::ArraySlice<DeviceMemory<float> *> &a, int lda,
1082f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      const port::ArraySlice<DeviceMemory<float> *> &b, int ldb, float beta,
1083f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      const port::ArraySlice<DeviceMemory<float> *> &c, int ldc,
108405ea40f180e528dbfde36cd338a0b6ac3cca6dd9A. Unique TensorFlower      int batch_count, ScratchAllocator *scratch_allocator) = 0;
1085f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasGemmBatched(
1086f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      Stream *stream, blas::Transpose transa, blas::Transpose transb, uint64 m,
1087f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      uint64 n, uint64 k, double alpha,
1088f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      const port::ArraySlice<DeviceMemory<double> *> &a, int lda,
1089f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      const port::ArraySlice<DeviceMemory<double> *> &b, int ldb, double beta,
1090f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      const port::ArraySlice<DeviceMemory<double> *> &c, int ldc,
109105ea40f180e528dbfde36cd338a0b6ac3cca6dd9A. Unique TensorFlower      int batch_count, ScratchAllocator *scratch_allocator) = 0;
1092f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasGemmBatched(
1093f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      Stream *stream, blas::Transpose transa, blas::Transpose transb, uint64 m,
1094f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      uint64 n, uint64 k, std::complex<float> alpha,
1095f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      const port::ArraySlice<DeviceMemory<std::complex<float>> *> &a, int lda,
1096f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      const port::ArraySlice<DeviceMemory<std::complex<float>> *> &b, int ldb,
1097f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      std::complex<float> beta,
1098f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      const port::ArraySlice<DeviceMemory<std::complex<float>> *> &c, int ldc,
109905ea40f180e528dbfde36cd338a0b6ac3cca6dd9A. Unique TensorFlower      int batch_count, ScratchAllocator *scratch_allocator) = 0;
1100f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasGemmBatched(
1101f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      Stream *stream, blas::Transpose transa, blas::Transpose transb, uint64 m,
1102f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      uint64 n, uint64 k, std::complex<double> alpha,
1103f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      const port::ArraySlice<DeviceMemory<std::complex<double>> *> &a, int lda,
1104f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      const port::ArraySlice<DeviceMemory<std::complex<double>> *> &b, int ldb,
1105f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      std::complex<double> beta,
1106f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      const port::ArraySlice<DeviceMemory<std::complex<double>> *> &c, int ldc,
110705ea40f180e528dbfde36cd338a0b6ac3cca6dd9A. Unique TensorFlower      int batch_count, ScratchAllocator *scratch_allocator) = 0;
1108f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
1109f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // Computes a matrix-matrix product where one input matrix is Hermitian:
1110f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
1111f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //     c <- alpha * a * b + beta * c,
1112f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // or
1113f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //     c <- alpha * b * a + beta * c,
1114f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
1115f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // alpha and beta are scalars; a is a Hermitian matrix; b and c are m-by-n
1116f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // matrices.
1117f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasHemm(Stream *stream, blas::Side side,
1118f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          blas::UpperLower uplo, uint64 m, uint64 n,
1119f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          std::complex<float> alpha,
1120f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<float>> &a, int lda,
1121f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<float>> &b, int ldb,
1122f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          std::complex<float> beta,
1123f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<std::complex<float>> *c, int ldc) = 0;
1124f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasHemm(Stream *stream, blas::Side side,
1125f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          blas::UpperLower uplo, uint64 m, uint64 n,
1126f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          std::complex<double> alpha,
1127f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<double>> &a, int lda,
1128f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<double>> &b, int ldb,
1129f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          std::complex<double> beta,
1130f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<std::complex<double>> *c, int ldc) = 0;
1131f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
1132f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // Performs a Hermitian rank-k update.
1133f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
1134f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //     c <- alpha * a * conj(a') + beta * c,
1135f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // or
1136f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //     c <- alpha * conj(a') * a + beta * c,
1137f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
1138f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // alpha and beta are scalars; c is a n-by-n Hermitian matrix; a is an n-by-k
1139f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // matrix in the first case and a k-by-n matrix in the second case.
1140f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasHerk(Stream *stream, blas::UpperLower uplo,
1141f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          blas::Transpose trans, uint64 n, uint64 k,
1142f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          float alpha,
1143f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<float>> &a, int lda,
1144f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          float beta, DeviceMemory<std::complex<float>> *c,
1145f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          int ldc) = 0;
1146f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasHerk(Stream *stream, blas::UpperLower uplo,
1147f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          blas::Transpose trans, uint64 n, uint64 k,
1148f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          double alpha,
1149f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<double>> &a, int lda,
1150f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          double beta, DeviceMemory<std::complex<double>> *c,
1151f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          int ldc) = 0;
1152f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
1153f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // Performs a Hermitian rank-2k update.
1154f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
1155f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //     c <- alpha * a * conj(b') + conj(alpha) * b * conj(a') + beta * c,
1156f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // or
1157f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //     c <- alpha * conj(b') * a + conj(alpha) * conj(a') * b + beta * c,
1158f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
1159f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // alpha and beta are scalars; c is a n-by-n Hermitian matrix; a and b are
1160f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // n-by-k matrices in the first case and k-by-n matrices in the second case.
1161f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasHer2k(Stream *stream, blas::UpperLower uplo,
1162f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                           blas::Transpose trans, uint64 n, uint64 k,
1163f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                           std::complex<float> alpha,
1164f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                           const DeviceMemory<std::complex<float>> &a, int lda,
1165f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                           const DeviceMemory<std::complex<float>> &b, int ldb,
1166f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                           float beta, DeviceMemory<std::complex<float>> *c,
1167f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                           int ldc) = 0;
1168f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasHer2k(Stream *stream, blas::UpperLower uplo,
1169f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                           blas::Transpose trans, uint64 n, uint64 k,
1170f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                           std::complex<double> alpha,
1171f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                           const DeviceMemory<std::complex<double>> &a, int lda,
1172f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                           const DeviceMemory<std::complex<double>> &b, int ldb,
1173f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                           double beta, DeviceMemory<std::complex<double>> *c,
1174f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                           int ldc) = 0;
1175f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
1176f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // Computes a matrix-matrix product where one input matrix is symmetric.
1177f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
1178f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //     c <- alpha * a * b + beta * c,
1179f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // or
1180f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //     c <- alpha * b * a + beta * c,
1181f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
1182f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // alpha and beta are scalars; a is a symmetric matrix; b and c are m-by-n
1183f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // matrices.
1184f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasSymm(Stream *stream, blas::Side side,
1185f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          blas::UpperLower uplo, uint64 m, uint64 n,
1186f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          float alpha, const DeviceMemory<float> &a, int lda,
1187f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<float> &b, int ldb, float beta,
1188f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<float> *c, int ldc) = 0;
1189f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasSymm(Stream *stream, blas::Side side,
1190f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          blas::UpperLower uplo, uint64 m, uint64 n,
1191f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          double alpha, const DeviceMemory<double> &a, int lda,
1192f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<double> &b, int ldb, double beta,
1193f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<double> *c, int ldc) = 0;
1194f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasSymm(Stream *stream, blas::Side side,
1195f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          blas::UpperLower uplo, uint64 m, uint64 n,
1196f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          std::complex<float> alpha,
1197f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<float>> &a, int lda,
1198f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<float>> &b, int ldb,
1199f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          std::complex<float> beta,
1200f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<std::complex<float>> *c, int ldc) = 0;
1201f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasSymm(Stream *stream, blas::Side side,
1202f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          blas::UpperLower uplo, uint64 m, uint64 n,
1203f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          std::complex<double> alpha,
1204f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<double>> &a, int lda,
1205f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<double>> &b, int ldb,
1206f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          std::complex<double> beta,
1207f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<std::complex<double>> *c, int ldc) = 0;
1208f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
1209f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // Performs a symmetric rank-k update.
1210f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
1211f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //     c <- alpha * a * a' + beta * c,
1212f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // or
1213f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //     c <- alpha * a' * a + beta * c,
1214f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
1215f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // alpha and beta are scalars; c is a n-by-n symmetric matrix; a is an n-by-k
1216f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // matrix in the first case and a k-by-n matrix in the second case.
1217f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasSyrk(Stream *stream, blas::UpperLower uplo,
1218f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          blas::Transpose trans, uint64 n, uint64 k,
1219f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          float alpha, const DeviceMemory<float> &a, int lda,
1220f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          float beta, DeviceMemory<float> *c, int ldc) = 0;
1221f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasSyrk(Stream *stream, blas::UpperLower uplo,
1222f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          blas::Transpose trans, uint64 n, uint64 k,
1223f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          double alpha, const DeviceMemory<double> &a, int lda,
1224f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          double beta, DeviceMemory<double> *c, int ldc) = 0;
1225f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasSyrk(Stream *stream, blas::UpperLower uplo,
1226f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          blas::Transpose trans, uint64 n, uint64 k,
1227f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          std::complex<float> alpha,
1228f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<float>> &a, int lda,
1229f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          std::complex<float> beta,
1230f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<std::complex<float>> *c, int ldc) = 0;
1231f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasSyrk(Stream *stream, blas::UpperLower uplo,
1232f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          blas::Transpose trans, uint64 n, uint64 k,
1233f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          std::complex<double> alpha,
1234f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<double>> &a, int lda,
1235f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          std::complex<double> beta,
1236f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<std::complex<double>> *c, int ldc) = 0;
1237f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
1238f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // Performs a symmetric rank-2k update.
1239f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
1240f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //     c <- alpha * a * b' + alpha * b * a' + beta * c,
1241f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // or
1242f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //     c <- alpha * b' * a + alpha * a' * b + beta * c,
1243f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
1244f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // alpha and beta are scalars; c is a n-by-n symmetric matrix; a and b are
1245f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // n-by-k matrices in the first case and k-by-n matrices in the second case.
1246f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasSyr2k(Stream *stream, blas::UpperLower uplo,
1247f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                           blas::Transpose trans, uint64 n, uint64 k,
1248f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                           float alpha, const DeviceMemory<float> &a, int lda,
1249f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                           const DeviceMemory<float> &b, int ldb, float beta,
1250f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                           DeviceMemory<float> *c, int ldc) = 0;
1251f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasSyr2k(Stream *stream, blas::UpperLower uplo,
1252f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                           blas::Transpose trans, uint64 n, uint64 k,
1253f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                           double alpha, const DeviceMemory<double> &a, int lda,
1254f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                           const DeviceMemory<double> &b, int ldb, double beta,
1255f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                           DeviceMemory<double> *c, int ldc) = 0;
1256f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasSyr2k(Stream *stream, blas::UpperLower uplo,
1257f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                           blas::Transpose trans, uint64 n, uint64 k,
1258f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                           std::complex<float> alpha,
1259f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                           const DeviceMemory<std::complex<float>> &a, int lda,
1260f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                           const DeviceMemory<std::complex<float>> &b, int ldb,
1261f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                           std::complex<float> beta,
1262f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                           DeviceMemory<std::complex<float>> *c, int ldc) = 0;
1263f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasSyr2k(Stream *stream, blas::UpperLower uplo,
1264f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                           blas::Transpose trans, uint64 n, uint64 k,
1265f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                           std::complex<double> alpha,
1266f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                           const DeviceMemory<std::complex<double>> &a, int lda,
1267f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                           const DeviceMemory<std::complex<double>> &b, int ldb,
1268f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                           std::complex<double> beta,
1269f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                           DeviceMemory<std::complex<double>> *c, int ldc) = 0;
1270f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
1271f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // Computes a matrix-matrix product where one input matrix is triangular.
1272f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
1273f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //     b <- alpha * op(a) * b,
1274f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // or
1275f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //     b <- alpha * b * op(a)
1276f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
1277f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // alpha is a scalar; b is an m-by-n matrix; a is a unit, or non-unit, upper
1278f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // or lower triangular matrix; op(a) is one of op(a) = a, or op(a) = a', or
1279f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // op(a) = conj(a').
1280f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasTrmm(Stream *stream, blas::Side side,
1281f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          blas::UpperLower uplo, blas::Transpose transa,
1282f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          blas::Diagonal diag, uint64 m, uint64 n, float alpha,
1283f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<float> &a, int lda,
1284f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<float> *b, int ldb) = 0;
1285f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasTrmm(Stream *stream, blas::Side side,
1286f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          blas::UpperLower uplo, blas::Transpose transa,
1287f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          blas::Diagonal diag, uint64 m, uint64 n, double alpha,
1288f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<double> &a, int lda,
1289f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<double> *b, int ldb) = 0;
1290f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasTrmm(Stream *stream, blas::Side side,
1291f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          blas::UpperLower uplo, blas::Transpose transa,
1292f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          blas::Diagonal diag, uint64 m, uint64 n,
1293f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          std::complex<float> alpha,
1294f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<float>> &a, int lda,
1295f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<std::complex<float>> *b, int ldb) = 0;
1296f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasTrmm(Stream *stream, blas::Side side,
1297f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          blas::UpperLower uplo, blas::Transpose transa,
1298f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          blas::Diagonal diag, uint64 m, uint64 n,
1299f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          std::complex<double> alpha,
1300f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<double>> &a, int lda,
1301f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<std::complex<double>> *b, int ldb) = 0;
1302f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
1303f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // Solves a triangular matrix equation.
1304f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
1305f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //     op(a) * x = alpha * b,
1306f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // or
1307f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //     x * op(a) = alpha * b
1308f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
1309f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // alpha is a scalar; x and b are m-by-n matrices; a is a unit, or non-unit,
1310f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // upper or lower triangular matrix; op(a) is one of op(a) = a, or op(a) = a',
1311f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // or op(a) = conj(a').
1312f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasTrsm(Stream *stream, blas::Side side,
1313f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          blas::UpperLower uplo, blas::Transpose transa,
1314f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          blas::Diagonal diag, uint64 m, uint64 n, float alpha,
1315f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<float> &a, int lda,
1316f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<float> *b, int ldb) = 0;
1317f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasTrsm(Stream *stream, blas::Side side,
1318f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          blas::UpperLower uplo, blas::Transpose transa,
1319f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          blas::Diagonal diag, uint64 m, uint64 n, double alpha,
1320f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<double> &a, int lda,
1321f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<double> *b, int ldb) = 0;
1322f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasTrsm(Stream *stream, blas::Side side,
1323f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          blas::UpperLower uplo, blas::Transpose transa,
1324f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          blas::Diagonal diag, uint64 m, uint64 n,
1325f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          std::complex<float> alpha,
1326f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<float>> &a, int lda,
1327f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<std::complex<float>> *b, int ldb) = 0;
1328f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasTrsm(Stream *stream, blas::Side side,
1329f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          blas::UpperLower uplo, blas::Transpose transa,
1330f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          blas::Diagonal diag, uint64 m, uint64 n,
1331f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          std::complex<double> alpha,
1332f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<double>> &a, int lda,
1333f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<std::complex<double>> *b, int ldb) = 0;
1334f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
1335f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur protected:
1336f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  BlasSupport() {}
1337f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
1338f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur private:
1339f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  SE_DISALLOW_COPY_AND_ASSIGN(BlasSupport);
1340f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur};
1341f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
1342f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// Macro used to quickly declare overrides for abstract virtuals in the
1343f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// BlasSupport base class.
134405ea40f180e528dbfde36cd338a0b6ac3cca6dd9A. Unique TensorFlower#define TENSORFLOW_STREAM_EXECUTOR_GPU_BLAS_SUPPORT_OVERRIDES                  \
1345f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasAsum(Stream *stream, uint64 elem_count,                           \
1346f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<float> &x, int incx,                      \
1347f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<float> *result) override;                       \
1348f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasAsum(Stream *stream, uint64 elem_count,                           \
1349f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<double> &x, int incx,                     \
1350f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<double> *result) override;                      \
1351f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasAsum(Stream *stream, uint64 elem_count,                           \
1352f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<float>> &x, int incx,        \
1353f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<float> *result) override;                       \
1354f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasAsum(Stream *stream, uint64 elem_count,                           \
1355f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<double>> &x, int incx,       \
1356f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<double> *result) override;                      \
1357f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasAxpy(Stream *stream, uint64 elem_count, float alpha,              \
1358f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<float> &x, int incx,                      \
1359f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<float> *y, int incy) override;                  \
1360f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasAxpy(Stream *stream, uint64 elem_count, double alpha,             \
1361f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<double> &x, int incx,                     \
1362f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<double> *y, int incy) override;                 \
1363f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasAxpy(Stream *stream, uint64 elem_count,                           \
1364f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  std::complex<float> alpha,                                   \
1365f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<float>> &x, int incx,        \
1366f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<std::complex<float>> *y, int incy) override;    \
1367f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasAxpy(Stream *stream, uint64 elem_count,                           \
1368f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  std::complex<double> alpha,                                  \
1369f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<double>> &x, int incx,       \
1370f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<std::complex<double>> *y, int incy) override;   \
1371f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasCopy(Stream *stream, uint64 elem_count,                           \
1372f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<float> &x, int incx,                      \
1373f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<float> *y, int incy) override;                  \
1374f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasCopy(Stream *stream, uint64 elem_count,                           \
1375f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<double> &x, int incx,                     \
1376f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<double> *y, int incy) override;                 \
1377f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasCopy(Stream *stream, uint64 elem_count,                           \
1378f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<float>> &x, int incx,        \
1379f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<std::complex<float>> *y, int incy) override;    \
1380f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasCopy(Stream *stream, uint64 elem_count,                           \
1381f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<double>> &x, int incx,       \
1382f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<std::complex<double>> *y, int incy) override;   \
1383f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasDot(Stream *stream, uint64 elem_count,                            \
1384f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                 const DeviceMemory<float> &x, int incx,                       \
1385f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                 const DeviceMemory<float> &y, int incy,                       \
1386f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                 DeviceMemory<float> *result) override;                        \
1387f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasDot(Stream *stream, uint64 elem_count,                            \
1388f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                 const DeviceMemory<double> &x, int incx,                      \
1389f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                 const DeviceMemory<double> &y, int incy,                      \
1390f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                 DeviceMemory<double> *result) override;                       \
1391f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasDotc(Stream *stream, uint64 elem_count,                           \
1392f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<float>> &x, int incx,        \
1393f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<float>> &y, int incy,        \
1394f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<std::complex<float>> *result) override;         \
1395f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasDotc(Stream *stream, uint64 elem_count,                           \
1396f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<double>> &x, int incx,       \
1397f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<double>> &y, int incy,       \
1398f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<std::complex<double>> *result) override;        \
1399f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasDotu(Stream *stream, uint64 elem_count,                           \
1400f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<float>> &x, int incx,        \
1401f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<float>> &y, int incy,        \
1402f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<std::complex<float>> *result) override;         \
1403f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasDotu(Stream *stream, uint64 elem_count,                           \
1404f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<double>> &x, int incx,       \
1405f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<double>> &y, int incy,       \
1406f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<std::complex<double>> *result) override;        \
1407f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasNrm2(Stream *stream, uint64 elem_count,                           \
1408f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<float> &x, int incx,                      \
1409f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<float> *result) override;                       \
1410f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasNrm2(Stream *stream, uint64 elem_count,                           \
1411f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<double> &x, int incx,                     \
1412f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<double> *result) override;                      \
1413f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasNrm2(Stream *stream, uint64 elem_count,                           \
1414f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<float>> &x, int incx,        \
1415f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<float> *result) override;                       \
1416f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasNrm2(Stream *stream, uint64 elem_count,                           \
1417f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<double>> &x, int incx,       \
1418f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<double> *result) override;                      \
1419f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasRot(Stream *stream, uint64 elem_count, DeviceMemory<float> *x,    \
1420f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                 int incx, DeviceMemory<float> *y, int incy, float c, float s) \
1421f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      override;                                                                \
1422f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasRot(Stream *stream, uint64 elem_count, DeviceMemory<double> *x,   \
1423f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                 int incx, DeviceMemory<double> *y, int incy, double c,        \
1424f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                 double s) override;                                           \
1425f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasRot(Stream *stream, uint64 elem_count,                            \
1426f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                 DeviceMemory<std::complex<float>> *x, int incx,               \
1427f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                 DeviceMemory<std::complex<float>> *y, int incy, float c,      \
1428f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                 float s) override;                                            \
1429f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasRot(Stream *stream, uint64 elem_count,                            \
1430f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                 DeviceMemory<std::complex<double>> *x, int incx,              \
1431f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                 DeviceMemory<std::complex<double>> *y, int incy, double c,    \
1432f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                 double s) override;                                           \
1433f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasRotg(Stream *stream, DeviceMemory<float> *a,                      \
1434f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<float> *b, DeviceMemory<float> *c,              \
1435f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<float> *s) override;                            \
1436f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasRotg(Stream *stream, DeviceMemory<double> *a,                     \
1437f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<double> *b, DeviceMemory<double> *c,            \
1438f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<double> *s) override;                           \
1439f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasRotg(Stream *stream, DeviceMemory<std::complex<float>> *a,        \
1440f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<std::complex<float>> *b,                        \
1441f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<float> *c,                                      \
1442f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<std::complex<float>> *s) override;              \
1443f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasRotg(Stream *stream, DeviceMemory<std::complex<double>> *a,       \
1444f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<std::complex<double>> *b,                       \
1445f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<double> *c,                                     \
1446f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<std::complex<double>> *s) override;             \
1447f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasRotm(Stream *stream, uint64 elem_count, DeviceMemory<float> *x,   \
1448f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  int incx, DeviceMemory<float> *y, int incy,                  \
1449f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<float> &param) override;                  \
1450f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasRotm(Stream *stream, uint64 elem_count, DeviceMemory<double> *x,  \
1451f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  int incx, DeviceMemory<double> *y, int incy,                 \
1452f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<double> &param) override;                 \
1453f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasRotmg(Stream *stream, DeviceMemory<float> *d1,                    \
1454f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                   DeviceMemory<float> *d2, DeviceMemory<float> *x1,           \
1455f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                   const DeviceMemory<float> &y1, DeviceMemory<float> *param)  \
1456f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      override;                                                                \
1457f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasRotmg(Stream *stream, DeviceMemory<double> *d1,                   \
1458f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                   DeviceMemory<double> *d2, DeviceMemory<double> *x1,         \
1459f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                   const DeviceMemory<double> &y1,                             \
1460f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                   DeviceMemory<double> *param) override;                      \
1461f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasScal(Stream *stream, uint64 elem_count, float alpha,              \
1462f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<float> *x, int incx) override;                  \
1463f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasScal(Stream *stream, uint64 elem_count, double alpha,             \
1464f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<double> *x, int incx) override;                 \
1465f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasScal(Stream *stream, uint64 elem_count, float alpha,              \
1466f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<std::complex<float>> *x, int incx) override;    \
1467f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasScal(Stream *stream, uint64 elem_count, double alpha,             \
1468f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<std::complex<double>> *x, int incx) override;   \
1469f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasScal(Stream *stream, uint64 elem_count,                           \
1470f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  std::complex<float> alpha,                                   \
1471f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<std::complex<float>> *x, int incx) override;    \
1472f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasScal(Stream *stream, uint64 elem_count,                           \
1473f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  std::complex<double> alpha,                                  \
1474f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<std::complex<double>> *x, int incx) override;   \
1475f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasSwap(Stream *stream, uint64 elem_count, DeviceMemory<float> *x,   \
1476f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  int incx, DeviceMemory<float> *y, int incy) override;        \
1477f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasSwap(Stream *stream, uint64 elem_count, DeviceMemory<double> *x,  \
1478f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  int incx, DeviceMemory<double> *y, int incy) override;       \
1479f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasSwap(Stream *stream, uint64 elem_count,                           \
1480f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<std::complex<float>> *x, int incx,              \
1481f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<std::complex<float>> *y, int incy) override;    \
1482f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasSwap(Stream *stream, uint64 elem_count,                           \
1483f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<std::complex<double>> *x, int incx,             \
1484f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<std::complex<double>> *y, int incy) override;   \
1485f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasIamax(Stream *stream, uint64 elem_count,                          \
1486f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                   const DeviceMemory<float> &x, int incx,                     \
1487f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                   DeviceMemory<int> *result) override;                        \
1488f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasIamax(Stream *stream, uint64 elem_count,                          \
1489f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                   const DeviceMemory<double> &x, int incx,                    \
1490f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                   DeviceMemory<int> *result) override;                        \
1491f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasIamax(Stream *stream, uint64 elem_count,                          \
1492f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                   const DeviceMemory<std::complex<float>> &x, int incx,       \
1493f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                   DeviceMemory<int> *result) override;                        \
1494f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasIamax(Stream *stream, uint64 elem_count,                          \
1495f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                   const DeviceMemory<std::complex<double>> &x, int incx,      \
1496f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                   DeviceMemory<int> *result) override;                        \
1497f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasIamin(Stream *stream, uint64 elem_count,                          \
1498f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                   const DeviceMemory<float> &x, int incx,                     \
1499f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                   DeviceMemory<int> *result) override;                        \
1500f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasIamin(Stream *stream, uint64 elem_count,                          \
1501f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                   const DeviceMemory<double> &x, int incx,                    \
1502f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                   DeviceMemory<int> *result) override;                        \
1503f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasIamin(Stream *stream, uint64 elem_count,                          \
1504f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                   const DeviceMemory<std::complex<float>> &x, int incx,       \
1505f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                   DeviceMemory<int> *result) override;                        \
1506f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasIamin(Stream *stream, uint64 elem_count,                          \
1507f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                   const DeviceMemory<std::complex<double>> &x, int incx,      \
1508f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                   DeviceMemory<int> *result) override;                        \
1509f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasGbmv(Stream *stream, blas::Transpose trans, uint64 m, uint64 n,   \
1510f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  uint64 kl, uint64 ku, float alpha,                           \
1511f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<float> &a, int lda,                       \
1512f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<float> &x, int incx, float beta,          \
1513f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<float> *y, int incy) override;                  \
1514f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasGbmv(Stream *stream, blas::Transpose trans, uint64 m, uint64 n,   \
1515f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  uint64 kl, uint64 ku, double alpha,                          \
1516f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<double> &a, int lda,                      \
1517f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<double> &x, int incx, double beta,        \
1518f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<double> *y, int incy) override;                 \
1519f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasGbmv(Stream *stream, blas::Transpose trans, uint64 m, uint64 n,   \
1520f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  uint64 kl, uint64 ku, std::complex<float> alpha,             \
1521f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<float>> &a, int lda,         \
1522f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<float>> &x, int incx,        \
1523f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  std::complex<float> beta,                                    \
1524f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<std::complex<float>> *y, int incy) override;    \
1525f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasGbmv(Stream *stream, blas::Transpose trans, uint64 m, uint64 n,   \
1526f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  uint64 kl, uint64 ku, std::complex<double> alpha,            \
1527f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<double>> &a, int lda,        \
1528f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<double>> &x, int incx,       \
1529f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  std::complex<double> beta,                                   \
1530f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<std::complex<double>> *y, int incy) override;   \
1531f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasGemv(Stream *stream, blas::Transpose trans, uint64 m, uint64 n,   \
1532f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  float alpha, const DeviceMemory<float> &a, int lda,          \
1533f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<float> &x, int incx, float beta,          \
1534f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<float> *y, int incy) override;                  \
1535f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasGemv(Stream *stream, blas::Transpose trans, uint64 m, uint64 n,   \
1536f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  double alpha, const DeviceMemory<double> &a, int lda,        \
1537f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<double> &x, int incx, double beta,        \
1538f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<double> *y, int incy) override;                 \
1539f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasGemv(Stream *stream, blas::Transpose trans, uint64 m, uint64 n,   \
1540f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  std::complex<float> alpha,                                   \
1541f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<float>> &a, int lda,         \
1542f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<float>> &x, int incx,        \
1543f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  std::complex<float> beta,                                    \
1544f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<std::complex<float>> *y, int incy) override;    \
1545f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasGemv(Stream *stream, blas::Transpose trans, uint64 m, uint64 n,   \
1546f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  std::complex<double> alpha,                                  \
1547f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<double>> &a, int lda,        \
1548f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<double>> &x, int incx,       \
1549f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  std::complex<double> beta,                                   \
1550f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<std::complex<double>> *y, int incy) override;   \
15513e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang  bool DoBlasGemvWithProfiling(                                                \
15523e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang      Stream *stream, blas::Transpose trans, uint64 m, uint64 n, float alpha,  \
15533e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang      const DeviceMemory<float> &a, int lda, const DeviceMemory<float> &x,     \
15543e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang      int incx, float beta, DeviceMemory<float> *y, int incy,                  \
15553e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang      blas::ProfileResult *output_profile_result) override;                    \
15563e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang  bool DoBlasGemvWithProfiling(                                                \
15573e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang      Stream *stream, blas::Transpose trans, uint64 m, uint64 n, double alpha, \
15583e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang      const DeviceMemory<double> &a, int lda, const DeviceMemory<double> &x,   \
15593e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang      int incx, double beta, DeviceMemory<double> *y, int incy,                \
15603e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang      blas::ProfileResult *output_profile_result) override;                    \
15613e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang  bool DoBlasGemvWithProfiling(                                                \
15623e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang      Stream *stream, blas::Transpose trans, uint64 m, uint64 n,               \
15633e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang      std::complex<float> alpha, const DeviceMemory<std::complex<float>> &a,   \
15643e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang      int lda, const DeviceMemory<std::complex<float>> &x, int incx,           \
15653e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang      std::complex<float> beta, DeviceMemory<std::complex<float>> *y,          \
15663e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang      int incy, blas::ProfileResult *output_profile_result) override;          \
15673e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang  bool DoBlasGemvWithProfiling(                                                \
15683e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang      Stream *stream, blas::Transpose trans, uint64 m, uint64 n,               \
15693e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang      std::complex<double> alpha, const DeviceMemory<std::complex<double>> &a, \
15703e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang      int lda, const DeviceMemory<std::complex<double>> &x, int incx,          \
15713e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang      std::complex<double> beta, DeviceMemory<std::complex<double>> *y,        \
15723e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang      int incy, blas::ProfileResult *output_profile_result) override;          \
1573f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasGer(Stream *stream, uint64 m, uint64 n, float alpha,              \
1574f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                 const DeviceMemory<float> &x, int incx,                       \
1575f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                 const DeviceMemory<float> &y, int incy,                       \
1576f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                 DeviceMemory<float> *a, int lda) override;                    \
1577f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasGer(Stream *stream, uint64 m, uint64 n, double alpha,             \
1578f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                 const DeviceMemory<double> &x, int incx,                      \
1579f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                 const DeviceMemory<double> &y, int incy,                      \
1580f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                 DeviceMemory<double> *a, int lda) override;                   \
1581f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasGerc(Stream *stream, uint64 m, uint64 n,                          \
1582f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  std::complex<float> alpha,                                   \
1583f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<float>> &x, int incx,        \
1584f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<float>> &y, int incy,        \
1585f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<std::complex<float>> *a, int lda) override;     \
1586f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasGerc(Stream *stream, uint64 m, uint64 n,                          \
1587f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  std::complex<double> alpha,                                  \
1588f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<double>> &x, int incx,       \
1589f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<double>> &y, int incy,       \
1590f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<std::complex<double>> *a, int lda) override;    \
1591f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasGeru(Stream *stream, uint64 m, uint64 n,                          \
1592f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  std::complex<float> alpha,                                   \
1593f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<float>> &x, int incx,        \
1594f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<float>> &y, int incy,        \
1595f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<std::complex<float>> *a, int lda) override;     \
1596f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasGeru(Stream *stream, uint64 m, uint64 n,                          \
1597f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  std::complex<double> alpha,                                  \
1598f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<double>> &x, int incx,       \
1599f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<double>> &y, int incy,       \
1600f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<std::complex<double>> *a, int lda) override;    \
1601f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasHbmv(Stream *stream, blas::UpperLower uplo, uint64 n, uint64 k,   \
1602f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  std::complex<float> alpha,                                   \
1603f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<float>> &a, int lda,         \
1604f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<float>> &x, int incx,        \
1605f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  std::complex<float> beta,                                    \
1606f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<std::complex<float>> *y, int incy) override;    \
1607f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasHbmv(Stream *stream, blas::UpperLower uplo, uint64 n, uint64 k,   \
1608f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  std::complex<double> alpha,                                  \
1609f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<double>> &a, int lda,        \
1610f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<double>> &x, int incx,       \
1611f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  std::complex<double> beta,                                   \
1612f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<std::complex<double>> *y, int incy) override;   \
1613f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasHemv(Stream *stream, blas::UpperLower uplo, uint64 n,             \
1614f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  std::complex<float> alpha,                                   \
1615f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<float>> &a, int lda,         \
1616f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<float>> &x, int incx,        \
1617f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  std::complex<float> beta,                                    \
1618f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<std::complex<float>> *y, int incy) override;    \
1619f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasHemv(Stream *stream, blas::UpperLower uplo, uint64 n,             \
1620f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  std::complex<double> alpha,                                  \
1621f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<double>> &a, int lda,        \
1622f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<double>> &x, int incx,       \
1623f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  std::complex<double> beta,                                   \
1624f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<std::complex<double>> *y, int incy) override;   \
1625f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasHer(Stream *stream, blas::UpperLower uplo, uint64 n, float alpha, \
1626f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                 const DeviceMemory<std::complex<float>> &x, int incx,         \
1627f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                 DeviceMemory<std::complex<float>> *a, int lda) override;      \
1628f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasHer(Stream *stream, blas::UpperLower uplo, uint64 n,              \
1629f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                 double alpha, const DeviceMemory<std::complex<double>> &x,    \
1630f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                 int incx, DeviceMemory<std::complex<double>> *a, int lda)     \
1631f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      override;                                                                \
1632f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasHer2(Stream *stream, blas::UpperLower uplo, uint64 n,             \
1633f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  std::complex<float> alpha,                                   \
1634f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<float>> &x, int incx,        \
1635f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<float>> &y, int incy,        \
1636f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<std::complex<float>> *a, int lda) override;     \
1637f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasHer2(Stream *stream, blas::UpperLower uplo, uint64 n,             \
1638f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  std::complex<double> alpha,                                  \
1639f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<double>> &x, int incx,       \
1640f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<double>> &y, int incy,       \
1641f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<std::complex<double>> *a, int lda) override;    \
1642f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasHpmv(Stream *stream, blas::UpperLower uplo, uint64 n,             \
1643f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  std::complex<float> alpha,                                   \
1644f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<float>> &ap,                 \
1645f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<float>> &x, int incx,        \
1646f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  std::complex<float> beta,                                    \
1647f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<std::complex<float>> *y, int incy) override;    \
1648f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasHpmv(Stream *stream, blas::UpperLower uplo, uint64 n,             \
1649f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  std::complex<double> alpha,                                  \
1650f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<double>> &ap,                \
1651f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<double>> &x, int incx,       \
1652f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  std::complex<double> beta,                                   \
1653f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<std::complex<double>> *y, int incy) override;   \
1654f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasHpr(Stream *stream, blas::UpperLower uplo, uint64 n, float alpha, \
1655f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                 const DeviceMemory<std::complex<float>> &x, int incx,         \
1656f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                 DeviceMemory<std::complex<float>> *ap) override;              \
1657f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasHpr(Stream *stream, blas::UpperLower uplo, uint64 n,              \
1658f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                 double alpha, const DeviceMemory<std::complex<double>> &x,    \
1659f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                 int incx, DeviceMemory<std::complex<double>> *ap) override;   \
1660f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasHpr2(Stream *stream, blas::UpperLower uplo, uint64 n,             \
1661f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  std::complex<float> alpha,                                   \
1662f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<float>> &x, int incx,        \
1663f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<float>> &y, int incy,        \
1664f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<std::complex<float>> *ap) override;             \
1665f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasHpr2(Stream *stream, blas::UpperLower uplo, uint64 n,             \
1666f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  std::complex<double> alpha,                                  \
1667f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<double>> &x, int incx,       \
1668f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<double>> &y, int incy,       \
1669f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<std::complex<double>> *ap) override;            \
1670f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasSbmv(Stream *stream, blas::UpperLower uplo, uint64 n, uint64 k,   \
1671f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  float alpha, const DeviceMemory<float> &a, int lda,          \
1672f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<float> &x, int incx, float beta,          \
1673f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<float> *y, int incy) override;                  \
1674f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasSbmv(Stream *stream, blas::UpperLower uplo, uint64 n, uint64 k,   \
1675f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  double alpha, const DeviceMemory<double> &a, int lda,        \
1676f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<double> &x, int incx, double beta,        \
1677f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<double> *y, int incy) override;                 \
1678f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasSpmv(Stream *stream, blas::UpperLower uplo, uint64 n,             \
1679f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  float alpha, const DeviceMemory<float> &ap,                  \
1680f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<float> &x, int incx, float beta,          \
1681f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<float> *y, int incy) override;                  \
1682f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasSpmv(Stream *stream, blas::UpperLower uplo, uint64 n,             \
1683f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  double alpha, const DeviceMemory<double> &ap,                \
1684f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<double> &x, int incx, double beta,        \
1685f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<double> *y, int incy) override;                 \
1686f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasSpr(Stream *stream, blas::UpperLower uplo, uint64 n, float alpha, \
1687f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                 const DeviceMemory<float> &x, int incx,                       \
1688f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                 DeviceMemory<float> *ap) override;                            \
1689f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasSpr(Stream *stream, blas::UpperLower uplo, uint64 n,              \
1690f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                 double alpha, const DeviceMemory<double> &x, int incx,        \
1691f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                 DeviceMemory<double> *ap) override;                           \
1692f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasSpr2(Stream *stream, blas::UpperLower uplo, uint64 n,             \
1693f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  float alpha, const DeviceMemory<float> &x, int incx,         \
1694f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<float> &y, int incy,                      \
1695f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<float> *ap) override;                           \
1696f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasSpr2(Stream *stream, blas::UpperLower uplo, uint64 n,             \
1697f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  double alpha, const DeviceMemory<double> &x, int incx,       \
1698f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<double> &y, int incy,                     \
1699f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<double> *ap) override;                          \
1700f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasSymv(Stream *stream, blas::UpperLower uplo, uint64 n,             \
1701f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  float alpha, const DeviceMemory<float> &a, int lda,          \
1702f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<float> &x, int incx, float beta,          \
1703f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<float> *y, int incy) override;                  \
1704f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasSymv(Stream *stream, blas::UpperLower uplo, uint64 n,             \
1705f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  double alpha, const DeviceMemory<double> &a, int lda,        \
1706f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<double> &x, int incx, double beta,        \
1707f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<double> *y, int incy) override;                 \
1708f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasSyr(Stream *stream, blas::UpperLower uplo, uint64 n, float alpha, \
1709f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                 const DeviceMemory<float> &x, int incx,                       \
1710f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                 DeviceMemory<float> *a, int lda) override;                    \
1711f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasSyr(Stream *stream, blas::UpperLower uplo, uint64 n,              \
1712f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                 double alpha, const DeviceMemory<double> &x, int incx,        \
1713f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                 DeviceMemory<double> *a, int lda) override;                   \
1714f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasSyr2(Stream *stream, blas::UpperLower uplo, uint64 n,             \
1715f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  float alpha, const DeviceMemory<float> &x, int incx,         \
1716f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<float> &y, int incy,                      \
1717f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<float> *a, int lda) override;                   \
1718f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasSyr2(Stream *stream, blas::UpperLower uplo, uint64 n,             \
1719f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  double alpha, const DeviceMemory<double> &x, int incx,       \
1720f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<double> &y, int incy,                     \
1721f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<double> *a, int lda) override;                  \
1722f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasTbmv(Stream *stream, blas::UpperLower uplo,                       \
1723f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  blas::Transpose trans, blas::Diagonal diag, uint64 n,        \
1724f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  uint64 k, const DeviceMemory<float> &a, int lda,             \
1725f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<float> *x, int incx) override;                  \
1726f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasTbmv(Stream *stream, blas::UpperLower uplo,                       \
1727f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  blas::Transpose trans, blas::Diagonal diag, uint64 n,        \
1728f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  uint64 k, const DeviceMemory<double> &a, int lda,            \
1729f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<double> *x, int incx) override;                 \
1730f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasTbmv(Stream *stream, blas::UpperLower uplo,                       \
1731f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  blas::Transpose trans, blas::Diagonal diag, uint64 n,        \
1732f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  uint64 k, const DeviceMemory<std::complex<float>> &a,        \
1733f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  int lda, DeviceMemory<std::complex<float>> *x, int incx)     \
1734f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      override;                                                                \
1735f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasTbmv(Stream *stream, blas::UpperLower uplo,                       \
1736f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  blas::Transpose trans, blas::Diagonal diag, uint64 n,        \
1737f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  uint64 k, const DeviceMemory<std::complex<double>> &a,       \
1738f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  int lda, DeviceMemory<std::complex<double>> *x, int incx)    \
1739f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      override;                                                                \
1740f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasTbsv(Stream *stream, blas::UpperLower uplo,                       \
1741f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  blas::Transpose trans, blas::Diagonal diag, uint64 n,        \
1742f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  uint64 k, const DeviceMemory<float> &a, int lda,             \
1743f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<float> *x, int incx) override;                  \
1744f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasTbsv(Stream *stream, blas::UpperLower uplo,                       \
1745f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  blas::Transpose trans, blas::Diagonal diag, uint64 n,        \
1746f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  uint64 k, const DeviceMemory<double> &a, int lda,            \
1747f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<double> *x, int incx) override;                 \
1748f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasTbsv(Stream *stream, blas::UpperLower uplo,                       \
1749f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  blas::Transpose trans, blas::Diagonal diag, uint64 n,        \
1750f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  uint64 k, const DeviceMemory<std::complex<float>> &a,        \
1751f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  int lda, DeviceMemory<std::complex<float>> *x, int incx)     \
1752f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      override;                                                                \
1753f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasTbsv(Stream *stream, blas::UpperLower uplo,                       \
1754f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  blas::Transpose trans, blas::Diagonal diag, uint64 n,        \
1755f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  uint64 k, const DeviceMemory<std::complex<double>> &a,       \
1756f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  int lda, DeviceMemory<std::complex<double>> *x, int incx)    \
1757f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      override;                                                                \
1758f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasTpmv(Stream *stream, blas::UpperLower uplo,                       \
1759f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  blas::Transpose trans, blas::Diagonal diag, uint64 n,        \
1760f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<float> &ap, DeviceMemory<float> *x,       \
1761f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  int incx) override;                                          \
1762f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasTpmv(Stream *stream, blas::UpperLower uplo,                       \
1763f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  blas::Transpose trans, blas::Diagonal diag, uint64 n,        \
1764f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<double> &ap, DeviceMemory<double> *x,     \
1765f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  int incx) override;                                          \
1766f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasTpmv(Stream *stream, blas::UpperLower uplo,                       \
1767f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  blas::Transpose trans, blas::Diagonal diag, uint64 n,        \
1768f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<float>> &ap,                 \
1769f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<std::complex<float>> *x, int incx) override;    \
1770f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasTpmv(Stream *stream, blas::UpperLower uplo,                       \
1771f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  blas::Transpose trans, blas::Diagonal diag, uint64 n,        \
1772f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<double>> &ap,                \
1773f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<std::complex<double>> *x, int incx) override;   \
1774f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasTpsv(Stream *stream, blas::UpperLower uplo,                       \
1775f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  blas::Transpose trans, blas::Diagonal diag, uint64 n,        \
1776f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<float> &ap, DeviceMemory<float> *x,       \
1777f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  int incx) override;                                          \
1778f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasTpsv(Stream *stream, blas::UpperLower uplo,                       \
1779f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  blas::Transpose trans, blas::Diagonal diag, uint64 n,        \
1780f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<double> &ap, DeviceMemory<double> *x,     \
1781f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  int incx) override;                                          \
1782f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasTpsv(Stream *stream, blas::UpperLower uplo,                       \
1783f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  blas::Transpose trans, blas::Diagonal diag, uint64 n,        \
1784f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<float>> &ap,                 \
1785f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<std::complex<float>> *x, int incx) override;    \
1786f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasTpsv(Stream *stream, blas::UpperLower uplo,                       \
1787f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  blas::Transpose trans, blas::Diagonal diag, uint64 n,        \
1788f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<double>> &ap,                \
1789f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<std::complex<double>> *x, int incx) override;   \
1790f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasTrmv(Stream *stream, blas::UpperLower uplo,                       \
1791f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  blas::Transpose trans, blas::Diagonal diag, uint64 n,        \
1792f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<float> &a, int lda,                       \
1793f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<float> *x, int incx) override;                  \
1794f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasTrmv(Stream *stream, blas::UpperLower uplo,                       \
1795f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  blas::Transpose trans, blas::Diagonal diag, uint64 n,        \
1796f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<double> &a, int lda,                      \
1797f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<double> *x, int incx) override;                 \
1798f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasTrmv(Stream *stream, blas::UpperLower uplo,                       \
1799f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  blas::Transpose trans, blas::Diagonal diag, uint64 n,        \
1800f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<float>> &a, int lda,         \
1801f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<std::complex<float>> *x, int incx) override;    \
1802f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasTrmv(Stream *stream, blas::UpperLower uplo,                       \
1803f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  blas::Transpose trans, blas::Diagonal diag, uint64 n,        \
1804f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<double>> &a, int lda,        \
1805f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<std::complex<double>> *x, int incx) override;   \
1806f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasTrsv(Stream *stream, blas::UpperLower uplo,                       \
1807f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  blas::Transpose trans, blas::Diagonal diag, uint64 n,        \
1808f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<float> &a, int lda,                       \
1809f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<float> *x, int incx) override;                  \
1810f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasTrsv(Stream *stream, blas::UpperLower uplo,                       \
1811f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  blas::Transpose trans, blas::Diagonal diag, uint64 n,        \
1812f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<double> &a, int lda,                      \
1813f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<double> *x, int incx) override;                 \
1814f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasTrsv(Stream *stream, blas::UpperLower uplo,                       \
1815f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  blas::Transpose trans, blas::Diagonal diag, uint64 n,        \
1816f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<float>> &a, int lda,         \
1817f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<std::complex<float>> *x, int incx) override;    \
1818f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasTrsv(Stream *stream, blas::UpperLower uplo,                       \
1819f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  blas::Transpose trans, blas::Diagonal diag, uint64 n,        \
1820f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<double>> &a, int lda,        \
1821f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<std::complex<double>> *x, int incx) override;   \
1822f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasGemm(Stream *stream, blas::Transpose transa,                      \
1823f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  blas::Transpose transb, uint64 m, uint64 n, uint64 k,        \
1824523055469c8a61425e3b8f104be67787c2933ccbA. Unique TensorFlower                  float alpha, const DeviceMemory<Eigen::half> &a, int lda,    \
1825523055469c8a61425e3b8f104be67787c2933ccbA. Unique TensorFlower                  const DeviceMemory<Eigen::half> &b, int ldb, float beta,     \
1826523055469c8a61425e3b8f104be67787c2933ccbA. Unique TensorFlower                  DeviceMemory<Eigen::half> *c, int ldc) override;             \
1827523055469c8a61425e3b8f104be67787c2933ccbA. Unique TensorFlower  bool DoBlasGemm(Stream *stream, blas::Transpose transa,                      \
1828523055469c8a61425e3b8f104be67787c2933ccbA. Unique TensorFlower                  blas::Transpose transb, uint64 m, uint64 n, uint64 k,        \
1829f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  float alpha, const DeviceMemory<float> &a, int lda,          \
1830f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<float> &b, int ldb, float beta,           \
1831f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<float> *c, int ldc) override;                   \
1832f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasGemm(Stream *stream, blas::Transpose transa,                      \
1833f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  blas::Transpose transb, uint64 m, uint64 n, uint64 k,        \
1834f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  double alpha, const DeviceMemory<double> &a, int lda,        \
1835f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<double> &b, int ldb, double beta,         \
1836f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<double> *c, int ldc) override;                  \
1837f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasGemm(Stream *stream, blas::Transpose transa,                      \
1838f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  blas::Transpose transb, uint64 m, uint64 n, uint64 k,        \
1839f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  std::complex<float> alpha,                                   \
1840f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<float>> &a, int lda,         \
1841f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<float>> &b, int ldb,         \
1842f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  std::complex<float> beta,                                    \
1843f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<std::complex<float>> *c, int ldc) override;     \
1844f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasGemm(Stream *stream, blas::Transpose transa,                      \
1845f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  blas::Transpose transb, uint64 m, uint64 n, uint64 k,        \
1846f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  std::complex<double> alpha,                                  \
1847f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<double>> &a, int lda,        \
1848f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<double>> &b, int ldb,        \
1849f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  std::complex<double> beta,                                   \
1850f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<std::complex<double>> *c, int ldc) override;    \
18513e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang  bool DoBlasGemmWithProfiling(                                                \
18523e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang      Stream *stream, blas::Transpose transa, blas::Transpose transb,          \
18533e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang      uint64 m, uint64 n, uint64 k, float alpha,                               \
18543e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang      const DeviceMemory<Eigen::half> &a, int lda,                             \
18553e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang      const DeviceMemory<Eigen::half> &b, int ldb, float beta,                 \
18563e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang      DeviceMemory<Eigen::half> *c, int ldc,                                   \
18573e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang      blas::ProfileResult *output_profile_result) override;                    \
18583e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang  bool DoBlasGemmWithProfiling(                                                \
18593e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang      Stream *stream, blas::Transpose transa, blas::Transpose transb,          \
18603e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang      uint64 m, uint64 n, uint64 k, float alpha, const DeviceMemory<float> &a, \
18613e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang      int lda, const DeviceMemory<float> &b, int ldb, float beta,              \
18623e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang      DeviceMemory<float> *c, int ldc,                                         \
18633e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang      blas::ProfileResult *output_profile_result) override;                    \
18643e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang  bool DoBlasGemmWithProfiling(                                                \
18653e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang      Stream *stream, blas::Transpose transa, blas::Transpose transb,          \
18663e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang      uint64 m, uint64 n, uint64 k, double alpha,                              \
18673e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang      const DeviceMemory<double> &a, int lda, const DeviceMemory<double> &b,   \
18683e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang      int ldb, double beta, DeviceMemory<double> *c, int ldc,                  \
18693e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang      blas::ProfileResult *output_profile_result) override;                    \
18703e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang  bool DoBlasGemmWithProfiling(                                                \
18713e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang      Stream *stream, blas::Transpose transa, blas::Transpose transb,          \
18723e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang      uint64 m, uint64 n, uint64 k, std::complex<float> alpha,                 \
18733e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang      const DeviceMemory<std::complex<float>> &a, int lda,                     \
18743e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang      const DeviceMemory<std::complex<float>> &b, int ldb,                     \
18753e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang      std::complex<float> beta, DeviceMemory<std::complex<float>> *c, int ldc, \
18763e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang      blas::ProfileResult *output_profile_result) override;                    \
18773e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang  bool DoBlasGemmWithProfiling(                                                \
18783e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang      Stream *stream, blas::Transpose transa, blas::Transpose transb,          \
18793e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang      uint64 m, uint64 n, uint64 k, std::complex<double> alpha,                \
18803e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang      const DeviceMemory<std::complex<double>> &a, int lda,                    \
18813e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang      const DeviceMemory<std::complex<double>> &b, int ldb,                    \
18823e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang      std::complex<double> beta, DeviceMemory<std::complex<double>> *c,        \
18833e3306ef0009b5b21050139f9b8e5f4868c4c0c7Yangzihao Wang      int ldc, blas::ProfileResult *output_profile_result) override;           \
188401194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar  bool GetBlasGemmAlgorithms(std::vector<blas::AlgorithmType> *out_algorithms) \
188501194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      override;                                                                \
188601194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar  bool DoBlasGemmWithAlgorithm(                                                \
188701194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      Stream *stream, blas::Transpose transa, blas::Transpose transb,          \
1888a2ee8bca3f3fa08bf022f5c8a68c3e1cf829f2cdA. Unique TensorFlower      uint64 m, uint64 n, uint64 k, int alpha, const DeviceMemory<int8> &a,    \
1889a2ee8bca3f3fa08bf022f5c8a68c3e1cf829f2cdA. Unique TensorFlower      int lda, const DeviceMemory<int8> &b, int ldb, int beta,                 \
1890a2ee8bca3f3fa08bf022f5c8a68c3e1cf829f2cdA. Unique TensorFlower      DeviceMemory<int> *c, int ldc, blas::ComputationType computation_type,   \
1891a2ee8bca3f3fa08bf022f5c8a68c3e1cf829f2cdA. Unique TensorFlower      blas::AlgorithmType algorithm,                                           \
1892a2ee8bca3f3fa08bf022f5c8a68c3e1cf829f2cdA. Unique TensorFlower      blas::ProfileResult *output_profile_result) override;                    \
1893a2ee8bca3f3fa08bf022f5c8a68c3e1cf829f2cdA. Unique TensorFlower  bool DoBlasGemmWithAlgorithm(                                                \
1894a2ee8bca3f3fa08bf022f5c8a68c3e1cf829f2cdA. Unique TensorFlower      Stream *stream, blas::Transpose transa, blas::Transpose transb,          \
189501194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      uint64 m, uint64 n, uint64 k, const Eigen::half &alpha,                  \
189601194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      const DeviceMemory<Eigen::half> &a, int lda,                             \
189701194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      const DeviceMemory<Eigen::half> &b, int ldb, const Eigen::half &beta,    \
189801194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      DeviceMemory<Eigen::half> *c, int ldc,                                   \
189901194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      blas::ComputationType computation_type, blas::AlgorithmType algorithm,   \
190001194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      blas::ProfileResult *output_profile_result) override;                    \
190101194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar  bool DoBlasGemmWithAlgorithm(                                                \
190201194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      Stream *stream, blas::Transpose transa, blas::Transpose transb,          \
190301194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      uint64 m, uint64 n, uint64 k, float alpha, const DeviceMemory<float> &a, \
190401194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      int lda, const DeviceMemory<float> &b, int ldb, float beta,              \
190501194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      DeviceMemory<float> *c, int ldc, blas::ComputationType computation_type, \
190601194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      blas::AlgorithmType algorithm,                                           \
190701194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      blas::ProfileResult *output_profile_result) override;                    \
190801194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar  bool DoBlasGemmWithAlgorithm(                                                \
190901194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      Stream *stream, blas::Transpose transa, blas::Transpose transb,          \
191001194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      uint64 m, uint64 n, uint64 k, double alpha,                              \
191101194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      const DeviceMemory<double> &a, int lda, const DeviceMemory<double> &b,   \
191201194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      int ldb, double beta, DeviceMemory<double> *c, int ldc,                  \
191301194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      blas::ComputationType computation_type, blas::AlgorithmType algorithm,   \
191401194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      blas::ProfileResult *output_profile_result) override;                    \
191501194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar  bool DoBlasGemmWithAlgorithm(                                                \
191601194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      Stream *stream, blas::Transpose transa, blas::Transpose transb,          \
191701194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      uint64 m, uint64 n, uint64 k, std::complex<float> alpha,                 \
191801194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      const DeviceMemory<std::complex<float>> &a, int lda,                     \
191901194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      const DeviceMemory<std::complex<float>> &b, int ldb,                     \
192001194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      std::complex<float> beta, DeviceMemory<std::complex<float>> *c, int ldc, \
192101194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      blas::ComputationType computation_type, blas::AlgorithmType algorithm,   \
192201194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      blas::ProfileResult *output_profile_result) override;                    \
192301194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar  bool DoBlasGemmWithAlgorithm(                                                \
192401194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      Stream *stream, blas::Transpose transa, blas::Transpose transb,          \
192501194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      uint64 m, uint64 n, uint64 k, std::complex<double> alpha,                \
192601194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      const DeviceMemory<std::complex<double>> &a, int lda,                    \
192701194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      const DeviceMemory<std::complex<double>> &b, int ldb,                    \
192801194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      std::complex<double> beta, DeviceMemory<std::complex<double>> *c,        \
192901194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      int ldc, blas::ComputationType computation_type,                         \
193001194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      blas::AlgorithmType algorithm,                                           \
193101194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      blas::ProfileResult *output_profile_result) override;                    \
1932f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasGemmBatched(                                                      \
1933f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      Stream *stream, blas::Transpose transa, blas::Transpose transb,          \
1934f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      uint64 m, uint64 n, uint64 k, float alpha,                               \
1935f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      const port::ArraySlice<DeviceMemory<float> *> &a, int lda,               \
1936f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      const port::ArraySlice<DeviceMemory<float> *> &b, int ldb, float beta,   \
1937f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      const port::ArraySlice<DeviceMemory<float> *> &c, int ldc,               \
193805ea40f180e528dbfde36cd338a0b6ac3cca6dd9A. Unique TensorFlower      int batch_count, ScratchAllocator *scratch_allocator) override;          \
1939f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasGemmBatched(                                                      \
1940f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      Stream *stream, blas::Transpose transa, blas::Transpose transb,          \
1941f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      uint64 m, uint64 n, uint64 k, double alpha,                              \
1942f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      const port::ArraySlice<DeviceMemory<double> *> &a, int lda,              \
1943f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      const port::ArraySlice<DeviceMemory<double> *> &b, int ldb, double beta, \
1944f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      const port::ArraySlice<DeviceMemory<double> *> &c, int ldc,              \
194505ea40f180e528dbfde36cd338a0b6ac3cca6dd9A. Unique TensorFlower      int batch_count, ScratchAllocator *scratch_allocator) override;          \
1946f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasGemmBatched(                                                      \
1947f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      Stream *stream, blas::Transpose transa, blas::Transpose transb,          \
1948f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      uint64 m, uint64 n, uint64 k, std::complex<float> alpha,                 \
1949f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      const port::ArraySlice<DeviceMemory<std::complex<float>> *> &a, int lda, \
1950f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      const port::ArraySlice<DeviceMemory<std::complex<float>> *> &b, int ldb, \
1951f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      std::complex<float> beta,                                                \
1952f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      const port::ArraySlice<DeviceMemory<std::complex<float>> *> &c, int ldc, \
195305ea40f180e528dbfde36cd338a0b6ac3cca6dd9A. Unique TensorFlower      int batch_count, ScratchAllocator *scratch_allocator) override;          \
1954f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasGemmBatched(                                                      \
1955f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      Stream *stream, blas::Transpose transa, blas::Transpose transb,          \
1956f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      uint64 m, uint64 n, uint64 k, std::complex<double> alpha,                \
1957f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      const port::ArraySlice<DeviceMemory<std::complex<double>> *> &a,         \
1958f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      int lda,                                                                 \
1959f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      const port::ArraySlice<DeviceMemory<std::complex<double>> *> &b,         \
1960f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      int ldb, std::complex<double> beta,                                      \
1961f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      const port::ArraySlice<DeviceMemory<std::complex<double>> *> &c,         \
196205ea40f180e528dbfde36cd338a0b6ac3cca6dd9A. Unique TensorFlower      int ldc, int batch_count, ScratchAllocator *scratch_allocator) override; \
1963f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasHemm(Stream *stream, blas::Side side, blas::UpperLower uplo,      \
1964f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  uint64 m, uint64 n, std::complex<float> alpha,               \
1965f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<float>> &a, int lda,         \
1966f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<float>> &b, int ldb,         \
1967f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  std::complex<float> beta,                                    \
1968f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<std::complex<float>> *c, int ldc) override;     \
1969f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasHemm(Stream *stream, blas::Side side, blas::UpperLower uplo,      \
1970f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  uint64 m, uint64 n, std::complex<double> alpha,              \
1971f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<double>> &a, int lda,        \
1972f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<double>> &b, int ldb,        \
1973f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  std::complex<double> beta,                                   \
1974f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<std::complex<double>> *c, int ldc) override;    \
1975f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasHerk(Stream *stream, blas::UpperLower uplo,                       \
1976f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  blas::Transpose trans, uint64 n, uint64 k, float alpha,      \
1977f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<float>> &a, int lda,         \
1978f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  float beta, DeviceMemory<std::complex<float>> *c, int ldc)   \
1979f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      override;                                                                \
1980f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasHerk(Stream *stream, blas::UpperLower uplo,                       \
1981f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  blas::Transpose trans, uint64 n, uint64 k, double alpha,     \
1982f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<double>> &a, int lda,        \
1983f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  double beta, DeviceMemory<std::complex<double>> *c, int ldc) \
1984f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      override;                                                                \
1985f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasHer2k(                                                            \
1986f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      Stream *stream, blas::UpperLower uplo, blas::Transpose trans, uint64 n,  \
1987f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      uint64 k, std::complex<float> alpha,                                     \
1988f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      const DeviceMemory<std::complex<float>> &a, int lda,                     \
1989f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      const DeviceMemory<std::complex<float>> &b, int ldb, float beta,         \
1990f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      DeviceMemory<std::complex<float>> *c, int ldc) override;                 \
1991f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasHer2k(                                                            \
1992f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      Stream *stream, blas::UpperLower uplo, blas::Transpose trans, uint64 n,  \
1993f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      uint64 k, std::complex<double> alpha,                                    \
1994f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      const DeviceMemory<std::complex<double>> &a, int lda,                    \
1995f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      const DeviceMemory<std::complex<double>> &b, int ldb, double beta,       \
1996f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      DeviceMemory<std::complex<double>> *c, int ldc) override;                \
1997f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasSymm(Stream *stream, blas::Side side, blas::UpperLower uplo,      \
1998f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  uint64 m, uint64 n, float alpha,                             \
1999f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<float> &a, int lda,                       \
2000f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<float> &b, int ldb, float beta,           \
2001f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<float> *c, int ldc) override;                   \
2002f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasSymm(Stream *stream, blas::Side side, blas::UpperLower uplo,      \
2003f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  uint64 m, uint64 n, double alpha,                            \
2004f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<double> &a, int lda,                      \
2005f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<double> &b, int ldb, double beta,         \
2006f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<double> *c, int ldc) override;                  \
2007f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasSymm(Stream *stream, blas::Side side, blas::UpperLower uplo,      \
2008f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  uint64 m, uint64 n, std::complex<float> alpha,               \
2009f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<float>> &a, int lda,         \
2010f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<float>> &b, int ldb,         \
2011f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  std::complex<float> beta,                                    \
2012f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<std::complex<float>> *c, int ldc) override;     \
2013f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasSymm(Stream *stream, blas::Side side, blas::UpperLower uplo,      \
2014f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  uint64 m, uint64 n, std::complex<double> alpha,              \
2015f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<double>> &a, int lda,        \
2016f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<double>> &b, int ldb,        \
2017f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  std::complex<double> beta,                                   \
2018f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<std::complex<double>> *c, int ldc) override;    \
2019f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasSyrk(Stream *stream, blas::UpperLower uplo,                       \
2020f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  blas::Transpose trans, uint64 n, uint64 k, float alpha,      \
2021f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<float> &a, int lda, float beta,           \
2022f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<float> *c, int ldc) override;                   \
2023f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasSyrk(Stream *stream, blas::UpperLower uplo,                       \
2024f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  blas::Transpose trans, uint64 n, uint64 k, double alpha,     \
2025f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<double> &a, int lda, double beta,         \
2026f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<double> *c, int ldc) override;                  \
2027f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasSyrk(Stream *stream, blas::UpperLower uplo,                       \
2028f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  blas::Transpose trans, uint64 n, uint64 k,                   \
2029f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  std::complex<float> alpha,                                   \
2030f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<float>> &a, int lda,         \
2031f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  std::complex<float> beta,                                    \
2032f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<std::complex<float>> *c, int ldc) override;     \
2033f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasSyrk(Stream *stream, blas::UpperLower uplo,                       \
2034f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  blas::Transpose trans, uint64 n, uint64 k,                   \
2035f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  std::complex<double> alpha,                                  \
2036f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<double>> &a, int lda,        \
2037f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  std::complex<double> beta,                                   \
2038f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<std::complex<double>> *c, int ldc) override;    \
2039f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasSyr2k(Stream *stream, blas::UpperLower uplo,                      \
2040f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                   blas::Transpose trans, uint64 n, uint64 k, float alpha,     \
2041f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                   const DeviceMemory<float> &a, int lda,                      \
2042f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                   const DeviceMemory<float> &b, int ldb, float beta,          \
2043f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                   DeviceMemory<float> *c, int ldc) override;                  \
2044f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasSyr2k(Stream *stream, blas::UpperLower uplo,                      \
2045f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                   blas::Transpose trans, uint64 n, uint64 k, double alpha,    \
2046f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                   const DeviceMemory<double> &a, int lda,                     \
2047f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                   const DeviceMemory<double> &b, int ldb, double beta,        \
2048f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                   DeviceMemory<double> *c, int ldc) override;                 \
2049f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasSyr2k(Stream *stream, blas::UpperLower uplo,                      \
2050f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                   blas::Transpose trans, uint64 n, uint64 k,                  \
2051f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                   std::complex<float> alpha,                                  \
2052f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                   const DeviceMemory<std::complex<float>> &a, int lda,        \
2053f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                   const DeviceMemory<std::complex<float>> &b, int ldb,        \
2054f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                   std::complex<float> beta,                                   \
2055f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                   DeviceMemory<std::complex<float>> *c, int ldc) override;    \
2056f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasSyr2k(Stream *stream, blas::UpperLower uplo,                      \
2057f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                   blas::Transpose trans, uint64 n, uint64 k,                  \
2058f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                   std::complex<double> alpha,                                 \
2059f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                   const DeviceMemory<std::complex<double>> &a, int lda,       \
2060f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                   const DeviceMemory<std::complex<double>> &b, int ldb,       \
2061f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                   std::complex<double> beta,                                  \
2062f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                   DeviceMemory<std::complex<double>> *c, int ldc) override;   \
2063f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasTrmm(Stream *stream, blas::Side side, blas::UpperLower uplo,      \
2064f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  blas::Transpose transa, blas::Diagonal diag, uint64 m,       \
2065f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  uint64 n, float alpha, const DeviceMemory<float> &a,         \
2066f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  int lda, DeviceMemory<float> *b, int ldb) override;          \
2067f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasTrmm(Stream *stream, blas::Side side, blas::UpperLower uplo,      \
2068f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  blas::Transpose transa, blas::Diagonal diag, uint64 m,       \
2069f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  uint64 n, double alpha, const DeviceMemory<double> &a,       \
2070f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  int lda, DeviceMemory<double> *b, int ldb) override;         \
2071f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasTrmm(Stream *stream, blas::Side side, blas::UpperLower uplo,      \
2072f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  blas::Transpose transa, blas::Diagonal diag, uint64 m,       \
2073f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  uint64 n, std::complex<float> alpha,                         \
2074f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<float>> &a, int lda,         \
2075f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<std::complex<float>> *b, int ldb) override;     \
2076f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasTrmm(Stream *stream, blas::Side side, blas::UpperLower uplo,      \
2077f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  blas::Transpose transa, blas::Diagonal diag, uint64 m,       \
2078f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  uint64 n, std::complex<double> alpha,                        \
2079f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<double>> &a, int lda,        \
2080f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<std::complex<double>> *b, int ldb) override;    \
2081f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasTrsm(Stream *stream, blas::Side side, blas::UpperLower uplo,      \
2082f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  blas::Transpose transa, blas::Diagonal diag, uint64 m,       \
2083f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  uint64 n, float alpha, const DeviceMemory<float> &a,         \
2084f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  int lda, DeviceMemory<float> *b, int ldb) override;          \
2085f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasTrsm(Stream *stream, blas::Side side, blas::UpperLower uplo,      \
2086f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  blas::Transpose transa, blas::Diagonal diag, uint64 m,       \
2087f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  uint64 n, double alpha, const DeviceMemory<double> &a,       \
2088f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  int lda, DeviceMemory<double> *b, int ldb) override;         \
2089f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasTrsm(Stream *stream, blas::Side side, blas::UpperLower uplo,      \
2090f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  blas::Transpose transa, blas::Diagonal diag, uint64 m,       \
2091f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  uint64 n, std::complex<float> alpha,                         \
2092f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<float>> &a, int lda,         \
2093f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<std::complex<float>> *b, int ldb) override;     \
2094f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasTrsm(Stream *stream, blas::Side side, blas::UpperLower uplo,      \
2095f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  blas::Transpose transa, blas::Diagonal diag, uint64 m,       \
2096f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  uint64 n, std::complex<double> alpha,                        \
2097f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<double>> &a, int lda,        \
2098f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<std::complex<double>> *b, int ldb) override;
2099f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
2100f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur}  // namespace blas
2101f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur}  // namespace gputools
2102f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur}  // namespace perftools
2103f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
2104f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur#endif  // TENSORFLOW_STREAM_EXECUTOR_BLAS_H_
2105