blas.h revision a2ee8bca3f3fa08bf022f5c8a68c3e1cf829f2cd
1122cdce33e3e0a01a7f82645617317530aa571fbA. Unique TensorFlower/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
29c3043ff3bf31a6a81810b4ce9e87ef936f1f529Manjunath Kudlur
39c3043ff3bf31a6a81810b4ce9e87ef936f1f529Manjunath KudlurLicensed under the Apache License, Version 2.0 (the "License");
49c3043ff3bf31a6a81810b4ce9e87ef936f1f529Manjunath Kudluryou may not use this file except in compliance with the License.
59c3043ff3bf31a6a81810b4ce9e87ef936f1f529Manjunath KudlurYou may obtain a copy of the License at
69c3043ff3bf31a6a81810b4ce9e87ef936f1f529Manjunath Kudlur
79c3043ff3bf31a6a81810b4ce9e87ef936f1f529Manjunath Kudlur    http://www.apache.org/licenses/LICENSE-2.0
89c3043ff3bf31a6a81810b4ce9e87ef936f1f529Manjunath Kudlur
99c3043ff3bf31a6a81810b4ce9e87ef936f1f529Manjunath KudlurUnless required by applicable law or agreed to in writing, software
109c3043ff3bf31a6a81810b4ce9e87ef936f1f529Manjunath Kudlurdistributed under the License is distributed on an "AS IS" BASIS,
119c3043ff3bf31a6a81810b4ce9e87ef936f1f529Manjunath KudlurWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
129c3043ff3bf31a6a81810b4ce9e87ef936f1f529Manjunath KudlurSee the License for the specific language governing permissions and
139c3043ff3bf31a6a81810b4ce9e87ef936f1f529Manjunath Kudlurlimitations under the License.
149c3043ff3bf31a6a81810b4ce9e87ef936f1f529Manjunath Kudlur==============================================================================*/
159c3043ff3bf31a6a81810b4ce9e87ef936f1f529Manjunath Kudlur
16f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// Exposes the family of BLAS routines as pre-canned high performance calls for
17f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// use in conjunction with the StreamExecutor abstraction.
18f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur//
19f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// Note that this interface is optionally supported by platforms; see
20f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// StreamExecutor::SupportsBlas() for details.
21f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur//
22f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// This abstraction makes it simple to entrain BLAS operations on GPU data into
23f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// a Stream -- users typically will not use this API directly, but will use the
24f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// Stream builder methods to entrain these operations "under the hood". For
25f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// example:
26f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur//
27f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur//  DeviceMemory<float> x = stream_exec->AllocateArray<float>(1024);
28f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur//  DeviceMemory<float> y = stream_exec->AllocateArray<float>(1024);
29f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur//  // ... populate x and y ...
30f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur//  Stream stream{stream_exec};
31f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur//  stream
32f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur//    .Init()
33f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur//    .ThenBlasAxpy(1024, 5.5, x, 1, &y, 1)
34f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur//    .BlockHostUntilDone();
35f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur//
36f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// By using stream operations in this manner the user can easily intermix custom
37f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// kernel launches (via StreamExecutor::ThenLaunch()) with these pre-canned BLAS
38f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// routines.
39f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
40f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur#ifndef TENSORFLOW_STREAM_EXECUTOR_BLAS_H_
41f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur#define TENSORFLOW_STREAM_EXECUTOR_BLAS_H_
42f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
43f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur#include <complex>
44f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur#include "tensorflow/stream_executor/platform/port.h"
45f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
46f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur#include "tensorflow/stream_executor/lib/array_slice.h"
47f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur#include "tensorflow/stream_executor/platform/port.h"
48b4fa4ad13831174f2276f0b382e06a7d5c42a7e7A. Unique TensorFlower
49b4fa4ad13831174f2276f0b382e06a7d5c42a7e7A. Unique TensorFlowernamespace Eigen {
50b4fa4ad13831174f2276f0b382e06a7d5c42a7e7A. Unique TensorFlowerstruct half;
51b4fa4ad13831174f2276f0b382e06a7d5c42a7e7A. Unique TensorFlower}  // namespace Eigen
52f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
53f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlurnamespace perftools {
54f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlurnamespace gputools {
55f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
56f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlurclass Stream;
5705ea40f180e528dbfde36cd338a0b6ac3cca6dd9A. Unique TensorFlowerclass ScratchAllocator;
58f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
59f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlurtemplate <typename ElemT>
60f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlurclass DeviceMemory;
61f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
62f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlurnamespace blas {
63f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
64f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// Specifies whether the input matrix will be transposed or
65f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// transposed+conjugated before any BLAS operations.
66f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlurenum class Transpose { kNoTranspose, kTranspose, kConjugateTranspose };
67f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
68f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// Returns a name for t.
69f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlurstring TransposeString(Transpose t);
70f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
71f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// Specifies whether the upper or lower triangular part of a
72f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// symmetric/Hermitian matrix is used.
73f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlurenum class UpperLower { kUpper, kLower };
74f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
75f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// Returns a name for ul.
76f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlurstring UpperLowerString(UpperLower ul);
77f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
78f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// Specifies whether a matrix is unit triangular.
79f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlurenum class Diagonal { kUnit, kNonUnit };
80f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
81f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// Returns a name for d.
82f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlurstring DiagonalString(Diagonal d);
83f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
84f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// Specifies whether a Hermitian matrix appears on the left or right in
85f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// operation.
86f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlurenum class Side { kLeft, kRight };
87f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
88f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// Returns a name for s.
89f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlurstring SideString(Side s);
90f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
9101194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar// Type with which intermediate computations of a blas routine are performed.
9201194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar//
9301194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar// Some blas calls can perform computations with a type that's different than
9401194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar// the type of their inputs/outputs.  This lets you e.g. multiply two matricies
9501194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar// of int8s using float32s to store the matmul's intermediate values.
9601194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebarenum class ComputationType {
9701194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar  kF16,         // 16-bit floating-point
9801194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar  kF32,         // 32-bit floating-point
9901194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar  kF64,         // 64-bit floating-point
100a2ee8bca3f3fa08bf022f5c8a68c3e1cf829f2cdA. Unique TensorFlower  kI32,         // 32-bit integer
10101194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar  kComplexF32,  // Complex number comprised of two f32s.
102a2ee8bca3f3fa08bf022f5c8a68c3e1cf829f2cdA. Unique TensorFlower  kComplexF64,  // Complex number comprised of two f64s.
10301194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar};
10401194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar
10501194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar// Converts a ComputationType to a string.
10601194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebarstring ComputationTypeString(ComputationType ty);
10701194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar
10801194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar// Opaque identifier for an "algorithm" used by a blas routine.  This functions
10901194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar// as a hint to the blas library.
11001194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebartypedef int64 AlgorithmType;
11101194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar
112a2ee8bca3f3fa08bf022f5c8a68c3e1cf829f2cdA. Unique TensorFlower// blas uses -1 to represent the default algorithm. This happens to match up
113a2ee8bca3f3fa08bf022f5c8a68c3e1cf829f2cdA. Unique TensorFlower// with the CUBLAS_GEMM_DFALT constant, so cuda_blas.cc is using static_cast
114a2ee8bca3f3fa08bf022f5c8a68c3e1cf829f2cdA. Unique TensorFlower// to convert from AlgorithmType to cublasGemmAlgo_t, and uses a static_assert
115a2ee8bca3f3fa08bf022f5c8a68c3e1cf829f2cdA. Unique TensorFlower// to ensure that this assumption does not break.
116a2ee8bca3f3fa08bf022f5c8a68c3e1cf829f2cdA. Unique TensorFlower// If another blas implementation uses a different value for the default
117a2ee8bca3f3fa08bf022f5c8a68c3e1cf829f2cdA. Unique TensorFlower// algorithm, then it needs to convert kDefaultGemmAlgo to that value
118a2ee8bca3f3fa08bf022f5c8a68c3e1cf829f2cdA. Unique TensorFlower// (e.g. via a function called ToWhateverGemmAlgo).
119a2ee8bca3f3fa08bf022f5c8a68c3e1cf829f2cdA. Unique TensorFlowerconstexpr AlgorithmType kDefaultGemmAlgo = -1;
120a2ee8bca3f3fa08bf022f5c8a68c3e1cf829f2cdA. Unique TensorFlower
12101194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar// Describes the result of a performance experiment, usually timing the speed of
12201194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar// a particular AlgorithmType.
12301194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar//
12401194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar// If the call we were benchmarking failed (a common occurrence; not all
12501194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar// algorithms are valid for all calls), is_valid() will be false.
12601194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebarclass ProfileResult {
12701194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar public:
12801194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar  bool is_valid() const { return is_valid_; }
12901194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar  void set_is_valid(bool val) { is_valid_ = val; }
13001194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar  AlgorithmType algorithm() const { return algorithm_; }
13101194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar  void set_algorithm(AlgorithmType val) { algorithm_ = val; }
13201194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar  float elapsed_time_in_ms() const { return elapsed_time_in_ms_; }
13301194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar  void set_elapsed_time_in_ms(float val) { elapsed_time_in_ms_ = val; }
13401194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar
13501194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar private:
13601194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar  bool is_valid_ = false;
13701194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar  AlgorithmType algorithm_ = 0;
13801194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar  float elapsed_time_in_ms_ = std::numeric_limits<float>::max();
13901194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar};
14001194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar
141f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// BLAS support interface -- this can be derived from a GPU executor when the
142f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// underlying platform has an BLAS library implementation available. See
143f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// StreamExecutor::AsBlas().
144f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur//
145f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// Thread-hostile: CUDA associates a CUDA-context with a particular thread in
146f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// the system. Any operation that a user attempts to perform by enqueueing BLAS
147f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// operations on a thread not-associated with the CUDA-context has unknown
148f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// behavior at the current time; see b/13176597
149f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlurclass BlasSupport {
150f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur public:
151f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual ~BlasSupport() {}
152f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
153f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // Computes the sum of magnitudes of the vector elements.
154f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // result <- |Re x(1)| + |Im x(1)| + |Re  x(2)| + |Im  x(2)|+ ... + |Re  x(n)|
155f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // + |Im x(n)|.
156f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // Note that Im x(i) = 0 for real types float/double.
157f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasAsum(Stream *stream, uint64 elem_count,
158f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<float> &x, int incx,
159f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<float> *result) = 0;
160f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasAsum(Stream *stream, uint64 elem_count,
161f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<double> &x, int incx,
162f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<double> *result) = 0;
163f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasAsum(Stream *stream, uint64 elem_count,
164f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<float>> &x, int incx,
165f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<float> *result) = 0;
166f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasAsum(Stream *stream, uint64 elem_count,
167f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<double>> &x, int incx,
168f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<double> *result) = 0;
169f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
170f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // Performs a BLAS y <- ax+y operation.
171f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasAxpy(Stream *stream, uint64 elem_count, float alpha,
172f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<float> &x, int incx,
173f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<float> *y, int incy) = 0;
174f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasAxpy(Stream *stream, uint64 elem_count, double alpha,
175f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<double> &x, int incx,
176f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<double> *y, int incy) = 0;
177f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasAxpy(Stream *stream, uint64 elem_count,
178f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          std::complex<float> alpha,
179f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<float>> &x, int incx,
180f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<std::complex<float>> *y, int incy) = 0;
181f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasAxpy(Stream *stream, uint64 elem_count,
182f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          std::complex<double> alpha,
183f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<double>> &x, int incx,
184f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<std::complex<double>> *y, int incy) = 0;
185f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
186f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // Copies vector to another vector: y <- x.
187f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasCopy(Stream *stream, uint64 elem_count,
188f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<float> &x, int incx,
189f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<float> *y, int incy) = 0;
190f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasCopy(Stream *stream, uint64 elem_count,
191f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<double> &x, int incx,
192f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<double> *y, int incy) = 0;
193f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasCopy(Stream *stream, uint64 elem_count,
194f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<float>> &x, int incx,
195f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<std::complex<float>> *y, int incy) = 0;
196f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasCopy(Stream *stream, uint64 elem_count,
197f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<double>> &x, int incx,
198f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<std::complex<double>> *y, int incy) = 0;
199f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
200f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // Performs a BLAS dot product result <- x . y.
201f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasDot(Stream *stream, uint64 elem_count,
202f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                         const DeviceMemory<float> &x, int incx,
203f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                         const DeviceMemory<float> &y, int incy,
204f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                         DeviceMemory<float> *result) = 0;
205f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasDot(Stream *stream, uint64 elem_count,
206f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                         const DeviceMemory<double> &x, int incx,
207f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                         const DeviceMemory<double> &y, int incy,
208f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                         DeviceMemory<double> *result) = 0;
209f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
210f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // Performs a BLAS dot product result <- conj(x) . y for complex types.
211f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasDotc(Stream *stream, uint64 elem_count,
212f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<float>> &x, int incx,
213f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<float>> &y, int incy,
214f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<std::complex<float>> *result) = 0;
215f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasDotc(Stream *stream, uint64 elem_count,
216f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<double>> &x, int incx,
217f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<double>> &y, int incy,
218f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<std::complex<double>> *result) = 0;
219f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
220f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // Performs a BLAS dot product result <- x . y for complex types. Note that
221f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // x is unconjugated in this routine.
222f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasDotu(Stream *stream, uint64 elem_count,
223f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<float>> &x, int incx,
224f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<float>> &y, int incy,
225f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<std::complex<float>> *result) = 0;
226f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasDotu(Stream *stream, uint64 elem_count,
227f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<double>> &x, int incx,
228f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<double>> &y, int incy,
229f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<std::complex<double>> *result) = 0;
230f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
231f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // Computes the Euclidean norm of a vector: result <- ||x||.
232f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // See the following link for more information of Euclidean norm:
233f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // http://en.wikipedia.org/wiki/Norm_(mathematics)#Euclidean_norm
234f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasNrm2(Stream *stream, uint64 elem_count,
235f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<float> &x, int incx,
236f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<float> *result) = 0;
237f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasNrm2(Stream *stream, uint64 elem_count,
238f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<double> &x, int incx,
239f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<double> *result) = 0;
240f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasNrm2(Stream *stream, uint64 elem_count,
241f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<float>> &x, int incx,
242f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<float> *result) = 0;
243f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasNrm2(Stream *stream, uint64 elem_count,
244f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<double>> &x, int incx,
245f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<double> *result) = 0;
246f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
247f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // Performs rotation of points in the plane:
248f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // x(i) = c*x(i) + s*y(i)
249f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // y(i) = c*y(i) - s*x(i).
250f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasRot(Stream *stream, uint64 elem_count,
251f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                         DeviceMemory<float> *x, int incx,
252f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                         DeviceMemory<float> *y, int incy, float c,
253f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                         float s) = 0;
254f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasRot(Stream *stream, uint64 elem_count,
255f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                         DeviceMemory<double> *x, int incx,
256f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                         DeviceMemory<double> *y, int incy, double c,
257f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                         double s) = 0;
258f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasRot(Stream *stream, uint64 elem_count,
259f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                         DeviceMemory<std::complex<float>> *x, int incx,
260f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                         DeviceMemory<std::complex<float>> *y, int incy,
261f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                         float c, float s) = 0;
262f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasRot(Stream *stream, uint64 elem_count,
263f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                         DeviceMemory<std::complex<double>> *x, int incx,
264f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                         DeviceMemory<std::complex<double>> *y, int incy,
265f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                         double c, double s) = 0;
266f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
267f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // Computes the parameters for a Givens rotation.
268f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // Given the Cartesian coordinates (a, b) of a point, these routines return
269f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // the parameters c, s, r, and z associated with the Givens rotation. The
270f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // parameters c and s define a unitary matrix such that:
271f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
272f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //   |  c s |.| a | = | r |
273f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //   | -s c | | b |   | 0 |
274f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
275f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // The parameter z is defined such that if |a| > |b|, z is s; otherwise if
276f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // c is not 0 z is 1/c; otherwise z is 1.
277f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasRotg(Stream *stream, DeviceMemory<float> *a,
278f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<float> *b, DeviceMemory<float> *c,
279f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<float> *s) = 0;
280f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasRotg(Stream *stream, DeviceMemory<double> *a,
281f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<double> *b, DeviceMemory<double> *c,
282f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<double> *s) = 0;
283f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasRotg(Stream *stream, DeviceMemory<std::complex<float>> *a,
284f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<std::complex<float>> *b,
285f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<float> *c,
286f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<std::complex<float>> *s) = 0;
287f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasRotg(Stream *stream, DeviceMemory<std::complex<double>> *a,
288f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<std::complex<double>> *b,
289f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<double> *c,
290f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<std::complex<double>> *s) = 0;
291f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
292f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // Performs modified Givens rotation of points in the plane.
293f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // Given two vectors x and y, each vector element of these vectors is replaced
294f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // as follows:
295f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
296f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //   | x(i) | =  H | x(i) |
297f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //   | y(i) |      | y(i) |
298f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
299f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // for i=1 to n, where H is a modified Givens transformation matrix whose
300f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // values are stored in the param[1] through param[4] array.
301f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // For more information please Google this routine.
302f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasRotm(Stream *stream, uint64 elem_count,
303f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<float> *x, int incx,
304f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<float> *y, int incy,
305f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<float> &param) = 0;
306f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasRotm(Stream *stream, uint64 elem_count,
307f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<double> *x, int incx,
308f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<double> *y, int incy,
309f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<double> &param) = 0;
310f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
311f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // Computes the parameters for a modified Givens rotation.
312f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // Given Cartesian coordinates (x1, y1) of an input vector, these routines
313f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // compute the components of a modified Givens transformation matrix H that
314f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // zeros the y-component of the resulting vector:
315f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
316f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //   | x1 | =  H | x1 * sqrt(d1) |
317f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //   |  0 |      | y1 * sqrt(d1) |
318f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
319f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // For more information please Google this routine.
320f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasRotmg(Stream *stream, DeviceMemory<float> *d1,
321f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                           DeviceMemory<float> *d2, DeviceMemory<float> *x1,
322f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                           const DeviceMemory<float> &y1,
323f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                           DeviceMemory<float> *param) = 0;
324f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasRotmg(Stream *stream, DeviceMemory<double> *d1,
325f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                           DeviceMemory<double> *d2, DeviceMemory<double> *x1,
326f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                           const DeviceMemory<double> &y1,
327f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                           DeviceMemory<double> *param) = 0;
328f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
329f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // Computes the product of a vector by a scalar: x <- a*x.
330f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasScal(Stream *stream, uint64 elem_count, float alpha,
331f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<float> *x, int incx) = 0;
332f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasScal(Stream *stream, uint64 elem_count, double alpha,
333f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<double> *x, int incx) = 0;
334f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasScal(Stream *stream, uint64 elem_count, float alpha,
335f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<std::complex<float>> *x, int incx) = 0;
336f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasScal(Stream *stream, uint64 elem_count, double alpha,
337f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<std::complex<double>> *x, int incx) = 0;
338f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasScal(Stream *stream, uint64 elem_count,
339f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          std::complex<float> alpha,
340f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<std::complex<float>> *x, int incx) = 0;
341f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasScal(Stream *stream, uint64 elem_count,
342f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          std::complex<double> alpha,
343f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<std::complex<double>> *x, int incx) = 0;
344f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
345f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // Swaps a vector with another vector.
346f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasSwap(Stream *stream, uint64 elem_count,
347f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<float> *x, int incx,
348f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<float> *y, int incy) = 0;
349f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasSwap(Stream *stream, uint64 elem_count,
350f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<double> *x, int incx,
351f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<double> *y, int incy) = 0;
352f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasSwap(Stream *stream, uint64 elem_count,
353f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<std::complex<float>> *x, int incx,
354f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<std::complex<float>> *y, int incy) = 0;
355f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasSwap(Stream *stream, uint64 elem_count,
356f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<std::complex<double>> *x, int incx,
357f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<std::complex<double>> *y, int incy) = 0;
358f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
359f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // Finds the index of the element with maximum absolute value.
360f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasIamax(Stream *stream, uint64 elem_count,
361f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                           const DeviceMemory<float> &x, int incx,
362f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                           DeviceMemory<int> *result) = 0;
363f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasIamax(Stream *stream, uint64 elem_count,
364f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                           const DeviceMemory<double> &x, int incx,
365f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                           DeviceMemory<int> *result) = 0;
366f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasIamax(Stream *stream, uint64 elem_count,
367f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                           const DeviceMemory<std::complex<float>> &x, int incx,
368f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                           DeviceMemory<int> *result) = 0;
369f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasIamax(Stream *stream, uint64 elem_count,
370f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                           const DeviceMemory<std::complex<double>> &x,
371f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                           int incx, DeviceMemory<int> *result) = 0;
372f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
373f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // Finds the index of the element with minimum absolute value.
374f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasIamin(Stream *stream, uint64 elem_count,
375f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                           const DeviceMemory<float> &x, int incx,
376f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                           DeviceMemory<int> *result) = 0;
377f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasIamin(Stream *stream, uint64 elem_count,
378f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                           const DeviceMemory<double> &x, int incx,
379f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                           DeviceMemory<int> *result) = 0;
380f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasIamin(Stream *stream, uint64 elem_count,
381f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                           const DeviceMemory<std::complex<float>> &x, int incx,
382f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                           DeviceMemory<int> *result) = 0;
383f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasIamin(Stream *stream, uint64 elem_count,
384f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                           const DeviceMemory<std::complex<double>> &x,
385f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                           int incx, DeviceMemory<int> *result) = 0;
386f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
387f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // Computes a matrix-vector product using a general band matrix:
388f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
389f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //     y <- alpha * a * x + beta * y,
390f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // or
391f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //     y <- alpha * a' * x + beta * y,
392f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // or
393f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //     y <- alpha * conj(a') * x + beta * y,
394f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
395f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // alpha and beta are scalars; a is an m-by-n general band matrix, with kl
396f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // sub-diagonals and ku super-diagonals; x is a vector with
397f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // n(trans==kNoTranspose)/m(otherwise) elements;
398f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // y is a vector with m(trans==kNoTranspose)/n(otherwise) elements.
399f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasGbmv(Stream *stream, blas::Transpose trans, uint64 m,
400f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          uint64 n, uint64 kl, uint64 ku, float alpha,
401f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<float> &a, int lda,
402f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<float> &x, int incx, float beta,
403f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<float> *y, int incy) = 0;
404f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasGbmv(Stream *stream, blas::Transpose trans, uint64 m,
405f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          uint64 n, uint64 kl, uint64 ku, double alpha,
406f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<double> &a, int lda,
407f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<double> &x, int incx, double beta,
408f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<double> *y, int incy) = 0;
409f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasGbmv(Stream *stream, blas::Transpose trans, uint64 m,
410f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          uint64 n, uint64 kl, uint64 ku,
411f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          std::complex<float> alpha,
412f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<float>> &a, int lda,
413f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<float>> &x, int incx,
414f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          std::complex<float> beta,
415f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<std::complex<float>> *y, int incy) = 0;
416f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasGbmv(Stream *stream, blas::Transpose trans, uint64 m,
417f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          uint64 n, uint64 kl, uint64 ku,
418f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          std::complex<double> alpha,
419f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<double>> &a, int lda,
420f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<double>> &x, int incx,
421f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          std::complex<double> beta,
422f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<std::complex<double>> *y, int incy) = 0;
423f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
424f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // Computes a matrix-vector product using a general matrix.
425f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
426f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //     y <- alpha * a * x + beta * y,
427f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // or
428f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //     y <- alpha * a' * x + beta * y,
429f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // or
430f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //     y <- alpha * conj(a') * x + beta * y,
431f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
432f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // alpha and beta are scalars; a is an m-by-n general matrix; x is a vector
433f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // with n(trans==kNoTranspose)/m(otherwise) elements;
434f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // y is a vector with m(trans==kNoTranspose)/n(otherwise) elements.
435f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasGemv(Stream *stream, blas::Transpose trans, uint64 m,
436f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          uint64 n, float alpha, const DeviceMemory<float> &a,
437f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          int lda, const DeviceMemory<float> &x, int incx,
438f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          float beta, DeviceMemory<float> *y, int incy) = 0;
439f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasGemv(Stream *stream, blas::Transpose trans, uint64 m,
440f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          uint64 n, double alpha, const DeviceMemory<double> &a,
441f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          int lda, const DeviceMemory<double> &x, int incx,
442f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          double beta, DeviceMemory<double> *y, int incy) = 0;
443f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasGemv(Stream *stream, blas::Transpose trans, uint64 m,
444f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          uint64 n, std::complex<float> alpha,
445f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<float>> &a, int lda,
446f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<float>> &x, int incx,
447f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          std::complex<float> beta,
448f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<std::complex<float>> *y, int incy) = 0;
449f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasGemv(Stream *stream, blas::Transpose trans, uint64 m,
450f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          uint64 n, std::complex<double> alpha,
451f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<double>> &a, int lda,
452f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<double>> &x, int incx,
453f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          std::complex<double> beta,
454f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<std::complex<double>> *y, int incy) = 0;
455f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
456f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // Performs a rank-1 update of a general matrix.
457f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
458f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //     a <- alpha * x * y' + a,
459f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
460f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // alpha is a scalar; x is an m-element vector; y is an n-element vector; a is
461f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // an m-by-n general matrix.
462f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasGer(Stream *stream, uint64 m, uint64 n, float alpha,
463f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                         const DeviceMemory<float> &x, int incx,
464f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                         const DeviceMemory<float> &y, int incy,
465f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                         DeviceMemory<float> *a, int lda) = 0;
466f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasGer(Stream *stream, uint64 m, uint64 n, double alpha,
467f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                         const DeviceMemory<double> &x, int incx,
468f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                         const DeviceMemory<double> &y, int incy,
469f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                         DeviceMemory<double> *a, int lda) = 0;
470f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
471f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // Performs a rank-1 update (conjugated) of a general matrix.
472f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
473f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //     a <- alpha * x * conj(y') + a,
474f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
475f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // alpha is a scalar; x is an m-element vector; y is an n-element vector; a is
476f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // an m-by-n general matrix.
477f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasGerc(Stream *stream, uint64 m, uint64 n,
478f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          std::complex<float> alpha,
479f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<float>> &x, int incx,
480f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<float>> &y, int incy,
481f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<std::complex<float>> *a, int lda) = 0;
482f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasGerc(Stream *stream, uint64 m, uint64 n,
483f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          std::complex<double> alpha,
484f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<double>> &x, int incx,
485f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<double>> &y, int incy,
486f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<std::complex<double>> *a, int lda) = 0;
487f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
488f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // Performs a rank-1 update (unconjugated) of a general matrix.
489f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
490f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //     a <- alpha * x * y' + a,
491f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
492f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // alpha is a scalar; x is an m-element vector; y is an n-element vector; a is
493f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // an m-by-n general matrix.
494f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasGeru(Stream *stream, uint64 m, uint64 n,
495f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          std::complex<float> alpha,
496f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<float>> &x, int incx,
497f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<float>> &y, int incy,
498f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<std::complex<float>> *a, int lda) = 0;
499f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasGeru(Stream *stream, uint64 m, uint64 n,
500f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          std::complex<double> alpha,
501f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<double>> &x, int incx,
502f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<double>> &y, int incy,
503f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<std::complex<double>> *a, int lda) = 0;
504f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
505f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // Computes a matrix-vector product using a Hermitian band matrix.
506f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
507f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //     y <- alpha * a * x + beta * y,
508f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
509f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // alpha and beta are scalars; a is an n-by-n Hermitian band matrix, with k
510f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // super-diagonals; x and y are n-element vectors.
511f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasHbmv(Stream *stream, blas::UpperLower uplo, uint64 n,
512f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          uint64 k, std::complex<float> alpha,
513f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<float>> &a, int lda,
514f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<float>> &x, int incx,
515f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          std::complex<float> beta,
516f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<std::complex<float>> *y, int incy) = 0;
517f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasHbmv(Stream *stream, blas::UpperLower uplo, uint64 n,
518f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          uint64 k, std::complex<double> alpha,
519f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<double>> &a, int lda,
520f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<double>> &x, int incx,
521f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          std::complex<double> beta,
522f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<std::complex<double>> *y, int incy) = 0;
523f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
524f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // Computes a matrix-vector product using a Hermitian matrix.
525f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
526f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //     y <- alpha * a * x + beta * y,
527f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
528f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // alpha and beta are scalars; a is an n-by-n Hermitian matrix; x and y are
529f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // n-element vectors.
530f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasHemv(Stream *stream, blas::UpperLower uplo, uint64 n,
531f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          std::complex<float> alpha,
532f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<float>> &a, int lda,
533f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<float>> &x, int incx,
534f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          std::complex<float> beta,
535f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<std::complex<float>> *y, int incy) = 0;
536f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasHemv(Stream *stream, blas::UpperLower uplo, uint64 n,
537f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          std::complex<double> alpha,
538f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<double>> &a, int lda,
539f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<double>> &x, int incx,
540f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          std::complex<double> beta,
541f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<std::complex<double>> *y, int incy) = 0;
542f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
543f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // Performs a rank-1 update of a Hermitian matrix.
544f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
545f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //     a <- alpha * x * conj(x') + a,
546f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
547f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // alpha is a scalar; x is an n-element vector; a is an n-by-n Hermitian
548f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // matrix.
549f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasHer(Stream *stream, blas::UpperLower uplo, uint64 n,
550f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                         float alpha,
551f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                         const DeviceMemory<std::complex<float>> &x, int incx,
552f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                         DeviceMemory<std::complex<float>> *a, int lda) = 0;
553f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasHer(Stream *stream, blas::UpperLower uplo, uint64 n,
554f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                         double alpha,
555f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                         const DeviceMemory<std::complex<double>> &x, int incx,
556f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                         DeviceMemory<std::complex<double>> *a, int lda) = 0;
557f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
558f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // Performs a rank-2 update of a Hermitian matrix.
559f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
560f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //     a <- alpha * x * conj(x') + conj(alpha) * y * conj(x') + a,
561f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
562f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // alpha is a scalar; x and y are n-element vectors; a is an n-by-n Hermitian
563f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // matrix.
564f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasHer2(Stream *stream, blas::UpperLower uplo, uint64 n,
565f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          std::complex<float> alpha,
566f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<float>> &x, int incx,
567f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<float>> &y, int incy,
568f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<std::complex<float>> *a, int lda) = 0;
569f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasHer2(Stream *stream, blas::UpperLower uplo, uint64 n,
570f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          std::complex<double> alpha,
571f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<double>> &x, int incx,
572f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<double>> &y, int incy,
573f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<std::complex<double>> *a, int lda) = 0;
574f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
575f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // Computes a matrix-vector product using a Hermitian packed matrix.
576f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
577f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //     y <- alpha * a * x + beta * y,
578f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
579f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // alpha and beta are scalars; a is an n-by-n Hermitian matrix, supplied in
580f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // packed form; x and y are n-element vectors.
581f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasHpmv(Stream *stream, blas::UpperLower uplo, uint64 n,
582f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          std::complex<float> alpha,
583f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<float>> &ap,
584f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<float>> &x, int incx,
585f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          std::complex<float> beta,
586f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<std::complex<float>> *y, int incy) = 0;
587f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasHpmv(Stream *stream, blas::UpperLower uplo, uint64 n,
588f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          std::complex<double> alpha,
589f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<double>> &ap,
590f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<double>> &x, int incx,
591f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          std::complex<double> beta,
592f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<std::complex<double>> *y, int incy) = 0;
593f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
594f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // Performs a rank-1 update of a Hermitian packed matrix.
595f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
596f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //     a <- alpha * x * conj(x') + a,
597f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
598f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // alpha is a scalar; x is an n-element vector; a is an n-by-n Hermitian
599f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // matrix, supplied in packed form.
600f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasHpr(Stream *stream, blas::UpperLower uplo, uint64 n,
601f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                         float alpha,
602f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                         const DeviceMemory<std::complex<float>> &x, int incx,
603f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                         DeviceMemory<std::complex<float>> *ap) = 0;
604f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasHpr(Stream *stream, blas::UpperLower uplo, uint64 n,
605f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                         double alpha,
606f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                         const DeviceMemory<std::complex<double>> &x, int incx,
607f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                         DeviceMemory<std::complex<double>> *ap) = 0;
608f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
609f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // Performs a rank-2 update of a Hermitian packed matrix.
610f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
611f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //     a <- alpha * x * conj(x') + conj(alpha) * y * conj(x') + a,
612f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
613f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // alpha is a scalar; x and y are n-element vectors; a is an n-by-n Hermitian
614f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // matrix, supplied in packed form.
615f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasHpr2(Stream *stream, blas::UpperLower uplo, uint64 n,
616f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          std::complex<float> alpha,
617f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<float>> &x, int incx,
618f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<float>> &y, int incy,
619f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<std::complex<float>> *ap) = 0;
620f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasHpr2(Stream *stream, blas::UpperLower uplo, uint64 n,
621f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          std::complex<double> alpha,
622f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<double>> &x, int incx,
623f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<double>> &y, int incy,
624f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<std::complex<double>> *ap) = 0;
625f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
626f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // Computes a matrix-vector product using a symmetric band matrix.
627f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
628f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //     y <- alpha * a * x + beta * y,
629f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
630f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // alpha and beta are scalars; a is an n-by-n symmetric band matrix, with k
631f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // super-diagonals; x and y are n-element vectors.
632f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasSbmv(Stream *stream, blas::UpperLower uplo, uint64 n,
633f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          uint64 k, float alpha, const DeviceMemory<float> &a,
634f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          int lda, const DeviceMemory<float> &x, int incx,
635f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          float beta, DeviceMemory<float> *y, int incy) = 0;
636f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasSbmv(Stream *stream, blas::UpperLower uplo, uint64 n,
637f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          uint64 k, double alpha, const DeviceMemory<double> &a,
638f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          int lda, const DeviceMemory<double> &x, int incx,
639f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          double beta, DeviceMemory<double> *y, int incy) = 0;
640f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
641f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // Computes a matrix-vector product using a symmetric packed matrix.
642f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
643f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //     y <- alpha * a * x + beta * y,
644f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
645f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // alpha and beta are scalars; a is an n-by-n symmetric matrix, supplied in
646f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // packed form; x and y are n-element vectors.
647f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasSpmv(Stream *stream, blas::UpperLower uplo, uint64 n,
648f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          float alpha, const DeviceMemory<float> &ap,
649f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<float> &x, int incx, float beta,
650f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<float> *y, int incy) = 0;
651f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasSpmv(Stream *stream, blas::UpperLower uplo, uint64 n,
652f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          double alpha, const DeviceMemory<double> &ap,
653f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<double> &x, int incx, double beta,
654f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<double> *y, int incy) = 0;
655f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
656f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // Performs a rank-1 update of a symmetric packed matrix.
657f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
658f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //     a <- alpha * x * x' + a,
659f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
660f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // alpha is a scalar; x is an n-element vector; a is an n-by-n symmetric
661f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // matrix, supplied in packed form.
662f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasSpr(Stream *stream, blas::UpperLower uplo, uint64 n,
663f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                         float alpha, const DeviceMemory<float> &x, int incx,
664f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                         DeviceMemory<float> *ap) = 0;
665f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasSpr(Stream *stream, blas::UpperLower uplo, uint64 n,
666f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                         double alpha, const DeviceMemory<double> &x, int incx,
667f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                         DeviceMemory<double> *ap) = 0;
668f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
669f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // Performs a rank-2 update of a symmetric packed matrix.
670f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
671f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //     a <- alpha * x * x' + alpha * y * x' + a,
672f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
673f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // alpha is a scalar; x and y are n-element vectors; a is an n-by-n symmetric
674f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // matrix, supplied in packed form.
675f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasSpr2(Stream *stream, blas::UpperLower uplo, uint64 n,
676f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          float alpha, const DeviceMemory<float> &x, int incx,
677f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<float> &y, int incy,
678f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<float> *ap) = 0;
679f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasSpr2(Stream *stream, blas::UpperLower uplo, uint64 n,
680f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          double alpha, const DeviceMemory<double> &x, int incx,
681f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<double> &y, int incy,
682f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<double> *ap) = 0;
683f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
684f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // Computes a matrix-vector product for a symmetric matrix.
685f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
686f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //     y <- alpha * a * x + beta * y,
687f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
688f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // alpha and beta are scalars; a is an n-by-n symmetric matrix; x and y are
689f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // n-element vectors.
690f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasSymv(Stream *stream, blas::UpperLower uplo, uint64 n,
691f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          float alpha, const DeviceMemory<float> &a, int lda,
692f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<float> &x, int incx, float beta,
693f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<float> *y, int incy) = 0;
694f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasSymv(Stream *stream, blas::UpperLower uplo, uint64 n,
695f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          double alpha, const DeviceMemory<double> &a, int lda,
696f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<double> &x, int incx, double beta,
697f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<double> *y, int incy) = 0;
698f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
699f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // Performs a rank-1 update of a symmetric matrix.
700f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
701f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //     a <- alpha * x * x' + a,
702f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
703f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // alpha is a scalar; x is an n-element vector; a is an n-by-n symmetric
704f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // matrix.
705f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasSyr(Stream *stream, blas::UpperLower uplo, uint64 n,
706f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                         float alpha, const DeviceMemory<float> &x, int incx,
707f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                         DeviceMemory<float> *a, int lda) = 0;
708f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasSyr(Stream *stream, blas::UpperLower uplo, uint64 n,
709f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                         double alpha, const DeviceMemory<double> &x, int incx,
710f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                         DeviceMemory<double> *a, int lda) = 0;
711f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
712f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // Performs a rank-2 update of symmetric matrix.
713f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
714f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //     a <- alpha * x * x' + alpha * y * x' + a,
715f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
716f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // alpha is a scalar; x and y are n-element vectors; a is an n-by-n symmetric
717f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // matrix.
718f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasSyr2(Stream *stream, blas::UpperLower uplo, uint64 n,
719f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          float alpha, const DeviceMemory<float> &x, int incx,
720f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<float> &y, int incy,
721f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<float> *a, int lda) = 0;
722f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasSyr2(Stream *stream, blas::UpperLower uplo, uint64 n,
723f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          double alpha, const DeviceMemory<double> &x, int incx,
724f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<double> &y, int incy,
725f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<double> *a, int lda) = 0;
726f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
727f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // Computes a matrix-vector product using a triangular band matrix.
728f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
729f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //     x <- a * x,
730f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // or
731f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //     x <- a' * x,
732f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // or
733f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //     x <- conj(a') * x,
734f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
735f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // a is an n-by-n unit, or non-unit, upper or lower triangular band matrix,
736f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // with k+1 diagonals; x is a n-element vector.
737f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasTbmv(Stream *stream, blas::UpperLower uplo,
738f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          blas::Transpose trans, blas::Diagonal diag, uint64 n,
739f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          uint64 k, const DeviceMemory<float> &a, int lda,
740f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<float> *x, int incx) = 0;
741f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasTbmv(Stream *stream, blas::UpperLower uplo,
742f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          blas::Transpose trans, blas::Diagonal diag, uint64 n,
743f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          uint64 k, const DeviceMemory<double> &a, int lda,
744f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<double> *x, int incx) = 0;
745f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasTbmv(Stream *stream, blas::UpperLower uplo,
746f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          blas::Transpose trans, blas::Diagonal diag, uint64 n,
747f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          uint64 k, const DeviceMemory<std::complex<float>> &a,
748f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          int lda, DeviceMemory<std::complex<float>> *x,
749f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          int incx) = 0;
750f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasTbmv(Stream *stream, blas::UpperLower uplo,
751f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          blas::Transpose trans, blas::Diagonal diag, uint64 n,
752f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          uint64 k, const DeviceMemory<std::complex<double>> &a,
753f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          int lda, DeviceMemory<std::complex<double>> *x,
754f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          int incx) = 0;
755f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
756f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // Solves a system of linear equations whose coefficients are in a triangular
757f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // band matrix as below:
758f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
759f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //     a * x = b,
760f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // or
761f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //     a' * x = b,
762f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // or
763f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //     conj(a') * x = b,
764f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
765f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // b and x are n-element vectors; a is an n-by-n unit, or non-unit, upper or
766f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // lower triangular band matrix, with k+1 diagonals.
767f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasTbsv(Stream *stream, blas::UpperLower uplo,
768f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          blas::Transpose trans, blas::Diagonal diag, uint64 n,
769f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          uint64 k, const DeviceMemory<float> &a, int lda,
770f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<float> *x, int incx) = 0;
771f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasTbsv(Stream *stream, blas::UpperLower uplo,
772f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          blas::Transpose trans, blas::Diagonal diag, uint64 n,
773f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          uint64 k, const DeviceMemory<double> &a, int lda,
774f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<double> *x, int incx) = 0;
775f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasTbsv(Stream *stream, blas::UpperLower uplo,
776f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          blas::Transpose trans, blas::Diagonal diag, uint64 n,
777f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          uint64 k, const DeviceMemory<std::complex<float>> &a,
778f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          int lda, DeviceMemory<std::complex<float>> *x,
779f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          int incx) = 0;
780f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasTbsv(Stream *stream, blas::UpperLower uplo,
781f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          blas::Transpose trans, blas::Diagonal diag, uint64 n,
782f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          uint64 k, const DeviceMemory<std::complex<double>> &a,
783f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          int lda, DeviceMemory<std::complex<double>> *x,
784f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          int incx) = 0;
785f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
786f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // Computes a matrix-vector product using a triangular packed matrix.
787f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
788f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //     x <- a * x,
789f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // or
790f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //     x <- a' * x,
791f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // or
792f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //     x <- conj(a') * x,
793f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
794f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // a is an n-by-n unit, or non-unit, upper or lower triangular matrix,
795f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // supplied in packed form; x is a n-element vector.
796f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasTpmv(Stream *stream, blas::UpperLower uplo,
797f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          blas::Transpose trans, blas::Diagonal diag, uint64 n,
798f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<float> &ap, DeviceMemory<float> *x,
799f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          int incx) = 0;
800f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasTpmv(Stream *stream, blas::UpperLower uplo,
801f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          blas::Transpose trans, blas::Diagonal diag, uint64 n,
802f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<double> &ap,
803f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<double> *x, int incx) = 0;
804f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasTpmv(Stream *stream, blas::UpperLower uplo,
805f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          blas::Transpose trans, blas::Diagonal diag, uint64 n,
806f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<float>> &ap,
807f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<std::complex<float>> *x, int incx) = 0;
808f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasTpmv(Stream *stream, blas::UpperLower uplo,
809f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          blas::Transpose trans, blas::Diagonal diag, uint64 n,
810f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<double>> &ap,
811f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<std::complex<double>> *x, int incx) = 0;
812f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
813f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // Solves a system of linear equations whose coefficients are in a triangular
814f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // packed matrix as below:
815f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
816f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //     a * x = b,
817f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // or
818f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //     a' * x = b,
819f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // or
820f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //     conj(a') * x = b,
821f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
822f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // b and x are n-element vectors; a is an n-by-n unit, or non-unit, upper or
823f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // lower triangular matrix, supplied in packed form.
824f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasTpsv(Stream *stream, blas::UpperLower uplo,
825f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          blas::Transpose trans, blas::Diagonal diag, uint64 n,
826f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<float> &ap, DeviceMemory<float> *x,
827f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          int incx) = 0;
828f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasTpsv(Stream *stream, blas::UpperLower uplo,
829f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          blas::Transpose trans, blas::Diagonal diag, uint64 n,
830f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<double> &ap,
831f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<double> *x, int incx) = 0;
832f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasTpsv(Stream *stream, blas::UpperLower uplo,
833f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          blas::Transpose trans, blas::Diagonal diag, uint64 n,
834f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<float>> &ap,
835f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<std::complex<float>> *x, int incx) = 0;
836f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasTpsv(Stream *stream, blas::UpperLower uplo,
837f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          blas::Transpose trans, blas::Diagonal diag, uint64 n,
838f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<double>> &ap,
839f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<std::complex<double>> *x, int incx) = 0;
840f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
841f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // Computes a matrix-vector product using a triangular matrix.
842f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
843f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //     x <- a * x,
844f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // or
845f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //     x <- a' * x,
846f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // or
847f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //     x <- conj(a') * x,
848f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
849f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // a is an n-by-n unit, or non-unit, upper or lower triangular matrix; x is a
850f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // n-element vector.
851f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasTrmv(Stream *stream, blas::UpperLower uplo,
852f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          blas::Transpose trans, blas::Diagonal diag, uint64 n,
853f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<float> &a, int lda,
854f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<float> *x, int incx) = 0;
855f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasTrmv(Stream *stream, blas::UpperLower uplo,
856f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          blas::Transpose trans, blas::Diagonal diag, uint64 n,
857f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<double> &a, int lda,
858f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<double> *x, int incx) = 0;
859f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasTrmv(Stream *stream, blas::UpperLower uplo,
860f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          blas::Transpose trans, blas::Diagonal diag, uint64 n,
861f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<float>> &a, int lda,
862f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<std::complex<float>> *x, int incx) = 0;
863f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasTrmv(Stream *stream, blas::UpperLower uplo,
864f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          blas::Transpose trans, blas::Diagonal diag, uint64 n,
865f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<double>> &a, int lda,
866f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<std::complex<double>> *x, int incx) = 0;
867f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
868f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // Solves a system of linear equations whose coefficients are in a triangular
869f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // matrix as below:
870f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
871f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //     a * x = b,
872f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // or
873f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //     a' * x = b,
874f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // or
875f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //     conj(a') * x = b,
876f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
877f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // b and x are n-element vectors; a is an n-by-n unit, or non-unit, upper or
878f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // lower triangular matrix.
879f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasTrsv(Stream *stream, blas::UpperLower uplo,
880f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          blas::Transpose trans, blas::Diagonal diag, uint64 n,
881f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<float> &a, int lda,
882f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<float> *x, int incx) = 0;
883f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasTrsv(Stream *stream, blas::UpperLower uplo,
884f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          blas::Transpose trans, blas::Diagonal diag, uint64 n,
885f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<double> &a, int lda,
886f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<double> *x, int incx) = 0;
887f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasTrsv(Stream *stream, blas::UpperLower uplo,
888f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          blas::Transpose trans, blas::Diagonal diag, uint64 n,
889f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<float>> &a, int lda,
890f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<std::complex<float>> *x, int incx) = 0;
891f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasTrsv(Stream *stream, blas::UpperLower uplo,
892f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          blas::Transpose trans, blas::Diagonal diag, uint64 n,
893f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<double>> &a, int lda,
894f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<std::complex<double>> *x, int incx) = 0;
895f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
896f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // Computes a matrix-matrix product with general matrices:
897f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
898f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //     c <- alpha * op(a) * op(b) + beta * c,
899f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
900f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // op(X) is one of op(X) = X, or op(X) = X', or op(X) = conj(X'); alpha and
901f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // beta are scalars; a, b, and c are matrices; op(a) is an m-by-k matrix;
902f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // op(b) is a k-by-n matrix; c is an m-by-n matrix.
903523055469c8a61425e3b8f104be67787c2933ccbA. Unique TensorFlower  //
904523055469c8a61425e3b8f104be67787c2933ccbA. Unique TensorFlower  // Note: The half interface uses float precision internally; the version
905523055469c8a61425e3b8f104be67787c2933ccbA. Unique TensorFlower  // that uses half precision internally is not yet supported. There is no
906523055469c8a61425e3b8f104be67787c2933ccbA. Unique TensorFlower  // batched version of the half-precision interface.
907523055469c8a61425e3b8f104be67787c2933ccbA. Unique TensorFlower  virtual bool DoBlasGemm(Stream *stream, blas::Transpose transa,
908523055469c8a61425e3b8f104be67787c2933ccbA. Unique TensorFlower                          blas::Transpose transb, uint64 m, uint64 n, uint64 k,
90901194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar                          float alpha, const DeviceMemory<Eigen::half> &a,
91001194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar                          int lda, const DeviceMemory<Eigen::half> &b, int ldb,
91101194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar                          float beta, DeviceMemory<Eigen::half> *c,
91201194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar                          int ldc) = 0;
913f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasGemm(Stream *stream, blas::Transpose transa,
914f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          blas::Transpose transb, uint64 m, uint64 n, uint64 k,
915f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          float alpha, const DeviceMemory<float> &a, int lda,
916f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<float> &b, int ldb, float beta,
917f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<float> *c, int ldc) = 0;
918f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasGemm(Stream *stream, blas::Transpose transa,
919f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          blas::Transpose transb, uint64 m, uint64 n, uint64 k,
920f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          double alpha, const DeviceMemory<double> &a, int lda,
921f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<double> &b, int ldb, double beta,
922f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<double> *c, int ldc) = 0;
923f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasGemm(Stream *stream, blas::Transpose transa,
924f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          blas::Transpose transb, uint64 m, uint64 n, uint64 k,
925f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          std::complex<float> alpha,
926f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<float>> &a, int lda,
927f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<float>> &b, int ldb,
928f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          std::complex<float> beta,
929f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<std::complex<float>> *c, int ldc) = 0;
930f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasGemm(Stream *stream, blas::Transpose transa,
931f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          blas::Transpose transb, uint64 m, uint64 n, uint64 k,
932f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          std::complex<double> alpha,
933f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<double>> &a, int lda,
934f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<double>> &b, int ldb,
935f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          std::complex<double> beta,
936f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<std::complex<double>> *c, int ldc) = 0;
937f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
93801194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar  // Gets a list of supported algorithms for DoBlasGemmWithAlgorithm.  Note that
93901194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar  // any or all of these algorithms may still be
94001194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar  virtual bool GetBlasGemmAlgorithms(
94101194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      std::vector<AlgorithmType> *out_algorithms) = 0;
94201194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar
94301194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar  // Like DoBlasGemm, but accepts an algorithm and an compute type.
94401194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar  //
94501194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar  // The compute type lets you say (e.g.) that the inputs and outputs are
94601194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar  // Eigen::halfs, but you want the internal computations to be done with
94701194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar  // float32 precision.
94801194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar  //
94901194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar  // Note the subtle difference in the version that accepts Eigen:::half --
95001194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar  // alpha and beta have type const Eigen::half&, not float.
95101194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar  //
95201194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar  // If output_profile_result is not null, a failure here does not put the
95301194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar  // stream in a failure state.  Instead, success/failure is indicated by
95401194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar  // output_profile_result->is_valid().  This lets you use this function for
95501194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar  // choosing the best algorithm among many (some of which may fail) without
95601194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar  // creating a new Stream for each attempt.
95701194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar  virtual bool DoBlasGemmWithAlgorithm(
95801194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      Stream *stream, blas::Transpose transa, blas::Transpose transb, uint64 m,
959a2ee8bca3f3fa08bf022f5c8a68c3e1cf829f2cdA. Unique TensorFlower      uint64 n, uint64 k, int alpha, const DeviceMemory<int8> &a, int lda,
960a2ee8bca3f3fa08bf022f5c8a68c3e1cf829f2cdA. Unique TensorFlower      const DeviceMemory<int8> &b, int ldb, int beta, DeviceMemory<int32> *c,
961a2ee8bca3f3fa08bf022f5c8a68c3e1cf829f2cdA. Unique TensorFlower      int ldc, ComputationType computation_type, AlgorithmType algorithm,
962a2ee8bca3f3fa08bf022f5c8a68c3e1cf829f2cdA. Unique TensorFlower      ProfileResult *output_profile_result) = 0;
963a2ee8bca3f3fa08bf022f5c8a68c3e1cf829f2cdA. Unique TensorFlower  virtual bool DoBlasGemmWithAlgorithm(
964a2ee8bca3f3fa08bf022f5c8a68c3e1cf829f2cdA. Unique TensorFlower      Stream *stream, blas::Transpose transa, blas::Transpose transb, uint64 m,
96501194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      uint64 n, uint64 k, const Eigen::half &alpha,
96601194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      const DeviceMemory<Eigen::half> &a, int lda,
96701194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      const DeviceMemory<Eigen::half> &b, int ldb, const Eigen::half &beta,
96801194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      DeviceMemory<Eigen::half> *c, int ldc, ComputationType computation_type,
96901194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      AlgorithmType algorithm, ProfileResult *output_profile_result) = 0;
97001194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar  virtual bool DoBlasGemmWithAlgorithm(
97101194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      Stream *stream, blas::Transpose transa, blas::Transpose transb, uint64 m,
97201194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      uint64 n, uint64 k, float alpha, const DeviceMemory<float> &a, int lda,
97301194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      const DeviceMemory<float> &b, int ldb, float beta, DeviceMemory<float> *c,
97401194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      int ldc, ComputationType computation_type, AlgorithmType algorithm,
97501194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      ProfileResult *output_profile_result) = 0;
97601194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar  virtual bool DoBlasGemmWithAlgorithm(
97701194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      Stream *stream, blas::Transpose transa, blas::Transpose transb, uint64 m,
97801194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      uint64 n, uint64 k, double alpha, const DeviceMemory<double> &a, int lda,
97901194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      const DeviceMemory<double> &b, int ldb, double beta,
98001194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      DeviceMemory<double> *c, int ldc, ComputationType computation_type,
98101194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      AlgorithmType algorithm, ProfileResult *output_profile_result) = 0;
98201194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar  virtual bool DoBlasGemmWithAlgorithm(
98301194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      Stream *stream, blas::Transpose transa, blas::Transpose transb, uint64 m,
98401194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      uint64 n, uint64 k, std::complex<float> alpha,
98501194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      const DeviceMemory<std::complex<float>> &a, int lda,
98601194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      const DeviceMemory<std::complex<float>> &b, int ldb,
98701194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      std::complex<float> beta, DeviceMemory<std::complex<float>> *c, int ldc,
98801194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      ComputationType computation_type, AlgorithmType algorithm,
98901194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      ProfileResult *output_profile_result) = 0;
99001194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar  virtual bool DoBlasGemmWithAlgorithm(
99101194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      Stream *stream, blas::Transpose transa, blas::Transpose transb, uint64 m,
99201194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      uint64 n, uint64 k, std::complex<double> alpha,
99301194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      const DeviceMemory<std::complex<double>> &a, int lda,
99401194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      const DeviceMemory<std::complex<double>> &b, int ldb,
99501194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      std::complex<double> beta, DeviceMemory<std::complex<double>> *c, int ldc,
99601194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      ComputationType computation_type, AlgorithmType algorithm,
99701194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      ProfileResult *output_profile_result) = 0;
99801194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar
999f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // Computes a batch of matrix-matrix product with general matrices.
1000f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // This is a batched version of DoBlasGemm.
1001f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // The batched GEMM computes matrix product for each input/output in a, b,
1002f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // and c, which contain batch_count DeviceMemory objects.
1003f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasGemmBatched(
1004f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      Stream *stream, blas::Transpose transa, blas::Transpose transb, uint64 m,
1005f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      uint64 n, uint64 k, float alpha,
1006f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      const port::ArraySlice<DeviceMemory<float> *> &a, int lda,
1007f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      const port::ArraySlice<DeviceMemory<float> *> &b, int ldb, float beta,
1008f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      const port::ArraySlice<DeviceMemory<float> *> &c, int ldc,
100905ea40f180e528dbfde36cd338a0b6ac3cca6dd9A. Unique TensorFlower      int batch_count, ScratchAllocator *scratch_allocator) = 0;
1010f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasGemmBatched(
1011f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      Stream *stream, blas::Transpose transa, blas::Transpose transb, uint64 m,
1012f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      uint64 n, uint64 k, double alpha,
1013f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      const port::ArraySlice<DeviceMemory<double> *> &a, int lda,
1014f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      const port::ArraySlice<DeviceMemory<double> *> &b, int ldb, double beta,
1015f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      const port::ArraySlice<DeviceMemory<double> *> &c, int ldc,
101605ea40f180e528dbfde36cd338a0b6ac3cca6dd9A. Unique TensorFlower      int batch_count, ScratchAllocator *scratch_allocator) = 0;
1017f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasGemmBatched(
1018f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      Stream *stream, blas::Transpose transa, blas::Transpose transb, uint64 m,
1019f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      uint64 n, uint64 k, std::complex<float> alpha,
1020f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      const port::ArraySlice<DeviceMemory<std::complex<float>> *> &a, int lda,
1021f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      const port::ArraySlice<DeviceMemory<std::complex<float>> *> &b, int ldb,
1022f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      std::complex<float> beta,
1023f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      const port::ArraySlice<DeviceMemory<std::complex<float>> *> &c, int ldc,
102405ea40f180e528dbfde36cd338a0b6ac3cca6dd9A. Unique TensorFlower      int batch_count, ScratchAllocator *scratch_allocator) = 0;
1025f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasGemmBatched(
1026f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      Stream *stream, blas::Transpose transa, blas::Transpose transb, uint64 m,
1027f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      uint64 n, uint64 k, std::complex<double> alpha,
1028f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      const port::ArraySlice<DeviceMemory<std::complex<double>> *> &a, int lda,
1029f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      const port::ArraySlice<DeviceMemory<std::complex<double>> *> &b, int ldb,
1030f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      std::complex<double> beta,
1031f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      const port::ArraySlice<DeviceMemory<std::complex<double>> *> &c, int ldc,
103205ea40f180e528dbfde36cd338a0b6ac3cca6dd9A. Unique TensorFlower      int batch_count, ScratchAllocator *scratch_allocator) = 0;
1033f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
1034f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // Computes a matrix-matrix product where one input matrix is Hermitian:
1035f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
1036f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //     c <- alpha * a * b + beta * c,
1037f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // or
1038f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //     c <- alpha * b * a + beta * c,
1039f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
1040f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // alpha and beta are scalars; a is a Hermitian matrix; b and c are m-by-n
1041f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // matrices.
1042f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasHemm(Stream *stream, blas::Side side,
1043f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          blas::UpperLower uplo, uint64 m, uint64 n,
1044f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          std::complex<float> alpha,
1045f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<float>> &a, int lda,
1046f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<float>> &b, int ldb,
1047f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          std::complex<float> beta,
1048f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<std::complex<float>> *c, int ldc) = 0;
1049f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasHemm(Stream *stream, blas::Side side,
1050f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          blas::UpperLower uplo, uint64 m, uint64 n,
1051f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          std::complex<double> alpha,
1052f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<double>> &a, int lda,
1053f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<double>> &b, int ldb,
1054f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          std::complex<double> beta,
1055f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<std::complex<double>> *c, int ldc) = 0;
1056f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
1057f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // Performs a Hermitian rank-k update.
1058f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
1059f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //     c <- alpha * a * conj(a') + beta * c,
1060f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // or
1061f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //     c <- alpha * conj(a') * a + beta * c,
1062f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
1063f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // alpha and beta are scalars; c is a n-by-n Hermitian matrix; a is an n-by-k
1064f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // matrix in the first case and a k-by-n matrix in the second case.
1065f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasHerk(Stream *stream, blas::UpperLower uplo,
1066f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          blas::Transpose trans, uint64 n, uint64 k,
1067f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          float alpha,
1068f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<float>> &a, int lda,
1069f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          float beta, DeviceMemory<std::complex<float>> *c,
1070f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          int ldc) = 0;
1071f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasHerk(Stream *stream, blas::UpperLower uplo,
1072f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          blas::Transpose trans, uint64 n, uint64 k,
1073f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          double alpha,
1074f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<double>> &a, int lda,
1075f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          double beta, DeviceMemory<std::complex<double>> *c,
1076f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          int ldc) = 0;
1077f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
1078f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // Performs a Hermitian rank-2k update.
1079f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
1080f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //     c <- alpha * a * conj(b') + conj(alpha) * b * conj(a') + beta * c,
1081f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // or
1082f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //     c <- alpha * conj(b') * a + conj(alpha) * conj(a') * b + beta * c,
1083f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
1084f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // alpha and beta are scalars; c is a n-by-n Hermitian matrix; a and b are
1085f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // n-by-k matrices in the first case and k-by-n matrices in the second case.
1086f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasHer2k(Stream *stream, blas::UpperLower uplo,
1087f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                           blas::Transpose trans, uint64 n, uint64 k,
1088f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                           std::complex<float> alpha,
1089f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                           const DeviceMemory<std::complex<float>> &a, int lda,
1090f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                           const DeviceMemory<std::complex<float>> &b, int ldb,
1091f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                           float beta, DeviceMemory<std::complex<float>> *c,
1092f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                           int ldc) = 0;
1093f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasHer2k(Stream *stream, blas::UpperLower uplo,
1094f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                           blas::Transpose trans, uint64 n, uint64 k,
1095f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                           std::complex<double> alpha,
1096f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                           const DeviceMemory<std::complex<double>> &a, int lda,
1097f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                           const DeviceMemory<std::complex<double>> &b, int ldb,
1098f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                           double beta, DeviceMemory<std::complex<double>> *c,
1099f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                           int ldc) = 0;
1100f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
1101f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // Computes a matrix-matrix product where one input matrix is symmetric.
1102f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
1103f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //     c <- alpha * a * b + beta * c,
1104f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // or
1105f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //     c <- alpha * b * a + beta * c,
1106f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
1107f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // alpha and beta are scalars; a is a symmetric matrix; b and c are m-by-n
1108f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // matrices.
1109f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasSymm(Stream *stream, blas::Side side,
1110f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          blas::UpperLower uplo, uint64 m, uint64 n,
1111f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          float alpha, const DeviceMemory<float> &a, int lda,
1112f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<float> &b, int ldb, float beta,
1113f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<float> *c, int ldc) = 0;
1114f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasSymm(Stream *stream, blas::Side side,
1115f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          blas::UpperLower uplo, uint64 m, uint64 n,
1116f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          double alpha, const DeviceMemory<double> &a, int lda,
1117f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<double> &b, int ldb, double beta,
1118f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<double> *c, int ldc) = 0;
1119f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasSymm(Stream *stream, blas::Side side,
1120f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          blas::UpperLower uplo, uint64 m, uint64 n,
1121f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          std::complex<float> alpha,
1122f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<float>> &a, int lda,
1123f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<float>> &b, int ldb,
1124f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          std::complex<float> beta,
1125f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<std::complex<float>> *c, int ldc) = 0;
1126f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasSymm(Stream *stream, blas::Side side,
1127f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          blas::UpperLower uplo, uint64 m, uint64 n,
1128f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          std::complex<double> alpha,
1129f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<double>> &a, int lda,
1130f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<double>> &b, int ldb,
1131f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          std::complex<double> beta,
1132f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<std::complex<double>> *c, int ldc) = 0;
1133f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
1134f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // Performs a symmetric rank-k update.
1135f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
1136f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //     c <- alpha * a * a' + beta * c,
1137f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // or
1138f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //     c <- alpha * a' * a + beta * c,
1139f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
1140f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // alpha and beta are scalars; c is a n-by-n symmetric matrix; a is an n-by-k
1141f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // matrix in the first case and a k-by-n matrix in the second case.
1142f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasSyrk(Stream *stream, blas::UpperLower uplo,
1143f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          blas::Transpose trans, uint64 n, uint64 k,
1144f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          float alpha, const DeviceMemory<float> &a, int lda,
1145f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          float beta, DeviceMemory<float> *c, int ldc) = 0;
1146f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasSyrk(Stream *stream, blas::UpperLower uplo,
1147f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          blas::Transpose trans, uint64 n, uint64 k,
1148f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          double alpha, const DeviceMemory<double> &a, int lda,
1149f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          double beta, DeviceMemory<double> *c, int ldc) = 0;
1150f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasSyrk(Stream *stream, blas::UpperLower uplo,
1151f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          blas::Transpose trans, uint64 n, uint64 k,
1152f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          std::complex<float> alpha,
1153f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<float>> &a, int lda,
1154f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          std::complex<float> beta,
1155f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<std::complex<float>> *c, int ldc) = 0;
1156f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasSyrk(Stream *stream, blas::UpperLower uplo,
1157f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          blas::Transpose trans, uint64 n, uint64 k,
1158f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          std::complex<double> alpha,
1159f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<double>> &a, int lda,
1160f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          std::complex<double> beta,
1161f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<std::complex<double>> *c, int ldc) = 0;
1162f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
1163f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // Performs a symmetric rank-2k update.
1164f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
1165f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //     c <- alpha * a * b' + alpha * b * a' + beta * c,
1166f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // or
1167f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //     c <- alpha * b' * a + alpha * a' * b + beta * c,
1168f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
1169f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // alpha and beta are scalars; c is a n-by-n symmetric matrix; a and b are
1170f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // n-by-k matrices in the first case and k-by-n matrices in the second case.
1171f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasSyr2k(Stream *stream, blas::UpperLower uplo,
1172f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                           blas::Transpose trans, uint64 n, uint64 k,
1173f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                           float alpha, const DeviceMemory<float> &a, int lda,
1174f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                           const DeviceMemory<float> &b, int ldb, float beta,
1175f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                           DeviceMemory<float> *c, int ldc) = 0;
1176f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasSyr2k(Stream *stream, blas::UpperLower uplo,
1177f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                           blas::Transpose trans, uint64 n, uint64 k,
1178f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                           double alpha, const DeviceMemory<double> &a, int lda,
1179f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                           const DeviceMemory<double> &b, int ldb, double beta,
1180f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                           DeviceMemory<double> *c, int ldc) = 0;
1181f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasSyr2k(Stream *stream, blas::UpperLower uplo,
1182f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                           blas::Transpose trans, uint64 n, uint64 k,
1183f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                           std::complex<float> alpha,
1184f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                           const DeviceMemory<std::complex<float>> &a, int lda,
1185f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                           const DeviceMemory<std::complex<float>> &b, int ldb,
1186f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                           std::complex<float> beta,
1187f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                           DeviceMemory<std::complex<float>> *c, int ldc) = 0;
1188f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasSyr2k(Stream *stream, blas::UpperLower uplo,
1189f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                           blas::Transpose trans, uint64 n, uint64 k,
1190f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                           std::complex<double> alpha,
1191f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                           const DeviceMemory<std::complex<double>> &a, int lda,
1192f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                           const DeviceMemory<std::complex<double>> &b, int ldb,
1193f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                           std::complex<double> beta,
1194f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                           DeviceMemory<std::complex<double>> *c, int ldc) = 0;
1195f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
1196f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // Computes a matrix-matrix product where one input matrix is triangular.
1197f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
1198f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //     b <- alpha * op(a) * b,
1199f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // or
1200f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //     b <- alpha * b * op(a)
1201f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
1202f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // alpha is a scalar; b is an m-by-n matrix; a is a unit, or non-unit, upper
1203f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // or lower triangular matrix; op(a) is one of op(a) = a, or op(a) = a', or
1204f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // op(a) = conj(a').
1205f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasTrmm(Stream *stream, blas::Side side,
1206f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          blas::UpperLower uplo, blas::Transpose transa,
1207f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          blas::Diagonal diag, uint64 m, uint64 n, float alpha,
1208f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<float> &a, int lda,
1209f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<float> *b, int ldb) = 0;
1210f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasTrmm(Stream *stream, blas::Side side,
1211f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          blas::UpperLower uplo, blas::Transpose transa,
1212f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          blas::Diagonal diag, uint64 m, uint64 n, double alpha,
1213f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<double> &a, int lda,
1214f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<double> *b, int ldb) = 0;
1215f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasTrmm(Stream *stream, blas::Side side,
1216f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          blas::UpperLower uplo, blas::Transpose transa,
1217f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          blas::Diagonal diag, uint64 m, uint64 n,
1218f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          std::complex<float> alpha,
1219f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<float>> &a, int lda,
1220f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<std::complex<float>> *b, int ldb) = 0;
1221f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasTrmm(Stream *stream, blas::Side side,
1222f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          blas::UpperLower uplo, blas::Transpose transa,
1223f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          blas::Diagonal diag, uint64 m, uint64 n,
1224f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          std::complex<double> alpha,
1225f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<double>> &a, int lda,
1226f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<std::complex<double>> *b, int ldb) = 0;
1227f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
1228f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // Solves a triangular matrix equation.
1229f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
1230f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //     op(a) * x = alpha * b,
1231f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // or
1232f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //     x * op(a) = alpha * b
1233f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //
1234f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // alpha is a scalar; x and b are m-by-n matrices; a is a unit, or non-unit,
1235f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // upper or lower triangular matrix; op(a) is one of op(a) = a, or op(a) = a',
1236f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // or op(a) = conj(a').
1237f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasTrsm(Stream *stream, blas::Side side,
1238f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          blas::UpperLower uplo, blas::Transpose transa,
1239f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          blas::Diagonal diag, uint64 m, uint64 n, float alpha,
1240f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<float> &a, int lda,
1241f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<float> *b, int ldb) = 0;
1242f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasTrsm(Stream *stream, blas::Side side,
1243f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          blas::UpperLower uplo, blas::Transpose transa,
1244f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          blas::Diagonal diag, uint64 m, uint64 n, double alpha,
1245f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<double> &a, int lda,
1246f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<double> *b, int ldb) = 0;
1247f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasTrsm(Stream *stream, blas::Side side,
1248f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          blas::UpperLower uplo, blas::Transpose transa,
1249f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          blas::Diagonal diag, uint64 m, uint64 n,
1250f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          std::complex<float> alpha,
1251f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<float>> &a, int lda,
1252f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<std::complex<float>> *b, int ldb) = 0;
1253f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  virtual bool DoBlasTrsm(Stream *stream, blas::Side side,
1254f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          blas::UpperLower uplo, blas::Transpose transa,
1255f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          blas::Diagonal diag, uint64 m, uint64 n,
1256f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          std::complex<double> alpha,
1257f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          const DeviceMemory<std::complex<double>> &a, int lda,
1258f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                          DeviceMemory<std::complex<double>> *b, int ldb) = 0;
1259f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
1260f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur protected:
1261f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  BlasSupport() {}
1262f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
1263f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur private:
1264f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  SE_DISALLOW_COPY_AND_ASSIGN(BlasSupport);
1265f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur};
1266f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
1267f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// Macro used to quickly declare overrides for abstract virtuals in the
1268f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// BlasSupport base class.
126905ea40f180e528dbfde36cd338a0b6ac3cca6dd9A. Unique TensorFlower#define TENSORFLOW_STREAM_EXECUTOR_GPU_BLAS_SUPPORT_OVERRIDES                  \
1270f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasAsum(Stream *stream, uint64 elem_count,                           \
1271f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<float> &x, int incx,                      \
1272f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<float> *result) override;                       \
1273f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasAsum(Stream *stream, uint64 elem_count,                           \
1274f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<double> &x, int incx,                     \
1275f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<double> *result) override;                      \
1276f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasAsum(Stream *stream, uint64 elem_count,                           \
1277f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<float>> &x, int incx,        \
1278f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<float> *result) override;                       \
1279f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasAsum(Stream *stream, uint64 elem_count,                           \
1280f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<double>> &x, int incx,       \
1281f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<double> *result) override;                      \
1282f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasAxpy(Stream *stream, uint64 elem_count, float alpha,              \
1283f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<float> &x, int incx,                      \
1284f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<float> *y, int incy) override;                  \
1285f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasAxpy(Stream *stream, uint64 elem_count, double alpha,             \
1286f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<double> &x, int incx,                     \
1287f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<double> *y, int incy) override;                 \
1288f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasAxpy(Stream *stream, uint64 elem_count,                           \
1289f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  std::complex<float> alpha,                                   \
1290f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<float>> &x, int incx,        \
1291f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<std::complex<float>> *y, int incy) override;    \
1292f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasAxpy(Stream *stream, uint64 elem_count,                           \
1293f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  std::complex<double> alpha,                                  \
1294f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<double>> &x, int incx,       \
1295f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<std::complex<double>> *y, int incy) override;   \
1296f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasCopy(Stream *stream, uint64 elem_count,                           \
1297f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<float> &x, int incx,                      \
1298f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<float> *y, int incy) override;                  \
1299f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasCopy(Stream *stream, uint64 elem_count,                           \
1300f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<double> &x, int incx,                     \
1301f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<double> *y, int incy) override;                 \
1302f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasCopy(Stream *stream, uint64 elem_count,                           \
1303f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<float>> &x, int incx,        \
1304f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<std::complex<float>> *y, int incy) override;    \
1305f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasCopy(Stream *stream, uint64 elem_count,                           \
1306f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<double>> &x, int incx,       \
1307f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<std::complex<double>> *y, int incy) override;   \
1308f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasDot(Stream *stream, uint64 elem_count,                            \
1309f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                 const DeviceMemory<float> &x, int incx,                       \
1310f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                 const DeviceMemory<float> &y, int incy,                       \
1311f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                 DeviceMemory<float> *result) override;                        \
1312f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasDot(Stream *stream, uint64 elem_count,                            \
1313f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                 const DeviceMemory<double> &x, int incx,                      \
1314f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                 const DeviceMemory<double> &y, int incy,                      \
1315f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                 DeviceMemory<double> *result) override;                       \
1316f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasDotc(Stream *stream, uint64 elem_count,                           \
1317f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<float>> &x, int incx,        \
1318f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<float>> &y, int incy,        \
1319f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<std::complex<float>> *result) override;         \
1320f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasDotc(Stream *stream, uint64 elem_count,                           \
1321f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<double>> &x, int incx,       \
1322f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<double>> &y, int incy,       \
1323f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<std::complex<double>> *result) override;        \
1324f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasDotu(Stream *stream, uint64 elem_count,                           \
1325f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<float>> &x, int incx,        \
1326f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<float>> &y, int incy,        \
1327f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<std::complex<float>> *result) override;         \
1328f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasDotu(Stream *stream, uint64 elem_count,                           \
1329f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<double>> &x, int incx,       \
1330f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<double>> &y, int incy,       \
1331f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<std::complex<double>> *result) override;        \
1332f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasNrm2(Stream *stream, uint64 elem_count,                           \
1333f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<float> &x, int incx,                      \
1334f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<float> *result) override;                       \
1335f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasNrm2(Stream *stream, uint64 elem_count,                           \
1336f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<double> &x, int incx,                     \
1337f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<double> *result) override;                      \
1338f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasNrm2(Stream *stream, uint64 elem_count,                           \
1339f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<float>> &x, int incx,        \
1340f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<float> *result) override;                       \
1341f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasNrm2(Stream *stream, uint64 elem_count,                           \
1342f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<double>> &x, int incx,       \
1343f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<double> *result) override;                      \
1344f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasRot(Stream *stream, uint64 elem_count, DeviceMemory<float> *x,    \
1345f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                 int incx, DeviceMemory<float> *y, int incy, float c, float s) \
1346f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      override;                                                                \
1347f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasRot(Stream *stream, uint64 elem_count, DeviceMemory<double> *x,   \
1348f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                 int incx, DeviceMemory<double> *y, int incy, double c,        \
1349f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                 double s) override;                                           \
1350f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasRot(Stream *stream, uint64 elem_count,                            \
1351f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                 DeviceMemory<std::complex<float>> *x, int incx,               \
1352f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                 DeviceMemory<std::complex<float>> *y, int incy, float c,      \
1353f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                 float s) override;                                            \
1354f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasRot(Stream *stream, uint64 elem_count,                            \
1355f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                 DeviceMemory<std::complex<double>> *x, int incx,              \
1356f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                 DeviceMemory<std::complex<double>> *y, int incy, double c,    \
1357f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                 double s) override;                                           \
1358f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasRotg(Stream *stream, DeviceMemory<float> *a,                      \
1359f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<float> *b, DeviceMemory<float> *c,              \
1360f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<float> *s) override;                            \
1361f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasRotg(Stream *stream, DeviceMemory<double> *a,                     \
1362f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<double> *b, DeviceMemory<double> *c,            \
1363f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<double> *s) override;                           \
1364f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasRotg(Stream *stream, DeviceMemory<std::complex<float>> *a,        \
1365f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<std::complex<float>> *b,                        \
1366f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<float> *c,                                      \
1367f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<std::complex<float>> *s) override;              \
1368f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasRotg(Stream *stream, DeviceMemory<std::complex<double>> *a,       \
1369f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<std::complex<double>> *b,                       \
1370f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<double> *c,                                     \
1371f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<std::complex<double>> *s) override;             \
1372f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasRotm(Stream *stream, uint64 elem_count, DeviceMemory<float> *x,   \
1373f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  int incx, DeviceMemory<float> *y, int incy,                  \
1374f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<float> &param) override;                  \
1375f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasRotm(Stream *stream, uint64 elem_count, DeviceMemory<double> *x,  \
1376f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  int incx, DeviceMemory<double> *y, int incy,                 \
1377f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<double> &param) override;                 \
1378f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasRotmg(Stream *stream, DeviceMemory<float> *d1,                    \
1379f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                   DeviceMemory<float> *d2, DeviceMemory<float> *x1,           \
1380f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                   const DeviceMemory<float> &y1, DeviceMemory<float> *param)  \
1381f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      override;                                                                \
1382f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasRotmg(Stream *stream, DeviceMemory<double> *d1,                   \
1383f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                   DeviceMemory<double> *d2, DeviceMemory<double> *x1,         \
1384f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                   const DeviceMemory<double> &y1,                             \
1385f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                   DeviceMemory<double> *param) override;                      \
1386f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasScal(Stream *stream, uint64 elem_count, float alpha,              \
1387f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<float> *x, int incx) override;                  \
1388f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasScal(Stream *stream, uint64 elem_count, double alpha,             \
1389f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<double> *x, int incx) override;                 \
1390f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasScal(Stream *stream, uint64 elem_count, float alpha,              \
1391f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<std::complex<float>> *x, int incx) override;    \
1392f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasScal(Stream *stream, uint64 elem_count, double alpha,             \
1393f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<std::complex<double>> *x, int incx) override;   \
1394f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasScal(Stream *stream, uint64 elem_count,                           \
1395f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  std::complex<float> alpha,                                   \
1396f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<std::complex<float>> *x, int incx) override;    \
1397f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasScal(Stream *stream, uint64 elem_count,                           \
1398f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  std::complex<double> alpha,                                  \
1399f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<std::complex<double>> *x, int incx) override;   \
1400f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasSwap(Stream *stream, uint64 elem_count, DeviceMemory<float> *x,   \
1401f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  int incx, DeviceMemory<float> *y, int incy) override;        \
1402f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasSwap(Stream *stream, uint64 elem_count, DeviceMemory<double> *x,  \
1403f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  int incx, DeviceMemory<double> *y, int incy) override;       \
1404f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasSwap(Stream *stream, uint64 elem_count,                           \
1405f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<std::complex<float>> *x, int incx,              \
1406f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<std::complex<float>> *y, int incy) override;    \
1407f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasSwap(Stream *stream, uint64 elem_count,                           \
1408f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<std::complex<double>> *x, int incx,             \
1409f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<std::complex<double>> *y, int incy) override;   \
1410f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasIamax(Stream *stream, uint64 elem_count,                          \
1411f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                   const DeviceMemory<float> &x, int incx,                     \
1412f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                   DeviceMemory<int> *result) override;                        \
1413f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasIamax(Stream *stream, uint64 elem_count,                          \
1414f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                   const DeviceMemory<double> &x, int incx,                    \
1415f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                   DeviceMemory<int> *result) override;                        \
1416f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasIamax(Stream *stream, uint64 elem_count,                          \
1417f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                   const DeviceMemory<std::complex<float>> &x, int incx,       \
1418f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                   DeviceMemory<int> *result) override;                        \
1419f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasIamax(Stream *stream, uint64 elem_count,                          \
1420f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                   const DeviceMemory<std::complex<double>> &x, int incx,      \
1421f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                   DeviceMemory<int> *result) override;                        \
1422f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasIamin(Stream *stream, uint64 elem_count,                          \
1423f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                   const DeviceMemory<float> &x, int incx,                     \
1424f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                   DeviceMemory<int> *result) override;                        \
1425f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasIamin(Stream *stream, uint64 elem_count,                          \
1426f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                   const DeviceMemory<double> &x, int incx,                    \
1427f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                   DeviceMemory<int> *result) override;                        \
1428f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasIamin(Stream *stream, uint64 elem_count,                          \
1429f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                   const DeviceMemory<std::complex<float>> &x, int incx,       \
1430f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                   DeviceMemory<int> *result) override;                        \
1431f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasIamin(Stream *stream, uint64 elem_count,                          \
1432f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                   const DeviceMemory<std::complex<double>> &x, int incx,      \
1433f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                   DeviceMemory<int> *result) override;                        \
1434f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasGbmv(Stream *stream, blas::Transpose trans, uint64 m, uint64 n,   \
1435f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  uint64 kl, uint64 ku, float alpha,                           \
1436f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<float> &a, int lda,                       \
1437f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<float> &x, int incx, float beta,          \
1438f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<float> *y, int incy) override;                  \
1439f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasGbmv(Stream *stream, blas::Transpose trans, uint64 m, uint64 n,   \
1440f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  uint64 kl, uint64 ku, double alpha,                          \
1441f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<double> &a, int lda,                      \
1442f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<double> &x, int incx, double beta,        \
1443f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<double> *y, int incy) override;                 \
1444f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasGbmv(Stream *stream, blas::Transpose trans, uint64 m, uint64 n,   \
1445f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  uint64 kl, uint64 ku, std::complex<float> alpha,             \
1446f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<float>> &a, int lda,         \
1447f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<float>> &x, int incx,        \
1448f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  std::complex<float> beta,                                    \
1449f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<std::complex<float>> *y, int incy) override;    \
1450f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasGbmv(Stream *stream, blas::Transpose trans, uint64 m, uint64 n,   \
1451f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  uint64 kl, uint64 ku, std::complex<double> alpha,            \
1452f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<double>> &a, int lda,        \
1453f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<double>> &x, int incx,       \
1454f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  std::complex<double> beta,                                   \
1455f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<std::complex<double>> *y, int incy) override;   \
1456f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasGemv(Stream *stream, blas::Transpose trans, uint64 m, uint64 n,   \
1457f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  float alpha, const DeviceMemory<float> &a, int lda,          \
1458f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<float> &x, int incx, float beta,          \
1459f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<float> *y, int incy) override;                  \
1460f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasGemv(Stream *stream, blas::Transpose trans, uint64 m, uint64 n,   \
1461f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  double alpha, const DeviceMemory<double> &a, int lda,        \
1462f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<double> &x, int incx, double beta,        \
1463f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<double> *y, int incy) override;                 \
1464f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasGemv(Stream *stream, blas::Transpose trans, uint64 m, uint64 n,   \
1465f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  std::complex<float> alpha,                                   \
1466f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<float>> &a, int lda,         \
1467f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<float>> &x, int incx,        \
1468f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  std::complex<float> beta,                                    \
1469f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<std::complex<float>> *y, int incy) override;    \
1470f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasGemv(Stream *stream, blas::Transpose trans, uint64 m, uint64 n,   \
1471f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  std::complex<double> alpha,                                  \
1472f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<double>> &a, int lda,        \
1473f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<double>> &x, int incx,       \
1474f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  std::complex<double> beta,                                   \
1475f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<std::complex<double>> *y, int incy) override;   \
1476f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasGer(Stream *stream, uint64 m, uint64 n, float alpha,              \
1477f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                 const DeviceMemory<float> &x, int incx,                       \
1478f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                 const DeviceMemory<float> &y, int incy,                       \
1479f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                 DeviceMemory<float> *a, int lda) override;                    \
1480f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasGer(Stream *stream, uint64 m, uint64 n, double alpha,             \
1481f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                 const DeviceMemory<double> &x, int incx,                      \
1482f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                 const DeviceMemory<double> &y, int incy,                      \
1483f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                 DeviceMemory<double> *a, int lda) override;                   \
1484f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasGerc(Stream *stream, uint64 m, uint64 n,                          \
1485f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  std::complex<float> alpha,                                   \
1486f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<float>> &x, int incx,        \
1487f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<float>> &y, int incy,        \
1488f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<std::complex<float>> *a, int lda) override;     \
1489f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasGerc(Stream *stream, uint64 m, uint64 n,                          \
1490f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  std::complex<double> alpha,                                  \
1491f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<double>> &x, int incx,       \
1492f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<double>> &y, int incy,       \
1493f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<std::complex<double>> *a, int lda) override;    \
1494f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasGeru(Stream *stream, uint64 m, uint64 n,                          \
1495f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  std::complex<float> alpha,                                   \
1496f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<float>> &x, int incx,        \
1497f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<float>> &y, int incy,        \
1498f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<std::complex<float>> *a, int lda) override;     \
1499f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasGeru(Stream *stream, uint64 m, uint64 n,                          \
1500f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  std::complex<double> alpha,                                  \
1501f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<double>> &x, int incx,       \
1502f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<double>> &y, int incy,       \
1503f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<std::complex<double>> *a, int lda) override;    \
1504f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasHbmv(Stream *stream, blas::UpperLower uplo, uint64 n, uint64 k,   \
1505f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  std::complex<float> alpha,                                   \
1506f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<float>> &a, int lda,         \
1507f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<float>> &x, int incx,        \
1508f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  std::complex<float> beta,                                    \
1509f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<std::complex<float>> *y, int incy) override;    \
1510f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasHbmv(Stream *stream, blas::UpperLower uplo, uint64 n, uint64 k,   \
1511f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  std::complex<double> alpha,                                  \
1512f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<double>> &a, int lda,        \
1513f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<double>> &x, int incx,       \
1514f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  std::complex<double> beta,                                   \
1515f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<std::complex<double>> *y, int incy) override;   \
1516f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasHemv(Stream *stream, blas::UpperLower uplo, uint64 n,             \
1517f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  std::complex<float> alpha,                                   \
1518f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<float>> &a, int lda,         \
1519f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<float>> &x, int incx,        \
1520f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  std::complex<float> beta,                                    \
1521f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<std::complex<float>> *y, int incy) override;    \
1522f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasHemv(Stream *stream, blas::UpperLower uplo, uint64 n,             \
1523f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  std::complex<double> alpha,                                  \
1524f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<double>> &a, int lda,        \
1525f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<double>> &x, int incx,       \
1526f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  std::complex<double> beta,                                   \
1527f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<std::complex<double>> *y, int incy) override;   \
1528f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasHer(Stream *stream, blas::UpperLower uplo, uint64 n, float alpha, \
1529f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                 const DeviceMemory<std::complex<float>> &x, int incx,         \
1530f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                 DeviceMemory<std::complex<float>> *a, int lda) override;      \
1531f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasHer(Stream *stream, blas::UpperLower uplo, uint64 n,              \
1532f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                 double alpha, const DeviceMemory<std::complex<double>> &x,    \
1533f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                 int incx, DeviceMemory<std::complex<double>> *a, int lda)     \
1534f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      override;                                                                \
1535f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasHer2(Stream *stream, blas::UpperLower uplo, uint64 n,             \
1536f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  std::complex<float> alpha,                                   \
1537f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<float>> &x, int incx,        \
1538f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<float>> &y, int incy,        \
1539f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<std::complex<float>> *a, int lda) override;     \
1540f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasHer2(Stream *stream, blas::UpperLower uplo, uint64 n,             \
1541f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  std::complex<double> alpha,                                  \
1542f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<double>> &x, int incx,       \
1543f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<double>> &y, int incy,       \
1544f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<std::complex<double>> *a, int lda) override;    \
1545f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasHpmv(Stream *stream, blas::UpperLower uplo, uint64 n,             \
1546f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  std::complex<float> alpha,                                   \
1547f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<float>> &ap,                 \
1548f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<float>> &x, int incx,        \
1549f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  std::complex<float> beta,                                    \
1550f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<std::complex<float>> *y, int incy) override;    \
1551f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasHpmv(Stream *stream, blas::UpperLower uplo, uint64 n,             \
1552f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  std::complex<double> alpha,                                  \
1553f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<double>> &ap,                \
1554f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<double>> &x, int incx,       \
1555f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  std::complex<double> beta,                                   \
1556f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<std::complex<double>> *y, int incy) override;   \
1557f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasHpr(Stream *stream, blas::UpperLower uplo, uint64 n, float alpha, \
1558f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                 const DeviceMemory<std::complex<float>> &x, int incx,         \
1559f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                 DeviceMemory<std::complex<float>> *ap) override;              \
1560f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasHpr(Stream *stream, blas::UpperLower uplo, uint64 n,              \
1561f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                 double alpha, const DeviceMemory<std::complex<double>> &x,    \
1562f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                 int incx, DeviceMemory<std::complex<double>> *ap) override;   \
1563f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasHpr2(Stream *stream, blas::UpperLower uplo, uint64 n,             \
1564f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  std::complex<float> alpha,                                   \
1565f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<float>> &x, int incx,        \
1566f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<float>> &y, int incy,        \
1567f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<std::complex<float>> *ap) override;             \
1568f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasHpr2(Stream *stream, blas::UpperLower uplo, uint64 n,             \
1569f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  std::complex<double> alpha,                                  \
1570f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<double>> &x, int incx,       \
1571f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<double>> &y, int incy,       \
1572f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<std::complex<double>> *ap) override;            \
1573f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasSbmv(Stream *stream, blas::UpperLower uplo, uint64 n, uint64 k,   \
1574f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  float alpha, const DeviceMemory<float> &a, int lda,          \
1575f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<float> &x, int incx, float beta,          \
1576f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<float> *y, int incy) override;                  \
1577f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasSbmv(Stream *stream, blas::UpperLower uplo, uint64 n, uint64 k,   \
1578f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  double alpha, const DeviceMemory<double> &a, int lda,        \
1579f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<double> &x, int incx, double beta,        \
1580f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<double> *y, int incy) override;                 \
1581f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasSpmv(Stream *stream, blas::UpperLower uplo, uint64 n,             \
1582f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  float alpha, const DeviceMemory<float> &ap,                  \
1583f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<float> &x, int incx, float beta,          \
1584f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<float> *y, int incy) override;                  \
1585f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasSpmv(Stream *stream, blas::UpperLower uplo, uint64 n,             \
1586f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  double alpha, const DeviceMemory<double> &ap,                \
1587f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<double> &x, int incx, double beta,        \
1588f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<double> *y, int incy) override;                 \
1589f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasSpr(Stream *stream, blas::UpperLower uplo, uint64 n, float alpha, \
1590f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                 const DeviceMemory<float> &x, int incx,                       \
1591f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                 DeviceMemory<float> *ap) override;                            \
1592f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasSpr(Stream *stream, blas::UpperLower uplo, uint64 n,              \
1593f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                 double alpha, const DeviceMemory<double> &x, int incx,        \
1594f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                 DeviceMemory<double> *ap) override;                           \
1595f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasSpr2(Stream *stream, blas::UpperLower uplo, uint64 n,             \
1596f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  float alpha, const DeviceMemory<float> &x, int incx,         \
1597f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<float> &y, int incy,                      \
1598f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<float> *ap) override;                           \
1599f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasSpr2(Stream *stream, blas::UpperLower uplo, uint64 n,             \
1600f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  double alpha, const DeviceMemory<double> &x, int incx,       \
1601f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<double> &y, int incy,                     \
1602f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<double> *ap) override;                          \
1603f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasSymv(Stream *stream, blas::UpperLower uplo, uint64 n,             \
1604f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  float alpha, const DeviceMemory<float> &a, int lda,          \
1605f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<float> &x, int incx, float beta,          \
1606f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<float> *y, int incy) override;                  \
1607f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasSymv(Stream *stream, blas::UpperLower uplo, uint64 n,             \
1608f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  double alpha, const DeviceMemory<double> &a, int lda,        \
1609f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<double> &x, int incx, double beta,        \
1610f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<double> *y, int incy) override;                 \
1611f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasSyr(Stream *stream, blas::UpperLower uplo, uint64 n, float alpha, \
1612f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                 const DeviceMemory<float> &x, int incx,                       \
1613f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                 DeviceMemory<float> *a, int lda) override;                    \
1614f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasSyr(Stream *stream, blas::UpperLower uplo, uint64 n,              \
1615f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                 double alpha, const DeviceMemory<double> &x, int incx,        \
1616f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                 DeviceMemory<double> *a, int lda) override;                   \
1617f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasSyr2(Stream *stream, blas::UpperLower uplo, uint64 n,             \
1618f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  float alpha, const DeviceMemory<float> &x, int incx,         \
1619f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<float> &y, int incy,                      \
1620f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<float> *a, int lda) override;                   \
1621f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasSyr2(Stream *stream, blas::UpperLower uplo, uint64 n,             \
1622f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  double alpha, const DeviceMemory<double> &x, int incx,       \
1623f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<double> &y, int incy,                     \
1624f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<double> *a, int lda) override;                  \
1625f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasTbmv(Stream *stream, blas::UpperLower uplo,                       \
1626f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  blas::Transpose trans, blas::Diagonal diag, uint64 n,        \
1627f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  uint64 k, const DeviceMemory<float> &a, int lda,             \
1628f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<float> *x, int incx) override;                  \
1629f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasTbmv(Stream *stream, blas::UpperLower uplo,                       \
1630f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  blas::Transpose trans, blas::Diagonal diag, uint64 n,        \
1631f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  uint64 k, const DeviceMemory<double> &a, int lda,            \
1632f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<double> *x, int incx) override;                 \
1633f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasTbmv(Stream *stream, blas::UpperLower uplo,                       \
1634f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  blas::Transpose trans, blas::Diagonal diag, uint64 n,        \
1635f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  uint64 k, const DeviceMemory<std::complex<float>> &a,        \
1636f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  int lda, DeviceMemory<std::complex<float>> *x, int incx)     \
1637f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      override;                                                                \
1638f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasTbmv(Stream *stream, blas::UpperLower uplo,                       \
1639f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  blas::Transpose trans, blas::Diagonal diag, uint64 n,        \
1640f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  uint64 k, const DeviceMemory<std::complex<double>> &a,       \
1641f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  int lda, DeviceMemory<std::complex<double>> *x, int incx)    \
1642f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      override;                                                                \
1643f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasTbsv(Stream *stream, blas::UpperLower uplo,                       \
1644f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  blas::Transpose trans, blas::Diagonal diag, uint64 n,        \
1645f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  uint64 k, const DeviceMemory<float> &a, int lda,             \
1646f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<float> *x, int incx) override;                  \
1647f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasTbsv(Stream *stream, blas::UpperLower uplo,                       \
1648f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  blas::Transpose trans, blas::Diagonal diag, uint64 n,        \
1649f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  uint64 k, const DeviceMemory<double> &a, int lda,            \
1650f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<double> *x, int incx) override;                 \
1651f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasTbsv(Stream *stream, blas::UpperLower uplo,                       \
1652f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  blas::Transpose trans, blas::Diagonal diag, uint64 n,        \
1653f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  uint64 k, const DeviceMemory<std::complex<float>> &a,        \
1654f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  int lda, DeviceMemory<std::complex<float>> *x, int incx)     \
1655f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      override;                                                                \
1656f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasTbsv(Stream *stream, blas::UpperLower uplo,                       \
1657f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  blas::Transpose trans, blas::Diagonal diag, uint64 n,        \
1658f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  uint64 k, const DeviceMemory<std::complex<double>> &a,       \
1659f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  int lda, DeviceMemory<std::complex<double>> *x, int incx)    \
1660f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      override;                                                                \
1661f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasTpmv(Stream *stream, blas::UpperLower uplo,                       \
1662f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  blas::Transpose trans, blas::Diagonal diag, uint64 n,        \
1663f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<float> &ap, DeviceMemory<float> *x,       \
1664f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  int incx) override;                                          \
1665f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasTpmv(Stream *stream, blas::UpperLower uplo,                       \
1666f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  blas::Transpose trans, blas::Diagonal diag, uint64 n,        \
1667f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<double> &ap, DeviceMemory<double> *x,     \
1668f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  int incx) override;                                          \
1669f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasTpmv(Stream *stream, blas::UpperLower uplo,                       \
1670f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  blas::Transpose trans, blas::Diagonal diag, uint64 n,        \
1671f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<float>> &ap,                 \
1672f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<std::complex<float>> *x, int incx) override;    \
1673f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasTpmv(Stream *stream, blas::UpperLower uplo,                       \
1674f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  blas::Transpose trans, blas::Diagonal diag, uint64 n,        \
1675f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<double>> &ap,                \
1676f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<std::complex<double>> *x, int incx) override;   \
1677f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasTpsv(Stream *stream, blas::UpperLower uplo,                       \
1678f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  blas::Transpose trans, blas::Diagonal diag, uint64 n,        \
1679f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<float> &ap, DeviceMemory<float> *x,       \
1680f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  int incx) override;                                          \
1681f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasTpsv(Stream *stream, blas::UpperLower uplo,                       \
1682f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  blas::Transpose trans, blas::Diagonal diag, uint64 n,        \
1683f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<double> &ap, DeviceMemory<double> *x,     \
1684f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  int incx) override;                                          \
1685f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasTpsv(Stream *stream, blas::UpperLower uplo,                       \
1686f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  blas::Transpose trans, blas::Diagonal diag, uint64 n,        \
1687f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<float>> &ap,                 \
1688f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<std::complex<float>> *x, int incx) override;    \
1689f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasTpsv(Stream *stream, blas::UpperLower uplo,                       \
1690f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  blas::Transpose trans, blas::Diagonal diag, uint64 n,        \
1691f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<double>> &ap,                \
1692f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<std::complex<double>> *x, int incx) override;   \
1693f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasTrmv(Stream *stream, blas::UpperLower uplo,                       \
1694f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  blas::Transpose trans, blas::Diagonal diag, uint64 n,        \
1695f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<float> &a, int lda,                       \
1696f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<float> *x, int incx) override;                  \
1697f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasTrmv(Stream *stream, blas::UpperLower uplo,                       \
1698f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  blas::Transpose trans, blas::Diagonal diag, uint64 n,        \
1699f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<double> &a, int lda,                      \
1700f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<double> *x, int incx) override;                 \
1701f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasTrmv(Stream *stream, blas::UpperLower uplo,                       \
1702f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  blas::Transpose trans, blas::Diagonal diag, uint64 n,        \
1703f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<float>> &a, int lda,         \
1704f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<std::complex<float>> *x, int incx) override;    \
1705f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasTrmv(Stream *stream, blas::UpperLower uplo,                       \
1706f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  blas::Transpose trans, blas::Diagonal diag, uint64 n,        \
1707f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<double>> &a, int lda,        \
1708f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<std::complex<double>> *x, int incx) override;   \
1709f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasTrsv(Stream *stream, blas::UpperLower uplo,                       \
1710f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  blas::Transpose trans, blas::Diagonal diag, uint64 n,        \
1711f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<float> &a, int lda,                       \
1712f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<float> *x, int incx) override;                  \
1713f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasTrsv(Stream *stream, blas::UpperLower uplo,                       \
1714f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  blas::Transpose trans, blas::Diagonal diag, uint64 n,        \
1715f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<double> &a, int lda,                      \
1716f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<double> *x, int incx) override;                 \
1717f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasTrsv(Stream *stream, blas::UpperLower uplo,                       \
1718f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  blas::Transpose trans, blas::Diagonal diag, uint64 n,        \
1719f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<float>> &a, int lda,         \
1720f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<std::complex<float>> *x, int incx) override;    \
1721f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasTrsv(Stream *stream, blas::UpperLower uplo,                       \
1722f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  blas::Transpose trans, blas::Diagonal diag, uint64 n,        \
1723f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<double>> &a, int lda,        \
1724f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<std::complex<double>> *x, int incx) override;   \
1725f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasGemm(Stream *stream, blas::Transpose transa,                      \
1726f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  blas::Transpose transb, uint64 m, uint64 n, uint64 k,        \
1727523055469c8a61425e3b8f104be67787c2933ccbA. Unique TensorFlower                  float alpha, const DeviceMemory<Eigen::half> &a, int lda,    \
1728523055469c8a61425e3b8f104be67787c2933ccbA. Unique TensorFlower                  const DeviceMemory<Eigen::half> &b, int ldb, float beta,     \
1729523055469c8a61425e3b8f104be67787c2933ccbA. Unique TensorFlower                  DeviceMemory<Eigen::half> *c, int ldc) override;             \
1730523055469c8a61425e3b8f104be67787c2933ccbA. Unique TensorFlower  bool DoBlasGemm(Stream *stream, blas::Transpose transa,                      \
1731523055469c8a61425e3b8f104be67787c2933ccbA. Unique TensorFlower                  blas::Transpose transb, uint64 m, uint64 n, uint64 k,        \
1732f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  float alpha, const DeviceMemory<float> &a, int lda,          \
1733f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<float> &b, int ldb, float beta,           \
1734f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<float> *c, int ldc) override;                   \
1735f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasGemm(Stream *stream, blas::Transpose transa,                      \
1736f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  blas::Transpose transb, uint64 m, uint64 n, uint64 k,        \
1737f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  double alpha, const DeviceMemory<double> &a, int lda,        \
1738f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<double> &b, int ldb, double beta,         \
1739f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<double> *c, int ldc) override;                  \
1740f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasGemm(Stream *stream, blas::Transpose transa,                      \
1741f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  blas::Transpose transb, uint64 m, uint64 n, uint64 k,        \
1742f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  std::complex<float> alpha,                                   \
1743f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<float>> &a, int lda,         \
1744f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<float>> &b, int ldb,         \
1745f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  std::complex<float> beta,                                    \
1746f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<std::complex<float>> *c, int ldc) override;     \
1747f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasGemm(Stream *stream, blas::Transpose transa,                      \
1748f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  blas::Transpose transb, uint64 m, uint64 n, uint64 k,        \
1749f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  std::complex<double> alpha,                                  \
1750f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<double>> &a, int lda,        \
1751f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<double>> &b, int ldb,        \
1752f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  std::complex<double> beta,                                   \
1753f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<std::complex<double>> *c, int ldc) override;    \
175401194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar  bool GetBlasGemmAlgorithms(std::vector<blas::AlgorithmType> *out_algorithms) \
175501194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      override;                                                                \
175601194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar  bool DoBlasGemmWithAlgorithm(                                                \
175701194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      Stream *stream, blas::Transpose transa, blas::Transpose transb,          \
1758a2ee8bca3f3fa08bf022f5c8a68c3e1cf829f2cdA. Unique TensorFlower      uint64 m, uint64 n, uint64 k, int alpha, const DeviceMemory<int8> &a,    \
1759a2ee8bca3f3fa08bf022f5c8a68c3e1cf829f2cdA. Unique TensorFlower      int lda, const DeviceMemory<int8> &b, int ldb, int beta,                 \
1760a2ee8bca3f3fa08bf022f5c8a68c3e1cf829f2cdA. Unique TensorFlower      DeviceMemory<int> *c, int ldc, blas::ComputationType computation_type,   \
1761a2ee8bca3f3fa08bf022f5c8a68c3e1cf829f2cdA. Unique TensorFlower      blas::AlgorithmType algorithm,                                           \
1762a2ee8bca3f3fa08bf022f5c8a68c3e1cf829f2cdA. Unique TensorFlower      blas::ProfileResult *output_profile_result) override;                    \
1763a2ee8bca3f3fa08bf022f5c8a68c3e1cf829f2cdA. Unique TensorFlower  bool DoBlasGemmWithAlgorithm(                                                \
1764a2ee8bca3f3fa08bf022f5c8a68c3e1cf829f2cdA. Unique TensorFlower      Stream *stream, blas::Transpose transa, blas::Transpose transb,          \
176501194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      uint64 m, uint64 n, uint64 k, const Eigen::half &alpha,                  \
176601194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      const DeviceMemory<Eigen::half> &a, int lda,                             \
176701194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      const DeviceMemory<Eigen::half> &b, int ldb, const Eigen::half &beta,    \
176801194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      DeviceMemory<Eigen::half> *c, int ldc,                                   \
176901194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      blas::ComputationType computation_type, blas::AlgorithmType algorithm,   \
177001194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      blas::ProfileResult *output_profile_result) override;                    \
177101194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar  bool DoBlasGemmWithAlgorithm(                                                \
177201194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      Stream *stream, blas::Transpose transa, blas::Transpose transb,          \
177301194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      uint64 m, uint64 n, uint64 k, float alpha, const DeviceMemory<float> &a, \
177401194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      int lda, const DeviceMemory<float> &b, int ldb, float beta,              \
177501194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      DeviceMemory<float> *c, int ldc, blas::ComputationType computation_type, \
177601194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      blas::AlgorithmType algorithm,                                           \
177701194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      blas::ProfileResult *output_profile_result) override;                    \
177801194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar  bool DoBlasGemmWithAlgorithm(                                                \
177901194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      Stream *stream, blas::Transpose transa, blas::Transpose transb,          \
178001194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      uint64 m, uint64 n, uint64 k, double alpha,                              \
178101194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      const DeviceMemory<double> &a, int lda, const DeviceMemory<double> &b,   \
178201194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      int ldb, double beta, DeviceMemory<double> *c, int ldc,                  \
178301194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      blas::ComputationType computation_type, blas::AlgorithmType algorithm,   \
178401194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      blas::ProfileResult *output_profile_result) override;                    \
178501194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar  bool DoBlasGemmWithAlgorithm(                                                \
178601194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      Stream *stream, blas::Transpose transa, blas::Transpose transb,          \
178701194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      uint64 m, uint64 n, uint64 k, std::complex<float> alpha,                 \
178801194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      const DeviceMemory<std::complex<float>> &a, int lda,                     \
178901194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      const DeviceMemory<std::complex<float>> &b, int ldb,                     \
179001194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      std::complex<float> beta, DeviceMemory<std::complex<float>> *c, int ldc, \
179101194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      blas::ComputationType computation_type, blas::AlgorithmType algorithm,   \
179201194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      blas::ProfileResult *output_profile_result) override;                    \
179301194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar  bool DoBlasGemmWithAlgorithm(                                                \
179401194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      Stream *stream, blas::Transpose transa, blas::Transpose transb,          \
179501194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      uint64 m, uint64 n, uint64 k, std::complex<double> alpha,                \
179601194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      const DeviceMemory<std::complex<double>> &a, int lda,                    \
179701194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      const DeviceMemory<std::complex<double>> &b, int ldb,                    \
179801194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      std::complex<double> beta, DeviceMemory<std::complex<double>> *c,        \
179901194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      int ldc, blas::ComputationType computation_type,                         \
180001194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      blas::AlgorithmType algorithm,                                           \
180101194694948eb883e99af597d9dbbf3fc9f5c9e2Justin Lebar      blas::ProfileResult *output_profile_result) override;                    \
1802f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasGemmBatched(                                                      \
1803f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      Stream *stream, blas::Transpose transa, blas::Transpose transb,          \
1804f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      uint64 m, uint64 n, uint64 k, float alpha,                               \
1805f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      const port::ArraySlice<DeviceMemory<float> *> &a, int lda,               \
1806f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      const port::ArraySlice<DeviceMemory<float> *> &b, int ldb, float beta,   \
1807f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      const port::ArraySlice<DeviceMemory<float> *> &c, int ldc,               \
180805ea40f180e528dbfde36cd338a0b6ac3cca6dd9A. Unique TensorFlower      int batch_count, ScratchAllocator *scratch_allocator) override;          \
1809f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasGemmBatched(                                                      \
1810f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      Stream *stream, blas::Transpose transa, blas::Transpose transb,          \
1811f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      uint64 m, uint64 n, uint64 k, double alpha,                              \
1812f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      const port::ArraySlice<DeviceMemory<double> *> &a, int lda,              \
1813f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      const port::ArraySlice<DeviceMemory<double> *> &b, int ldb, double beta, \
1814f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      const port::ArraySlice<DeviceMemory<double> *> &c, int ldc,              \
181505ea40f180e528dbfde36cd338a0b6ac3cca6dd9A. Unique TensorFlower      int batch_count, ScratchAllocator *scratch_allocator) override;          \
1816f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasGemmBatched(                                                      \
1817f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      Stream *stream, blas::Transpose transa, blas::Transpose transb,          \
1818f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      uint64 m, uint64 n, uint64 k, std::complex<float> alpha,                 \
1819f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      const port::ArraySlice<DeviceMemory<std::complex<float>> *> &a, int lda, \
1820f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      const port::ArraySlice<DeviceMemory<std::complex<float>> *> &b, int ldb, \
1821f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      std::complex<float> beta,                                                \
1822f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      const port::ArraySlice<DeviceMemory<std::complex<float>> *> &c, int ldc, \
182305ea40f180e528dbfde36cd338a0b6ac3cca6dd9A. Unique TensorFlower      int batch_count, ScratchAllocator *scratch_allocator) override;          \
1824f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasGemmBatched(                                                      \
1825f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      Stream *stream, blas::Transpose transa, blas::Transpose transb,          \
1826f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      uint64 m, uint64 n, uint64 k, std::complex<double> alpha,                \
1827f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      const port::ArraySlice<DeviceMemory<std::complex<double>> *> &a,         \
1828f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      int lda,                                                                 \
1829f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      const port::ArraySlice<DeviceMemory<std::complex<double>> *> &b,         \
1830f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      int ldb, std::complex<double> beta,                                      \
1831f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      const port::ArraySlice<DeviceMemory<std::complex<double>> *> &c,         \
183205ea40f180e528dbfde36cd338a0b6ac3cca6dd9A. Unique TensorFlower      int ldc, int batch_count, ScratchAllocator *scratch_allocator) override; \
1833f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasHemm(Stream *stream, blas::Side side, blas::UpperLower uplo,      \
1834f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  uint64 m, uint64 n, std::complex<float> alpha,               \
1835f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<float>> &a, int lda,         \
1836f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<float>> &b, int ldb,         \
1837f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  std::complex<float> beta,                                    \
1838f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<std::complex<float>> *c, int ldc) override;     \
1839f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasHemm(Stream *stream, blas::Side side, blas::UpperLower uplo,      \
1840f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  uint64 m, uint64 n, std::complex<double> alpha,              \
1841f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<double>> &a, int lda,        \
1842f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<double>> &b, int ldb,        \
1843f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  std::complex<double> beta,                                   \
1844f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<std::complex<double>> *c, int ldc) override;    \
1845f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasHerk(Stream *stream, blas::UpperLower uplo,                       \
1846f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  blas::Transpose trans, uint64 n, uint64 k, float alpha,      \
1847f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<float>> &a, int lda,         \
1848f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  float beta, DeviceMemory<std::complex<float>> *c, int ldc)   \
1849f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      override;                                                                \
1850f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasHerk(Stream *stream, blas::UpperLower uplo,                       \
1851f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  blas::Transpose trans, uint64 n, uint64 k, double alpha,     \
1852f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<double>> &a, int lda,        \
1853f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  double beta, DeviceMemory<std::complex<double>> *c, int ldc) \
1854f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      override;                                                                \
1855f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasHer2k(                                                            \
1856f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      Stream *stream, blas::UpperLower uplo, blas::Transpose trans, uint64 n,  \
1857f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      uint64 k, std::complex<float> alpha,                                     \
1858f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      const DeviceMemory<std::complex<float>> &a, int lda,                     \
1859f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      const DeviceMemory<std::complex<float>> &b, int ldb, float beta,         \
1860f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      DeviceMemory<std::complex<float>> *c, int ldc) override;                 \
1861f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasHer2k(                                                            \
1862f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      Stream *stream, blas::UpperLower uplo, blas::Transpose trans, uint64 n,  \
1863f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      uint64 k, std::complex<double> alpha,                                    \
1864f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      const DeviceMemory<std::complex<double>> &a, int lda,                    \
1865f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      const DeviceMemory<std::complex<double>> &b, int ldb, double beta,       \
1866f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      DeviceMemory<std::complex<double>> *c, int ldc) override;                \
1867f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasSymm(Stream *stream, blas::Side side, blas::UpperLower uplo,      \
1868f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  uint64 m, uint64 n, float alpha,                             \
1869f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<float> &a, int lda,                       \
1870f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<float> &b, int ldb, float beta,           \
1871f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<float> *c, int ldc) override;                   \
1872f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasSymm(Stream *stream, blas::Side side, blas::UpperLower uplo,      \
1873f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  uint64 m, uint64 n, double alpha,                            \
1874f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<double> &a, int lda,                      \
1875f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<double> &b, int ldb, double beta,         \
1876f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<double> *c, int ldc) override;                  \
1877f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasSymm(Stream *stream, blas::Side side, blas::UpperLower uplo,      \
1878f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  uint64 m, uint64 n, std::complex<float> alpha,               \
1879f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<float>> &a, int lda,         \
1880f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<float>> &b, int ldb,         \
1881f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  std::complex<float> beta,                                    \
1882f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<std::complex<float>> *c, int ldc) override;     \
1883f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasSymm(Stream *stream, blas::Side side, blas::UpperLower uplo,      \
1884f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  uint64 m, uint64 n, std::complex<double> alpha,              \
1885f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<double>> &a, int lda,        \
1886f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<double>> &b, int ldb,        \
1887f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  std::complex<double> beta,                                   \
1888f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<std::complex<double>> *c, int ldc) override;    \
1889f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasSyrk(Stream *stream, blas::UpperLower uplo,                       \
1890f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  blas::Transpose trans, uint64 n, uint64 k, float alpha,      \
1891f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<float> &a, int lda, float beta,           \
1892f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<float> *c, int ldc) override;                   \
1893f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasSyrk(Stream *stream, blas::UpperLower uplo,                       \
1894f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  blas::Transpose trans, uint64 n, uint64 k, double alpha,     \
1895f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<double> &a, int lda, double beta,         \
1896f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<double> *c, int ldc) override;                  \
1897f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasSyrk(Stream *stream, blas::UpperLower uplo,                       \
1898f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  blas::Transpose trans, uint64 n, uint64 k,                   \
1899f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  std::complex<float> alpha,                                   \
1900f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<float>> &a, int lda,         \
1901f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  std::complex<float> beta,                                    \
1902f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<std::complex<float>> *c, int ldc) override;     \
1903f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasSyrk(Stream *stream, blas::UpperLower uplo,                       \
1904f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  blas::Transpose trans, uint64 n, uint64 k,                   \
1905f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  std::complex<double> alpha,                                  \
1906f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<double>> &a, int lda,        \
1907f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  std::complex<double> beta,                                   \
1908f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<std::complex<double>> *c, int ldc) override;    \
1909f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasSyr2k(Stream *stream, blas::UpperLower uplo,                      \
1910f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                   blas::Transpose trans, uint64 n, uint64 k, float alpha,     \
1911f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                   const DeviceMemory<float> &a, int lda,                      \
1912f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                   const DeviceMemory<float> &b, int ldb, float beta,          \
1913f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                   DeviceMemory<float> *c, int ldc) override;                  \
1914f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasSyr2k(Stream *stream, blas::UpperLower uplo,                      \
1915f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                   blas::Transpose trans, uint64 n, uint64 k, double alpha,    \
1916f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                   const DeviceMemory<double> &a, int lda,                     \
1917f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                   const DeviceMemory<double> &b, int ldb, double beta,        \
1918f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                   DeviceMemory<double> *c, int ldc) override;                 \
1919f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasSyr2k(Stream *stream, blas::UpperLower uplo,                      \
1920f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                   blas::Transpose trans, uint64 n, uint64 k,                  \
1921f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                   std::complex<float> alpha,                                  \
1922f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                   const DeviceMemory<std::complex<float>> &a, int lda,        \
1923f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                   const DeviceMemory<std::complex<float>> &b, int ldb,        \
1924f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                   std::complex<float> beta,                                   \
1925f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                   DeviceMemory<std::complex<float>> *c, int ldc) override;    \
1926f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasSyr2k(Stream *stream, blas::UpperLower uplo,                      \
1927f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                   blas::Transpose trans, uint64 n, uint64 k,                  \
1928f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                   std::complex<double> alpha,                                 \
1929f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                   const DeviceMemory<std::complex<double>> &a, int lda,       \
1930f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                   const DeviceMemory<std::complex<double>> &b, int ldb,       \
1931f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                   std::complex<double> beta,                                  \
1932f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                   DeviceMemory<std::complex<double>> *c, int ldc) override;   \
1933f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasTrmm(Stream *stream, blas::Side side, blas::UpperLower uplo,      \
1934f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  blas::Transpose transa, blas::Diagonal diag, uint64 m,       \
1935f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  uint64 n, float alpha, const DeviceMemory<float> &a,         \
1936f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  int lda, DeviceMemory<float> *b, int ldb) override;          \
1937f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasTrmm(Stream *stream, blas::Side side, blas::UpperLower uplo,      \
1938f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  blas::Transpose transa, blas::Diagonal diag, uint64 m,       \
1939f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  uint64 n, double alpha, const DeviceMemory<double> &a,       \
1940f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  int lda, DeviceMemory<double> *b, int ldb) override;         \
1941f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasTrmm(Stream *stream, blas::Side side, blas::UpperLower uplo,      \
1942f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  blas::Transpose transa, blas::Diagonal diag, uint64 m,       \
1943f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  uint64 n, std::complex<float> alpha,                         \
1944f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<float>> &a, int lda,         \
1945f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<std::complex<float>> *b, int ldb) override;     \
1946f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasTrmm(Stream *stream, blas::Side side, blas::UpperLower uplo,      \
1947f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  blas::Transpose transa, blas::Diagonal diag, uint64 m,       \
1948f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  uint64 n, std::complex<double> alpha,                        \
1949f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<double>> &a, int lda,        \
1950f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<std::complex<double>> *b, int ldb) override;    \
1951f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasTrsm(Stream *stream, blas::Side side, blas::UpperLower uplo,      \
1952f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  blas::Transpose transa, blas::Diagonal diag, uint64 m,       \
1953f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  uint64 n, float alpha, const DeviceMemory<float> &a,         \
1954f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  int lda, DeviceMemory<float> *b, int ldb) override;          \
1955f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasTrsm(Stream *stream, blas::Side side, blas::UpperLower uplo,      \
1956f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  blas::Transpose transa, blas::Diagonal diag, uint64 m,       \
1957f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  uint64 n, double alpha, const DeviceMemory<double> &a,       \
1958f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  int lda, DeviceMemory<double> *b, int ldb) override;         \
1959f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasTrsm(Stream *stream, blas::Side side, blas::UpperLower uplo,      \
1960f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  blas::Transpose transa, blas::Diagonal diag, uint64 m,       \
1961f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  uint64 n, std::complex<float> alpha,                         \
1962f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<float>> &a, int lda,         \
1963f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<std::complex<float>> *b, int ldb) override;     \
1964f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  bool DoBlasTrsm(Stream *stream, blas::Side side, blas::UpperLower uplo,      \
1965f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  blas::Transpose transa, blas::Diagonal diag, uint64 m,       \
1966f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  uint64 n, std::complex<double> alpha,                        \
1967f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  const DeviceMemory<std::complex<double>> &a, int lda,        \
1968f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                  DeviceMemory<std::complex<double>> *b, int ldb) override;
1969f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
1970f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur}  // namespace blas
1971f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur}  // namespace gputools
1972f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur}  // namespace perftools
1973f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
1974f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur#endif  // TENSORFLOW_STREAM_EXECUTOR_BLAS_H_
1975