rsCppStructs.h revision 62237219e567b9f972c86e7ca4e96f9b3d5ad4de
1/*
2 * Copyright (C) 2013 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#ifndef ANDROID_RSCPPSTRUCTS_H
18#define ANDROID_RSCPPSTRUCTS_H
19
20#include "rsDefines.h"
21#include "util/RefBase.h"
22
23#include <pthread.h>
24
25
26/**
27 * Every row in an RS allocation is guaranteed to be aligned by this amount, and
28 * every row in a user-backed allocation must be aligned by this amount.
29 */
30#define RS_CPU_ALLOCATION_ALIGNMENT 16
31
32struct dispatchTable;
33
34namespace android {
35class Surface;
36
37namespace RSC {
38
39
40typedef void (*ErrorHandlerFunc_t)(uint32_t errorNum, const char *errorText);
41typedef void (*MessageHandlerFunc_t)(uint32_t msgNum, const void *msgData, size_t msgLen);
42
43class RS;
44class BaseObj;
45class Element;
46class Type;
47class Allocation;
48class Script;
49class ScriptC;
50class Sampler;
51
52/**
53 * Possible error codes used by RenderScript. Once a status other than RS_SUCCESS
54 * is returned, the RenderScript context is considered dead and cannot perform any
55 * additional work.
56 */
57 enum RSError {
58     RS_SUCCESS = 0,                 ///< No error
59     RS_ERROR_INVALID_PARAMETER = 1, ///< An invalid parameter was passed to a function
60     RS_ERROR_RUNTIME_ERROR = 2,     ///< The RenderScript driver returned an error; this is
61                                     ///< often indicative of a kernel that crashed
62     RS_ERROR_INVALID_ELEMENT = 3,   ///< An invalid Element was passed to a function
63     RS_ERROR_MAX = 9999
64
65 };
66
67 /**
68  * Flags that can control RenderScript behavior on a per-context level.
69  */
70 enum RSInitFlags {
71     RS_INIT_SYNCHRONOUS = 1, ///< All RenderScript calls will be synchronous. May reduce latency.
72     RS_INIT_LOW_LATENCY = 2, ///< Prefer low latency devices over potentially higher throughput devices.
73     // Bitflag 4 is reserved for the context flag low power
74     RS_INIT_WAIT_FOR_ATTACH = 8,   ///< Kernel execution will hold to give time for a debugger to be attached
75     RS_INIT_MAX = 16
76 };
77
78
79class Byte2 {
80 public:
81  int8_t x, y;
82
83  Byte2(int8_t initX, int8_t initY)
84    : x(initX), y(initY) {}
85  Byte2() : x(0), y(0) {}
86};
87
88class Byte3 {
89 public:
90  int8_t x, y, z;
91
92  Byte3(int8_t initX, int8_t initY, int8_t initZ)
93    : x(initX), y(initY), z(initZ) {}
94  Byte3() : x(0), y(0), z(0) {}
95};
96
97class Byte4 {
98 public:
99  int8_t x, y, z, w;
100
101  Byte4(int8_t initX, int8_t initY, int8_t initZ, int8_t initW)
102    : x(initX), y(initY), z(initZ), w(initW) {}
103  Byte4() : x(0), y(0), z(0), w(0) {}
104};
105
106class UByte2 {
107 public:
108  uint8_t x, y;
109
110  UByte2(uint8_t initX, uint8_t initY)
111    : x(initX), y(initY) {}
112  UByte2() : x(0), y(0) {}
113};
114
115class UByte3 {
116 public:
117  uint8_t x, y, z;
118
119  UByte3(uint8_t initX, uint8_t initY, uint8_t initZ)
120    : x(initX), y(initY), z(initZ) {}
121  UByte3() : x(0), y(0), z(0) {}
122};
123
124class UByte4 {
125 public:
126  uint8_t x, y, z, w;
127
128  UByte4(uint8_t initX, uint8_t initY, uint8_t initZ, uint8_t initW)
129    : x(initX), y(initY), z(initZ), w(initW) {}
130  UByte4() : x(0), y(0), z(0), w(0) {}
131};
132
133class Short2 {
134 public:
135  short x, y;
136
137  Short2(short initX, short initY)
138    : x(initX), y(initY) {}
139  Short2() : x(0), y(0) {}
140};
141
142class Short3 {
143 public:
144  short x, y, z;
145
146  Short3(short initX, short initY, short initZ)
147    : x(initX), y(initY), z(initZ) {}
148  Short3() : x(0), y(0), z(0) {}
149};
150
151class Short4 {
152 public:
153  short x, y, z, w;
154
155  Short4(short initX, short initY, short initZ, short initW)
156    : x(initX), y(initY), z(initZ), w(initW) {}
157  Short4() : x(0), y(0), z(0), w(0) {}
158};
159
160class UShort2 {
161 public:
162  uint16_t x, y;
163
164  UShort2(uint16_t initX, uint16_t initY)
165    : x(initX), y(initY) {}
166  UShort2() : x(0), y(0) {}
167};
168
169class UShort3 {
170 public:
171  uint16_t x, y, z;
172
173  UShort3(uint16_t initX, uint16_t initY, uint16_t initZ)
174    : x(initX), y(initY), z(initZ) {}
175  UShort3() : x(0), y(0), z(0) {}
176};
177
178class UShort4 {
179 public:
180  uint16_t x, y, z, w;
181
182  UShort4(uint16_t initX, uint16_t initY, uint16_t initZ, uint16_t initW)
183    : x(initX), y(initY), z(initZ), w(initW) {}
184  UShort4() : x(0), y(0), z(0), w(0) {}
185};
186
187class Int2 {
188 public:
189  int x, y;
190
191  Int2(int initX, int initY)
192    : x(initX), y(initY) {}
193  Int2() : x(0), y(0) {}
194};
195
196class Int3 {
197 public:
198  int x, y, z;
199
200  Int3(int initX, int initY, int initZ)
201    : x(initX), y(initY), z(initZ) {}
202  Int3() : x(0), y(0), z(0) {}
203};
204
205class Int4 {
206 public:
207  int x, y, z, w;
208
209  Int4(int initX, int initY, int initZ, int initW)
210    : x(initX), y(initY), z(initZ), w(initW) {}
211  Int4() : x(0), y(0), z(0), w(0) {}
212};
213
214class UInt2 {
215 public:
216  uint32_t x, y;
217
218  UInt2(uint32_t initX, uint32_t initY)
219    : x(initX), y(initY) {}
220  UInt2() : x(0), y(0) {}
221};
222
223class UInt3 {
224 public:
225  uint32_t x, y, z;
226
227  UInt3(uint32_t initX, uint32_t initY, uint32_t initZ)
228    : x(initX), y(initY), z(initZ) {}
229  UInt3() : x(0), y(0), z(0) {}
230};
231
232class UInt4 {
233 public:
234  uint32_t x, y, z, w;
235
236  UInt4(uint32_t initX, uint32_t initY, uint32_t initZ, uint32_t initW)
237    : x(initX), y(initY), z(initZ), w(initW) {}
238  UInt4() : x(0), y(0), z(0), w(0) {}
239};
240
241class Long2 {
242 public:
243  int64_t x, y;
244
245  Long2(int64_t initX, int64_t initY)
246    : x(initX), y(initY) {}
247  Long2() : x(0), y(0) {}
248};
249
250class Long3 {
251 public:
252  int64_t x, y, z;
253
254  Long3(int64_t initX, int64_t initY, int64_t initZ)
255    : x(initX), y(initY), z(initZ) {}
256  Long3() : x(0), y(0), z(0) {}
257};
258
259class Long4 {
260 public:
261  int64_t x, y, z, w;
262
263  Long4(int64_t initX, int64_t initY, int64_t initZ, int64_t initW)
264    : x(initX), y(initY), z(initZ), w(initW) {}
265  Long4() : x(0), y(0), z(0), w(0) {}
266};
267
268class ULong2 {
269 public:
270  uint64_t x, y;
271
272  ULong2(uint64_t initX, uint64_t initY)
273    : x(initX), y(initY) {}
274  ULong2() : x(0), y(0) {}
275};
276
277class ULong3 {
278 public:
279  uint64_t x, y, z;
280
281  ULong3(uint64_t initX, uint64_t initY, uint64_t initZ)
282    : x(initX), y(initY), z(initZ) {}
283  ULong3() : x(0), y(0), z(0) {}
284};
285
286class ULong4 {
287 public:
288  uint64_t x, y, z, w;
289
290  ULong4(uint64_t initX, uint64_t initY, uint64_t initZ, uint64_t initW)
291    : x(initX), y(initY), z(initZ), w(initW) {}
292  ULong4() : x(0), y(0), z(0), w(0) {}
293};
294
295class Float2 {
296 public:
297  float x, y;
298
299  Float2(float initX, float initY)
300    : x(initX), y(initY) {}
301  Float2() : x(0), y(0) {}
302};
303
304class Float3 {
305 public:
306  float x, y, z;
307
308  Float3(float initX, float initY, float initZ)
309    : x(initX), y(initY), z(initZ) {}
310  Float3() : x(0.f), y(0.f), z(0.f) {}
311};
312
313class Float4 {
314 public:
315  float x, y, z, w;
316
317  Float4(float initX, float initY, float initZ, float initW)
318    : x(initX), y(initY), z(initZ), w(initW) {}
319  Float4() : x(0.f), y(0.f), z(0.f), w(0.f) {}
320};
321
322class Double2 {
323 public:
324  double x, y;
325
326  Double2(double initX, double initY)
327    : x(initX), y(initY) {}
328  Double2() : x(0), y(0) {}
329};
330
331class Double3 {
332 public:
333  double x, y, z;
334
335  Double3(double initX, double initY, double initZ)
336    : x(initX), y(initY), z(initZ) {}
337  Double3() : x(0), y(0), z(0) {}
338};
339
340class Double4 {
341 public:
342  double x, y, z, w;
343
344  Double4(double initX, double initY, double initZ, double initW)
345    : x(initX), y(initY), z(initZ), w(initW) {}
346  Double4() : x(0), y(0), z(0), w(0) {}
347};
348
349 /**
350  * The RenderScript context. This class controls initialization, resource management, and teardown.
351  */
352 class RS : public android::RSC::LightRefBase<RS> {
353
354 public:
355    RS();
356    virtual ~RS();
357
358    /**
359     * Initializes a RenderScript context. A context must be initialized before it can be used.
360     * @param[in] name Directory name to be used by this context. This should be equivalent to
361     * Context.getCacheDir().
362     * @param[in] flags Optional flags for this context.
363     * @return true on success
364     */
365    bool init(const char * name, uint32_t flags = 0);
366
367    /**
368     * Initializes a RenderScript context. A context must be initialized before it can be used.
369     * @param[in] name Directory name to be used by this context. This should be equivalent to
370     * Context.getCacheDir().
371     * @param[in] flags Flags for this context.
372     * @param[in] targetApi Target RS API level.
373     * @return true on success
374     */
375    bool init(const char * name, uint32_t flags, int targetApi);
376
377    /**
378     * Sets the error handler function for this context. This error handler is
379     * called whenever an error is set.
380     *
381     * @param[in] func Error handler function
382     */
383    void setErrorHandler(ErrorHandlerFunc_t func);
384
385    /**
386     * Returns the current error handler function for this context.
387     *
388     * @return pointer to current error handler function or NULL if not set
389     */
390    ErrorHandlerFunc_t getErrorHandler() { return mErrorFunc; }
391
392    /**
393     * Sets the message handler function for this context. This message handler
394     * is called whenever a message is sent from a RenderScript kernel.
395     *
396     *  @param[in] func Message handler function
397     */
398    void setMessageHandler(MessageHandlerFunc_t func);
399
400    /**
401     * Returns the current message handler function for this context.
402     *
403     * @return pointer to current message handler function or NULL if not set
404     */
405    MessageHandlerFunc_t getMessageHandler() { return mMessageFunc; }
406
407    /**
408     * Returns current status for the context.
409     *
410     * @return current error
411     */
412    RSError getError();
413
414    /**
415     * Waits for any currently running asynchronous operations to finish. This
416     * should only be used for performance testing and timing.
417     */
418    void finish();
419
420    RsContext getContext() { return mContext; }
421    void throwError(RSError error, const char *errMsg);
422
423    static dispatchTable* dispatch;
424
425 private:
426    static bool usingNative;
427    static bool initDispatch(int targetApi);
428
429    static void * threadProc(void *);
430
431    static bool gInitialized;
432    static pthread_mutex_t gInitMutex;
433
434    pthread_t mMessageThreadId;
435    pid_t mNativeMessageThreadId;
436    bool mMessageRun;
437
438    RsContext mContext;
439    RSError mCurrentError;
440
441    ErrorHandlerFunc_t mErrorFunc;
442    MessageHandlerFunc_t mMessageFunc;
443    bool mInit;
444
445    char mCacheDir[PATH_MAX+1];
446    uint32_t mCacheDirLen;
447
448    struct {
449        sp<const Element> U8;
450        sp<const Element> U8_2;
451        sp<const Element> U8_3;
452        sp<const Element> U8_4;
453        sp<const Element> I8;
454        sp<const Element> I8_2;
455        sp<const Element> I8_3;
456        sp<const Element> I8_4;
457        sp<const Element> U16;
458        sp<const Element> U16_2;
459        sp<const Element> U16_3;
460        sp<const Element> U16_4;
461        sp<const Element> I16;
462        sp<const Element> I16_2;
463        sp<const Element> I16_3;
464        sp<const Element> I16_4;
465        sp<const Element> U32;
466        sp<const Element> U32_2;
467        sp<const Element> U32_3;
468        sp<const Element> U32_4;
469        sp<const Element> I32;
470        sp<const Element> I32_2;
471        sp<const Element> I32_3;
472        sp<const Element> I32_4;
473        sp<const Element> U64;
474        sp<const Element> U64_2;
475        sp<const Element> U64_3;
476        sp<const Element> U64_4;
477        sp<const Element> I64;
478        sp<const Element> I64_2;
479        sp<const Element> I64_3;
480        sp<const Element> I64_4;
481        sp<const Element> F16;
482        sp<const Element> F16_2;
483        sp<const Element> F16_3;
484        sp<const Element> F16_4;
485        sp<const Element> F32;
486        sp<const Element> F32_2;
487        sp<const Element> F32_3;
488        sp<const Element> F32_4;
489        sp<const Element> F64;
490        sp<const Element> F64_2;
491        sp<const Element> F64_3;
492        sp<const Element> F64_4;
493        sp<const Element> BOOLEAN;
494
495        sp<const Element> ELEMENT;
496        sp<const Element> TYPE;
497        sp<const Element> ALLOCATION;
498        sp<const Element> SAMPLER;
499        sp<const Element> SCRIPT;
500        sp<const Element> MESH;
501        sp<const Element> PROGRAM_FRAGMENT;
502        sp<const Element> PROGRAM_VERTEX;
503        sp<const Element> PROGRAM_RASTER;
504        sp<const Element> PROGRAM_STORE;
505
506        sp<const Element> A_8;
507        sp<const Element> RGB_565;
508        sp<const Element> RGB_888;
509        sp<const Element> RGBA_5551;
510        sp<const Element> RGBA_4444;
511        sp<const Element> RGBA_8888;
512
513        sp<const Element> YUV;
514
515        sp<const Element> MATRIX_4X4;
516        sp<const Element> MATRIX_3X3;
517        sp<const Element> MATRIX_2X2;
518    } mElements;
519
520    struct {
521        sp<const Sampler> CLAMP_NEAREST;
522        sp<const Sampler> CLAMP_LINEAR;
523        sp<const Sampler> CLAMP_LINEAR_MIP_LINEAR;
524        sp<const Sampler> WRAP_NEAREST;
525        sp<const Sampler> WRAP_LINEAR;
526        sp<const Sampler> WRAP_LINEAR_MIP_LINEAR;
527        sp<const Sampler> MIRRORED_REPEAT_NEAREST;
528        sp<const Sampler> MIRRORED_REPEAT_LINEAR;
529        sp<const Sampler> MIRRORED_REPEAT_LINEAR_MIP_LINEAR;
530    } mSamplers;
531    friend class Sampler;
532    friend class Element;
533    friend class ScriptC;
534};
535
536 /**
537  * Base class for all RenderScript objects. Not for direct use by developers.
538  */
539class BaseObj : public android::RSC::LightRefBase<BaseObj> {
540public:
541    void * getID() const;
542    virtual ~BaseObj();
543    virtual void updateFromNative();
544    virtual bool equals(const sp<const BaseObj>& obj);
545
546protected:
547    void *mID;
548    RS* mRS;
549    const char * mName;
550
551    BaseObj(void *id, sp<RS> rs);
552    void checkValid();
553
554    static void * getObjID(const sp<const BaseObj>& o);
555
556};
557
558 /**
559  * This class provides the primary method through which data is passed to and
560  * from RenderScript kernels. An Allocation provides the backing store for a
561  * given Type.
562  *
563  * An Allocation also contains a set of usage flags that denote how the
564  * Allocation could be used. For example, an Allocation may have usage flags
565  * specifying that it can be used from a script as well as input to a
566  * Sampler. A developer must synchronize across these different usages using
567  * syncAll(int) in order to ensure that different users of the Allocation have
568  * a consistent view of memory. For example, in the case where an Allocation is
569  * used as the output of one kernel and as Sampler input in a later kernel, a
570  * developer must call syncAll(RS_ALLOCATION_USAGE_SCRIPT) prior to launching the
571  * second kernel to ensure correctness.
572  */
573class Allocation : public BaseObj {
574protected:
575    sp<const Type> mType;
576    uint32_t mUsage;
577    sp<Allocation> mAdaptedAllocation;
578
579    bool mConstrainedLOD;
580    bool mConstrainedFace;
581    bool mConstrainedY;
582    bool mConstrainedZ;
583    bool mReadAllowed;
584    bool mWriteAllowed;
585    bool mAutoPadding;
586    uint32_t mSelectedY;
587    uint32_t mSelectedZ;
588    uint32_t mSelectedLOD;
589    RsAllocationCubemapFace mSelectedFace;
590
591    uint32_t mCurrentDimX;
592    uint32_t mCurrentDimY;
593    uint32_t mCurrentDimZ;
594    uint32_t mCurrentCount;
595
596    void * getIDSafe() const;
597    void updateCacheInfo(const sp<const Type>& t);
598
599    Allocation(void *id, sp<RS> rs, sp<const Type> t, uint32_t usage);
600
601    void validateIsInt64();
602    void validateIsInt32();
603    void validateIsInt16();
604    void validateIsInt8();
605    void validateIsFloat32();
606    void validateIsFloat64();
607    void validateIsObject();
608
609    virtual void updateFromNative();
610
611    void validate2DRange(uint32_t xoff, uint32_t yoff, uint32_t w, uint32_t h);
612    void validate3DRange(uint32_t xoff, uint32_t yoff, uint32_t zoff,
613                         uint32_t w, uint32_t h, uint32_t d);
614
615public:
616
617    /**
618     * Return Type for the allocation.
619     * @return pointer to underlying Type
620     */
621    sp<const Type> getType() const {
622        return mType;
623    }
624
625    /**
626     * Enable/Disable AutoPadding for Vec3 elements.
627     *
628     * @param useAutoPadding True: enable AutoPadding; flase: disable AutoPadding
629     *
630     */
631    void setAutoPadding(bool useAutoPadding) {
632        mAutoPadding = useAutoPadding;
633    }
634
635    /**
636     * Propagate changes from one usage of the Allocation to other usages of the Allocation.
637     * @param[in] srcLocation source location with changes to propagate elsewhere
638     */
639    void syncAll(RsAllocationUsageType srcLocation);
640
641    /**
642     * Send a buffer to the output stream.  The contents of the Allocation will
643     * be undefined after this operation. This operation is only valid if
644     * USAGE_IO_OUTPUT is set on the Allocation.
645     */
646    void ioSendOutput();
647
648    /**
649     * Receive the latest input into the Allocation. This operation
650     * is only valid if USAGE_IO_INPUT is set on the Allocation.
651     */
652    void ioGetInput();
653
654#ifndef RS_COMPATIBILITY_LIB
655    /**
656     * Returns the handle to a raw buffer that is being managed by the screen
657     * compositor. This operation is only valid for Allocations with USAGE_IO_INPUT.
658     * @return Surface associated with allocation
659     */
660    sp<Surface> getSurface();
661
662    /**
663     * Associate a Surface with this Allocation. This
664     * operation is only valid for Allocations with USAGE_IO_OUTPUT.
665     * @param[in] s Surface to associate with allocation
666     */
667    void setSurface(const sp<Surface>& s);
668#endif
669
670    /**
671     * Generate a mipmap chain. This is only valid if the Type of the Allocation
672     * includes mipmaps. This function will generate a complete set of mipmaps
673     * from the top level LOD and place them into the script memory space. If
674     * the Allocation is also using other memory spaces, a call to
675     * syncAll(Allocation.USAGE_SCRIPT) is required.
676     */
677    void generateMipmaps();
678
679    /**
680     * Copy an array into part of this Allocation.
681     * @param[in] off offset of first Element to be overwritten
682     * @param[in] count number of Elements to copy
683     * @param[in] data array from which to copy
684     */
685    void copy1DRangeFrom(uint32_t off, size_t count, const void *data);
686
687    /**
688     * Copy part of an Allocation into part of this Allocation.
689     * @param[in] off offset of first Element to be overwritten
690     * @param[in] count number of Elements to copy
691     * @param[in] data Allocation from which to copy
692     * @param[in] dataOff offset of first Element in data to copy
693     */
694    void copy1DRangeFrom(uint32_t off, size_t count, const sp<const Allocation>& data, uint32_t dataOff);
695
696    /**
697     * Copy an array into part of this Allocation.
698     * @param[in] off offset of first Element to be overwritten
699     * @param[in] count number of Elements to copy
700     * @param[in] data array from which to copy
701     */
702    void copy1DRangeTo(uint32_t off, size_t count, void *data);
703
704    /**
705     * Copy entire array to an Allocation.
706     * @param[in] data array from which to copy
707     */
708    void copy1DFrom(const void* data);
709
710    /**
711     * Copy entire Allocation to an array.
712     * @param[in] data destination array
713     */
714    void copy1DTo(void* data);
715
716    /**
717     * Copy from an array into a rectangular region in this Allocation. The
718     * array is assumed to be tightly packed.
719     * @param[in] xoff X offset of region to update in this Allocation
720     * @param[in] yoff Y offset of region to update in this Allocation
721     * @param[in] w Width of region to update
722     * @param[in] h Height of region to update
723     * @param[in] data Array from which to copy
724     */
725    void copy2DRangeFrom(uint32_t xoff, uint32_t yoff, uint32_t w, uint32_t h,
726                         const void *data);
727
728    /**
729     * Copy from this Allocation into a rectangular region in an array. The
730     * array is assumed to be tightly packed.
731     * @param[in] xoff X offset of region to copy from this Allocation
732     * @param[in] yoff Y offset of region to copy from this Allocation
733     * @param[in] w Width of region to update
734     * @param[in] h Height of region to update
735     * @param[in] data destination array
736     */
737    void copy2DRangeTo(uint32_t xoff, uint32_t yoff, uint32_t w, uint32_t h,
738                       void *data);
739
740    /**
741     * Copy from an Allocation into a rectangular region in this Allocation.
742     * @param[in] xoff X offset of region to update in this Allocation
743     * @param[in] yoff Y offset of region to update in this Allocation
744     * @param[in] w Width of region to update
745     * @param[in] h Height of region to update
746     * @param[in] data Allocation from which to copy
747     * @param[in] dataXoff X offset of region to copy from in data
748     * @param[in] dataYoff Y offset of region to copy from in data
749     */
750    void copy2DRangeFrom(uint32_t xoff, uint32_t yoff, uint32_t w, uint32_t h,
751                         const sp<const Allocation>& data, uint32_t dataXoff, uint32_t dataYoff);
752
753    /**
754     * Copy from a strided array into a rectangular region in this Allocation.
755     * @param[in] xoff X offset of region to update in this Allocation
756     * @param[in] yoff Y offset of region to update in this Allocation
757     * @param[in] w Width of region to update
758     * @param[in] h Height of region to update
759     * @param[in] data array from which to copy
760     * @param[in] stride stride of data in bytes
761     */
762    void copy2DStridedFrom(uint32_t xoff, uint32_t yoff, uint32_t w, uint32_t h,
763                           const void *data, size_t stride);
764
765    /**
766     * Copy from a strided array into this Allocation.
767     * @param[in] data array from which to copy
768     * @param[in] stride stride of data in bytes
769     */
770    void copy2DStridedFrom(const void *data, size_t stride);
771
772    /**
773     * Copy from a rectangular region in this Allocation into a strided array.
774     * @param[in] xoff X offset of region to update in this Allocation
775     * @param[in] yoff Y offset of region to update in this Allocation
776     * @param[in] w Width of region to update
777     * @param[in] h Height of region to update
778     * @param[in] data destination array
779     * @param[in] stride stride of data in bytes
780     */
781    void copy2DStridedTo(uint32_t xoff, uint32_t yoff, uint32_t w, uint32_t h,
782                         void *data, size_t stride);
783
784    /**
785     * Copy this Allocation into a strided array.
786     * @param[in] data destination array
787     * @param[in] stride stride of data in bytes
788     */
789    void copy2DStridedTo(void *data, size_t stride);
790
791
792    /**
793     * Copy from an array into a 3D region in this Allocation. The
794     * array is assumed to be tightly packed.
795     * @param[in] xoff X offset of region to update in this Allocation
796     * @param[in] yoff Y offset of region to update in this Allocation
797     * @param[in] zoff Z offset of region to update in this Allocation
798     * @param[in] w Width of region to update
799     * @param[in] h Height of region to update
800     * @param[in] d Depth of region to update
801     * @param[in] data Array from which to copy
802     */
803    void copy3DRangeFrom(uint32_t xoff, uint32_t yoff, uint32_t zoff, uint32_t w,
804                         uint32_t h, uint32_t d, const void* data);
805
806    /**
807     * Copy from an Allocation into a 3D region in this Allocation.
808     * @param[in] xoff X offset of region to update in this Allocation
809     * @param[in] yoff Y offset of region to update in this Allocation
810     * @param[in] zoff Z offset of region to update in this Allocation
811     * @param[in] w Width of region to update
812     * @param[in] h Height of region to update
813     * @param[in] d Depth of region to update
814     * @param[in] data Allocation from which to copy
815     * @param[in] dataXoff X offset of region in data to copy from
816     * @param[in] dataYoff Y offset of region in data to copy from
817     * @param[in] dataZoff Z offset of region in data to copy from
818     */
819    void copy3DRangeFrom(uint32_t xoff, uint32_t yoff, uint32_t zoff,
820                         uint32_t w, uint32_t h, uint32_t d,
821                         const sp<const Allocation>& data,
822                         uint32_t dataXoff, uint32_t dataYoff, uint32_t dataZoff);
823
824    /**
825     * Copy a 3D region in this Allocation into an array. The
826     * array is assumed to be tightly packed.
827     * @param[in] xoff X offset of region to update in this Allocation
828     * @param[in] yoff Y offset of region to update in this Allocation
829     * @param[in] zoff Z offset of region to update in this Allocation
830     * @param[in] w Width of region to update
831     * @param[in] h Height of region to update
832     * @param[in] d Depth of region to update
833     * @param[in] data Array from which to copy
834     */
835    void copy3DRangeTo(uint32_t xoff, uint32_t yoff, uint32_t zoff, uint32_t w,
836                         uint32_t h, uint32_t d, void* data);
837
838    /**
839     * Creates an Allocation for use by scripts with a given Type.
840     * @param[in] rs Context to which the Allocation will belong
841     * @param[in] type Type of the Allocation
842     * @param[in] mipmaps desired mipmap behavior for the Allocation
843     * @param[in] usage usage for the Allocation
844     * @return new Allocation
845     */
846    static sp<Allocation> createTyped(const sp<RS>& rs, const sp<const Type>& type,
847                                   RsAllocationMipmapControl mipmaps, uint32_t usage);
848
849    /**
850     * Creates an Allocation for use by scripts with a given Type and a backing pointer. For use
851     * with RS_ALLOCATION_USAGE_SHARED.
852     * @param[in] rs Context to which the Allocation will belong
853     * @param[in] type Type of the Allocation
854     * @param[in] mipmaps desired mipmap behavior for the Allocation
855     * @param[in] usage usage for the Allocation
856     * @param[in] pointer existing backing store to use for this Allocation if possible
857     * @return new Allocation
858     */
859    static sp<Allocation> createTyped(const sp<RS>& rs, const sp<const Type>& type,
860                                   RsAllocationMipmapControl mipmaps, uint32_t usage, void * pointer);
861
862    /**
863     * Creates an Allocation for use by scripts with a given Type with no mipmaps.
864     * @param[in] rs Context to which the Allocation will belong
865     * @param[in] type Type of the Allocation
866     * @param[in] usage usage for the Allocation
867     * @return new Allocation
868     */
869    static sp<Allocation> createTyped(const sp<RS>& rs, const sp<const Type>& type,
870                                   uint32_t usage = RS_ALLOCATION_USAGE_SCRIPT);
871    /**
872     * Creates an Allocation with a specified number of given elements.
873     * @param[in] rs Context to which the Allocation will belong
874     * @param[in] e Element used in the Allocation
875     * @param[in] count Number of elements of the Allocation
876     * @param[in] usage usage for the Allocation
877     * @return new Allocation
878     */
879    static sp<Allocation> createSized(const sp<RS>& rs, const sp<const Element>& e, size_t count,
880                                   uint32_t usage = RS_ALLOCATION_USAGE_SCRIPT);
881
882    /**
883     * Creates a 2D Allocation with a specified number of given elements.
884     * @param[in] rs Context to which the Allocation will belong
885     * @param[in] e Element used in the Allocation
886     * @param[in] x Width in Elements of the Allocation
887     * @param[in] y Height of the Allocation
888     * @param[in] usage usage for the Allocation
889     * @return new Allocation
890     */
891    static sp<Allocation> createSized2D(const sp<RS>& rs, const sp<const Element>& e,
892                                        size_t x, size_t y,
893                                        uint32_t usage = RS_ALLOCATION_USAGE_SCRIPT);
894
895
896    /**
897     * Get the backing pointer for a USAGE_SHARED allocation.
898     * @param[in] stride optional parameter. when non-NULL, will contain
899     *   stride in bytes of a 2D Allocation
900     * @return pointer to data
901     */
902    void * getPointer(size_t *stride = NULL);
903};
904
905 /**
906  * An Element represents one item within an Allocation. An Element is roughly
907  * equivalent to a C type in a RenderScript kernel. Elements may be basic
908  * or complex. Some basic elements are:
909
910  * - A single float value (equivalent to a float in a kernel)
911  * - A four-element float vector (equivalent to a float4 in a kernel)
912  * - An unsigned 32-bit integer (equivalent to an unsigned int in a kernel)
913  * - A single signed 8-bit integer (equivalent to a char in a kernel)
914
915  * Basic Elements are comprised of a Element.DataType and a
916  * Element.DataKind. The DataType encodes C type information of an Element,
917  * while the DataKind encodes how that Element should be interpreted by a
918  * Sampler. Note that Allocation objects with DataKind USER cannot be used as
919  * input for a Sampler. In general, Allocation objects that are intended for
920  * use with a Sampler should use bitmap-derived Elements such as
921  * Element::RGBA_8888.
922 */
923
924
925class Element : public BaseObj {
926public:
927    bool isComplex();
928
929    /**
930     * Elements could be simple, such as an int or a float, or a structure with
931     * multiple sub-elements, such as a collection of floats, float2,
932     * float4. This function returns zero for simple elements or the number of
933     * sub-elements otherwise.
934     * @return number of sub-elements
935     */
936    size_t getSubElementCount() {
937        return mVisibleElementMapSize;
938    }
939
940    /**
941     * For complex Elements, this returns the sub-element at a given index.
942     * @param[in] index index of sub-element
943     * @return sub-element
944     */
945    sp<const Element> getSubElement(uint32_t index);
946
947    /**
948     * For complex Elements, this returns the name of the sub-element at a given
949     * index.
950     * @param[in] index index of sub-element
951     * @return name of sub-element
952     */
953    const char * getSubElementName(uint32_t index);
954
955    /**
956     * For complex Elements, this returns the size of the sub-element at a given
957     * index.
958     * @param[in] index index of sub-element
959     * @return size of sub-element
960     */
961    size_t getSubElementArraySize(uint32_t index);
962
963    /**
964     * Returns the location of a sub-element within a complex Element.
965     * @param[in] index index of sub-element
966     * @return offset in bytes
967     */
968    uint32_t getSubElementOffsetBytes(uint32_t index);
969
970    /**
971     * Returns the data type used for the Element.
972     * @return data type
973     */
974    RsDataType getDataType() const {
975        return mType;
976    }
977
978    /**
979     * Returns the data kind used for the Element.
980     * @return data kind
981     */
982    RsDataKind getDataKind() const {
983        return mKind;
984    }
985
986    /**
987     * Returns the size in bytes of the Element.
988     * @return size in bytes
989     */
990    size_t getSizeBytes() const {
991        return mSizeBytes;
992    }
993
994    /**
995     * Returns the number of vector components for this Element.
996     * @return number of vector components
997     */
998    uint32_t getVectorSize() const {
999        return mVectorSize;
1000    }
1001
1002    /**
1003     * Utility function for returning an Element containing a single bool.
1004     * @param[in] rs RenderScript context
1005     * @return Element
1006     */
1007    static sp<const Element> BOOLEAN(const sp<RS> &rs);
1008    /**
1009     * Utility function for returning an Element containing a single unsigned char.
1010     * @param[in] rs RenderScript context
1011     * @return Element
1012     */
1013    static sp<const Element> U8(const sp<RS> &rs);
1014    /**
1015     * Utility function for returning an Element containing a single signed char.
1016     * @param[in] rs RenderScript context
1017     * @return Element
1018     */
1019    static sp<const Element> I8(const sp<RS> &rs);
1020    /**
1021     * Utility function for returning an Element containing a single unsigned short.
1022     * @param[in] rs RenderScript context
1023     * @return Element
1024     */
1025    static sp<const Element> U16(const sp<RS> &rs);
1026    /**
1027     * Utility function for returning an Element containing a single signed short.
1028     * @param[in] rs RenderScript context
1029     * @return Element
1030     */
1031    static sp<const Element> I16(const sp<RS> &rs);
1032    /**
1033     * Utility function for returning an Element containing a single unsigned int.
1034     * @param[in] rs RenderScript context
1035     * @return Element
1036     */
1037    static sp<const Element> U32(const sp<RS> &rs);
1038    /**
1039     * Utility function for returning an Element containing a single signed int.
1040     * @param[in] rs RenderScript context
1041     * @return Element
1042     */
1043    static sp<const Element> I32(const sp<RS> &rs);
1044    /**
1045     * Utility function for returning an Element containing a single unsigned long long.
1046     * @param[in] rs RenderScript context
1047     * @return Element
1048     */
1049    static sp<const Element> U64(const sp<RS> &rs);
1050    /**
1051     * Utility function for returning an Element containing a single signed long long.
1052     * @param[in] rs RenderScript context
1053     * @return Element
1054     */
1055    static sp<const Element> I64(const sp<RS> &rs);
1056    /**
1057     * Utility function for returning an Element containing a single half.
1058     * @param[in] rs RenderScript context
1059     * @return Element
1060     */
1061    static sp<const Element> F16(const sp<RS> &rs);
1062    /**
1063     * Utility function for returning an Element containing a single float.
1064     * @param[in] rs RenderScript context
1065     * @return Element
1066     */
1067    static sp<const Element> F32(const sp<RS> &rs);
1068    /**
1069     * Utility function for returning an Element containing a single double.
1070     * @param[in] rs RenderScript context
1071     * @return Element
1072     */
1073    static sp<const Element> F64(const sp<RS> &rs);
1074    /**
1075     * Utility function for returning an Element containing a single Element.
1076     * @param[in] rs RenderScript context
1077     * @return Element
1078     */
1079    static sp<const Element> ELEMENT(const sp<RS> &rs);
1080    /**
1081     * Utility function for returning an Element containing a single Type.
1082     * @param[in] rs RenderScript context
1083     * @return Element
1084     */
1085    static sp<const Element> TYPE(const sp<RS> &rs);
1086    /**
1087     * Utility function for returning an Element containing a single Allocation.
1088     * @param[in] rs RenderScript context
1089     * @return Element
1090     */
1091    static sp<const Element> ALLOCATION(const sp<RS> &rs);
1092    /**
1093     * Utility function for returning an Element containing a single Sampler.
1094     * @param[in] rs RenderScript context
1095     * @return Element
1096     */
1097    static sp<const Element> SAMPLER(const sp<RS> &rs);
1098    /**
1099     * Utility function for returning an Element containing a single Script.
1100     * @param[in] rs RenderScript context
1101     * @return Element
1102     */
1103    static sp<const Element> SCRIPT(const sp<RS> &rs);
1104    /**
1105     * Utility function for returning an Element containing an ALPHA_8 pixel.
1106     * @param[in] rs RenderScript context
1107     * @return Element
1108     */
1109    static sp<const Element> A_8(const sp<RS> &rs);
1110    /**
1111     * Utility function for returning an Element containing an RGB_565 pixel.
1112     * @param[in] rs RenderScript context
1113     * @return Element
1114     */
1115    static sp<const Element> RGB_565(const sp<RS> &rs);
1116    /**
1117     * Utility function for returning an Element containing an RGB_888 pixel.
1118     * @param[in] rs RenderScript context
1119     * @return Element
1120     */
1121    static sp<const Element> RGB_888(const sp<RS> &rs);
1122    /**
1123     * Utility function for returning an Element containing an RGBA_5551 pixel.
1124     * @param[in] rs RenderScript context
1125     * @return Element
1126     */
1127    static sp<const Element> RGBA_5551(const sp<RS> &rs);
1128    /**
1129     * Utility function for returning an Element containing an RGBA_4444 pixel.
1130     * @param[in] rs RenderScript context
1131     * @return Element
1132     */
1133    static sp<const Element> RGBA_4444(const sp<RS> &rs);
1134    /**
1135     * Utility function for returning an Element containing an RGBA_8888 pixel.
1136     * @param[in] rs RenderScript context
1137     * @return Element
1138     */
1139    static sp<const Element> RGBA_8888(const sp<RS> &rs);
1140
1141    /**
1142     * Utility function for returning an Element containing a half2.
1143     * @param[in] rs RenderScript context
1144     * @return Element
1145     */
1146    static sp<const Element> F16_2(const sp<RS> &rs);
1147    /**
1148     * Utility function for returning an Element containing a half3.
1149     * @param[in] rs RenderScript context
1150     * @return Element
1151     */
1152    static sp<const Element> F16_3(const sp<RS> &rs);
1153    /**
1154     * Utility function for returning an Element containing a half4.
1155     * @param[in] rs RenderScript context
1156     * @return Element
1157     */
1158    static sp<const Element> F16_4(const sp<RS> &rs);
1159
1160    /**
1161     * Utility function for returning an Element containing a float2.
1162     * @param[in] rs RenderScript context
1163     * @return Element
1164     */
1165    static sp<const Element> F32_2(const sp<RS> &rs);
1166    /**
1167     * Utility function for returning an Element containing a float3.
1168     * @param[in] rs RenderScript context
1169     * @return Element
1170     */
1171    static sp<const Element> F32_3(const sp<RS> &rs);
1172    /**
1173     * Utility function for returning an Element containing a float4.
1174     * @param[in] rs RenderScript context
1175     * @return Element
1176     */
1177    static sp<const Element> F32_4(const sp<RS> &rs);
1178    /**
1179     * Utility function for returning an Element containing a double2.
1180     * @param[in] rs RenderScript context
1181     * @return Element
1182     */
1183    static sp<const Element> F64_2(const sp<RS> &rs);
1184    /**
1185     * Utility function for returning an Element containing a double3.
1186     * @param[in] rs RenderScript context
1187     * @return Element
1188     */
1189    static sp<const Element> F64_3(const sp<RS> &rs);
1190    /**
1191     * Utility function for returning an Element containing a double4.
1192     * @param[in] rs RenderScript context
1193     * @return Element
1194     */
1195    static sp<const Element> F64_4(const sp<RS> &rs);
1196    /**
1197     * Utility function for returning an Element containing a uchar2.
1198     * @param[in] rs RenderScript context
1199     * @return Element
1200     */
1201    static sp<const Element> U8_2(const sp<RS> &rs);
1202    /**
1203     * Utility function for returning an Element containing a uchar3.
1204     * @param[in] rs RenderScript context
1205     * @return Element
1206     */
1207    static sp<const Element> U8_3(const sp<RS> &rs);
1208    /**
1209     * Utility function for returning an Element containing a uchar4.
1210     * @param[in] rs RenderScript context
1211     * @return Element
1212     */
1213    static sp<const Element> U8_4(const sp<RS> &rs);
1214    /**
1215     * Utility function for returning an Element containing a char2.
1216     * @param[in] rs RenderScript context
1217     * @return Element
1218     */
1219    static sp<const Element> I8_2(const sp<RS> &rs);
1220    /**
1221     * Utility function for returning an Element containing a char3.
1222     * @param[in] rs RenderScript context
1223     * @return Element
1224     */
1225    static sp<const Element> I8_3(const sp<RS> &rs);
1226    /**
1227     * Utility function for returning an Element containing a char4.
1228     * @param[in] rs RenderScript context
1229     * @return Element
1230     */
1231    static sp<const Element> I8_4(const sp<RS> &rs);
1232    /**
1233     * Utility function for returning an Element containing a ushort2.
1234     * @param[in] rs RenderScript context
1235     * @return Element
1236     */
1237    static sp<const Element> U16_2(const sp<RS> &rs);
1238    /**
1239     * Utility function for returning an Element containing a ushort3.
1240     * @param[in] rs RenderScript context
1241     * @return Element
1242     */
1243    static sp<const Element> U16_3(const sp<RS> &rs);
1244    /**
1245     * Utility function for returning an Element containing a ushort4.
1246     * @param[in] rs RenderScript context
1247     * @return Element
1248     */
1249    static sp<const Element> U16_4(const sp<RS> &rs);
1250    /**
1251     * Utility function for returning an Element containing a short2.
1252     * @param[in] rs RenderScript context
1253     * @return Element
1254     */
1255    static sp<const Element> I16_2(const sp<RS> &rs);
1256    /**
1257     * Utility function for returning an Element containing a short3.
1258     * @param[in] rs RenderScript context
1259     * @return Element
1260     */
1261    static sp<const Element> I16_3(const sp<RS> &rs);
1262    /**
1263     * Utility function for returning an Element containing a short4.
1264     * @param[in] rs RenderScript context
1265     * @return Element
1266     */
1267    static sp<const Element> I16_4(const sp<RS> &rs);
1268    /**
1269     * Utility function for returning an Element containing a uint2.
1270     * @param[in] rs RenderScript context
1271     * @return Element
1272     */
1273    static sp<const Element> U32_2(const sp<RS> &rs);
1274    /**
1275     * Utility function for returning an Element containing a uint3.
1276     * @param[in] rs RenderScript context
1277     * @return Element
1278     */
1279    static sp<const Element> U32_3(const sp<RS> &rs);
1280    /**
1281     * Utility function for returning an Element containing a uint4.
1282     * @param[in] rs RenderScript context
1283     * @return Element
1284     */
1285    static sp<const Element> U32_4(const sp<RS> &rs);
1286    /**
1287     * Utility function for returning an Element containing an int2.
1288     * @param[in] rs RenderScript context
1289     * @return Element
1290     */
1291    static sp<const Element> I32_2(const sp<RS> &rs);
1292    /**
1293     * Utility function for returning an Element containing an int3.
1294     * @param[in] rs RenderScript context
1295     * @return Element
1296     */
1297    static sp<const Element> I32_3(const sp<RS> &rs);
1298    /**
1299     * Utility function for returning an Element containing an int4.
1300     * @param[in] rs RenderScript context
1301     * @return Element
1302     */
1303    static sp<const Element> I32_4(const sp<RS> &rs);
1304    /**
1305     * Utility function for returning an Element containing a ulong2.
1306     * @param[in] rs RenderScript context
1307     * @return Element
1308     */
1309    static sp<const Element> U64_2(const sp<RS> &rs);
1310    /**
1311     * Utility function for returning an Element containing a ulong3.
1312     * @param[in] rs RenderScript context
1313     * @return Element
1314     */
1315    static sp<const Element> U64_3(const sp<RS> &rs);
1316    /**
1317     * Utility function for returning an Element containing a ulong4.
1318     * @param[in] rs RenderScript context
1319     * @return Element
1320     */
1321    static sp<const Element> U64_4(const sp<RS> &rs);
1322    /**
1323     * Utility function for returning an Element containing a long2.
1324     * @param[in] rs RenderScript context
1325     * @return Element
1326     */
1327    static sp<const Element> I64_2(const sp<RS> &rs);
1328    /**
1329     * Utility function for returning an Element containing a long3.
1330     * @param[in] rs RenderScript context
1331     * @return Element
1332     */
1333    static sp<const Element> I64_3(const sp<RS> &rs);
1334    /**
1335     * Utility function for returning an Element containing a long4.
1336     * @param[in] rs RenderScript context
1337     * @return Element
1338     */
1339    static sp<const Element> I64_4(const sp<RS> &rs);
1340    /**
1341     * Utility function for returning an Element containing a YUV pixel.
1342     * @param[in] rs RenderScript context
1343     * @return Element
1344     */
1345    static sp<const Element> YUV(const sp<RS> &rs);
1346    /**
1347     * Utility function for returning an Element containing an rs_matrix_4x4.
1348     * @param[in] rs RenderScript context
1349     * @return Element
1350     */
1351    static sp<const Element> MATRIX_4X4(const sp<RS> &rs);
1352    /**
1353     * Utility function for returning an Element containing an rs_matrix_3x3.
1354     * @param[in] rs RenderScript context
1355     * @return Element
1356     */
1357    static sp<const Element> MATRIX_3X3(const sp<RS> &rs);
1358    /**
1359     * Utility function for returning an Element containing an rs_matrix_2x2.
1360     * @param[in] rs RenderScript context
1361     * @return Element
1362     */
1363    static sp<const Element> MATRIX_2X2(const sp<RS> &rs);
1364
1365    void updateFromNative();
1366
1367    /**
1368     * Create an Element with a given DataType.
1369     * @param[in] rs RenderScript context
1370     * @param[in] dt data type
1371     * @return Element
1372     */
1373    static sp<const Element> createUser(const sp<RS>& rs, RsDataType dt);
1374    /**
1375     * Create a vector Element with the given DataType
1376     * @param[in] rs RenderScript
1377     * @param[in] dt DataType
1378     * @param[in] size vector size
1379     * @return Element
1380     */
1381    static sp<const Element> createVector(const sp<RS>& rs, RsDataType dt, uint32_t size);
1382    /**
1383     * Create an Element with a given DataType and DataKind.
1384     * @param[in] rs RenderScript context
1385     * @param[in] dt DataType
1386     * @param[in] dk DataKind
1387     * @return Element
1388     */
1389    static sp<const Element> createPixel(const sp<RS>& rs, RsDataType dt, RsDataKind dk);
1390
1391    /**
1392     * Returns true if the Element can interoperate with this Element.
1393     * @param[in] e Element to compare
1394     * @return true if Elements can interoperate
1395     */
1396    bool isCompatible(const sp<const Element>&e) const;
1397
1398    /**
1399     * Builder class for producing complex elements with matching field and name
1400     * pairs. The builder starts empty. The order in which elements are added is
1401     * retained for the layout in memory.
1402     */
1403    class Builder {
1404    private:
1405        RS* mRS;
1406        size_t mElementsCount;
1407        size_t mElementsVecSize;
1408        sp<const Element> * mElements;
1409        char ** mElementNames;
1410        size_t * mElementNameLengths;
1411        uint32_t * mArraySizes;
1412        bool mSkipPadding;
1413
1414    public:
1415        explicit Builder(sp<RS> rs);
1416        ~Builder();
1417        void add(const sp<const Element>& e, const char * name, uint32_t arraySize = 1);
1418        sp<const Element> create();
1419    };
1420
1421protected:
1422    friend class Type;
1423    Element(void *id, sp<RS> rs,
1424            sp<const Element> * elements,
1425            size_t elementCount,
1426            const char ** elementNames,
1427            size_t * elementNameLengths,
1428            uint32_t * arraySizes);
1429    Element(void *id, sp<RS> rs, RsDataType dt, RsDataKind dk, bool norm, uint32_t size);
1430    Element(void *id, sp<RS> rs);
1431    explicit Element(sp<RS> rs);
1432    virtual ~Element();
1433
1434private:
1435    void updateVisibleSubElements();
1436
1437    size_t mElementsCount;
1438    size_t mVisibleElementMapSize;
1439
1440    sp<const Element> * mElements;
1441    char ** mElementNames;
1442    size_t * mElementNameLengths;
1443    uint32_t * mArraySizes;
1444    uint32_t * mVisibleElementMap;
1445    uint32_t * mOffsetInBytes;
1446
1447    RsDataType mType;
1448    RsDataKind mKind;
1449    bool mNormalized;
1450    size_t mSizeBytes;
1451    size_t mVectorSize;
1452};
1453
1454class FieldPacker {
1455protected:
1456    unsigned char* mData;
1457    size_t mPos;
1458    size_t mLen;
1459
1460public:
1461    explicit FieldPacker(size_t len)
1462        : mPos(0), mLen(len) {
1463            mData = new unsigned char[len];
1464        }
1465
1466    virtual ~FieldPacker() {
1467        delete [] mData;
1468    }
1469
1470    void align(size_t v) {
1471        if ((v & (v - 1)) != 0) {
1472            //            ALOGE("Non-power-of-two alignment: %zu", v);
1473            return;
1474        }
1475
1476        while ((mPos & (v - 1)) != 0) {
1477            mData[mPos++] = 0;
1478        }
1479    }
1480
1481    void reset() {
1482        mPos = 0;
1483    }
1484
1485    void reset(size_t i) {
1486        if (i >= mLen) {
1487            //            ALOGE("Out of bounds: i (%zu) >= len (%zu)", i, mLen);
1488            return;
1489        }
1490        mPos = i;
1491    }
1492
1493    void skip(size_t i) {
1494        size_t res = mPos + i;
1495        if (res > mLen) {
1496            //            ALOGE("Exceeded buffer length: i (%zu) > len (%zu)", i, mLen);
1497            return;
1498        }
1499        mPos = res;
1500    }
1501
1502    void* getData() const {
1503        return mData;
1504    }
1505
1506    size_t getLength() const {
1507        return mLen;
1508    }
1509
1510    template <typename T>
1511        void add(T t) {
1512        align(sizeof(t));
1513        if (mPos + sizeof(t) <= mLen) {
1514            memcpy(&mData[mPos], &t, sizeof(t));
1515            mPos += sizeof(t);
1516        }
1517    }
1518
1519    /*
1520      void add(rs_matrix4x4 m) {
1521      for (size_t i = 0; i < 16; i++) {
1522      add(m.m[i]);
1523      }
1524      }
1525
1526      void add(rs_matrix3x3 m) {
1527      for (size_t i = 0; i < 9; i++) {
1528      add(m.m[i]);
1529      }
1530      }
1531
1532      void add(rs_matrix2x2 m) {
1533      for (size_t i = 0; i < 4; i++) {
1534      add(m.m[i]);
1535      }
1536      }
1537    */
1538
1539    void add(const sp<BaseObj>& obj) {
1540        if (obj != NULL) {
1541            add((uint32_t) (uintptr_t) obj->getID());
1542        } else {
1543            add((uint32_t) 0);
1544        }
1545    }
1546};
1547
1548/**
1549 * A Type describes the Element and dimensions used for an Allocation or a
1550 * parallel operation.
1551 *
1552 * A Type always includes an Element and an X dimension. A Type may be
1553 * multidimensional, up to three dimensions. A nonzero value in the Y or Z
1554 * dimensions indicates that the dimension is present. Note that a Type with
1555 * only a given X dimension and a Type with the same X dimension but Y = 1 are
1556 * not equivalent.
1557 *
1558 * A Type also supports inclusion of level of detail (LOD) or cube map
1559 * faces. LOD and cube map faces are booleans to indicate present or not
1560 * present.
1561 *
1562 * A Type also supports YUV format information to support an Allocation in a YUV
1563 * format. The YUV formats supported are RS_YUV_YV12 and RS_YUV_NV21.
1564 */
1565class Type : public BaseObj {
1566protected:
1567    friend class Allocation;
1568
1569    uint32_t mDimX;
1570    uint32_t mDimY;
1571    uint32_t mDimZ;
1572    RsYuvFormat mYuvFormat;
1573    bool mDimMipmaps;
1574    bool mDimFaces;
1575    size_t mElementCount;
1576    sp<const Element> mElement;
1577
1578    Type(void *id, sp<RS> rs);
1579
1580    void calcElementCount();
1581    virtual void updateFromNative();
1582
1583public:
1584
1585    /**
1586     * Returns the YUV format.
1587     * @return YUV format of the Allocation
1588     */
1589    RsYuvFormat getYuvFormat() const {
1590        return mYuvFormat;
1591    }
1592
1593    /**
1594     * Returns the Element of the Allocation.
1595     * @return YUV format of the Allocation
1596     */
1597    sp<const Element> getElement() const {
1598        return mElement;
1599    }
1600
1601    /**
1602     * Returns the X dimension of the Allocation.
1603     * @return X dimension of the allocation
1604     */
1605    uint32_t getX() const {
1606        return mDimX;
1607    }
1608
1609    /**
1610     * Returns the Y dimension of the Allocation.
1611     * @return Y dimension of the allocation
1612     */
1613    uint32_t getY() const {
1614        return mDimY;
1615    }
1616
1617    /**
1618     * Returns the Z dimension of the Allocation.
1619     * @return Z dimension of the allocation
1620     */
1621    uint32_t getZ() const {
1622        return mDimZ;
1623    }
1624
1625    /**
1626     * Returns true if the Allocation has mipmaps.
1627     * @return true if the Allocation has mipmaps
1628     */
1629    bool hasMipmaps() const {
1630        return mDimMipmaps;
1631    }
1632
1633    /**
1634     * Returns true if the Allocation is a cube map
1635     * @return true if the Allocation is a cube map
1636     */
1637    bool hasFaces() const {
1638        return mDimFaces;
1639    }
1640
1641    /**
1642     * Returns number of accessible Elements in the Allocation
1643     * @return number of accessible Elements in the Allocation
1644     */
1645    size_t getCount() const {
1646        return mElementCount;
1647    }
1648
1649    /**
1650     * Returns size in bytes of all Elements in the Allocation
1651     * @return size in bytes of all Elements in the Allocation
1652     */
1653    size_t getSizeBytes() const {
1654        return mElementCount * mElement->getSizeBytes();
1655    }
1656
1657    /**
1658     * Creates a new Type with the given Element and dimensions.
1659     * @param[in] rs RenderScript context
1660     * @param[in] e Element
1661     * @param[in] dimX X dimension
1662     * @param[in] dimY Y dimension
1663     * @param[in] dimZ Z dimension
1664     * @return new Type
1665     */
1666    static sp<const Type> create(const sp<RS>& rs, const sp<const Element>& e, uint32_t dimX, uint32_t dimY, uint32_t dimZ);
1667
1668    class Builder {
1669    protected:
1670        RS* mRS;
1671        uint32_t mDimX;
1672        uint32_t mDimY;
1673        uint32_t mDimZ;
1674        RsYuvFormat mYuvFormat;
1675        bool mDimMipmaps;
1676        bool mDimFaces;
1677        sp<const Element> mElement;
1678
1679    public:
1680        Builder(sp<RS> rs, sp<const Element> e);
1681
1682        void setX(uint32_t value);
1683        void setY(uint32_t value);
1684        void setZ(uint32_t value);
1685        void setYuvFormat(RsYuvFormat format);
1686        void setMipmaps(bool value);
1687        void setFaces(bool value);
1688        sp<const Type> create();
1689    };
1690
1691};
1692
1693/**
1694 * The parent class for all executable Scripts. This should not be used by applications.
1695 */
1696class Script : public BaseObj {
1697private:
1698
1699protected:
1700    Script(void *id, sp<RS> rs);
1701    void forEach(uint32_t slot, const sp<const Allocation>& in, const sp<const Allocation>& out,
1702            const void *v, size_t) const;
1703    void bindAllocation(const sp<Allocation>& va, uint32_t slot) const;
1704    void setVar(uint32_t index, const void *, size_t len) const;
1705    void setVar(uint32_t index, const sp<const BaseObj>& o) const;
1706    void invoke(uint32_t slot, const void *v, size_t len) const;
1707
1708
1709    void invoke(uint32_t slot) const {
1710        invoke(slot, NULL, 0);
1711    }
1712    void setVar(uint32_t index, float v) const {
1713        setVar(index, &v, sizeof(v));
1714    }
1715    void setVar(uint32_t index, double v) const {
1716        setVar(index, &v, sizeof(v));
1717    }
1718    void setVar(uint32_t index, int32_t v) const {
1719        setVar(index, &v, sizeof(v));
1720    }
1721    void setVar(uint32_t index, uint32_t v) const {
1722        setVar(index, &v, sizeof(v));
1723    }
1724    void setVar(uint32_t index, int64_t v) const {
1725        setVar(index, &v, sizeof(v));
1726    }
1727    void setVar(uint32_t index, bool v) const {
1728        setVar(index, &v, sizeof(v));
1729    }
1730
1731public:
1732    class FieldBase {
1733    protected:
1734        sp<const Element> mElement;
1735        sp<Allocation> mAllocation;
1736
1737        void init(const sp<RS>& rs, uint32_t dimx, uint32_t usages = 0);
1738
1739    public:
1740        sp<const Element> getElement() {
1741            return mElement;
1742        }
1743
1744        sp<const Type> getType() {
1745            return mAllocation->getType();
1746        }
1747
1748        sp<const Allocation> getAllocation() {
1749            return mAllocation;
1750        }
1751
1752        //void updateAllocation();
1753    };
1754};
1755
1756/**
1757 * The parent class for all user-defined scripts. This is intended to be used by auto-generated code only.
1758 */
1759class ScriptC : public Script {
1760protected:
1761    ScriptC(sp<RS> rs,
1762            const void *codeTxt, size_t codeLength,
1763            const char *cachedName, size_t cachedNameLength,
1764            const char *cacheDir, size_t cacheDirLength);
1765
1766};
1767
1768/**
1769 * The parent class for all script intrinsics. Intrinsics provide highly optimized implementations of
1770 * basic functions. This is not intended to be used directly.
1771 */
1772class ScriptIntrinsic : public Script {
1773 protected:
1774    sp<const Element> mElement;
1775    ScriptIntrinsic(sp<RS> rs, int id, sp<const Element> e);
1776    virtual ~ScriptIntrinsic();
1777};
1778
1779/**
1780 * Intrinsic for converting RGB to RGBA by using a 3D lookup table. The incoming
1781 * r,g,b values are use as normalized x,y,z coordinates into a 3D
1782 * allocation. The 8 nearest values are sampled and linearly interpolated. The
1783 * result is placed in the output.
1784 */
1785class ScriptIntrinsic3DLUT : public ScriptIntrinsic {
1786 private:
1787    ScriptIntrinsic3DLUT(sp<RS> rs, sp<const Element> e);
1788 public:
1789    /**
1790     * Supported Element types are U8_4. Default lookup table is identity.
1791     * @param[in] rs RenderScript context
1792     * @param[in] e Element
1793     * @return new ScriptIntrinsic
1794     */
1795    static sp<ScriptIntrinsic3DLUT> create(const sp<RS>& rs, const sp<const Element>& e);
1796
1797    /**
1798     * Launch the intrinsic.
1799     * @param[in] ain input Allocation
1800     * @param[in] aout output Allocation
1801     */
1802    void forEach(const sp<Allocation>& ain, const sp<Allocation>& aout);
1803
1804    /**
1805     * Sets the lookup table. The lookup table must use the same Element as the
1806     * intrinsic.
1807     * @param[in] lut new lookup table
1808     */
1809    void setLUT(const sp<Allocation>& lut);
1810};
1811
1812
1813/**
1814 * Intrinsic kernel provides high performance RenderScript APIs to BLAS.
1815 *
1816 * The BLAS (Basic Linear Algebra Subprograms) are routines that provide standard
1817 * building blocks for performing basic vector and matrix operations.
1818 *
1819 * For detailed description of BLAS, please refer to http://www.netlib.org/blas/
1820 *
1821 **/
1822class ScriptIntrinsicBLAS : public ScriptIntrinsic {
1823 private:
1824    ScriptIntrinsicBLAS(sp<RS> rs, sp<const Element> e);
1825 public:
1826    /**
1827     * Create an intrinsic to access BLAS subroutines.
1828     *
1829     * @param rs The RenderScript context
1830     * @return ScriptIntrinsicBLAS
1831     */
1832    static sp<ScriptIntrinsicBLAS> create(const sp<RS>& rs);
1833
1834    /**
1835     * SGEMV performs one of the matrix-vector operations
1836     * y := alpha*A*x + beta*y   or   y := alpha*A**T*x + beta*y
1837     *
1838     * Details: http://www.netlib.org/lapack/explore-html/db/d58/sgemv_8f.html
1839     *
1840     * @param TransA The type of transpose applied to matrix A.
1841     * @param alpha The scalar alpha.
1842     * @param A The input allocation contains matrix A, supported elements type: {Element#F32}.
1843     * @param X The input allocation contains vector x, supported elements type: {Element#F32}.
1844     * @param incX The increment for the elements of vector x, must be larger than zero.
1845     * @param beta The scalar beta.
1846     * @param Y The input allocation contains vector y, supported elements type: {Element#F32}.
1847     * @param incY The increment for the elements of vector y, must be larger than zero.
1848     */
1849    void SGEMV(RsBlasTranspose TransA,
1850               float alpha, const sp<Allocation>& A, const sp<Allocation>& X, int incX,
1851               float beta, const sp<Allocation>& Y, int incY);
1852
1853    /**
1854     * DGEMV performs one of the matrix-vector operations
1855     * y := alpha*A*x + beta*y   or   y := alpha*A**T*x + beta*y
1856     *
1857     * Details: http://www.netlib.org/lapack/explore-html/dc/da8/dgemv_8f.html
1858     *
1859     * @param TransA The type of transpose applied to matrix A.
1860     * @param alpha The scalar alpha.
1861     * @param A The input allocation contains matrix A, supported elements type: {Element#F64}.
1862     * @param X The input allocation contains vector x, supported elements type: {Element#F64}.
1863     * @param incX The increment for the elements of vector x, must be larger than zero.
1864     * @param beta The scalar beta.
1865     * @param Y The input allocation contains vector y, supported elements type: {Element#F64}.
1866     * @param incY The increment for the elements of vector y, must be larger than zero.
1867     */
1868    void DGEMV(RsBlasTranspose TransA,
1869               double alpha, const sp<Allocation>& A, const sp<Allocation>& X, int incX,
1870               double beta, const sp<Allocation>& Y, int incY);
1871
1872    /**
1873     * CGEMV performs one of the matrix-vector operations
1874     * y := alpha*A*x + beta*y   or   y := alpha*A**T*x + beta*y   or   y := alpha*A**H*x + beta*y
1875     *
1876     * Details: http://www.netlib.org/lapack/explore-html/d4/d8a/cgemv_8f.html
1877     *
1878     * @param TransA The type of transpose applied to matrix A.
1879     * @param alpha The scalar alpha.
1880     * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}.
1881     * @param X The input allocation contains vector x, supported elements type: {Element#F32_2}.
1882     * @param incX The increment for the elements of vector x, must be larger than zero.
1883     * @param beta The scalar beta.
1884     * @param Y The input allocation contains vector y, supported elements type: {Element#F32_2}.
1885     * @param incY The increment for the elements of vector y, must be larger than zero.
1886     */
1887    void CGEMV(RsBlasTranspose TransA,
1888               Float2 alpha, const sp<Allocation>& A, const sp<Allocation>& X, int incX,
1889               Float2 beta, const sp<Allocation>& Y, int incY);
1890
1891    /**
1892     * ZGEMV performs one of the matrix-vector operations
1893     * y := alpha*A*x + beta*y   or   y := alpha*A**T*x + beta*y   or   y := alpha*A**H*x + beta*y
1894     *
1895     * Details: http://www.netlib.org/lapack/explore-html/db/d40/zgemv_8f.html
1896     *
1897     * @param TransA The type of transpose applied to matrix A.
1898     * @param alpha The scalar alpha.
1899     * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2}.
1900     * @param X The input allocation contains vector x, supported elements type: {Element#F64_2}.
1901     * @param incX The increment for the elements of vector x, must be larger than zero.
1902     * @param beta The scalar beta.
1903     * @param Y The input allocation contains vector y, supported elements type: {Element#F64_2}.
1904     * @param incY The increment for the elements of vector y, must be larger than zero.
1905     */
1906    void ZGEMV(RsBlasTranspose TransA,
1907               Double2 alpha, const sp<Allocation>& A, const sp<Allocation>& X, int incX,
1908               Double2 beta, const sp<Allocation>& Y, int incY);
1909
1910    /**
1911     * SGBMV performs one of the matrix-vector operations
1912     * y := alpha*A*x + beta*y   or   y := alpha*A**T*x + beta*y
1913     *
1914     * Details: http://www.netlib.org/lapack/explore-html/d6/d46/sgbmv_8f.html
1915     *
1916     * Note: For a M*N matrix, the input Allocation should also be of size M*N (dimY = M, dimX = N),
1917     *       but only the region M*(KL+KU+1) will be referenced. The following subroutine can is an
1918     *       example showing how to convert the original matrix 'a' to row-based band matrix 'b'.
1919     *           for i in range(0, m):
1920     *              for j in range(max(0, i-kl), min(i+ku+1, n)):
1921     *                  b[i, j-i+kl] = a[i, j]
1922     *
1923     * @param TransA The type of transpose applied to matrix A.
1924     * @param KL The number of sub-diagonals of the matrix A.
1925     * @param KU The number of super-diagonals of the matrix A.
1926     * @param alpha The scalar alpha.
1927     * @param A The input allocation contains the band matrix A, supported elements type: {Element#F32}.
1928     * @param X The input allocation contains vector x, supported elements type: {Element#F32}.
1929     * @param incX The increment for the elements of vector x, must be larger than zero.
1930     * @param beta The scalar beta.
1931     * @param Y The input allocation contains vector y, supported elements type: {Element#F32}.
1932     * @param incY The increment for the elements of vector y, must be larger than zero.
1933     */
1934    void SGBMV(RsBlasTranspose TransA,
1935               int KL, int KU, float alpha, const sp<Allocation>& A, const sp<Allocation>& X, int incX,
1936               float beta, const sp<Allocation>& Y, int incY);
1937
1938    /**
1939     * DGBMV performs one of the matrix-vector operations
1940     * y := alpha*A*x + beta*y   or   y := alpha*A**T*x + beta*y
1941     *
1942     * Details: http://www.netlib.org/lapack/explore-html/d2/d3f/dgbmv_8f.html
1943     *
1944     * Note: For a M*N matrix, the input Allocation should also be of size M*N (dimY = M, dimX = N),
1945     *       but only the region M*(KL+KU+1) will be referenced. The following subroutine can is an
1946     *       example showing how to convert the original matrix 'a' to row-based band matrix 'b'.
1947     *           for i in range(0, m):
1948     *              for j in range(max(0, i-kl), min(i+ku+1, n)):
1949     *                  b[i, j-i+kl] = a[i, j]
1950     *
1951     * @param TransA The type of transpose applied to matrix A.
1952     * @param KL The number of sub-diagonals of the matrix A.
1953     * @param KU The number of super-diagonals of the matrix A.
1954     * @param alpha The scalar alpha.
1955     * @param A The input allocation contains the band matrix A, supported elements type: {Element#F64}.
1956     * @param X The input allocation contains vector x, supported elements type: {Element#F64}.
1957     * @param incX The increment for the elements of vector x, must be larger than zero.
1958     * @param beta The scalar beta.
1959     * @param Y The input allocation contains vector y, supported elements type: {Element#F64}.
1960     * @param incY The increment for the elements of vector y, must be larger than zero.
1961     */
1962    void DGBMV(RsBlasTranspose TransA,
1963               int KL, int KU, double alpha, const sp<Allocation>& A, const sp<Allocation>& X,
1964               int incX, double beta, const sp<Allocation>& Y, int incY);
1965
1966    /**
1967     * CGBMV performs one of the matrix-vector operations
1968     * y := alpha*A*x + beta*y   or   y := alpha*A**T*x + beta*y   or   y := alpha*A**H*x + beta*y
1969     *
1970     * Details: http://www.netlib.org/lapack/explore-html/d0/d75/cgbmv_8f.html
1971     *
1972     * Note: For a M*N matrix, the input Allocation should also be of size M*N (dimY = M, dimX = N),
1973     *       but only the region M*(KL+KU+1) will be referenced. The following subroutine can is an
1974     *       example showing how to convert the original matrix 'a' to row-based band matrix 'b'.
1975     *           for i in range(0, m):
1976     *              for j in range(max(0, i-kl), min(i+ku+1, n)):
1977     *                  b[i, j-i+kl] = a[i, j]
1978     *
1979     * @param TransA The type of transpose applied to matrix A.
1980     * @param KL The number of sub-diagonals of the matrix A.
1981     * @param KU The number of super-diagonals of the matrix A.
1982     * @param alpha The scalar alpha.
1983     * @param A The input allocation contains the band matrix A, supported elements type: {Element#F32_2}.
1984     * @param X The input allocation contains vector x, supported elements type: {Element#F32_2}.
1985     * @param incX The increment for the elements of vector x, must be larger than zero.
1986     * @param beta The scalar beta.
1987     * @param Y The input allocation contains vector y, supported elements type: {Element#F32_2}.
1988     * @param incY The increment for the elements of vector y, must be larger than zero.
1989     */
1990    void CGBMV(RsBlasTranspose TransA,
1991               int KL, int KU, Float2 alpha, const sp<Allocation>& A, const sp<Allocation>& X,
1992               int incX, Float2 beta, const sp<Allocation>& Y, int incY);
1993
1994    /**
1995     * ZGBMV performs one of the matrix-vector operations
1996     * y := alpha*A*x + beta*y   or   y := alpha*A**T*x + beta*y   or   y := alpha*A**H*x + beta*y
1997     *
1998     * Details: http://www.netlib.org/lapack/explore-html/d9/d46/zgbmv_8f.html
1999     *
2000     * Note: For a M*N matrix, the input Allocation should also be of size M*N (dimY = M, dimX = N),
2001     *       but only the region M*(KL+KU+1) will be referenced. The following subroutine can is an
2002     *       example showing how to convert the original matrix 'a' to row-based band matrix 'b'.
2003     *           for i in range(0, m):
2004     *              for j in range(max(0, i-kl), min(i+ku+1, n)):
2005     *                  b[i, j-i+kl] = a[i, j]
2006     *
2007     * @param TransA The type of transpose applied to matrix A.
2008     * @param KL The number of sub-diagonals of the matrix A.
2009     * @param KU The number of super-diagonals of the matrix A.
2010     * @param alpha The scalar alpha.
2011     * @param A The input allocation contains the band matrix A, supported elements type: {Element#F64_2}.
2012     * @param X The input allocation contains vector x, supported elements type: {Element#F64_2}.
2013     * @param incX The increment for the elements of vector x, must be larger than zero.
2014     * @param beta The scalar beta.
2015     * @param Y The input allocation contains vector y, supported elements type: {Element#F64_2}.
2016     * @param incY The increment for the elements of vector y, must be larger than zero.
2017     */
2018    void ZGBMV(RsBlasTranspose TransA,
2019               int KL, int KU, Double2 alpha, const sp<Allocation>& A, const sp<Allocation>& X, int incX,
2020               Double2 beta, const sp<Allocation>& Y, int incY);
2021
2022    /**
2023     * STRMV performs one of the matrix-vector operations
2024     * x := A*x   or   x := A**T*x
2025     *
2026     * Details: http://www.netlib.org/lapack/explore-html/de/d45/strmv_8f.html
2027     *
2028     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2029     * @param TransA The type of transpose applied to matrix A.
2030     * @param Diag Specifies whether or not A is unit triangular.
2031     * @param A The input allocation contains matrix A, supported elements type: {Element#F32}.
2032     * @param X The input allocation contains vector x, supported elements type: {Element#F32}.
2033     * @param incX The increment for the elements of vector x, must be larger than zero.
2034     */
2035    void STRMV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2036               const sp<Allocation>& A, const sp<Allocation>& X, int incX);
2037
2038    /**
2039     * DTRMV performs one of the matrix-vector operations
2040     * x := A*x   or   x := A**T*x
2041     *
2042     * Details: http://www.netlib.org/lapack/explore-html/dc/d7e/dtrmv_8f.html
2043     *
2044     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2045     * @param TransA The type of transpose applied to matrix A.
2046     * @param Diag Specifies whether or not A is unit triangular.
2047     * @param A The input allocation contains matrix A, supported elements type: {Element#F64}.
2048     * @param X The input allocation contains vector x, supported elements type: {Element#F64}.
2049     * @param incX The increment for the elements of vector x, must be larger than zero.
2050     */
2051    void DTRMV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2052               const sp<Allocation>& A, const sp<Allocation>& X, int incX);
2053
2054    /**
2055     * CTRMV performs one of the matrix-vector operations
2056     * x := A*x   or   x := A**T*x   or   x := A**H*x
2057     *
2058     * Details: http://www.netlib.org/lapack/explore-html/df/d78/ctrmv_8f.html
2059     *
2060     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2061     * @param TransA The type of transpose applied to matrix A.
2062     * @param Diag Specifies whether or not A is unit triangular.
2063     * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}.
2064     * @param X The input allocation contains vector x, supported elements type: {Element#F32_2}.
2065     * @param incX The increment for the elements of vector x, must be larger than zero.
2066     */
2067    void CTRMV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2068               const sp<Allocation>& A, const sp<Allocation>& X, int incX);
2069
2070    /**
2071     * ZTRMV performs one of the matrix-vector operations
2072     * x := A*x   or   x := A**T*x   or   x := A**H*x
2073     *
2074     * Details: http://www.netlib.org/lapack/explore-html/d0/dd1/ztrmv_8f.html
2075     *
2076     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2077     * @param TransA The type of transpose applied to matrix A.
2078     * @param Diag Specifies whether or not A is unit triangular.
2079     * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2}.
2080     * @param X The input allocation contains vector x, supported elements type: {Element#F64_2}.
2081     * @param incX The increment for the elements of vector x, must be larger than zero.
2082     */
2083    void ZTRMV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2084               const sp<Allocation>& A, const sp<Allocation>& X, int incX);
2085
2086    /**
2087     * STBMV performs one of the matrix-vector operations
2088     * x := A*x   or   x := A**T*x
2089     *
2090     * Details: http://www.netlib.org/lapack/explore-html/d6/d7d/stbmv_8f.html
2091     *
2092     * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
2093     *       but only the region N*(K+1) will be referenced. The following subroutine can is an
2094     *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
2095     *           for i in range(0, n):
2096     *              for j in range(i, min(i+k+1, n)):
2097     *                  b[i, j-i] = a[i, j]
2098     *
2099     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2100     * @param TransA The type of transpose applied to matrix A.
2101     * @param Diag Specifies whether or not A is unit triangular.
2102     * @param K The number of off-diagonals of the matrix A
2103     * @param A The input allocation contains matrix A, supported elements type: {Element#F32}.
2104     * @param X The input allocation contains vector x, supported elements type: {Element#F32}.
2105     * @param incX The increment for the elements of vector x, must be larger than zero.
2106     */
2107    void STBMV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2108               int K, const sp<Allocation>& A, const sp<Allocation>& X, int incX);
2109
2110    /**
2111     * DTBMV performs one of the matrix-vector operations
2112     * x := A*x   or   x := A**T*x
2113     *
2114     * Details: http://www.netlib.org/lapack/explore-html/df/d29/dtbmv_8f.html
2115     *
2116     * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
2117     *       but only the region N*(K+1) will be referenced. The following subroutine can is an
2118     *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
2119     *           for i in range(0, n):
2120     *              for j in range(i, min(i+k+1, n)):
2121     *                  b[i, j-i] = a[i, j]
2122     *
2123     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2124     * @param TransA The type of transpose applied to matrix A.
2125     * @param Diag Specifies whether or not A is unit triangular.
2126     * @param K The number of off-diagonals of the matrix A
2127     * @param A The input allocation contains matrix A, supported elements type: {Element#F64}.
2128     * @param X The input allocation contains vector x, supported elements type: {Element#F64}.
2129     * @param incX The increment for the elements of vector x, must be larger than zero.
2130     */
2131    void DTBMV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2132               int K, const sp<Allocation>& A, const sp<Allocation>& X, int incX);
2133
2134    /**
2135     * CTBMV performs one of the matrix-vector operations
2136     * x := A*x   or   x := A**T*x   or   x := A**H*x
2137     *
2138     * Details: http://www.netlib.org/lapack/explore-html/d3/dcd/ctbmv_8f.html
2139     *
2140     * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
2141     *       but only the region N*(K+1) will be referenced. The following subroutine can is an
2142     *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
2143     *           for i in range(0, n):
2144     *              for j in range(i, min(i+k+1, n)):
2145     *                  b[i, j-i] = a[i, j]
2146     *
2147     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2148     * @param TransA The type of transpose applied to matrix A.
2149     * @param Diag Specifies whether or not A is unit triangular.
2150     * @param K The number of off-diagonals of the matrix A
2151     * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}.
2152     * @param X The input allocation contains vector x, supported elements type: {Element#F32_2}.
2153     * @param incX The increment for the elements of vector x, must be larger than zero.
2154     */
2155    void CTBMV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2156               int K, const sp<Allocation>& A, const sp<Allocation>& X, int incX);
2157
2158    /**
2159     * ZTBMV performs one of the matrix-vector operations
2160     * x := A*x   or   x := A**T*x   or   x := A**H*x
2161     *
2162     * Details: http://www.netlib.org/lapack/explore-html/d3/d39/ztbmv_8f.html
2163     *
2164     * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
2165     *       but only the region N*(K+1) will be referenced. The following subroutine can is an
2166     *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
2167     *           for i in range(0, n):
2168     *              for j in range(i, min(i+k+1, n)):
2169     *                  b[i, j-i] = a[i, j]
2170     *
2171     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2172     * @param TransA The type of transpose applied to matrix A.
2173     * @param Diag Specifies whether or not A is unit triangular.
2174     * @param K The number of off-diagonals of the matrix A
2175     * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2}.
2176     * @param X The input allocation contains vector x, supported elements type: {Element#F64_2}.
2177     * @param incX The increment for the elements of vector x, must be larger than zero.
2178     */
2179    void ZTBMV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2180               int K, const sp<Allocation>& A, const sp<Allocation>& X, int incX);
2181
2182    /**
2183     * STPMV performs one of the matrix-vector operations
2184     * x := A*x   or   x := A**T*x
2185     *
2186     * Details: http://www.netlib.org/lapack/explore-html/db/db1/stpmv_8f.html
2187     *
2188     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2189     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2190     *       'a' to packed matrix 'b'.
2191     *           k = 0
2192     *           for i in range(0, n):
2193     *              for j in range(i, n):
2194     *                  b[k++] = a[i, j]
2195     *
2196     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2197     * @param TransA The type of transpose applied to matrix A.
2198     * @param Diag Specifies whether or not A is unit triangular.
2199     * @param Ap The input allocation contains packed matrix A, supported elements type: {Element#F32}.
2200     * @param X The input allocation contains vector x, supported elements type: {Element#F32}.
2201     * @param incX The increment for the elements of vector x, must be larger than zero.
2202     */
2203    void STPMV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2204               const sp<Allocation>& Ap, const sp<Allocation>& X, int incX);
2205
2206    /**
2207     * DTPMV performs one of the matrix-vector operations
2208     * x := A*x   or   x := A**T*x
2209     *
2210     * Details: http://www.netlib.org/lapack/explore-html/dc/dcd/dtpmv_8f.html
2211     *
2212     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2213     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2214     *       'a' to packed matrix 'b'.
2215     *           k = 0
2216     *           for i in range(0, n):
2217     *              for j in range(i, n):
2218     *                  b[k++] = a[i, j]
2219     *
2220     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2221     * @param TransA The type of transpose applied to matrix A.
2222     * @param Diag Specifies whether or not A is unit triangular.
2223     * @param Ap The input allocation contains packed matrix A, supported elements type: {Element#F64}.
2224     * @param X The input allocation contains vector x, supported elements type: {Element#F64}.
2225     * @param incX The increment for the elements of vector x, must be larger than zero.
2226     */
2227    void DTPMV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2228               const sp<Allocation>& Ap, const sp<Allocation>& X, int incX);
2229
2230    /**
2231     * CTPMV performs one of the matrix-vector operations
2232     * x := A*x   or   x := A**T*x   or   x := A**H*x
2233     *
2234     * Details: http://www.netlib.org/lapack/explore-html/d4/dbb/ctpmv_8f.html
2235     *
2236     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2237     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2238     *       'a' to packed matrix 'b'.
2239     *           k = 0
2240     *           for i in range(0, n):
2241     *              for j in range(i, n):
2242     *                  b[k++] = a[i, j]
2243     *
2244     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2245     * @param TransA The type of transpose applied to matrix A.
2246     * @param Diag Specifies whether or not A is unit triangular.
2247     * @param Ap The input allocation contains packed matrix A, supported elements type: {Element#F32_2}.
2248     * @param X The input allocation contains vector x, supported elements type: {Element#F32_2}.
2249     * @param incX The increment for the elements of vector x, must be larger than zero.
2250     */
2251    void CTPMV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2252               const sp<Allocation>& Ap, const sp<Allocation>& X, int incX);
2253
2254    /**
2255     * ZTPMV performs one of the matrix-vector operations
2256     * x := A*x   or   x := A**T*x   or   x := A**H*x
2257     *
2258     * Details: http://www.netlib.org/lapack/explore-html/d2/d9e/ztpmv_8f.html
2259     *
2260     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2261     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2262     *       'a' to packed matrix 'b'.
2263     *           k = 0
2264     *           for i in range(0, n):
2265     *              for j in range(i, n):
2266     *                  b[k++] = a[i, j]
2267     *
2268     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2269     * @param TransA The type of transpose applied to matrix A.
2270     * @param Diag Specifies whether or not A is unit triangular.
2271     * @param Ap The input allocation contains packed matrix A, supported elements type: {Element#F64_2}.
2272     * @param X The input allocation contains vector x, supported elements type: {Element#F64_2}.
2273     * @param incX The increment for the elements of vector x, must be larger than zero.
2274     */
2275    void ZTPMV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2276               const sp<Allocation>& Ap, const sp<Allocation>& X, int incX);
2277
2278    /**
2279     * STRSV solves one of the systems of equations
2280     * A*x = b   or   A**T*x = b
2281     *
2282     * Details: http://www.netlib.org/lapack/explore-html/d0/d2a/strsv_8f.html
2283     *
2284     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2285     * @param TransA The type of transpose applied to matrix A.
2286     * @param Diag Specifies whether or not A is unit triangular.
2287     * @param A The input allocation contains matrix A, supported elements type: {Element#F32}.
2288     * @param X The input allocation contains vector x, supported elements type: {Element#F32}.
2289     * @param incX The increment for the elements of vector x, must be larger than zero.
2290     */
2291    void STRSV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2292               const sp<Allocation>& A, const sp<Allocation>& X, int incX);
2293
2294    /**
2295     * DTRSV solves one of the systems of equations
2296     * A*x = b   or   A**T*x = b
2297     *
2298     * Details: http://www.netlib.org/lapack/explore-html/d6/d96/dtrsv_8f.html
2299     *
2300     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2301     * @param TransA The type of transpose applied to matrix A.
2302     * @param Diag Specifies whether or not A is unit triangular.
2303     * @param A The input allocation contains matrix A, supported elements type: {Element#F64}.
2304     * @param X The input allocation contains vector x, supported elements type: {Element#F64}.
2305     * @param incX The increment for the elements of vector x, must be larger than zero.
2306     */
2307    void DTRSV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2308               const sp<Allocation>& A, const sp<Allocation>& X, int incX);
2309
2310    /**
2311     * CTRSV solves one of the systems of equations
2312     * A*x = b   or   A**T*x = b   or   A**H*x = b
2313     *
2314     * Details: http://www.netlib.org/lapack/explore-html/d4/dc8/ctrsv_8f.html
2315     *
2316     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2317     * @param TransA The type of transpose applied to matrix A.
2318     * @param Diag Specifies whether or not A is unit triangular.
2319     * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}.
2320     * @param X The input allocation contains vector x, supported elements type: {Element#F32_2}.
2321     * @param incX The increment for the elements of vector x, must be larger than zero.
2322     */
2323    void CTRSV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2324               const sp<Allocation>& A, const sp<Allocation>& X, int incX);
2325
2326    /**
2327     * ZTRSV solves one of the systems of equations
2328     * A*x = b   or   A**T*x = b   or   A**H*x = b
2329     *
2330     * Details: http://www.netlib.org/lapack/explore-html/d1/d2f/ztrsv_8f.html
2331     *
2332     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2333     * @param TransA The type of transpose applied to matrix A.
2334     * @param Diag Specifies whether or not A is unit triangular.
2335     * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2}.
2336     * @param X The input allocation contains vector x, supported elements type: {Element#F64_2}.
2337     * @param incX The increment for the elements of vector x, must be larger than zero.
2338     */
2339    void ZTRSV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2340               const sp<Allocation>& A, const sp<Allocation>& X, int incX);
2341
2342    /**
2343     * STBSV solves one of the systems of equations
2344     * A*x = b   or   A**T*x = b
2345     *
2346     * Details: http://www.netlib.org/lapack/explore-html/d0/d1f/stbsv_8f.html
2347     *
2348     * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
2349     *       but only the region N*(K+1) will be referenced. The following subroutine can is an
2350     *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
2351     *           for i in range(0, n):
2352     *              for j in range(i, min(i+k+1, n)):
2353     *                  b[i, j-i] = a[i, j]
2354     *
2355     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2356     * @param TransA The type of transpose applied to matrix A.
2357     * @param Diag Specifies whether or not A is unit triangular.
2358     * @param K The number of off-diagonals of the matrix A
2359     * @param A The input allocation contains matrix A, supported elements type: {Element#F32}.
2360     * @param X The input allocation contains vector x, supported elements type: {Element#F32}.
2361     * @param incX The increment for the elements of vector x, must be larger than zero.
2362     */
2363    void STBSV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2364               int K, const sp<Allocation>& A, const sp<Allocation>& X, int incX);
2365
2366    /**
2367     * DTBSV solves one of the systems of equations
2368     * A*x = b   or   A**T*x = b
2369     *
2370     * Details: http://www.netlib.org/lapack/explore-html/d4/dcf/dtbsv_8f.html
2371     *
2372     * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
2373     *       but only the region N*(K+1) will be referenced. The following subroutine can is an
2374     *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
2375     *           for i in range(0, n):
2376     *              for j in range(i, min(i+k+1, n)):
2377     *                  b[i, j-i] = a[i, j]
2378     *
2379     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2380     * @param TransA The type of transpose applied to matrix A.
2381     * @param Diag Specifies whether or not A is unit triangular.
2382     * @param K The number of off-diagonals of the matrix A
2383     * @param A The input allocation contains matrix A, supported elements type: {Element#F64}.
2384     * @param X The input allocation contains vector x, supported elements type: {Element#F64}.
2385     * @param incX The increment for the elements of vector x, must be larger than zero.
2386     */
2387    void DTBSV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2388               int K, const sp<Allocation>& A, const sp<Allocation>& X, int incX);
2389
2390    /**
2391     * CTBSV solves one of the systems of equations
2392     * A*x = b   or   A**T*x = b   or   A**H*x = b
2393     *
2394     * Details: http://www.netlib.org/lapack/explore-html/d9/d5f/ctbsv_8f.html
2395     *
2396     * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
2397     *       but only the region N*(K+1) will be referenced. The following subroutine can is an
2398     *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
2399     *           for i in range(0, n):
2400     *              for j in range(i, min(i+k+1, n)):
2401     *                  b[i, j-i] = a[i, j]
2402     *
2403     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2404     * @param TransA The type of transpose applied to matrix A.
2405     * @param Diag Specifies whether or not A is unit triangular.
2406     * @param K The number of off-diagonals of the matrix A
2407     * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}.
2408     * @param X The input allocation contains vector x, supported elements type: {Element#F32_2}.
2409     * @param incX The increment for the elements of vector x, must be larger than zero.
2410     */
2411    void CTBSV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2412               int K, const sp<Allocation>& A, const sp<Allocation>& X, int incX);
2413
2414    /**
2415     * ZTBSV solves one of the systems of equations
2416     * A*x = b   or   A**T*x = b   or   A**H*x = b
2417     *
2418     * Details: http://www.netlib.org/lapack/explore-html/d4/d5a/ztbsv_8f.html
2419     *
2420     * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
2421     *       but only the region N*(K+1) will be referenced. The following subroutine can is an
2422     *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
2423     *           for i in range(0, n):
2424     *              for j in range(i, min(i+k+1, n)):
2425     *                  b[i, j-i] = a[i, j]
2426     *
2427     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2428     * @param TransA The type of transpose applied to matrix A.
2429     * @param Diag Specifies whether or not A is unit triangular.
2430     * @param K The number of off-diagonals of the matrix A
2431     * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2}.
2432     * @param X The input allocation contains vector x, supported elements type: {Element#F64_2}.
2433     * @param incX The increment for the elements of vector x, must be larger than zero.
2434     */
2435    void ZTBSV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2436               int K, const sp<Allocation>& A, const sp<Allocation>& X, int incX);
2437
2438    /**
2439     * STPSV solves one of the systems of equations
2440     * A*x = b   or   A**T*x = b
2441     *
2442     * Details: http://www.netlib.org/lapack/explore-html/d0/d7c/stpsv_8f.html
2443     *
2444     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2445     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2446     *       'a' to packed matrix 'b'.
2447     *           k = 0
2448     *           for i in range(0, n):
2449     *              for j in range(i, n):
2450     *                  b[k++] = a[i, j]
2451     *
2452     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2453     * @param TransA The type of transpose applied to matrix A.
2454     * @param Diag Specifies whether or not A is unit triangular.
2455     * @param Ap The input allocation contains packed matrix A, supported elements type: {Element#F32}.
2456     * @param X The input allocation contains vector x, supported elements type: {Element#F32}.
2457     * @param incX The increment for the elements of vector x, must be larger than zero.
2458     */
2459    void STPSV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2460               const sp<Allocation>& Ap, const sp<Allocation>& X, int incX);
2461
2462    /**
2463     * DTPSV solves one of the systems of equations
2464     * A*x = b   or   A**T*x = b
2465     *
2466     * Details: http://www.netlib.org/lapack/explore-html/d9/d84/dtpsv_8f.html
2467     *
2468     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2469     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2470     *       'a' to packed matrix 'b'.
2471     *           k = 0
2472     *           for i in range(0, n):
2473     *              for j in range(i, n):
2474     *                  b[k++] = a[i, j]
2475     *
2476     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2477     * @param TransA The type of transpose applied to matrix A.
2478     * @param Diag Specifies whether or not A is unit triangular.
2479     * @param Ap The input allocation contains packed matrix A, supported elements type: {Element#F64}.
2480     * @param X The input allocation contains vector x, supported elements type: {Element#F64}.
2481     * @param incX The increment for the elements of vector x, must be larger than zero.
2482     */
2483    void DTPSV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2484               const sp<Allocation>& Ap, const sp<Allocation>& X, int incX);
2485
2486    /**
2487     * CTPSV solves one of the systems of equations
2488     * A*x = b   or   A**T*x = b   or   A**H*x = b
2489     *
2490     * Details: http://www.netlib.org/lapack/explore-html/d8/d56/ctpsv_8f.html
2491     *
2492     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2493     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2494     *       'a' to packed matrix 'b'.
2495     *           k = 0
2496     *           for i in range(0, n):
2497     *              for j in range(i, n):
2498     *                  b[k++] = a[i, j]
2499     *
2500     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2501     * @param TransA The type of transpose applied to matrix A.
2502     * @param Diag Specifies whether or not A is unit triangular.
2503     * @param Ap The input allocation contains packed matrix A, supported elements type: {Element#F32_2}.
2504     * @param X The input allocation contains vector x, supported elements type: {Element#F32_2}.
2505     * @param incX The increment for the elements of vector x, must be larger than zero.
2506     */
2507    void CTPSV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2508               const sp<Allocation>& Ap, const sp<Allocation>& X, int incX);
2509
2510    /**
2511     * ZTPSV solves one of the systems of equations
2512     * A*x = b   or   A**T*x = b   or   A**H*x = b
2513     *
2514     * Details: http://www.netlib.org/lapack/explore-html/da/d57/ztpsv_8f.html
2515     *
2516     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2517     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2518     *       'a' to packed matrix 'b'.
2519     *           k = 0
2520     *           for i in range(0, n):
2521     *              for j in range(i, n):
2522     *                  b[k++] = a[i, j]
2523     *
2524     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2525     * @param TransA The type of transpose applied to matrix A.
2526     * @param Diag Specifies whether or not A is unit triangular.
2527     * @param Ap The input allocation contains packed matrix A, supported elements type: {Element#F64_2}.
2528     * @param X The input allocation contains vector x, supported elements type: {Element#F64_2}.
2529     * @param incX The increment for the elements of vector x, must be larger than zero.
2530     */
2531    void ZTPSV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2532               const sp<Allocation>& Ap, const sp<Allocation>& X, int incX);
2533
2534    /**
2535     * SSYMV performs the matrix-vector operation
2536     * y := alpha*A*x + beta*y
2537     *
2538     * Details: http://www.netlib.org/lapack/explore-html/d2/d94/ssymv_8f.html
2539     *
2540     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
2541     * @param alpha The scalar alpha.
2542     * @param A The input allocation contains matrix A, supported elements type: {Element#F32}.
2543     * @param X The input allocation contains vector x, supported elements type: {Element#F32}.
2544     * @param incX The increment for the elements of vector x, must be larger than zero.
2545     * @param beta The scalar beta.
2546     * @param Y The input allocation contains vector y, supported elements type: {Element#F32}.
2547     * @param incY The increment for the elements of vector y, must be larger than zero.
2548     */
2549    void SSYMV(RsBlasUplo Uplo, float alpha, const sp<Allocation>& A, const sp<Allocation>& X,
2550               int incX, float beta, const sp<Allocation>& Y, int incY);
2551
2552    /**
2553     * SSBMV performs the matrix-vector operation
2554     * y := alpha*A*x + beta*y
2555     *
2556     * Details: http://www.netlib.org/lapack/explore-html/d3/da1/ssbmv_8f.html
2557     *
2558     * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
2559     *       but only the region N*(K+1) will be referenced. The following subroutine can is an
2560     *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
2561     *           for i in range(0, n):
2562     *              for j in range(i, min(i+k+1, n)):
2563     *                  b[i, j-i] = a[i, j]
2564     *
2565     * @param Uplo Specifies whether the upper or lower triangular part of the band matrix A is being supplied.
2566     * @param K The number of off-diagonals of the matrix A
2567     * @param alpha The scalar alpha.
2568     * @param A The input allocation contains matrix A, supported elements type: {Element#F32}.
2569     * @param X The input allocation contains vector x, supported elements type: {Element#F32}.
2570     * @param incX The increment for the elements of vector x, must be larger than zero.
2571     * @param beta The scalar beta.
2572     * @param Y The input allocation contains vector y, supported elements type: {Element#F32}.
2573     * @param incY The increment for the elements of vector y, must be larger than zero.
2574     */
2575    void SSBMV(RsBlasUplo Uplo, int K, float alpha, const sp<Allocation>& A, const sp<Allocation>& X,
2576               int incX, float beta, const sp<Allocation>& Y, int incY);
2577
2578    /**
2579     * SSPMV performs the matrix-vector operation
2580     * y := alpha*A*x + beta*y
2581     *
2582     * Details: http://www.netlib.org/lapack/explore-html/d8/d68/sspmv_8f.html
2583     *
2584     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2585     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2586     *       'a' to packed matrix 'b'.
2587     *           k = 0
2588     *           for i in range(0, n):
2589     *              for j in range(i, n):
2590     *                  b[k++] = a[i, j]
2591     *
2592     * @param Uplo Specifies whether the upper or lower triangular part of the matrix A is supplied in packed form.
2593     * @param alpha The scalar alpha.
2594     * @param Ap The input allocation contains matrix A, supported elements type: {Element#F32}.
2595     * @param X The input allocation contains vector x, supported elements type: {Element#F32}.
2596     * @param incX The increment for the elements of vector x, must be larger than zero.
2597     * @param beta The scalar beta.
2598     * @param Y The input allocation contains vector y, supported elements type: {Element#F32}.
2599     * @param incY The increment for the elements of vector y, must be larger than zero.
2600     */
2601    void SSPMV(RsBlasUplo Uplo, float alpha, const sp<Allocation>& Ap, const sp<Allocation>& X,
2602               int incX, float beta, const sp<Allocation>& Y, int incY);
2603
2604    /**
2605     * SGER performs the rank 1 operation
2606     * A := alpha*x*y**T + A
2607     *
2608     * Details: http://www.netlib.org/lapack/explore-html/db/d5c/sger_8f.html
2609     *
2610     * @param alpha The scalar alpha.
2611     * @param X The input allocation contains vector x, supported elements type: {Element#F32}.
2612     * @param incX The increment for the elements of vector x, must be larger than zero.
2613     * @param Y The input allocation contains vector y, supported elements type: {Element#F32}.
2614     * @param incY The increment for the elements of vector y, must be larger than zero.
2615     * @param A The input allocation contains matrix A, supported elements type: {Element#F32}.
2616     */
2617    void SGER(float alpha, const sp<Allocation>& X, int incX, const sp<Allocation>& Y, int incY, const sp<Allocation>& A);
2618
2619    /**
2620     * SSYR performs the rank 1 operation
2621     * A := alpha*x*x**T + A
2622     *
2623     * Details: http://www.netlib.org/lapack/explore-html/d6/dac/ssyr_8f.html
2624     *
2625     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
2626     * @param alpha The scalar alpha.
2627     * @param X The input allocation contains vector x, supported elements type: {Element#F32}.
2628     * @param incX The increment for the elements of vector x, must be larger than zero.
2629     * @param A The input allocation contains matrix A, supported elements type: {Element#F32}.
2630     */
2631    void SSYR(RsBlasUplo Uplo, float alpha, const sp<Allocation>& X, int incX, const sp<Allocation>& A);
2632
2633    /**
2634     * SSPR performs the rank 1 operation
2635     * A := alpha*x*x**T + A
2636     *
2637     * Details: http://www.netlib.org/lapack/explore-html/d2/d9b/sspr_8f.html
2638     *
2639     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2640     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2641     *       'a' to packed matrix 'b'.
2642     *           k = 0
2643     *           for i in range(0, n):
2644     *              for j in range(i, n):
2645     *                  b[k++] = a[i, j]
2646     *
2647     * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
2648     * @param alpha The scalar alpha.
2649     * @param X The input allocation contains vector x, supported elements type: {Element#F32}.
2650     * @param incX The increment for the elements of vector x, must be larger than zero.
2651     * @param Ap The input allocation contains matrix A, supported elements type: {Element#F32}.
2652     */
2653    void SSPR(RsBlasUplo Uplo, float alpha, const sp<Allocation>& X, int incX, const sp<Allocation>& Ap);
2654
2655    /**
2656     * SSYR2 performs the symmetric rank 2 operation
2657     * A := alpha*x*y**T + alpha*y*x**T + A
2658     *
2659     * Details: http://www.netlib.org/lapack/explore-html/db/d99/ssyr2_8f.html
2660     *
2661     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
2662     * @param alpha The scalar alpha.
2663     * @param X The input allocation contains vector x, supported elements type: {Element#F32}.
2664     * @param incX The increment for the elements of vector x, must be larger than zero.
2665     * @param Y The input allocation contains vector y, supported elements type: {Element#F32}.
2666     * @param incY The increment for the elements of vector y, must be larger than zero.
2667     * @param A The input allocation contains matrix A, supported elements type: {Element#F32}.
2668     */
2669    void SSYR2(RsBlasUplo Uplo, float alpha, const sp<Allocation>& X, int incX,
2670               const sp<Allocation>& Y, int incY, const sp<Allocation>& A);
2671
2672    /**
2673     * SSPR2 performs the symmetric rank 2 operation
2674     * A := alpha*x*y**T + alpha*y*x**T + A
2675     *
2676     * Details: http://www.netlib.org/lapack/explore-html/db/d3e/sspr2_8f.html
2677     *
2678     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2679     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2680     *       'a' to packed matrix 'b'.
2681     *           k = 0
2682     *           for i in range(0, n):
2683     *              for j in range(i, n):
2684     *                  b[k++] = a[i, j]
2685     *
2686     * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
2687     * @param alpha The scalar alpha.
2688     * @param X The input allocation contains vector x, supported elements type: {Element#F32}.
2689     * @param incX The increment for the elements of vector x, must be larger than zero.
2690     * @param Y The input allocation contains vector y, supported elements type: {Element#F32}.
2691     * @param incY The increment for the elements of vector y, must be larger than zero.
2692     * @param Ap The input allocation contains matrix A, supported elements type: {Element#F32}.
2693     */
2694    void SSPR2(RsBlasUplo Uplo, float alpha, const sp<Allocation>& X, int incX,
2695               const sp<Allocation>& Y, int incY, const sp<Allocation>& Ap);
2696
2697    /**
2698     * DSYMV performs the matrix-vector operation
2699     * y := alpha*A*x + beta*y
2700     *
2701     * Details: http://www.netlib.org/lapack/explore-html/d8/dbe/dsymv_8f.html
2702     *
2703     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
2704     * @param alpha The scalar alpha.
2705     * @param A The input allocation contains matrix A, supported elements type: {Element#F64}.
2706     * @param X The input allocation contains vector x, supported elements type: {Element#F64}.
2707     * @param incX The increment for the elements of vector x, must be larger than zero.
2708     * @param beta The scalar beta.
2709     * @param Y The input allocation contains vector y, supported elements type: {Element#F64}.
2710     * @param incY The increment for the elements of vector y, must be larger than zero.
2711     */
2712    void DSYMV(RsBlasUplo Uplo, double alpha, const sp<Allocation>& A, const sp<Allocation>& X, int incX,
2713               double beta, const sp<Allocation>& Y, int incY);
2714
2715    /**
2716     * DSBMV performs the matrix-vector operation
2717     * y := alpha*A*x + beta*y
2718     *
2719     * Details: http://www.netlib.org/lapack/explore-html/d8/d1e/dsbmv_8f.html
2720     *
2721     * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
2722     *       but only the region N*(K+1) will be referenced. The following subroutine can is an
2723     *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
2724     *           for i in range(0, n):
2725     *              for j in range(i, min(i+k+1, n)):
2726     *                  b[i, j-i] = a[i, j]
2727     *
2728     * @param Uplo Specifies whether the upper or lower triangular part of the band matrix A is being supplied.
2729     * @param K The number of off-diagonals of the matrix A
2730     * @param alpha The scalar alpha.
2731     * @param A The input allocation contains matrix A, supported elements type: {Element#F64}.
2732     * @param X The input allocation contains vector x, supported elements type: {Element#F64}.
2733     * @param incX The increment for the elements of vector x, must be larger than zero.
2734     * @param beta The scalar beta.
2735     * @param Y The input allocation contains vector y, supported elements type: {Element#F64}.
2736     * @param incY The increment for the elements of vector y, must be larger than zero.
2737     */
2738    void DSBMV(RsBlasUplo Uplo, int K, double alpha, const sp<Allocation>& A, const sp<Allocation>& X, int incX,
2739               double beta, const sp<Allocation>& Y, int incY);
2740
2741    /**
2742     * DSPMV performs the matrix-vector operation
2743     * y := alpha*A*x + beta*y
2744     *
2745     * Details: http://www.netlib.org/lapack/explore-html/d4/d85/dspmv_8f.html
2746     *
2747     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2748     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2749     *       'a' to packed matrix 'b'.
2750     *           k = 0
2751     *           for i in range(0, n):
2752     *              for j in range(i, n):
2753     *                  b[k++] = a[i, j]
2754     *
2755     * @param Uplo Specifies whether the upper or lower triangular part of the matrix A is supplied in packed form.
2756     * @param alpha The scalar alpha.
2757     * @param Ap The input allocation contains matrix A, supported elements type: {Element#F64}.
2758     * @param X The input allocation contains vector x, supported elements type: {Element#F64}.
2759     * @param incX The increment for the elements of vector x, must be larger than zero.
2760     * @param beta The scalar beta.
2761     * @param Y The input allocation contains vector y, supported elements type: {Element#F64}.
2762     * @param incY The increment for the elements of vector y, must be larger than zero.
2763     */
2764    void DSPMV(RsBlasUplo Uplo, double alpha, const sp<Allocation>& Ap, const sp<Allocation>& X, int incX,
2765               double beta, const sp<Allocation>& Y, int incY);
2766
2767    /**
2768     * DGER performs the rank 1 operation
2769     * A := alpha*x*y**T + A
2770     *
2771     * Details: http://www.netlib.org/lapack/explore-html/dc/da8/dger_8f.html
2772     *
2773     * @param alpha The scalar alpha.
2774     * @param X The input allocation contains vector x, supported elements type: {Element#F64}.
2775     * @param incX The increment for the elements of vector x, must be larger than zero.
2776     * @param Y The input allocation contains vector y, supported elements type: {Element#F64}.
2777     * @param incY The increment for the elements of vector y, must be larger than zero.
2778     * @param A The input allocation contains matrix A, supported elements type: {Element#F64}.
2779     */
2780    void DGER(double alpha, const sp<Allocation>& X, int incX, const sp<Allocation>& Y, int incY, const sp<Allocation>& A);
2781
2782    /**
2783     * DSYR performs the rank 1 operation
2784     * A := alpha*x*x**T + A
2785     *
2786     * Details: http://www.netlib.org/lapack/explore-html/d3/d60/dsyr_8f.html
2787     *
2788     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
2789     * @param alpha The scalar alpha.
2790     * @param X The input allocation contains vector x, supported elements type: {Element#F64}.
2791     * @param incX The increment for the elements of vector x, must be larger than zero.
2792     * @param A The input allocation contains matrix A, supported elements type: {Element#F64}.
2793     */
2794    void DSYR(RsBlasUplo Uplo, double alpha, const sp<Allocation>& X, int incX, const sp<Allocation>& A);
2795
2796    /**
2797     * DSPR performs the rank 1 operation
2798     * A := alpha*x*x**T + A
2799     *
2800     * Details: http://www.netlib.org/lapack/explore-html/dd/dba/dspr_8f.html
2801     *
2802     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2803     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2804     *       'a' to packed matrix 'b'.
2805     *           k = 0
2806     *           for i in range(0, n):
2807     *              for j in range(i, n):
2808     *                  b[k++] = a[i, j]
2809     *
2810     * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
2811     * @param alpha The scalar alpha.
2812     * @param X The input allocation contains vector x, supported elements type: {Element#F64}.
2813     * @param incX The increment for the elements of vector x, must be larger than zero.
2814     * @param Ap The input allocation contains matrix A, supported elements type: {Element#F64}.
2815     */
2816    void DSPR(RsBlasUplo Uplo, double alpha, const sp<Allocation>& X, int incX, const sp<Allocation>& Ap);
2817
2818    /**
2819     * DSYR2 performs the symmetric rank 2 operation
2820     * A := alpha*x*y**T + alpha*y*x**T + A
2821     *
2822     * Details: http://www.netlib.org/lapack/explore-html/de/d41/dsyr2_8f.html
2823     *
2824     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
2825     * @param alpha The scalar alpha.
2826     * @param X The input allocation contains vector x, supported elements type: {Element#F64}.
2827     * @param incX The increment for the elements of vector x, must be larger than zero.
2828     * @param Y The input allocation contains vector y, supported elements type: {Element#F64}.
2829     * @param incY The increment for the elements of vector y, must be larger than zero.
2830     * @param A The input allocation contains matrix A, supported elements type: {Element#F64}.
2831     */
2832    void DSYR2(RsBlasUplo Uplo, double alpha, const sp<Allocation>& X, int incX,
2833               const sp<Allocation>& Y, int incY, const sp<Allocation>& A);
2834
2835    /**
2836     * DSPR2 performs the symmetric rank 2 operation
2837     * A := alpha*x*y**T + alpha*y*x**T + A
2838     *
2839     * Details: http://www.netlib.org/lapack/explore-html/dd/d9e/dspr2_8f.html
2840     *
2841     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2842     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2843     *       'a' to packed matrix 'b'.
2844     *           k = 0
2845     *           for i in range(0, n):
2846     *              for j in range(i, n):
2847     *                  b[k++] = a[i, j]
2848     *
2849     * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
2850     * @param alpha The scalar alpha.
2851     * @param X The input allocation contains vector x, supported elements type: {Element#F64}.
2852     * @param incX The increment for the elements of vector x, must be larger than zero.
2853     * @param Y The input allocation contains vector y, supported elements type: {Element#F64}.
2854     * @param incY The increment for the elements of vector y, must be larger than zero.
2855     * @param Ap The input allocation contains matrix A, supported elements type: {Element#F64}.
2856     */
2857    void DSPR2(RsBlasUplo Uplo, double alpha, const sp<Allocation>& X, int incX,
2858               const sp<Allocation>& Y, int incY, const sp<Allocation>& Ap);
2859
2860    /**
2861     * CHEMV performs the matrix-vector operation
2862     * y := alpha*A*x + beta*y
2863     *
2864     * Details: http://www.netlib.org/lapack/explore-html/d7/d51/chemv_8f.html
2865     *
2866     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
2867     * @param alpha The scalar alpha.
2868     * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}.
2869     * @param X The input allocation contains vector x, supported elements type: {Element#F32_2}.
2870     * @param incX The increment for the elements of vector x, must be larger than zero.
2871     * @param beta The scalar beta.
2872     * @param Y The input allocation contains vector y, supported elements type: {Element#F32_2}.
2873     * @param incY The increment for the elements of vector y, must be larger than zero.
2874     */
2875    void CHEMV(RsBlasUplo Uplo, Float2 alpha, const sp<Allocation>& A, const sp<Allocation>& X,
2876               int incX, Float2 beta, const sp<Allocation>& Y, int incY);
2877
2878    /**
2879     * CHBMV performs the matrix-vector operation
2880     * y := alpha*A*x + beta*y
2881     *
2882     * Details: http://www.netlib.org/lapack/explore-html/db/dc2/chbmv_8f.html
2883     *
2884     * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
2885     *       but only the region N*(K+1) will be referenced. The following subroutine can is an
2886     *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
2887     *           for i in range(0, n):
2888     *              for j in range(i, min(i+k+1, n)):
2889     *                  b[i, j-i] = a[i, j]
2890     *
2891     * @param Uplo Specifies whether the upper or lower triangular part of the band matrix A is being supplied.
2892     * @param K The number of off-diagonals of the matrix A
2893     * @param alpha The scalar alpha.
2894     * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}.
2895     * @param X The input allocation contains vector x, supported elements type: {Element#F32_2}.
2896     * @param incX The increment for the elements of vector x, must be larger than zero.
2897     * @param beta The scalar beta.
2898     * @param Y The input allocation contains vector y, supported elements type: {Element#F32_2}.
2899     * @param incY The increment for the elements of vector y, must be larger than zero.
2900     */
2901    void CHBMV(RsBlasUplo Uplo, int K, Float2 alpha, const sp<Allocation>& A, const sp<Allocation>& X,
2902               int incX, Float2 beta, const sp<Allocation>& Y, int incY);
2903
2904    /**
2905     * CHPMV performs the matrix-vector operation
2906     * y := alpha*A*x + beta*y
2907     *
2908     * Details: http://www.netlib.org/lapack/explore-html/d2/d06/chpmv_8f.html
2909     *
2910     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2911     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2912     *       'a' to packed matrix 'b'.
2913     *           k = 0
2914     *           for i in range(0, n):
2915     *              for j in range(i, n):
2916     *                  b[k++] = a[i, j]
2917     *
2918     * @param Uplo Specifies whether the upper or lower triangular part of the matrix A is supplied in packed form.
2919     * @param alpha The scalar alpha.
2920     * @param Ap The input allocation contains matrix A, supported elements type: {Element#F32_2}.
2921     * @param X The input allocation contains vector x, supported elements type: {Element#F32_2}.
2922     * @param incX The increment for the elements of vector x, must be larger than zero.
2923     * @param beta The scalar beta.
2924     * @param Y The input allocation contains vector y, supported elements type: {Element#F32_2}.
2925     * @param incY The increment for the elements of vector y, must be larger than zero.
2926     */
2927    void CHPMV(RsBlasUplo Uplo, Float2 alpha, const sp<Allocation>& Ap, const sp<Allocation>& X,
2928               int incX, Float2 beta, const sp<Allocation>& Y, int incY);
2929
2930    /**
2931     * CGERU performs the rank 1 operation
2932     * A := alpha*x*y**T + A
2933     *
2934     * Details: http://www.netlib.org/lapack/explore-html/db/d5f/cgeru_8f.html
2935     *
2936     * @param alpha The scalar alpha.
2937     * @param X The input allocation contains vector x, supported elements type: {Element#F32_2}.
2938     * @param incX The increment for the elements of vector x, must be larger than zero.
2939     * @param Y The input allocation contains vector y, supported elements type: {Element#F32_2}.
2940     * @param incY The increment for the elements of vector y, must be larger than zero.
2941     * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}.
2942     */
2943    void CGERU(Float2 alpha, const sp<Allocation>& X, int incX,
2944               const sp<Allocation>& Y, int incY, const sp<Allocation>& A);
2945
2946    /**
2947     * CGERC performs the rank 1 operation
2948     * A := alpha*x*y**H + A
2949     *
2950     * Details: http://www.netlib.org/lapack/explore-html/dd/d84/cgerc_8f.html
2951     *
2952     * @param alpha The scalar alpha.
2953     * @param X The input allocation contains vector x, supported elements type: {Element#F32_2}.
2954     * @param incX The increment for the elements of vector x, must be larger than zero.
2955     * @param Y The input allocation contains vector y, supported elements type: {Element#F32_2}.
2956     * @param incY The increment for the elements of vector y, must be larger than zero.
2957     * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}.
2958     */
2959    void CGERC(Float2 alpha, const sp<Allocation>& X, int incX,
2960               const sp<Allocation>& Y, int incY, const sp<Allocation>& A);
2961
2962    /**
2963     * CHER performs the rank 1 operation
2964     * A := alpha*x*x**H + A
2965     *
2966     * Details: http://www.netlib.org/lapack/explore-html/d3/d6d/cher_8f.html
2967     *
2968     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
2969     * @param alpha The scalar alpha.
2970     * @param X The input allocation contains vector x, supported elements type: {Element#F32_2}.
2971     * @param incX The increment for the elements of vector x, must be larger than zero.
2972     * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}.
2973     */
2974    void CHER(RsBlasUplo Uplo, float alpha, const sp<Allocation>& X, int incX, const sp<Allocation>& A);
2975
2976    /**
2977     * CHPR performs the rank 1 operation
2978     * A := alpha*x*x**H + A
2979     *
2980     * Details: http://www.netlib.org/lapack/explore-html/db/dcd/chpr_8f.html
2981     *
2982     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2983     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2984     *       'a' to packed matrix 'b'.
2985     *           k = 0
2986     *           for i in range(0, n):
2987     *              for j in range(i, n):
2988     *                  b[k++] = a[i, j]
2989     *
2990     * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
2991     * @param alpha The scalar alpha.
2992     * @param X The input allocation contains vector x, supported elements type: {Element#F32_2}.
2993     * @param incX The increment for the elements of vector x, must be larger than zero.
2994     * @param Ap The input allocation contains matrix A, supported elements type: {Element#F32_2}.
2995     */
2996    void CHPR(RsBlasUplo Uplo, float alpha, const sp<Allocation>& X, int incX, const sp<Allocation>& Ap);
2997
2998    /**
2999     * CHER2 performs the symmetric rank 2 operation
3000     * A := alpha*x*y**H + alpha*y*x**H + A
3001     *
3002     * Details: http://www.netlib.org/lapack/explore-html/db/d87/cher2_8f.html
3003     *
3004     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
3005     * @param alpha The scalar alpha.
3006     * @param X The input allocation contains vector x, supported elements type: {Element#F32_2}.
3007     * @param incX The increment for the elements of vector x, must be larger than zero.
3008     * @param Y The input allocation contains vector y, supported elements type: {Element#F32_2}.
3009     * @param incY The increment for the elements of vector y, must be larger than zero.
3010     * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}.
3011     */
3012    void CHER2(RsBlasUplo Uplo, Float2 alpha, const sp<Allocation>& X, int incX,
3013               const sp<Allocation>& Y, int incY, const sp<Allocation>& A);
3014
3015    /**
3016     * CHPR2 performs the symmetric rank 2 operation
3017     * A := alpha*x*y**H + alpha*y*x**H + A
3018     *
3019     * Details: http://www.netlib.org/lapack/explore-html/d6/d44/chpr2_8f.html
3020     *
3021     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
3022     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
3023     *       'a' to packed matrix 'b'.
3024     *           k = 0
3025     *           for i in range(0, n):
3026     *              for j in range(i, n):
3027     *                  b[k++] = a[i, j]
3028     *
3029     * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
3030     * @param alpha The scalar alpha.
3031     * @param X The input allocation contains vector x, supported elements type: {Element#F32_2}.
3032     * @param incX The increment for the elements of vector x, must be larger than zero.
3033     * @param Y The input allocation contains vector y, supported elements type: {Element#F32_2}.
3034     * @param incY The increment for the elements of vector y, must be larger than zero.
3035     * @param Ap The input allocation contains matrix A, supported elements type: {Element#F32_2}.
3036     */
3037    void CHPR2(RsBlasUplo Uplo, Float2 alpha, const sp<Allocation>& X, int incX,
3038               const sp<Allocation>& Y, int incY, const sp<Allocation>& Ap);
3039
3040    /**
3041     * ZHEMV performs the matrix-vector operation
3042     * y := alpha*A*x + beta*y
3043     *
3044     * Details: http://www.netlib.org/lapack/explore-html/d0/ddd/zhemv_8f.html
3045     *
3046     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
3047     * @param alpha The scalar alpha.
3048     * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2}.
3049     * @param X The input allocation contains vector x, supported elements type: {Element#F64_2}.
3050     * @param incX The increment for the elements of vector x, must be larger than zero.
3051     * @param beta The scalar beta.
3052     * @param Y The input allocation contains vector y, supported elements type: {Element#F64_2}.
3053     * @param incY The increment for the elements of vector y, must be larger than zero.
3054     */
3055    void ZHEMV(RsBlasUplo Uplo, Double2 alpha, const sp<Allocation>& A, const sp<Allocation>& X,
3056               int incX, Double2 beta, const sp<Allocation>& Y, int incY);
3057
3058    /**
3059     * ZHBMV performs the matrix-vector operation
3060     * y := alpha*A*x + beta*y
3061     *
3062     * Details: http://www.netlib.org/lapack/explore-html/d3/d1a/zhbmv_8f.html
3063     *
3064     * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
3065     *       but only the region N*(K+1) will be referenced. The following subroutine can is an
3066     *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
3067     *           for i in range(0, n):
3068     *              for j in range(i, min(i+k+1, n)):
3069     *                  b[i, j-i] = a[i, j]
3070     *
3071     * @param Uplo Specifies whether the upper or lower triangular part of the band matrix A is being supplied.
3072     * @param K The number of off-diagonals of the matrix A
3073     * @param alpha The scalar alpha.
3074     * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2}.
3075     * @param X The input allocation contains vector x, supported elements type: {Element#F64_2}.
3076     * @param incX The increment for the elements of vector x, must be larger than zero.
3077     * @param beta The scalar beta.
3078     * @param Y The input allocation contains vector y, supported elements type: {Element#F64_2}.
3079     * @param incY The increment for the elements of vector y, must be larger than zero.
3080     */
3081    void ZHBMV(RsBlasUplo Uplo, int K, Double2 alpha, const sp<Allocation>& A, const sp<Allocation>& X,
3082               int incX, Double2 beta, const sp<Allocation>& Y, int incY);
3083
3084    /**
3085     * ZHPMV performs the matrix-vector operation
3086     * y := alpha*A*x + beta*y
3087     *
3088     * Details: http://www.netlib.org/lapack/explore-html/d0/d60/zhpmv_8f.html
3089     *
3090     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
3091     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
3092     *       'a' to packed matrix 'b'.
3093     *           k = 0
3094     *           for i in range(0, n):
3095     *              for j in range(i, n):
3096     *                  b[k++] = a[i, j]
3097     *
3098     * @param Uplo Specifies whether the upper or lower triangular part of the matrix A is supplied in packed form.
3099     * @param alpha The scalar alpha.
3100     * @param Ap The input allocation contains matrix A, supported elements type: {Element#F64_2}.
3101     * @param X The input allocation contains vector x, supported elements type: {Element#F64_2}.
3102     * @param incX The increment for the elements of vector x, must be larger than zero.
3103     * @param beta The scalar beta.
3104     * @param Y The input allocation contains vector y, supported elements type: {Element#F64_2}.
3105     * @param incY The increment for the elements of vector y, must be larger than zero.
3106     */
3107    void ZHPMV(RsBlasUplo Uplo, Double2 alpha, const sp<Allocation>& Ap, const sp<Allocation>& X,
3108               int incX, Double2 beta, const sp<Allocation>& Y, int incY);
3109
3110    /**
3111     * ZGERU performs the rank 1 operation
3112     * A := alpha*x*y**T + A
3113     *
3114     * Details: http://www.netlib.org/lapack/explore-html/d7/d12/zgeru_8f.html
3115     *
3116     * @param alpha The scalar alpha.
3117     * @param X The input allocation contains vector x, supported elements type: {Element#F64_2}.
3118     * @param incX The increment for the elements of vector x, must be larger than zero.
3119     * @param Y The input allocation contains vector y, supported elements type: {Element#F64_2}.
3120     * @param incY The increment for the elements of vector y, must be larger than zero.
3121     * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2}.
3122     */
3123    void ZGERU(Double2 alpha, const sp<Allocation>& X, int incX,
3124               const sp<Allocation>& Y, int incY, const sp<Allocation>& A);
3125
3126    /**
3127     * ZGERC performs the rank 1 operation
3128     * A := alpha*x*y**H + A
3129     *
3130     * Details: http://www.netlib.org/lapack/explore-html/d3/dad/zgerc_8f.html
3131     *
3132     * @param alpha The scalar alpha.
3133     * @param X The input allocation contains vector x, supported elements type: {Element#F64_2}.
3134     * @param incX The increment for the elements of vector x, must be larger than zero.
3135     * @param Y The input allocation contains vector y, supported elements type: {Element#F64_2}.
3136     * @param incY The increment for the elements of vector y, must be larger than zero.
3137     * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2}.
3138     */
3139    void ZGERC(Double2 alpha, const sp<Allocation>& X, int incX,
3140               const sp<Allocation>& Y, int incY, const sp<Allocation>& A);
3141
3142    /**
3143     * ZHER performs the rank 1 operation
3144     * A := alpha*x*x**H + A
3145     *
3146     * Details: http://www.netlib.org/lapack/explore-html/de/d0e/zher_8f.html
3147     *
3148     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
3149     * @param alpha The scalar alpha.
3150     * @param X The input allocation contains vector x, supported elements type: {Element#F64_2}.
3151     * @param incX The increment for the elements of vector x, must be larger than zero.
3152     * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2}.
3153     */
3154    void ZHER(RsBlasUplo Uplo, double alpha, const sp<Allocation>& X, int incX, const sp<Allocation>& A);
3155
3156    /**
3157     * ZHPR performs the rank 1 operation
3158     * A := alpha*x*x**H + A
3159     *
3160     * Details: http://www.netlib.org/lapack/explore-html/de/de1/zhpr_8f.html
3161     *
3162     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
3163     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
3164     *       'a' to packed matrix 'b'.
3165     *           k = 0
3166     *           for i in range(0, n):
3167     *              for j in range(i, n):
3168     *                  b[k++] = a[i, j]
3169     *
3170     * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
3171     * @param alpha The scalar alpha.
3172     * @param X The input allocation contains vector x, supported elements type: {Element#F64_2}.
3173     * @param incX The increment for the elements of vector x, must be larger than zero.
3174     * @param Ap The input allocation contains matrix A, supported elements type: {Element#F64_2}.
3175     */
3176    void ZHPR(RsBlasUplo Uplo, double alpha, const sp<Allocation>& X, int incX, const sp<Allocation>& Ap);
3177
3178    /**
3179     * ZHER2 performs the symmetric rank 2 operation
3180     * A := alpha*x*y**H + alpha*y*x**H + A
3181     *
3182     * Details: http://www.netlib.org/lapack/explore-html/da/d8a/zher2_8f.html
3183     *
3184     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
3185     * @param alpha The scalar alpha.
3186     * @param X The input allocation contains vector x, supported elements type: {Element#F64_2}.
3187     * @param incX The increment for the elements of vector x, must be larger than zero.
3188     * @param Y The input allocation contains vector y, supported elements type: {Element#F64_2}.
3189     * @param incY The increment for the elements of vector y, must be larger than zero.
3190     * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2}.
3191     */
3192    void ZHER2(RsBlasUplo Uplo, Double2 alpha, const sp<Allocation>& X, int incX,
3193               const sp<Allocation>& Y, int incY, const sp<Allocation>& A);
3194
3195    /**
3196     * ZHPR2 performs the symmetric rank 2 operation
3197     * A := alpha*x*y**H + alpha*y*x**H + A
3198     *
3199     * Details: http://www.netlib.org/lapack/explore-html/d5/d52/zhpr2_8f.html
3200     *
3201     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
3202     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
3203     *       'a' to packed matrix 'b'.
3204     *           k = 0
3205     *           for i in range(0, n):
3206     *              for j in range(i, n):
3207     *                  b[k++] = a[i, j]
3208     *
3209     * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
3210     * @param alpha The scalar alpha.
3211     * @param X The input allocation contains vector x, supported elements type: {Element#F64_2}.
3212     * @param incX The increment for the elements of vector x, must be larger than zero.
3213     * @param Y The input allocation contains vector y, supported elements type: {Element#F64_2}.
3214     * @param incY The increment for the elements of vector y, must be larger than zero.
3215     * @param Ap The input allocation contains matrix A, supported elements type: {Element#F64_2}.
3216     */
3217    void ZHPR2(RsBlasUplo Uplo, Double2 alpha, const sp<Allocation>& X, int incX,
3218               const sp<Allocation>& Y, int incY, const sp<Allocation>& Ap);
3219
3220    /**
3221     * SGEMM performs one of the matrix-matrix operations
3222     * C := alpha*op(A)*op(B) + beta*C   where op(X) is one of op(X) = X  or  op(X) = X**T
3223     *
3224     * Details: http://www.netlib.org/lapack/explore-html/d4/de2/sgemm_8f.html
3225     *
3226     * @param TransA The type of transpose applied to matrix A.
3227     * @param TransB The type of transpose applied to matrix B.
3228     * @param alpha The scalar alpha.
3229     * @param A The input allocation contains matrix A, supported elements type: {Element#F32}.
3230     * @param B The input allocation contains matrix B, supported elements type: {Element#F32}.
3231     * @param beta The scalar beta.
3232     * @param C The input allocation contains matrix C, supported elements type: {Element#F32}.
3233     */
3234    void SGEMM(RsBlasTranspose TransA, RsBlasTranspose TransB, float alpha, const sp<Allocation>& A,
3235                      const sp<Allocation>& B, float beta, const sp<Allocation>& C);
3236
3237
3238    /**
3239     * DGEMM performs one of the matrix-matrix operations
3240     * C := alpha*op(A)*op(B) + beta*C   where op(X) is one of op(X) = X  or  op(X) = X**T
3241     *
3242     * Details: http://www.netlib.org/lapack/explore-html/d7/d2b/dgemm_8f.html
3243     *
3244     * @param TransA The type of transpose applied to matrix A.
3245     * @param TransB The type of transpose applied to matrix B.
3246     * @param alpha The scalar alpha.
3247     * @param A The input allocation contains matrix A, supported elements type: {Element#F64}.
3248     * @param B The input allocation contains matrix B, supported elements type: {Element#F64}.
3249     * @param beta The scalar beta.
3250     * @param C The input allocation contains matrix C, supported elements type: {Element#F64}.
3251     */
3252    void DGEMM(RsBlasTranspose TransA, RsBlasTranspose TransB, double alpha, const sp<Allocation>& A,
3253                      const sp<Allocation>& B, double beta, const sp<Allocation>& C);
3254
3255    /**
3256     * CGEMM performs one of the matrix-matrix operations
3257     * C := alpha*op(A)*op(B) + beta*C   where op(X) is one of op(X) = X  or  op(X) = X**T  or  op(X) = X**H
3258     *
3259     * Details: http://www.netlib.org/lapack/explore-html/d6/d5b/cgemm_8f.html
3260     *
3261     * @param TransA The type of transpose applied to matrix A.
3262     * @param TransB The type of transpose applied to matrix B.
3263     * @param alpha The scalar alpha.
3264     * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}.
3265     * @param B The input allocation contains matrix B, supported elements type: {Element#F32_2}.
3266     * @param beta The scalar beta.
3267     * @param C The input allocation contains matrix C, supported elements type: {Element#F32_2}.
3268     */
3269    void CGEMM(RsBlasTranspose TransA, RsBlasTranspose TransB, Float2 alpha, const sp<Allocation>& A,
3270                      const sp<Allocation>& B, Float2 beta, const sp<Allocation>& C);
3271
3272    /**
3273     * ZGEMM performs one of the matrix-matrix operations
3274     * C := alpha*op(A)*op(B) + beta*C   where op(X) is one of op(X) = X  or  op(X) = X**T  or  op(X) = X**H
3275     *
3276     * Details: http://www.netlib.org/lapack/explore-html/d7/d76/zgemm_8f.html
3277     *
3278     * @param TransA The type of transpose applied to matrix A.
3279     * @param TransB The type of transpose applied to matrix B.
3280     * @param alpha The scalar alpha.
3281     * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2}.
3282     * @param B The input allocation contains matrix B, supported elements type: {Element#F64_2}.
3283     * @param beta The scalar beta.
3284     * @param C The input allocation contains matrix C, supported elements type: {Element#F64_2}.
3285     */
3286    void ZGEMM(RsBlasTranspose TransA, RsBlasTranspose TransB, Double2 alpha, const sp<Allocation>& A,
3287                      const sp<Allocation>& B, Double2 beta, const sp<Allocation>& C);
3288
3289    /**
3290     * SSYMM performs one of the matrix-matrix operations
3291     * C := alpha*A*B + beta*C   or   C := alpha*B*A + beta*C
3292     *
3293     * Details: http://www.netlib.org/lapack/explore-html/d7/d42/ssymm_8f.html
3294     *
3295     * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3296     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
3297     * @param alpha The scalar alpha.
3298     * @param A The input allocation contains matrix A, supported elements type: {Element#F32}.
3299     * @param B The input allocation contains matrix B, supported elements type: {Element#F32}.
3300     * @param beta The scalar beta.
3301     * @param C The input allocation contains matrix C, supported elements type: {Element#F32}.
3302     */
3303    void SSYMM(RsBlasSide Side, RsBlasUplo Uplo, float alpha, const sp<Allocation>& A,
3304                      const sp<Allocation>& B, float beta, const sp<Allocation>& C);
3305
3306    /**
3307     * DSYMM performs one of the matrix-matrix operations
3308     * C := alpha*A*B + beta*C   or   C := alpha*B*A + beta*C
3309     *
3310     * Details: http://www.netlib.org/lapack/explore-html/d8/db0/dsymm_8f.html
3311     *
3312     * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3313     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
3314     * @param alpha The scalar alpha.
3315     * @param A The input allocation contains matrix A, supported elements type: {Element#F64}.
3316     * @param B The input allocation contains matrix B, supported elements type: {Element#F64}.
3317     * @param beta The scalar beta.
3318     * @param C The input allocation contains matrix C, supported elements type: {Element#F64}.
3319     */
3320    void DSYMM(RsBlasSide Side, RsBlasUplo Uplo, double alpha, const sp<Allocation>& A,
3321                      const sp<Allocation>& B, double beta, const sp<Allocation>& C);
3322
3323    /**
3324     * CSYMM performs one of the matrix-matrix operations
3325     * C := alpha*A*B + beta*C   or   C := alpha*B*A + beta*C
3326     *
3327     * Details: http://www.netlib.org/lapack/explore-html/db/d59/csymm_8f.html
3328     *
3329     * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3330     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
3331     * @param alpha The scalar alpha.
3332     * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}.
3333     * @param B The input allocation contains matrix B, supported elements type: {Element#F32_2}.
3334     * @param beta The scalar beta.
3335     * @param C The input allocation contains matrix C, supported elements type: {Element#F32_2}.
3336     */
3337    void CSYMM(RsBlasSide Side, RsBlasUplo Uplo, Float2 alpha, const sp<Allocation>& A,
3338                      const sp<Allocation>& B, Float2 beta, const sp<Allocation>& C);
3339
3340    /**
3341     * ZSYMM performs one of the matrix-matrix operations
3342     * C := alpha*A*B + beta*C   or   C := alpha*B*A + beta*C
3343     *
3344     * Details: http://www.netlib.org/lapack/explore-html/df/d51/zsymm_8f.html
3345     *
3346     * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3347     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
3348     * @param alpha The scalar alpha.
3349     * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2}.
3350     * @param B The input allocation contains matrix B, supported elements type: {Element#F64_2}.
3351     * @param beta The scalar beta.
3352     * @param C The input allocation contains matrix C, supported elements type: {Element#F64_2}.
3353     */
3354    void ZSYMM(RsBlasSide Side, RsBlasUplo Uplo, Double2 alpha, const sp<Allocation>& A,
3355                      const sp<Allocation>& B, Double2 beta, const sp<Allocation>& C);
3356
3357    /**
3358     * SSYRK performs one of the symmetric rank k operations
3359     * C := alpha*A*A**T + beta*C   or   C := alpha*A**T*A + beta*C
3360     *
3361     * Details: http://www.netlib.org/lapack/explore-html/d0/d40/ssyrk_8f.html
3362     *
3363     * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
3364     * @param Trans The type of transpose applied to the operation.
3365     * @param alpha The scalar alpha.
3366     * @param A The input allocation contains matrix A, supported elements type: {Element#F32}.
3367     * @param beta The scalar beta.
3368     * @param C The input allocation contains matrix C, supported elements type: {Element#F32}.
3369     */
3370    void SSYRK(RsBlasUplo Uplo, RsBlasTranspose Trans, float alpha,
3371               const sp<Allocation>& A, float beta, const sp<Allocation>& C);
3372
3373    /**
3374     * DSYRK performs one of the symmetric rank k operations
3375     * C := alpha*A*A**T + beta*C   or   C := alpha*A**T*A + beta*C
3376     *
3377     * Details: http://www.netlib.org/lapack/explore-html/dc/d05/dsyrk_8f.html
3378     *
3379     * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
3380     * @param Trans The type of transpose applied to the operation.
3381     * @param alpha The scalar alpha.
3382     * @param A The input allocation contains matrix A, supported elements type: {Element#F64}.
3383     * @param beta The scalar beta.
3384     * @param C The input allocation contains matrix C, supported elements type: {Element#F64}.
3385     */
3386    void DSYRK(RsBlasUplo Uplo, RsBlasTranspose Trans, double alpha,
3387               const sp<Allocation>& A, double beta, const sp<Allocation>& C);
3388
3389    /**
3390     * CSYRK performs one of the symmetric rank k operations
3391     * C := alpha*A*A**T + beta*C   or   C := alpha*A**T*A + beta*C
3392     *
3393     * Details: http://www.netlib.org/lapack/explore-html/d3/d6a/csyrk_8f.html
3394     *
3395     * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
3396     * @param Trans The type of transpose applied to the operation.
3397     * @param alpha The scalar alpha.
3398     * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}.
3399     * @param beta The scalar beta.
3400     * @param C The input allocation contains matrix C, supported elements type: {Element#F32_2}.
3401     */
3402    void CSYRK(RsBlasUplo Uplo, RsBlasTranspose Trans, Float2 alpha,
3403               const sp<Allocation>& A, Float2 beta, const sp<Allocation>& C);
3404
3405    /**
3406     * ZSYRK performs one of the symmetric rank k operations
3407     * C := alpha*A*A**T + beta*C   or   C := alpha*A**T*A + beta*C
3408     *
3409     * Details: http://www.netlib.org/lapack/explore-html/de/d54/zsyrk_8f.html
3410     *
3411     * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
3412     * @param Trans The type of transpose applied to the operation.
3413     * @param alpha The scalar alpha.
3414     * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2}.
3415     * @param beta The scalar beta.
3416     * @param C The input allocation contains matrix C, supported elements type: {Element#F64_2}.
3417     */
3418    void ZSYRK(RsBlasUplo Uplo, RsBlasTranspose Trans, Double2 alpha,
3419               const sp<Allocation>& A, Double2 beta, const sp<Allocation>& C);
3420
3421    /**
3422     * SSYR2K performs one of the symmetric rank 2k operations
3423     * C := alpha*A*B**T + alpha*B*A**T + beta*C   or   C := alpha*A**T*B + alpha*B**T*A + beta*C
3424     *
3425     * Details: http://www.netlib.org/lapack/explore-html/df/d3d/ssyr2k_8f.html
3426     *
3427     * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
3428     * @param Trans The type of transpose applied to the operation.
3429     * @param alpha The scalar alpha.
3430     * @param A The input allocation contains matrix A, supported elements type: {Element#F32}.
3431     * @param B The input allocation contains matrix B, supported elements type: {Element#F32}.
3432     * @param beta The scalar beta.
3433     * @param C The input allocation contains matrix C, supported elements type: {Element#F32}.
3434     */
3435    void SSYR2K(RsBlasUplo Uplo, RsBlasTranspose Trans, float alpha,
3436                const sp<Allocation>& A, const sp<Allocation>& B, float beta, const sp<Allocation>& C);
3437
3438    /**
3439     * DSYR2K performs one of the symmetric rank 2k operations
3440     * C := alpha*A*B**T + alpha*B*A**T + beta*C   or   C := alpha*A**T*B + alpha*B**T*A + beta*C
3441     *
3442     * Details: http://www.netlib.org/lapack/explore-html/d1/dec/dsyr2k_8f.html
3443     *
3444     * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
3445     * @param Trans The type of transpose applied to the operation.
3446     * @param alpha The scalar alpha.
3447     * @param A The input allocation contains matrix A, supported elements type: {Element#F64}.
3448     * @param B The input allocation contains matrix B, supported elements type: {Element#F64}.
3449     * @param beta The scalar beta.
3450     * @param C The input allocation contains matrix C, supported elements type: {Element#F64}.
3451     */
3452    void DSYR2K(RsBlasUplo Uplo, RsBlasTranspose Trans, double alpha,
3453                const sp<Allocation>& A, const sp<Allocation>& B, double beta, const sp<Allocation>& C);
3454
3455    /**
3456     * CSYR2K performs one of the symmetric rank 2k operations
3457     * C := alpha*A*B**T + alpha*B*A**T + beta*C   or   C := alpha*A**T*B + alpha*B**T*A + beta*C
3458     *
3459     * Details: http://www.netlib.org/lapack/explore-html/de/d7e/csyr2k_8f.html
3460     *
3461     * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
3462     * @param Trans The type of transpose applied to the operation.
3463     * @param alpha The scalar alpha.
3464     * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}.
3465     * @param B The input allocation contains matrix B, supported elements type: {Element#F32_2}.
3466     * @param beta The scalar beta.
3467     * @param C The input allocation contains matrix C, supported elements type: {Element#F32_2}.
3468     */
3469    void CSYR2K(RsBlasUplo Uplo, RsBlasTranspose Trans, Float2 alpha,
3470                const sp<Allocation>& A, const sp<Allocation>& B, Float2 beta, const sp<Allocation>& C);
3471
3472    /**
3473     * ZSYR2K performs one of the symmetric rank 2k operations
3474     * C := alpha*A*B**T + alpha*B*A**T + beta*C   or   C := alpha*A**T*B + alpha*B**T*A + beta*C
3475     *
3476     * Details: http://www.netlib.org/lapack/explore-html/df/d20/zsyr2k_8f.html
3477     *
3478     * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
3479     * @param Trans The type of transpose applied to the operation.
3480     * @param alpha The scalar alpha.
3481     * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2}.
3482     * @param B The input allocation contains matrix B, supported elements type: {Element#F64_2}.
3483     * @param beta The scalar beta.
3484     * @param C The input allocation contains matrix C, supported elements type: {Element#F64_2}.
3485     */
3486    void ZSYR2K(RsBlasUplo Uplo, RsBlasTranspose Trans, Double2 alpha,
3487                const sp<Allocation>& A, const sp<Allocation>& B, Double2 beta, const sp<Allocation>& C);
3488
3489    /**
3490     * STRMM performs one of the matrix-matrix operations
3491     * B := alpha*op(A)*B   or   B := alpha*B*op(A)
3492     * op(A) is one of  op(A) = A  or  op(A) = A**T
3493     *
3494     * Details: http://www.netlib.org/lapack/explore-html/df/d01/strmm_8f.html
3495     *
3496     * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3497     * @param Uplo Specifies whether matrix A is upper or lower triangular.
3498     * @param TransA The type of transpose applied to matrix A.
3499     * @param Diag Specifies whether or not A is unit triangular.
3500     * @param alpha The scalar alpha.
3501     * @param A The input allocation contains matrix A, supported elements type: {Element#F32}.
3502     * @param B The input allocation contains matrix B, supported elements type: {Element#F32}.
3503     */
3504    void STRMM(RsBlasSide Side, RsBlasUplo Uplo, RsBlasTranspose TransA,
3505               RsBlasDiag Diag, float alpha, const sp<Allocation>& A, const sp<Allocation>& B);
3506
3507    /**
3508     * DTRMM performs one of the matrix-matrix operations
3509     * B := alpha*op(A)*B   or   B := alpha*B*op(A)
3510     * op(A) is one of  op(A) = A  or  op(A) = A**T
3511     *
3512     * Details: http://www.netlib.org/lapack/explore-html/dd/d19/dtrmm_8f.html
3513     *
3514     * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3515     * @param Uplo Specifies whether matrix A is upper or lower triangular.
3516     * @param TransA The type of transpose applied to matrix A.
3517     * @param Diag Specifies whether or not A is unit triangular.
3518     * @param alpha The scalar alpha.
3519     * @param A The input allocation contains matrix A, supported elements type: {Element#F64}.
3520     * @param B The input allocation contains matrix B, supported elements type: {Element#F64}.
3521     */
3522    void DTRMM(RsBlasSide Side, RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
3523               double alpha, const sp<Allocation>& A, const sp<Allocation>& B);
3524
3525    /**
3526     * CTRMM performs one of the matrix-matrix operations
3527     * B := alpha*op(A)*B   or   B := alpha*B*op(A)
3528     * op(A) is one of  op(A) = A  or  op(A) = A**T  or  op(A) = A**H
3529     *
3530     * Details: http://www.netlib.org/lapack/explore-html/d4/d9b/ctrmm_8f.html
3531     *
3532     * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3533     * @param Uplo Specifies whether matrix A is upper or lower triangular.
3534     * @param TransA The type of transpose applied to matrix A.
3535     * @param Diag Specifies whether or not A is unit triangular.
3536     * @param alpha The scalar alpha.
3537     * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}.
3538     * @param B The input allocation contains matrix B, supported elements type: {Element#F32_2}.
3539     */
3540    void CTRMM(RsBlasSide Side, RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
3541               Float2 alpha, const sp<Allocation>& A, const sp<Allocation>& B);
3542
3543    /**
3544     * ZTRMM performs one of the matrix-matrix operations
3545     * B := alpha*op(A)*B   or   B := alpha*B*op(A)
3546     * op(A) is one of  op(A) = A  or  op(A) = A**T  or  op(A) = A**H
3547     *
3548     * Details: http://www.netlib.org/lapack/explore-html/d8/de1/ztrmm_8f.html
3549     *
3550     * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3551     * @param Uplo Specifies whether matrix A is upper or lower triangular.
3552     * @param TransA The type of transpose applied to matrix A.
3553     * @param Diag Specifies whether or not A is unit triangular.
3554     * @param alpha The scalar alpha.
3555     * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2}.
3556     * @param B The input allocation contains matrix B, supported elements type: {Element#F64_2}.
3557     */
3558    void ZTRMM(RsBlasSide Side, RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
3559               Double2 alpha, const sp<Allocation>& A, const sp<Allocation>& B);
3560
3561    /**
3562     * STRSM solves one of the matrix equations
3563     * op(A)*X := alpha*B   or   X*op(A) := alpha*B
3564     * op(A) is one of  op(A) = A  or  op(A) = A**T
3565     *
3566     * Details: http://www.netlib.org/lapack/explore-html/d2/d8b/strsm_8f.html
3567     *
3568     * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3569     * @param Uplo Specifies whether matrix A is upper or lower triangular.
3570     * @param TransA The type of transpose applied to matrix A.
3571     * @param Diag Specifies whether or not A is unit triangular.
3572     * @param alpha The scalar alpha.
3573     * @param A The input allocation contains matrix A, supported elements type: {Element#F32}.
3574     * @param B The input allocation contains matrix B, supported elements type: {Element#F32}.
3575     */
3576    void STRSM(RsBlasSide Side, RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
3577               float alpha, const sp<Allocation>& A, const sp<Allocation>& B);
3578
3579    /**
3580     * DTRSM solves one of the matrix equations
3581     * op(A)*X := alpha*B   or   X*op(A) := alpha*B
3582     * op(A) is one of  op(A) = A  or  op(A) = A**T
3583     *
3584     * Details: http://www.netlib.org/lapack/explore-html/de/da7/dtrsm_8f.html
3585     *
3586     * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3587     * @param Uplo Specifies whether matrix A is upper or lower triangular.
3588     * @param TransA The type of transpose applied to matrix A.
3589     * @param Diag Specifies whether or not A is unit triangular.
3590     * @param alpha The scalar alpha.
3591     * @param A The input allocation contains matrix A, supported elements type: {Element#F64}.
3592     * @param B The input allocation contains matrix B, supported elements type: {Element#F64}.
3593     */
3594    void DTRSM(RsBlasSide Side, RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
3595               double alpha, const sp<Allocation>& A, const sp<Allocation>& B);
3596
3597    /**
3598     * CTRSM solves one of the matrix equations
3599     * op(A)*X := alpha*B   or   X*op(A) := alpha*B
3600     * op(A) is one of  op(A) = A  or  op(A) = A**T  or  op(A) = A**H
3601     *
3602     * Details: http://www.netlib.org/lapack/explore-html/de/d30/ctrsm_8f.html
3603     *
3604     * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3605     * @param Uplo Specifies whether matrix A is upper or lower triangular.
3606     * @param TransA The type of transpose applied to matrix A.
3607     * @param Diag Specifies whether or not A is unit triangular.
3608     * @param alpha The scalar alpha.
3609     * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}.
3610     * @param B The input allocation contains matrix B, supported elements type: {Element#F32_2}.
3611     */
3612    void CTRSM(RsBlasSide Side, RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
3613               Float2 alpha, const sp<Allocation>& A, const sp<Allocation>& B);
3614
3615    /**
3616     * ZTRSM solves one of the matrix equations
3617     * op(A)*X := alpha*B   or   X*op(A) := alpha*B
3618     * op(A) is one of  op(A) = A  or  op(A) = A**T  or  op(A) = A**H
3619     *
3620     * Details: http://www.netlib.org/lapack/explore-html/d1/d39/ztrsm_8f.html
3621     *
3622     * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3623     * @param Uplo Specifies whether matrix A is upper or lower triangular.
3624     * @param TransA The type of transpose applied to matrix A.
3625     * @param Diag Specifies whether or not A is unit triangular.
3626     * @param alpha The scalar alpha.
3627     * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2}.
3628     * @param B The input allocation contains matrix B, supported elements type: {Element#F64_2}.
3629     */
3630    void ZTRSM(RsBlasSide Side, RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
3631               Double2 alpha, const sp<Allocation>& A, const sp<Allocation>& B);
3632
3633    /**
3634     * CHEMM performs one of the matrix-matrix operations
3635     * C := alpha*A*B + beta*C   or   C := alpha*B*A + beta*C
3636     *
3637     * Details: http://www.netlib.org/lapack/explore-html/d3/d66/chemm_8f.html
3638     *
3639     * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3640     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
3641     * @param alpha The scalar alpha.
3642     * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}.
3643     * @param B The input allocation contains matrix B, supported elements type: {Element#F32_2}.
3644     * @param beta The scalar beta.
3645     * @param C The input allocation contains matrix C, supported elements type: {Element#F32_2}.
3646     */
3647    void CHEMM(RsBlasSide Side, RsBlasUplo Uplo, Float2 alpha, const sp<Allocation>& A,
3648               const sp<Allocation>& B, Float2 beta, const sp<Allocation>& C);
3649
3650    /**
3651     * ZHEMM performs one of the matrix-matrix operations
3652     * C := alpha*A*B + beta*C   or   C := alpha*B*A + beta*C
3653     *
3654     * Details: http://www.netlib.org/lapack/explore-html/d6/d3e/zhemm_8f.html
3655     *
3656     * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3657     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
3658     * @param alpha The scalar alpha.
3659     * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2}.
3660     * @param B The input allocation contains matrix B, supported elements type: {Element#F64_2}.
3661     * @param beta The scalar beta.
3662     * @param C The input allocation contains matrix C, supported elements type: {Element#F64_2}.
3663     */
3664    void ZHEMM(RsBlasSide Side, RsBlasUplo Uplo, Double2 alpha, const sp<Allocation>& A,
3665               const sp<Allocation>& B, Double2 beta, const sp<Allocation>& C);
3666
3667    /**
3668     * CHERK performs one of the hermitian rank k operations
3669     * C := alpha*A*A**H + beta*C   or   C := alpha*A**H*A + beta*C
3670     *
3671     * Details: http://www.netlib.org/lapack/explore-html/d8/d52/cherk_8f.html
3672     *
3673     * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
3674     * @param Trans The type of transpose applied to the operation.
3675     * @param alpha The scalar alpha.
3676     * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}.
3677     * @param beta The scalar beta.
3678     * @param C The input allocation contains matrix C, supported elements type: {Element#F32_2}.
3679     */
3680    void CHERK(RsBlasUplo Uplo, RsBlasTranspose Trans, float alpha, const sp<Allocation>& A,
3681               float beta, const sp<Allocation>& C);
3682
3683    /**
3684     * ZHERK performs one of the hermitian rank k operations
3685     * C := alpha*A*A**H + beta*C   or   C := alpha*A**H*A + beta*C
3686     *
3687     * Details: http://www.netlib.org/lapack/explore-html/d1/db1/zherk_8f.html
3688     *
3689     * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
3690     * @param Trans The type of transpose applied to the operation.
3691     * @param alpha The scalar alpha.
3692     * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2}.
3693     * @param beta The scalar beta.
3694     * @param C The input allocation contains matrix C, supported elements type: {Element#F64_2}.
3695     */
3696    void ZHERK(RsBlasUplo Uplo, RsBlasTranspose Trans, double alpha, const sp<Allocation>& A,
3697               double beta, const sp<Allocation>& C);
3698
3699    /**
3700     * CHER2K performs one of the hermitian rank 2k operations
3701     * C := alpha*A*B**H + conjg( alpha )*B*A**H + beta*C   or   C := alpha*A**H*B + conjg( alpha )*B**H*A + beta*C
3702     *
3703     * Details: http://www.netlib.org/lapack/explore-html/d1/d82/cher2k_8f.html
3704     *
3705     * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
3706     * @param Trans The type of transpose applied to the operation.
3707     * @param alpha The scalar alpha.
3708     * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}.
3709     * @param B The input allocation contains matrix B, supported elements type: {Element#F32_2}.
3710     * @param beta The scalar beta.
3711     * @param C The input allocation contains matrix C, supported elements type: {Element#F32_2}.
3712     */
3713    void CHER2K(RsBlasUplo Uplo, RsBlasTranspose Trans, Float2 alpha, const sp<Allocation>& A,
3714                const sp<Allocation>& B, float beta, const sp<Allocation>& C);
3715
3716    /**
3717     * ZHER2K performs one of the hermitian rank 2k operations
3718     * C := alpha*A*B**H + conjg( alpha )*B*A**H + beta*C   or   C := alpha*A**H*B + conjg( alpha )*B**H*A + beta*C
3719     *
3720     * Details: http://www.netlib.org/lapack/explore-html/d7/dfa/zher2k_8f.html
3721     *
3722     * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
3723     * @param Trans The type of transpose applied to the operation.
3724     * @param alpha The scalar alpha.
3725     * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2}.
3726     * @param B The input allocation contains matrix B, supported elements type: {Element#F64_2}.
3727     * @param beta The scalar beta.
3728     * @param C The input allocation contains matrix C, supported elements type: {Element#F64_2}.
3729     */
3730    void ZHER2K(RsBlasUplo Uplo, RsBlasTranspose Trans, Double2 alpha, const sp<Allocation>& A,
3731                const sp<Allocation>& B, double beta, const sp<Allocation>& C);
3732
3733    /**
3734     * 8-bit GEMM-like operation for neural networks: C = A * Transpose(B)
3735     * Calculations are done in 1.10.21 fixed-point format for the final output,
3736     * just before there's a shift down to drop the fractional parts. The output
3737     * values are gated to 0 to 255 to fit in a byte, but the 10-bit format
3738     * gives some headroom to avoid wrapping around on small overflows.
3739     *
3740     * @param A The input allocation contains matrix A, supported elements type: {Element#U8}.
3741     * @param a_offset The offset for all values in matrix A, e.g A[i,j] = A[i,j] - a_offset. Value should be from 0 to 255.
3742     * @param B The input allocation contains matrix B, supported elements type: {Element#U8}.
3743     * @param b_offset The offset for all values in matrix B, e.g B[i,j] = B[i,j] - b_offset. Value should be from 0 to 255.
3744     * @param C The input allocation contains matrix C, supported elements type: {Element#U8}.
3745     * @param c_offset The offset for all values in matrix C.
3746     * @param c_mult The multiplier for all values in matrix C, e.g C[i,j] = (C[i,j] + c_offset) * c_mult.
3747     **/
3748    void BNNM(const sp<Allocation>& A, int a_offset, const sp<Allocation>& B, int b_offset, const sp<Allocation>& C,
3749              int c_offset, int c_mult);
3750};
3751
3752/**
3753 * Intrinsic kernel for blending two Allocations.
3754 */
3755class ScriptIntrinsicBlend : public ScriptIntrinsic {
3756 private:
3757    ScriptIntrinsicBlend(sp<RS> rs, sp<const Element> e);
3758 public:
3759    /**
3760     * Supported Element types are U8_4.
3761     * @param[in] rs RenderScript context
3762     * @param[in] e Element
3763     * @return new ScriptIntrinsicBlend
3764     */
3765    static sp<ScriptIntrinsicBlend> create(const sp<RS>& rs, const sp<const Element>& e);
3766    /**
3767     * sets dst = {0, 0, 0, 0}
3768     * @param[in] in input Allocation
3769     * @param[in] out output Allocation
3770     */
3771    void forEachClear(const sp<Allocation>& in, const sp<Allocation>& out);
3772    /**
3773     * Sets dst = src
3774     * @param[in] in input Allocation
3775     * @param[in] out output Allocation
3776     */
3777    void forEachSrc(const sp<Allocation>& in, const sp<Allocation>& out);
3778    /**
3779     * Sets dst = dst (NOP)
3780     * @param[in] in input Allocation
3781     * @param[in] out output Allocation
3782     */
3783    void forEachDst(const sp<Allocation>& in, const sp<Allocation>& out);
3784    /**
3785     * Sets dst = src + dst * (1.0 - src.a)
3786     * @param[in] in input Allocation
3787     * @param[in] out output Allocation
3788     */
3789    void forEachSrcOver(const sp<Allocation>& in, const sp<Allocation>& out);
3790    /**
3791     * Sets dst = dst + src * (1.0 - dst.a)
3792     * @param[in] in input Allocation
3793     * @param[in] out output Allocation
3794     */
3795    void forEachDstOver(const sp<Allocation>& in, const sp<Allocation>& out);
3796    /**
3797     * Sets dst = src * dst.a
3798     * @param[in] in input Allocation
3799     * @param[in] out output Allocation
3800     */
3801    void forEachSrcIn(const sp<Allocation>& in, const sp<Allocation>& out);
3802    /**
3803     * Sets dst = dst * src.a
3804     * @param[in] in input Allocation
3805     * @param[in] out output Allocation
3806     */
3807    void forEachDstIn(const sp<Allocation>& in, const sp<Allocation>& out);
3808    /**
3809     * Sets dst = src * (1.0 - dst.a)
3810     * @param[in] in input Allocation
3811     * @param[in] out output Allocation
3812     */
3813    void forEachSrcOut(const sp<Allocation>& in, const sp<Allocation>& out);
3814    /**
3815     * Sets dst = dst * (1.0 - src.a)
3816     * @param[in] in input Allocation
3817     * @param[in] out output Allocation
3818     */
3819    void forEachDstOut(const sp<Allocation>& in, const sp<Allocation>& out);
3820    /**
3821     * Sets dst.rgb = src.rgb * dst.a + (1.0 - src.a) * dst.rgb
3822     * @param[in] in input Allocation
3823     * @param[in] out output Allocation
3824     */
3825    void forEachSrcAtop(const sp<Allocation>& in, const sp<Allocation>& out);
3826    /**
3827     * Sets dst.rgb = dst.rgb * src.a + (1.0 - dst.a) * src.rgb
3828     * @param[in] in input Allocation
3829     * @param[in] out output Allocation
3830     */
3831    void forEachDstAtop(const sp<Allocation>& in, const sp<Allocation>& out);
3832    /**
3833     * Sets dst = {src.r ^ dst.r, src.g ^ dst.g, src.b ^ dst.b, src.a ^ dst.a}
3834     * @param[in] in input Allocation
3835     * @param[in] out output Allocation
3836     */
3837    void forEachXor(const sp<Allocation>& in, const sp<Allocation>& out);
3838    /**
3839     * Sets dst = src * dst
3840     * @param[in] in input Allocation
3841     * @param[in] out output Allocation
3842     */
3843    void forEachMultiply(const sp<Allocation>& in, const sp<Allocation>& out);
3844    /**
3845     * Sets dst = min(src + dst, 1.0)
3846     * @param[in] in input Allocation
3847     * @param[in] out output Allocation
3848     */
3849    void forEachAdd(const sp<Allocation>& in, const sp<Allocation>& out);
3850    /**
3851     * Sets dst = max(dst - src, 0.0)
3852     * @param[in] in input Allocation
3853     * @param[in] out output Allocation
3854     */
3855    void forEachSubtract(const sp<Allocation>& in, const sp<Allocation>& out);
3856};
3857
3858/**
3859 * Intrinsic Gausian blur filter. Applies a Gaussian blur of the specified
3860 * radius to all elements of an Allocation.
3861 */
3862class ScriptIntrinsicBlur : public ScriptIntrinsic {
3863 private:
3864    ScriptIntrinsicBlur(sp<RS> rs, sp<const Element> e);
3865 public:
3866    /**
3867     * Supported Element types are U8 and U8_4.
3868     * @param[in] rs RenderScript context
3869     * @param[in] e Element
3870     * @return new ScriptIntrinsicBlur
3871     */
3872    static sp<ScriptIntrinsicBlur> create(const sp<RS>& rs, const sp<const Element>& e);
3873    /**
3874     * Sets the input of the blur.
3875     * @param[in] in input Allocation
3876     */
3877    void setInput(const sp<Allocation>& in);
3878    /**
3879     * Runs the intrinsic.
3880     * @param[in] output Allocation
3881     */
3882    void forEach(const sp<Allocation>& out);
3883    /**
3884     * Sets the radius of the blur. The supported range is 0 < radius <= 25.
3885     * @param[in] radius radius of the blur
3886     */
3887    void setRadius(float radius);
3888};
3889
3890/**
3891 * Intrinsic for applying a color matrix to allocations. This has the
3892 * same effect as loading each element and converting it to a
3893 * F32_N, multiplying the result by the 4x4 color matrix
3894 * as performed by rsMatrixMultiply() and writing it to the output
3895 * after conversion back to U8_N or F32_N.
3896 */
3897class ScriptIntrinsicColorMatrix : public ScriptIntrinsic {
3898 private:
3899    ScriptIntrinsicColorMatrix(sp<RS> rs, sp<const Element> e);
3900 public:
3901    /**
3902     * Creates a new intrinsic.
3903     * @param[in] rs RenderScript context
3904     * @return new ScriptIntrinsicColorMatrix
3905     */
3906    static sp<ScriptIntrinsicColorMatrix> create(const sp<RS>& rs);
3907    /**
3908     * Applies the color matrix. Supported types are U8 and F32 with
3909     * vector lengths between 1 and 4.
3910     * @param[in] in input Allocation
3911     * @param[out] out output Allocation
3912     */
3913    void forEach(const sp<Allocation>& in, const sp<Allocation>& out);
3914    /**
3915     * Set the value to be added after the color matrix has been
3916     * applied. The default value is {0, 0, 0, 0}.
3917     * @param[in] add float[4] of values
3918     */
3919    void setAdd(float* add);
3920
3921    /**
3922     * Set the color matrix which will be applied to each cell of the
3923     * image. The alpha channel will be copied.
3924     *
3925     * @param[in] m float[9] of values
3926     */
3927    void setColorMatrix3(float* m);
3928    /**
3929     * Set the color matrix which will be applied to each cell of the
3930     * image.
3931     *
3932     * @param[in] m float[16] of values
3933     */
3934    void setColorMatrix4(float* m);
3935    /**
3936     * Set a color matrix to convert from RGB to luminance. The alpha
3937     * channel will be a copy.
3938     */
3939    void setGreyscale();
3940    /**
3941     * Set the matrix to convert from RGB to YUV with a direct copy of
3942     * the 4th channel.
3943     */
3944    void setRGBtoYUV();
3945    /**
3946     * Set the matrix to convert from YUV to RGB with a direct copy of
3947     * the 4th channel.
3948     */
3949    void setYUVtoRGB();
3950};
3951
3952/**
3953 * Intrinsic for applying a 3x3 convolve to an allocation.
3954 */
3955class ScriptIntrinsicConvolve3x3 : public ScriptIntrinsic {
3956 private:
3957    ScriptIntrinsicConvolve3x3(sp<RS> rs, sp<const Element> e);
3958 public:
3959    /**
3960     * Supported types U8 and F32 with vector lengths between 1 and
3961     * 4. The default convolution kernel is the identity.
3962     * @param[in] rs RenderScript context
3963     * @param[in] e Element
3964     * @return new ScriptIntrinsicConvolve3x3
3965     */
3966    static sp<ScriptIntrinsicConvolve3x3> create(const sp<RS>& rs, const sp<const Element>& e);
3967    /**
3968     * Sets input for intrinsic.
3969     * @param[in] in input Allocation
3970     */
3971    void setInput(const sp<Allocation>& in);
3972    /**
3973     * Launches the intrinsic.
3974     * @param[in] out output Allocation
3975     */
3976    void forEach(const sp<Allocation>& out);
3977    /**
3978     * Sets convolution kernel.
3979     * @param[in] v float[9] of values
3980     */
3981    void setCoefficients(float* v);
3982};
3983
3984/**
3985 * Intrinsic for applying a 5x5 convolve to an allocation.
3986 */
3987class ScriptIntrinsicConvolve5x5 : public ScriptIntrinsic {
3988 private:
3989    ScriptIntrinsicConvolve5x5(sp<RS> rs, sp<const Element> e);
3990 public:
3991    /**
3992     * Supported types U8 and F32 with vector lengths between 1 and
3993     * 4. The default convolution kernel is the identity.
3994     * @param[in] rs RenderScript context
3995     * @param[in] e Element
3996     * @return new ScriptIntrinsicConvolve5x5
3997     */
3998    static sp<ScriptIntrinsicConvolve5x5> create(const sp<RS>& rs, const sp<const Element>& e);
3999    /**
4000     * Sets input for intrinsic.
4001     * @param[in] in input Allocation
4002     */
4003    void setInput(const sp<Allocation>& in);
4004    /**
4005     * Launches the intrinsic.
4006     * @param[in] out output Allocation
4007     */
4008    void forEach(const sp<Allocation>& out);
4009    /**
4010     * Sets convolution kernel.
4011     * @param[in] v float[25] of values
4012     */
4013    void setCoefficients(float* v);
4014};
4015
4016/**
4017 * Intrinsic for computing a histogram.
4018 */
4019class ScriptIntrinsicHistogram : public ScriptIntrinsic {
4020 private:
4021    ScriptIntrinsicHistogram(sp<RS> rs, sp<const Element> e);
4022    sp<Allocation> mOut;
4023 public:
4024    /**
4025     * Create an intrinsic for calculating the histogram of an uchar
4026     * or uchar4 image.
4027     *
4028     * Supported elements types are U8_4, U8_3, U8_2, and U8.
4029     *
4030     * @param[in] rs The RenderScript context
4031     * @param[in] e Element type for inputs
4032     *
4033     * @return ScriptIntrinsicHistogram
4034     */
4035    static sp<ScriptIntrinsicHistogram> create(const sp<RS>& rs, const sp<const Element>& e);
4036    /**
4037     * Set the output of the histogram.  32 bit integer types are
4038     * supported.
4039     *
4040     * @param[in] aout The output allocation
4041     */
4042    void setOutput(const sp<Allocation>& aout);
4043    /**
4044     * Set the coefficients used for the dot product calculation. The
4045     * default is {0.299f, 0.587f, 0.114f, 0.f}.
4046     *
4047     * Coefficients must be >= 0 and sum to 1.0 or less.
4048     *
4049     * @param[in] r Red coefficient
4050     * @param[in] g Green coefficient
4051     * @param[in] b Blue coefficient
4052     * @param[in] a Alpha coefficient
4053     */
4054    void setDotCoefficients(float r, float g, float b, float a);
4055    /**
4056     * Process an input buffer and place the histogram into the output
4057     * allocation. The output allocation may be a narrower vector size
4058     * than the input. In this case the vector size of the output is
4059     * used to determine how many of the input channels are used in
4060     * the computation. This is useful if you have an RGBA input
4061     * buffer but only want the histogram for RGB.
4062     *
4063     * 1D and 2D input allocations are supported.
4064     *
4065     * @param[in] ain The input image
4066     */
4067    void forEach(const sp<Allocation>& ain);
4068    /**
4069     * Process an input buffer and place the histogram into the output
4070     * allocation. The dot product of the input channel and the
4071     * coefficients from 'setDotCoefficients' are used to calculate
4072     * the output values.
4073     *
4074     * 1D and 2D input allocations are supported.
4075     *
4076     * @param ain The input image
4077     */
4078    void forEach_dot(const sp<Allocation>& ain);
4079};
4080
4081/**
4082 * Intrinsic for applying a per-channel lookup table. Each channel of
4083 * the input has an independant lookup table. The tables are 256
4084 * entries in size and can cover the full value range of U8_4.
4085 **/
4086class ScriptIntrinsicLUT : public ScriptIntrinsic {
4087 private:
4088    sp<Allocation> LUT;
4089    bool mDirty;
4090    unsigned char mCache[1024];
4091    void setTable(unsigned int offset, unsigned char base, unsigned int length, unsigned char* lutValues);
4092    ScriptIntrinsicLUT(sp<RS> rs, sp<const Element> e);
4093
4094 public:
4095    /**
4096     * Supported elements types are U8_4.
4097     *
4098     * The defaults tables are identity.
4099     *
4100     * @param[in] rs The RenderScript context
4101     * @param[in] e Element type for intputs and outputs
4102     *
4103     * @return ScriptIntrinsicLUT
4104     */
4105    static sp<ScriptIntrinsicLUT> create(const sp<RS>& rs, const sp<const Element>& e);
4106    /**
4107     * Invoke the kernel and apply the lookup to each cell of ain and
4108     * copy to aout.
4109     *
4110     * @param[in] ain Input allocation
4111     * @param[in] aout Output allocation
4112     */
4113    void forEach(const sp<Allocation>& ain, const sp<Allocation>& aout);
4114    /**
4115     * Sets entries in LUT for the red channel.
4116     * @param[in] base base of region to update
4117     * @param[in] length length of region to update
4118     * @param[in] lutValues LUT values to use
4119     */
4120    void setRed(unsigned char base, unsigned int length, unsigned char* lutValues);
4121    /**
4122     * Sets entries in LUT for the green channel.
4123     * @param[in] base base of region to update
4124     * @param[in] length length of region to update
4125     * @param[in] lutValues LUT values to use
4126     */
4127    void setGreen(unsigned char base, unsigned int length, unsigned char* lutValues);
4128    /**
4129     * Sets entries in LUT for the blue channel.
4130     * @param[in] base base of region to update
4131     * @param[in] length length of region to update
4132     * @param[in] lutValues LUT values to use
4133     */
4134    void setBlue(unsigned char base, unsigned int length, unsigned char* lutValues);
4135    /**
4136     * Sets entries in LUT for the alpha channel.
4137     * @param[in] base base of region to update
4138     * @param[in] length length of region to update
4139     * @param[in] lutValues LUT values to use
4140     */
4141    void setAlpha(unsigned char base, unsigned int length, unsigned char* lutValues);
4142    virtual ~ScriptIntrinsicLUT();
4143};
4144
4145/**
4146 * Intrinsic for performing a resize of a 2D allocation.
4147 */
4148class ScriptIntrinsicResize : public ScriptIntrinsic {
4149 private:
4150    sp<Allocation> mInput;
4151    ScriptIntrinsicResize(sp<RS> rs, sp<const Element> e);
4152 public:
4153    /**
4154     * Supported Element types are U8_4. Default lookup table is identity.
4155     * @param[in] rs RenderScript context
4156     * @param[in] e Element
4157     * @return new ScriptIntrinsic
4158     */
4159    static sp<ScriptIntrinsicResize> create(const sp<RS>& rs);
4160
4161    /**
4162     * Resize copy the input allocation to the output specified. The
4163     * Allocation is rescaled if necessary using bi-cubic
4164     * interpolation.
4165     * @param[in] ain input Allocation
4166     * @param[in] aout output Allocation
4167     */
4168    void forEach_bicubic(const sp<Allocation>& aout);
4169
4170    /**
4171     * Set the input of the resize.
4172     * @param[in] lut new lookup table
4173     */
4174    void setInput(const sp<Allocation>& ain);
4175};
4176
4177/**
4178 * Intrinsic for converting an Android YUV buffer to RGB.
4179 *
4180 * The input allocation should be supplied in a supported YUV format
4181 * as a YUV element Allocation. The output is RGBA; the alpha channel
4182 * will be set to 255.
4183 */
4184class ScriptIntrinsicYuvToRGB : public ScriptIntrinsic {
4185 private:
4186    ScriptIntrinsicYuvToRGB(sp<RS> rs, sp<const Element> e);
4187 public:
4188    /**
4189     * Create an intrinsic for converting YUV to RGB.
4190     *
4191     * Supported elements types are U8_4.
4192     *
4193     * @param[in] rs The RenderScript context
4194     * @param[in] e Element type for output
4195     *
4196     * @return ScriptIntrinsicYuvToRGB
4197     */
4198    static sp<ScriptIntrinsicYuvToRGB> create(const sp<RS>& rs, const sp<const Element>& e);
4199    /**
4200     * Set the input YUV allocation.
4201     *
4202     * @param[in] ain The input allocation.
4203     */
4204    void setInput(const sp<Allocation>& in);
4205
4206    /**
4207     * Convert the image to RGB.
4208     *
4209     * @param[in] aout Output allocation. Must match creation element
4210     *                 type.
4211     */
4212    void forEach(const sp<Allocation>& out);
4213
4214};
4215
4216/**
4217 * Sampler object that defines how Allocations can be read as textures
4218 * within a kernel. Samplers are used in conjunction with the rsSample
4219 * runtime function to return values from normalized coordinates.
4220 *
4221 * Any Allocation used with a Sampler must have been created with
4222 * RS_ALLOCATION_USAGE_GRAPHICS_TEXTURE; using a Sampler on an
4223 * Allocation that was not created with
4224 * RS_ALLOCATION_USAGE_GRAPHICS_TEXTURE is undefined.
4225 **/
4226 class Sampler : public BaseObj {
4227 private:
4228    Sampler(sp<RS> rs, void* id);
4229    Sampler(sp<RS> rs, void* id, RsSamplerValue min, RsSamplerValue mag,
4230            RsSamplerValue wrapS, RsSamplerValue wrapT, float anisotropy);
4231    RsSamplerValue mMin;
4232    RsSamplerValue mMag;
4233    RsSamplerValue mWrapS;
4234    RsSamplerValue mWrapT;
4235    float mAniso;
4236
4237 public:
4238    /**
4239     * Creates a non-standard Sampler.
4240     * @param[in] rs RenderScript context
4241     * @param[in] min minification
4242     * @param[in] mag magnification
4243     * @param[in] wrapS S wrapping mode
4244     * @param[in] wrapT T wrapping mode
4245     * @param[in] anisotropy anisotropy setting
4246     */
4247    static sp<Sampler> create(const sp<RS>& rs, RsSamplerValue min, RsSamplerValue mag, RsSamplerValue wrapS, RsSamplerValue wrapT, float anisotropy);
4248
4249    /**
4250     * @return minification setting for the sampler
4251     */
4252    RsSamplerValue getMinification();
4253    /**
4254     * @return magnification setting for the sampler
4255     */
4256    RsSamplerValue getMagnification();
4257    /**
4258     * @return S wrapping mode for the sampler
4259     */
4260    RsSamplerValue getWrapS();
4261    /**
4262     * @return T wrapping mode for the sampler
4263     */
4264    RsSamplerValue getWrapT();
4265    /**
4266     * @return anisotropy setting for the sampler
4267     */
4268    float getAnisotropy();
4269
4270    /**
4271     * Retrieve a sampler with min and mag set to nearest and wrap modes set to
4272     * clamp.
4273     *
4274     * @param rs Context to which the sampler will belong.
4275     *
4276     * @return Sampler
4277     */
4278    static sp<const Sampler> CLAMP_NEAREST(const sp<RS> &rs);
4279    /**
4280     * Retrieve a sampler with min and mag set to linear and wrap modes set to
4281     * clamp.
4282     *
4283     * @param rs Context to which the sampler will belong.
4284     *
4285     * @return Sampler
4286     */
4287    static sp<const Sampler> CLAMP_LINEAR(const sp<RS> &rs);
4288    /**
4289     * Retrieve a sampler with mag set to linear, min linear mipmap linear, and
4290     * wrap modes set to clamp.
4291     *
4292     * @param rs Context to which the sampler will belong.
4293     *
4294     * @return Sampler
4295     */
4296    static sp<const Sampler> CLAMP_LINEAR_MIP_LINEAR(const sp<RS> &rs);
4297    /**
4298     * Retrieve a sampler with min and mag set to nearest and wrap modes set to
4299     * wrap.
4300     *
4301     * @param rs Context to which the sampler will belong.
4302     *
4303     * @return Sampler
4304     */
4305    static sp<const Sampler> WRAP_NEAREST(const sp<RS> &rs);
4306    /**
4307     * Retrieve a sampler with min and mag set to linear and wrap modes set to
4308     * wrap.
4309     *
4310     * @param rs Context to which the sampler will belong.
4311     *
4312     * @return Sampler
4313     */
4314    static sp<const Sampler> WRAP_LINEAR(const sp<RS> &rs);
4315    /**
4316     * Retrieve a sampler with mag set to linear, min linear mipmap linear, and
4317     * wrap modes set to wrap.
4318     *
4319     * @param rs Context to which the sampler will belong.
4320     *
4321     * @return Sampler
4322     */
4323    static sp<const Sampler> WRAP_LINEAR_MIP_LINEAR(const sp<RS> &rs);
4324    /**
4325     * Retrieve a sampler with min and mag set to nearest and wrap modes set to
4326     * mirrored repeat.
4327     *
4328     * @param rs Context to which the sampler will belong.
4329     *
4330     * @return Sampler
4331     */
4332    static sp<const Sampler> MIRRORED_REPEAT_NEAREST(const sp<RS> &rs);
4333    /**
4334     * Retrieve a sampler with min and mag set to linear and wrap modes set to
4335     * mirrored repeat.
4336     *
4337     * @param rs Context to which the sampler will belong.
4338     *
4339     * @return Sampler
4340     */
4341    static sp<const Sampler> MIRRORED_REPEAT_LINEAR(const sp<RS> &rs);
4342    /**
4343     * Retrieve a sampler with min and mag set to linear and wrap modes set to
4344     * mirrored repeat.
4345     *
4346     * @param rs Context to which the sampler will belong.
4347     *
4348     * @return Sampler
4349     */
4350    static sp<const Sampler> MIRRORED_REPEAT_LINEAR_MIP_LINEAR(const sp<RS> &rs);
4351
4352};
4353
4354} // namespace RSC
4355
4356} // namespace android
4357
4358#endif
4359