rsCppStructs.h revision d9a0257f82cf4c634f6d2005c7fe1b46b1f7e1f1
1/*
2 * Copyright (C) 2013 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#ifndef ANDROID_RSCPPSTRUCTS_H
18#define ANDROID_RSCPPSTRUCTS_H
19
20#include "rsDefines.h"
21#include "util/RefBase.h"
22
23#include <pthread.h>
24
25
26/**
27 * Every row in an RS allocation is guaranteed to be aligned by this amount, and
28 * every row in a user-backed allocation must be aligned by this amount.
29 */
30#define RS_CPU_ALLOCATION_ALIGNMENT 16
31
32struct dispatchTable;
33
34namespace android {
35class Surface;
36
37namespace RSC {
38
39
40typedef void (*ErrorHandlerFunc_t)(uint32_t errorNum, const char *errorText);
41typedef void (*MessageHandlerFunc_t)(uint32_t msgNum, const void *msgData, size_t msgLen);
42
43class RS;
44class BaseObj;
45class Element;
46class Type;
47class Allocation;
48class Script;
49class ScriptC;
50class Sampler;
51
52/**
53 * Possible error codes used by RenderScript. Once a status other than RS_SUCCESS
54 * is returned, the RenderScript context is considered dead and cannot perform any
55 * additional work.
56 */
57 enum RSError {
58     RS_SUCCESS = 0,                 ///< No error
59     RS_ERROR_INVALID_PARAMETER = 1, ///< An invalid parameter was passed to a function
60     RS_ERROR_RUNTIME_ERROR = 2,     ///< The RenderScript driver returned an error; this is
61                                     ///< often indicative of a kernel that crashed
62     RS_ERROR_INVALID_ELEMENT = 3,   ///< An invalid Element was passed to a function
63     RS_ERROR_MAX = 9999
64
65 };
66
67 /**
68  * YUV formats supported by the RenderScript API.
69  */
70 enum RSYuvFormat {
71     RS_YUV_NONE = 0, ///< No YUV data
72     RS_YUV_YV12 = 1, ///< YUV data in YV12 format
73     RS_YUV_NV21 = 2, ///< YUV data in NV21 format
74     RS_YUV_MAX = 3
75 };
76
77 /**
78  * Flags that can control RenderScript behavior on a per-context level.
79  */
80 enum RSInitFlags {
81     RS_INIT_SYNCHRONOUS = 1, ///< All RenderScript calls will be synchronous. May reduce latency.
82     RS_INIT_LOW_LATENCY = 2, ///< Prefer low latency devices over potentially higher throughput devices.
83     // Bitflag 4 is reserved for the context flag low power
84     RS_INIT_WAIT_FOR_ATTACH = 8,   ///< Kernel execution will hold to give time for a debugger to be attached
85     RS_INIT_OPT_LEVEL_0 = 16, ///< Use the -O0 option to set the optimization level to zero when calling the bcc compiler.
86     RS_INIT_MAX = 32
87 };
88
89
90class Byte2 {
91 public:
92  int8_t x, y;
93
94  Byte2(int8_t initX, int8_t initY)
95    : x(initX), y(initY) {}
96  Byte2() : x(0), y(0) {}
97};
98
99class Byte3 {
100 public:
101  int8_t x, y, z;
102
103  Byte3(int8_t initX, int8_t initY, int8_t initZ)
104    : x(initX), y(initY), z(initZ) {}
105  Byte3() : x(0), y(0), z(0) {}
106};
107
108class Byte4 {
109 public:
110  int8_t x, y, z, w;
111
112  Byte4(int8_t initX, int8_t initY, int8_t initZ, int8_t initW)
113    : x(initX), y(initY), z(initZ), w(initW) {}
114  Byte4() : x(0), y(0), z(0), w(0) {}
115};
116
117class UByte2 {
118 public:
119  uint8_t x, y;
120
121  UByte2(uint8_t initX, uint8_t initY)
122    : x(initX), y(initY) {}
123  UByte2() : x(0), y(0) {}
124};
125
126class UByte3 {
127 public:
128  uint8_t x, y, z;
129
130  UByte3(uint8_t initX, uint8_t initY, uint8_t initZ)
131    : x(initX), y(initY), z(initZ) {}
132  UByte3() : x(0), y(0), z(0) {}
133};
134
135class UByte4 {
136 public:
137  uint8_t x, y, z, w;
138
139  UByte4(uint8_t initX, uint8_t initY, uint8_t initZ, uint8_t initW)
140    : x(initX), y(initY), z(initZ), w(initW) {}
141  UByte4() : x(0), y(0), z(0), w(0) {}
142};
143
144class Short2 {
145 public:
146  short x, y;
147
148  Short2(short initX, short initY)
149    : x(initX), y(initY) {}
150  Short2() : x(0), y(0) {}
151};
152
153class Short3 {
154 public:
155  short x, y, z;
156
157  Short3(short initX, short initY, short initZ)
158    : x(initX), y(initY), z(initZ) {}
159  Short3() : x(0), y(0), z(0) {}
160};
161
162class Short4 {
163 public:
164  short x, y, z, w;
165
166  Short4(short initX, short initY, short initZ, short initW)
167    : x(initX), y(initY), z(initZ), w(initW) {}
168  Short4() : x(0), y(0), z(0), w(0) {}
169};
170
171class UShort2 {
172 public:
173  uint16_t x, y;
174
175  UShort2(uint16_t initX, uint16_t initY)
176    : x(initX), y(initY) {}
177  UShort2() : x(0), y(0) {}
178};
179
180class UShort3 {
181 public:
182  uint16_t x, y, z;
183
184  UShort3(uint16_t initX, uint16_t initY, uint16_t initZ)
185    : x(initX), y(initY), z(initZ) {}
186  UShort3() : x(0), y(0), z(0) {}
187};
188
189class UShort4 {
190 public:
191  uint16_t x, y, z, w;
192
193  UShort4(uint16_t initX, uint16_t initY, uint16_t initZ, uint16_t initW)
194    : x(initX), y(initY), z(initZ), w(initW) {}
195  UShort4() : x(0), y(0), z(0), w(0) {}
196};
197
198class Int2 {
199 public:
200  int x, y;
201
202  Int2(int initX, int initY)
203    : x(initX), y(initY) {}
204  Int2() : x(0), y(0) {}
205};
206
207class Int3 {
208 public:
209  int x, y, z;
210
211  Int3(int initX, int initY, int initZ)
212    : x(initX), y(initY), z(initZ) {}
213  Int3() : x(0), y(0), z(0) {}
214};
215
216class Int4 {
217 public:
218  int x, y, z, w;
219
220  Int4(int initX, int initY, int initZ, int initW)
221    : x(initX), y(initY), z(initZ), w(initW) {}
222  Int4() : x(0), y(0), z(0), w(0) {}
223};
224
225class UInt2 {
226 public:
227  uint32_t x, y;
228
229  UInt2(uint32_t initX, uint32_t initY)
230    : x(initX), y(initY) {}
231  UInt2() : x(0), y(0) {}
232};
233
234class UInt3 {
235 public:
236  uint32_t x, y, z;
237
238  UInt3(uint32_t initX, uint32_t initY, uint32_t initZ)
239    : x(initX), y(initY), z(initZ) {}
240  UInt3() : x(0), y(0), z(0) {}
241};
242
243class UInt4 {
244 public:
245  uint32_t x, y, z, w;
246
247  UInt4(uint32_t initX, uint32_t initY, uint32_t initZ, uint32_t initW)
248    : x(initX), y(initY), z(initZ), w(initW) {}
249  UInt4() : x(0), y(0), z(0), w(0) {}
250};
251
252class Long2 {
253 public:
254  int64_t x, y;
255
256  Long2(int64_t initX, int64_t initY)
257    : x(initX), y(initY) {}
258  Long2() : x(0), y(0) {}
259};
260
261class Long3 {
262 public:
263  int64_t x, y, z;
264
265  Long3(int64_t initX, int64_t initY, int64_t initZ)
266    : x(initX), y(initY), z(initZ) {}
267  Long3() : x(0), y(0), z(0) {}
268};
269
270class Long4 {
271 public:
272  int64_t x, y, z, w;
273
274  Long4(int64_t initX, int64_t initY, int64_t initZ, int64_t initW)
275    : x(initX), y(initY), z(initZ), w(initW) {}
276  Long4() : x(0), y(0), z(0), w(0) {}
277};
278
279class ULong2 {
280 public:
281  uint64_t x, y;
282
283  ULong2(uint64_t initX, uint64_t initY)
284    : x(initX), y(initY) {}
285  ULong2() : x(0), y(0) {}
286};
287
288class ULong3 {
289 public:
290  uint64_t x, y, z;
291
292  ULong3(uint64_t initX, uint64_t initY, uint64_t initZ)
293    : x(initX), y(initY), z(initZ) {}
294  ULong3() : x(0), y(0), z(0) {}
295};
296
297class ULong4 {
298 public:
299  uint64_t x, y, z, w;
300
301  ULong4(uint64_t initX, uint64_t initY, uint64_t initZ, uint64_t initW)
302    : x(initX), y(initY), z(initZ), w(initW) {}
303  ULong4() : x(0), y(0), z(0), w(0) {}
304};
305
306class Float2 {
307 public:
308  float x, y;
309
310  Float2(float initX, float initY)
311    : x(initX), y(initY) {}
312  Float2() : x(0), y(0) {}
313};
314
315class Float3 {
316 public:
317  float x, y, z;
318
319  Float3(float initX, float initY, float initZ)
320    : x(initX), y(initY), z(initZ) {}
321  Float3() : x(0.f), y(0.f), z(0.f) {}
322};
323
324class Float4 {
325 public:
326  float x, y, z, w;
327
328  Float4(float initX, float initY, float initZ, float initW)
329    : x(initX), y(initY), z(initZ), w(initW) {}
330  Float4() : x(0.f), y(0.f), z(0.f), w(0.f) {}
331};
332
333class Double2 {
334 public:
335  double x, y;
336
337  Double2(double initX, double initY)
338    : x(initX), y(initY) {}
339  Double2() : x(0), y(0) {}
340};
341
342class Double3 {
343 public:
344  double x, y, z;
345
346  Double3(double initX, double initY, double initZ)
347    : x(initX), y(initY), z(initZ) {}
348  Double3() : x(0), y(0), z(0) {}
349};
350
351class Double4 {
352 public:
353  double x, y, z, w;
354
355  Double4(double initX, double initY, double initZ, double initW)
356    : x(initX), y(initY), z(initZ), w(initW) {}
357  Double4() : x(0), y(0), z(0), w(0) {}
358};
359
360 /**
361  * The RenderScript context. This class controls initialization, resource management, and teardown.
362  */
363 class RS : public android::RSC::LightRefBase<RS> {
364
365 public:
366    RS();
367    virtual ~RS();
368
369    /**
370     * Initializes a RenderScript context. A context must be initialized before it can be used.
371     * @param[in] name Directory name to be used by this context. This should be equivalent to
372     * Context.getCacheDir().
373     * @param[in] flags Optional flags for this context.
374     * @param[in] targetApi Optional target RS API level. (Default 0: Using the latest SDK/Platform API).
375     * @return true on success
376     */
377    bool init(const char * name, uint32_t flags = 0, int targetApi = 0);
378
379    /**
380     * Sets the error handler function for this context. This error handler is
381     * called whenever an error is set.
382     *
383     * @param[in] func Error handler function
384     */
385    void setErrorHandler(ErrorHandlerFunc_t func);
386
387    /**
388     * Returns the current error handler function for this context.
389     *
390     * @return pointer to current error handler function or NULL if not set
391     */
392    ErrorHandlerFunc_t getErrorHandler() { return mErrorFunc; }
393
394    /**
395     * Sets the message handler function for this context. This message handler
396     * is called whenever a message is sent from a RenderScript kernel.
397     *
398     *  @param[in] func Message handler function
399     */
400    void setMessageHandler(MessageHandlerFunc_t func);
401
402    /**
403     * Returns the current message handler function for this context.
404     *
405     * @return pointer to current message handler function or NULL if not set
406     */
407    MessageHandlerFunc_t getMessageHandler() { return mMessageFunc; }
408
409    /**
410     * Returns current status for the context.
411     *
412     * @return current error
413     */
414    RSError getError();
415
416    /**
417     * Waits for any currently running asynchronous operations to finish. This
418     * should only be used for performance testing and timing.
419     */
420    void finish();
421
422    RsContext getContext() { return mContext; }
423    void throwError(RSError error, const char *errMsg);
424
425    static dispatchTable* dispatch;
426
427 private:
428    static bool usingNative;
429    static bool initDispatch(int targetApi);
430
431    static void * threadProc(void *);
432
433    static bool gInitialized;
434    static pthread_mutex_t gInitMutex;
435
436    pthread_t mMessageThreadId;
437    pid_t mNativeMessageThreadId;
438    bool mMessageRun;
439
440    RsDevice mDev;
441    RsContext mContext;
442    RSError mCurrentError;
443
444    ErrorHandlerFunc_t mErrorFunc;
445    MessageHandlerFunc_t mMessageFunc;
446    bool mInit;
447
448    char mCacheDir[PATH_MAX+1];
449    uint32_t mCacheDirLen;
450
451    struct {
452        sp<const Element> U8;
453        sp<const Element> U8_2;
454        sp<const Element> U8_3;
455        sp<const Element> U8_4;
456        sp<const Element> I8;
457        sp<const Element> I8_2;
458        sp<const Element> I8_3;
459        sp<const Element> I8_4;
460        sp<const Element> U16;
461        sp<const Element> U16_2;
462        sp<const Element> U16_3;
463        sp<const Element> U16_4;
464        sp<const Element> I16;
465        sp<const Element> I16_2;
466        sp<const Element> I16_3;
467        sp<const Element> I16_4;
468        sp<const Element> U32;
469        sp<const Element> U32_2;
470        sp<const Element> U32_3;
471        sp<const Element> U32_4;
472        sp<const Element> I32;
473        sp<const Element> I32_2;
474        sp<const Element> I32_3;
475        sp<const Element> I32_4;
476        sp<const Element> U64;
477        sp<const Element> U64_2;
478        sp<const Element> U64_3;
479        sp<const Element> U64_4;
480        sp<const Element> I64;
481        sp<const Element> I64_2;
482        sp<const Element> I64_3;
483        sp<const Element> I64_4;
484        sp<const Element> F32;
485        sp<const Element> F32_2;
486        sp<const Element> F32_3;
487        sp<const Element> F32_4;
488        sp<const Element> F64;
489        sp<const Element> F64_2;
490        sp<const Element> F64_3;
491        sp<const Element> F64_4;
492        sp<const Element> BOOLEAN;
493
494        sp<const Element> ELEMENT;
495        sp<const Element> TYPE;
496        sp<const Element> ALLOCATION;
497        sp<const Element> SAMPLER;
498        sp<const Element> SCRIPT;
499        sp<const Element> MESH;
500        sp<const Element> PROGRAM_FRAGMENT;
501        sp<const Element> PROGRAM_VERTEX;
502        sp<const Element> PROGRAM_RASTER;
503        sp<const Element> PROGRAM_STORE;
504
505        sp<const Element> A_8;
506        sp<const Element> RGB_565;
507        sp<const Element> RGB_888;
508        sp<const Element> RGBA_5551;
509        sp<const Element> RGBA_4444;
510        sp<const Element> RGBA_8888;
511
512        sp<const Element> YUV;
513
514        sp<const Element> MATRIX_4X4;
515        sp<const Element> MATRIX_3X3;
516        sp<const Element> MATRIX_2X2;
517    } mElements;
518
519    struct {
520        sp<const Sampler> CLAMP_NEAREST;
521        sp<const Sampler> CLAMP_LINEAR;
522        sp<const Sampler> CLAMP_LINEAR_MIP_LINEAR;
523        sp<const Sampler> WRAP_NEAREST;
524        sp<const Sampler> WRAP_LINEAR;
525        sp<const Sampler> WRAP_LINEAR_MIP_LINEAR;
526        sp<const Sampler> MIRRORED_REPEAT_NEAREST;
527        sp<const Sampler> MIRRORED_REPEAT_LINEAR;
528        sp<const Sampler> MIRRORED_REPEAT_LINEAR_MIP_LINEAR;
529    } mSamplers;
530    friend class Sampler;
531    friend class Element;
532    friend class ScriptC;
533};
534
535 /**
536  * Base class for all RenderScript objects. Not for direct use by developers.
537  */
538class BaseObj : public android::RSC::LightRefBase<BaseObj> {
539public:
540    void * getID() const;
541    virtual ~BaseObj();
542    virtual void updateFromNative();
543    virtual bool equals(sp<const BaseObj> obj);
544
545protected:
546    void *mID;
547    RS* mRS;
548    const char * mName;
549
550    BaseObj(void *id, sp<RS> rs);
551    void checkValid();
552
553    static void * getObjID(sp<const BaseObj> o);
554
555};
556
557 /**
558  * This class provides the primary method through which data is passed to and
559  * from RenderScript kernels. An Allocation provides the backing store for a
560  * given Type.
561  *
562  * An Allocation also contains a set of usage flags that denote how the
563  * Allocation could be used. For example, an Allocation may have usage flags
564  * specifying that it can be used from a script as well as input to a
565  * Sampler. A developer must synchronize across these different usages using
566  * syncAll(int) in order to ensure that different users of the Allocation have
567  * a consistent view of memory. For example, in the case where an Allocation is
568  * used as the output of one kernel and as Sampler input in a later kernel, a
569  * developer must call syncAll(RS_ALLOCATION_USAGE_SCRIPT) prior to launching the
570  * second kernel to ensure correctness.
571  */
572class Allocation : public BaseObj {
573protected:
574    sp<const Type> mType;
575    uint32_t mUsage;
576    sp<Allocation> mAdaptedAllocation;
577
578    bool mConstrainedLOD;
579    bool mConstrainedFace;
580    bool mConstrainedY;
581    bool mConstrainedZ;
582    bool mReadAllowed;
583    bool mWriteAllowed;
584    bool mAutoPadding;
585    uint32_t mSelectedY;
586    uint32_t mSelectedZ;
587    uint32_t mSelectedLOD;
588    RsAllocationCubemapFace mSelectedFace;
589
590    uint32_t mCurrentDimX;
591    uint32_t mCurrentDimY;
592    uint32_t mCurrentDimZ;
593    uint32_t mCurrentCount;
594
595    void * getIDSafe() const;
596    void updateCacheInfo(sp<const Type> t);
597
598    Allocation(void *id, sp<RS> rs, sp<const Type> t, uint32_t usage);
599
600    void validateIsInt64();
601    void validateIsInt32();
602    void validateIsInt16();
603    void validateIsInt8();
604    void validateIsFloat32();
605    void validateIsFloat64();
606    void validateIsObject();
607
608    virtual void updateFromNative();
609
610    void validate2DRange(uint32_t xoff, uint32_t yoff, uint32_t w, uint32_t h);
611    void validate3DRange(uint32_t xoff, uint32_t yoff, uint32_t zoff,
612                         uint32_t w, uint32_t h, uint32_t d);
613
614public:
615
616    /**
617     * Return Type for the allocation.
618     * @return pointer to underlying Type
619     */
620    sp<const Type> getType() const {
621        return mType;
622    }
623
624    /**
625     * Enable/Disable AutoPadding for Vec3 elements.
626     *
627     * @param useAutoPadding True: enable AutoPadding; flase: disable AutoPadding
628     *
629     */
630    void setAutoPadding(bool useAutoPadding) {
631        mAutoPadding = useAutoPadding;
632    }
633
634    /**
635     * Propagate changes from one usage of the Allocation to other usages of the Allocation.
636     * @param[in] srcLocation source location with changes to propagate elsewhere
637     */
638    void syncAll(RsAllocationUsageType srcLocation);
639
640    /**
641     * Send a buffer to the output stream.  The contents of the Allocation will
642     * be undefined after this operation. This operation is only valid if
643     * USAGE_IO_OUTPUT is set on the Allocation.
644     */
645    void ioSendOutput();
646
647    /**
648     * Receive the latest input into the Allocation. This operation
649     * is only valid if USAGE_IO_INPUT is set on the Allocation.
650     */
651    void ioGetInput();
652
653#if !defined(RS_SERVER) && !defined(RS_COMPATIBILITY_LIB)
654    /**
655     * Returns the handle to a raw buffer that is being managed by the screen
656     * compositor. This operation is only valid for Allocations with USAGE_IO_INPUT.
657     * @return Surface associated with allocation
658     */
659    sp<Surface> getSurface();
660
661    /**
662     * Associate a Surface with this Allocation. This
663     * operation is only valid for Allocations with USAGE_IO_OUTPUT.
664     * @param[in] s Surface to associate with allocation
665     */
666    void setSurface(sp<Surface> s);
667#endif
668
669    /**
670     * Generate a mipmap chain. This is only valid if the Type of the Allocation
671     * includes mipmaps. This function will generate a complete set of mipmaps
672     * from the top level LOD and place them into the script memory space. If
673     * the Allocation is also using other memory spaces, a call to
674     * syncAll(Allocation.USAGE_SCRIPT) is required.
675     */
676    void generateMipmaps();
677
678    /**
679     * Copy an array into part of this Allocation.
680     * @param[in] off offset of first Element to be overwritten
681     * @param[in] count number of Elements to copy
682     * @param[in] data array from which to copy
683     */
684    void copy1DRangeFrom(uint32_t off, size_t count, const void *data);
685
686    /**
687     * Copy part of an Allocation into part of this Allocation.
688     * @param[in] off offset of first Element to be overwritten
689     * @param[in] count number of Elements to copy
690     * @param[in] data Allocation from which to copy
691     * @param[in] dataOff offset of first Element in data to copy
692     */
693    void copy1DRangeFrom(uint32_t off, size_t count, sp<const Allocation> data, uint32_t dataOff);
694
695    /**
696     * Copy an array into part of this Allocation.
697     * @param[in] off offset of first Element to be overwritten
698     * @param[in] count number of Elements to copy
699     * @param[in] data array from which to copy
700     */
701    void copy1DRangeTo(uint32_t off, size_t count, void *data);
702
703    /**
704     * Copy entire array to an Allocation.
705     * @param[in] data array from which to copy
706     */
707    void copy1DFrom(const void* data);
708
709    /**
710     * Copy entire Allocation to an array.
711     * @param[in] data destination array
712     */
713    void copy1DTo(void* data);
714
715    /**
716     * Copy from an array into a rectangular region in this Allocation. The
717     * array is assumed to be tightly packed.
718     * @param[in] xoff X offset of region to update in this Allocation
719     * @param[in] yoff Y offset of region to update in this Allocation
720     * @param[in] w Width of region to update
721     * @param[in] h Height of region to update
722     * @param[in] data Array from which to copy
723     */
724    void copy2DRangeFrom(uint32_t xoff, uint32_t yoff, uint32_t w, uint32_t h,
725                         const void *data);
726
727    /**
728     * Copy from this Allocation into a rectangular region in an array. The
729     * array is assumed to be tightly packed.
730     * @param[in] xoff X offset of region to copy from this Allocation
731     * @param[in] yoff Y offset of region to copy from this Allocation
732     * @param[in] w Width of region to update
733     * @param[in] h Height of region to update
734     * @param[in] data destination array
735     */
736    void copy2DRangeTo(uint32_t xoff, uint32_t yoff, uint32_t w, uint32_t h,
737                       void *data);
738
739    /**
740     * Copy from an Allocation into a rectangular region in this Allocation.
741     * @param[in] xoff X offset of region to update in this Allocation
742     * @param[in] yoff Y offset of region to update in this Allocation
743     * @param[in] w Width of region to update
744     * @param[in] h Height of region to update
745     * @param[in] data Allocation from which to copy
746     * @param[in] dataXoff X offset of region to copy from in data
747     * @param[in] dataYoff Y offset of region to copy from in data
748     */
749    void copy2DRangeFrom(uint32_t xoff, uint32_t yoff, uint32_t w, uint32_t h,
750                         sp<const Allocation> data, uint32_t dataXoff, uint32_t dataYoff);
751
752    /**
753     * Copy from a strided array into a rectangular region in this Allocation.
754     * @param[in] xoff X offset of region to update in this Allocation
755     * @param[in] yoff Y offset of region to update in this Allocation
756     * @param[in] w Width of region to update
757     * @param[in] h Height of region to update
758     * @param[in] data array from which to copy
759     * @param[in] stride stride of data in bytes
760     */
761    void copy2DStridedFrom(uint32_t xoff, uint32_t yoff, uint32_t w, uint32_t h,
762                           const void *data, size_t stride);
763
764    /**
765     * Copy from a strided array into this Allocation.
766     * @param[in] data array from which to copy
767     * @param[in] stride stride of data in bytes
768     */
769    void copy2DStridedFrom(const void *data, size_t stride);
770
771    /**
772     * Copy from a rectangular region in this Allocation into a strided array.
773     * @param[in] xoff X offset of region to update in this Allocation
774     * @param[in] yoff Y offset of region to update in this Allocation
775     * @param[in] w Width of region to update
776     * @param[in] h Height of region to update
777     * @param[in] data destination array
778     * @param[in] stride stride of data in bytes
779     */
780    void copy2DStridedTo(uint32_t xoff, uint32_t yoff, uint32_t w, uint32_t h,
781                         void *data, size_t stride);
782
783    /**
784     * Copy this Allocation into a strided array.
785     * @param[in] data destination array
786     * @param[in] stride stride of data in bytes
787     */
788    void copy2DStridedTo(void *data, size_t stride);
789
790
791    /**
792     * Copy from an array into a 3D region in this Allocation. The
793     * array is assumed to be tightly packed.
794     * @param[in] xoff X offset of region to update in this Allocation
795     * @param[in] yoff Y offset of region to update in this Allocation
796     * @param[in] zoff Z offset of region to update in this Allocation
797     * @param[in] w Width of region to update
798     * @param[in] h Height of region to update
799     * @param[in] d Depth of region to update
800     * @param[in] data Array from which to copy
801     */
802    void copy3DRangeFrom(uint32_t xoff, uint32_t yoff, uint32_t zoff, uint32_t w,
803                         uint32_t h, uint32_t d, const void* data);
804
805    /**
806     * Copy from an Allocation into a 3D region in this Allocation.
807     * @param[in] xoff X offset of region to update in this Allocation
808     * @param[in] yoff Y offset of region to update in this Allocation
809     * @param[in] zoff Z offset of region to update in this Allocation
810     * @param[in] w Width of region to update
811     * @param[in] h Height of region to update
812     * @param[in] d Depth of region to update
813     * @param[in] data Allocation from which to copy
814     * @param[in] dataXoff X offset of region in data to copy from
815     * @param[in] dataYoff Y offset of region in data to copy from
816     * @param[in] dataZoff Z offset of region in data to copy from
817     */
818    void copy3DRangeFrom(uint32_t xoff, uint32_t yoff, uint32_t zoff,
819                         uint32_t w, uint32_t h, uint32_t d,
820                         sp<const Allocation> data,
821                         uint32_t dataXoff, uint32_t dataYoff, uint32_t dataZoff);
822
823    /**
824     * Copy a 3D region in this Allocation into an array. The
825     * array is assumed to be tightly packed.
826     * @param[in] xoff X offset of region to update in this Allocation
827     * @param[in] yoff Y offset of region to update in this Allocation
828     * @param[in] zoff Z offset of region to update in this Allocation
829     * @param[in] w Width of region to update
830     * @param[in] h Height of region to update
831     * @param[in] d Depth of region to update
832     * @param[in] data Array from which to copy
833     */
834    void copy3DRangeTo(uint32_t xoff, uint32_t yoff, uint32_t zoff, uint32_t w,
835                         uint32_t h, uint32_t d, void* data);
836
837    /**
838     * Creates an Allocation for use by scripts with a given Type.
839     * @param[in] rs Context to which the Allocation will belong
840     * @param[in] type Type of the Allocation
841     * @param[in] mipmaps desired mipmap behavior for the Allocation
842     * @param[in] usage usage for the Allocation
843     * @return new Allocation
844     */
845    static sp<Allocation> createTyped(sp<RS> rs, sp<const Type> type,
846                                   RsAllocationMipmapControl mipmaps, uint32_t usage);
847
848    /**
849     * Creates an Allocation for use by scripts with a given Type and a backing pointer. For use
850     * with RS_ALLOCATION_USAGE_SHARED.
851     * @param[in] rs Context to which the Allocation will belong
852     * @param[in] type Type of the Allocation
853     * @param[in] mipmaps desired mipmap behavior for the Allocation
854     * @param[in] usage usage for the Allocation
855     * @param[in] pointer existing backing store to use for this Allocation if possible
856     * @return new Allocation
857     */
858    static sp<Allocation> createTyped(sp<RS> rs, sp<const Type> type,
859                                   RsAllocationMipmapControl mipmaps, uint32_t usage, void * pointer);
860
861    /**
862     * Creates an Allocation for use by scripts with a given Type with no mipmaps.
863     * @param[in] rs Context to which the Allocation will belong
864     * @param[in] type Type of the Allocation
865     * @param[in] usage usage for the Allocation
866     * @return new Allocation
867     */
868    static sp<Allocation> createTyped(sp<RS> rs, sp<const Type> type,
869                                   uint32_t usage = RS_ALLOCATION_USAGE_SCRIPT);
870    /**
871     * Creates an Allocation with a specified number of given elements.
872     * @param[in] rs Context to which the Allocation will belong
873     * @param[in] e Element used in the Allocation
874     * @param[in] count Number of elements of the Allocation
875     * @param[in] usage usage for the Allocation
876     * @return new Allocation
877     */
878    static sp<Allocation> createSized(sp<RS> rs, sp<const Element> e, size_t count,
879                                   uint32_t usage = RS_ALLOCATION_USAGE_SCRIPT);
880
881    /**
882     * Creates a 2D Allocation with a specified number of given elements.
883     * @param[in] rs Context to which the Allocation will belong
884     * @param[in] e Element used in the Allocation
885     * @param[in] x Width in Elements of the Allocation
886     * @param[in] y Height of the Allocation
887     * @param[in] usage usage for the Allocation
888     * @return new Allocation
889     */
890    static sp<Allocation> createSized2D(sp<RS> rs, sp<const Element> e,
891                                        size_t x, size_t y,
892                                        uint32_t usage = RS_ALLOCATION_USAGE_SCRIPT);
893
894
895    /**
896     * Get the backing pointer for a USAGE_SHARED allocation.
897     * @param[in] stride optional parameter. when non-NULL, will contain
898     *   stride in bytes of a 2D Allocation
899     * @return pointer to data
900     */
901    void * getPointer(size_t *stride = NULL);
902};
903
904 /**
905  * An Element represents one item within an Allocation. An Element is roughly
906  * equivalent to a C type in a RenderScript kernel. Elements may be basic
907  * or complex. Some basic elements are:
908
909  * - A single float value (equivalent to a float in a kernel)
910  * - A four-element float vector (equivalent to a float4 in a kernel)
911  * - An unsigned 32-bit integer (equivalent to an unsigned int in a kernel)
912  * - A single signed 8-bit integer (equivalent to a char in a kernel)
913
914  * Basic Elements are comprised of a Element.DataType and a
915  * Element.DataKind. The DataType encodes C type information of an Element,
916  * while the DataKind encodes how that Element should be interpreted by a
917  * Sampler. Note that Allocation objects with DataKind USER cannot be used as
918  * input for a Sampler. In general, Allocation objects that are intended for
919  * use with a Sampler should use bitmap-derived Elements such as
920  * Element::RGBA_8888.
921 */
922
923
924class Element : public BaseObj {
925public:
926    bool isComplex();
927
928    /**
929     * Elements could be simple, such as an int or a float, or a structure with
930     * multiple sub-elements, such as a collection of floats, float2,
931     * float4. This function returns zero for simple elements or the number of
932     * sub-elements otherwise.
933     * @return number of sub-elements
934     */
935    size_t getSubElementCount() {
936        return mVisibleElementMapSize;
937    }
938
939    /**
940     * For complex Elements, this returns the sub-element at a given index.
941     * @param[in] index index of sub-element
942     * @return sub-element
943     */
944    sp<const Element> getSubElement(uint32_t index);
945
946    /**
947     * For complex Elements, this returns the name of the sub-element at a given
948     * index.
949     * @param[in] index index of sub-element
950     * @return name of sub-element
951     */
952    const char * getSubElementName(uint32_t index);
953
954    /**
955     * For complex Elements, this returns the size of the sub-element at a given
956     * index.
957     * @param[in] index index of sub-element
958     * @return size of sub-element
959     */
960    size_t getSubElementArraySize(uint32_t index);
961
962    /**
963     * Returns the location of a sub-element within a complex Element.
964     * @param[in] index index of sub-element
965     * @return offset in bytes
966     */
967    uint32_t getSubElementOffsetBytes(uint32_t index);
968
969    /**
970     * Returns the data type used for the Element.
971     * @return data type
972     */
973    RsDataType getDataType() const {
974        return mType;
975    }
976
977    /**
978     * Returns the data kind used for the Element.
979     * @return data kind
980     */
981    RsDataKind getDataKind() const {
982        return mKind;
983    }
984
985    /**
986     * Returns the size in bytes of the Element.
987     * @return size in bytes
988     */
989    size_t getSizeBytes() const {
990        return mSizeBytes;
991    }
992
993    /**
994     * Returns the number of vector components for this Element.
995     * @return number of vector components
996     */
997    uint32_t getVectorSize() const {
998        return mVectorSize;
999    }
1000
1001    /**
1002     * Utility function for returning an Element containing a single bool.
1003     * @param[in] rs RenderScript context
1004     * @return Element
1005     */
1006    static sp<const Element> BOOLEAN(sp<RS> rs);
1007    /**
1008     * Utility function for returning an Element containing a single unsigned char.
1009     * @param[in] rs RenderScript context
1010     * @return Element
1011     */
1012    static sp<const Element> U8(sp<RS> rs);
1013    /**
1014     * Utility function for returning an Element containing a single signed char.
1015     * @param[in] rs RenderScript context
1016     * @return Element
1017     */
1018    static sp<const Element> I8(sp<RS> rs);
1019    /**
1020     * Utility function for returning an Element containing a single unsigned short.
1021     * @param[in] rs RenderScript context
1022     * @return Element
1023     */
1024    static sp<const Element> U16(sp<RS> rs);
1025    /**
1026     * Utility function for returning an Element containing a single signed short.
1027     * @param[in] rs RenderScript context
1028     * @return Element
1029     */
1030    static sp<const Element> I16(sp<RS> rs);
1031    /**
1032     * Utility function for returning an Element containing a single unsigned int.
1033     * @param[in] rs RenderScript context
1034     * @return Element
1035     */
1036    static sp<const Element> U32(sp<RS> rs);
1037    /**
1038     * Utility function for returning an Element containing a single signed int.
1039     * @param[in] rs RenderScript context
1040     * @return Element
1041     */
1042    static sp<const Element> I32(sp<RS> rs);
1043    /**
1044     * Utility function for returning an Element containing a single unsigned long long.
1045     * @param[in] rs RenderScript context
1046     * @return Element
1047     */
1048    static sp<const Element> U64(sp<RS> rs);
1049    /**
1050     * Utility function for returning an Element containing a single signed long long.
1051     * @param[in] rs RenderScript context
1052     * @return Element
1053     */
1054    static sp<const Element> I64(sp<RS> rs);
1055    /**
1056     * Utility function for returning an Element containing a single float.
1057     * @param[in] rs RenderScript context
1058     * @return Element
1059     */
1060    static sp<const Element> F32(sp<RS> rs);
1061    /**
1062     * Utility function for returning an Element containing a single double.
1063     * @param[in] rs RenderScript context
1064     * @return Element
1065     */
1066    static sp<const Element> F64(sp<RS> rs);
1067    /**
1068     * Utility function for returning an Element containing a single Element.
1069     * @param[in] rs RenderScript context
1070     * @return Element
1071     */
1072    static sp<const Element> ELEMENT(sp<RS> rs);
1073    /**
1074     * Utility function for returning an Element containing a single Type.
1075     * @param[in] rs RenderScript context
1076     * @return Element
1077     */
1078    static sp<const Element> TYPE(sp<RS> rs);
1079    /**
1080     * Utility function for returning an Element containing a single Allocation.
1081     * @param[in] rs RenderScript context
1082     * @return Element
1083     */
1084    static sp<const Element> ALLOCATION(sp<RS> rs);
1085    /**
1086     * Utility function for returning an Element containing a single Sampler.
1087     * @param[in] rs RenderScript context
1088     * @return Element
1089     */
1090    static sp<const Element> SAMPLER(sp<RS> rs);
1091    /**
1092     * Utility function for returning an Element containing a single Script.
1093     * @param[in] rs RenderScript context
1094     * @return Element
1095     */
1096    static sp<const Element> SCRIPT(sp<RS> rs);
1097    /**
1098     * Utility function for returning an Element containing an ALPHA_8 pixel.
1099     * @param[in] rs RenderScript context
1100     * @return Element
1101     */
1102    static sp<const Element> A_8(sp<RS> rs);
1103    /**
1104     * Utility function for returning an Element containing an RGB_565 pixel.
1105     * @param[in] rs RenderScript context
1106     * @return Element
1107     */
1108    static sp<const Element> RGB_565(sp<RS> rs);
1109    /**
1110     * Utility function for returning an Element containing an RGB_888 pixel.
1111     * @param[in] rs RenderScript context
1112     * @return Element
1113     */
1114    static sp<const Element> RGB_888(sp<RS> rs);
1115    /**
1116     * Utility function for returning an Element containing an RGBA_5551 pixel.
1117     * @param[in] rs RenderScript context
1118     * @return Element
1119     */
1120    static sp<const Element> RGBA_5551(sp<RS> rs);
1121    /**
1122     * Utility function for returning an Element containing an RGBA_4444 pixel.
1123     * @param[in] rs RenderScript context
1124     * @return Element
1125     */
1126    static sp<const Element> RGBA_4444(sp<RS> rs);
1127    /**
1128     * Utility function for returning an Element containing an RGBA_8888 pixel.
1129     * @param[in] rs RenderScript context
1130     * @return Element
1131     */
1132    static sp<const Element> RGBA_8888(sp<RS> rs);
1133
1134    /**
1135     * Utility function for returning an Element containing a float2.
1136     * @param[in] rs RenderScript context
1137     * @return Element
1138     */
1139    static sp<const Element> F32_2(sp<RS> rs);
1140    /**
1141     * Utility function for returning an Element containing a float3.
1142     * @param[in] rs RenderScript context
1143     * @return Element
1144     */
1145    static sp<const Element> F32_3(sp<RS> rs);
1146    /**
1147     * Utility function for returning an Element containing a float4.
1148     * @param[in] rs RenderScript context
1149     * @return Element
1150     */
1151    static sp<const Element> F32_4(sp<RS> rs);
1152    /**
1153     * Utility function for returning an Element containing a double2.
1154     * @param[in] rs RenderScript context
1155     * @return Element
1156     */
1157    static sp<const Element> F64_2(sp<RS> rs);
1158    /**
1159     * Utility function for returning an Element containing a double3.
1160     * @param[in] rs RenderScript context
1161     * @return Element
1162     */
1163    static sp<const Element> F64_3(sp<RS> rs);
1164    /**
1165     * Utility function for returning an Element containing a double4.
1166     * @param[in] rs RenderScript context
1167     * @return Element
1168     */
1169    static sp<const Element> F64_4(sp<RS> rs);
1170    /**
1171     * Utility function for returning an Element containing a uchar2.
1172     * @param[in] rs RenderScript context
1173     * @return Element
1174     */
1175    static sp<const Element> U8_2(sp<RS> rs);
1176    /**
1177     * Utility function for returning an Element containing a uchar3.
1178     * @param[in] rs RenderScript context
1179     * @return Element
1180     */
1181    static sp<const Element> U8_3(sp<RS> rs);
1182    /**
1183     * Utility function for returning an Element containing a uchar4.
1184     * @param[in] rs RenderScript context
1185     * @return Element
1186     */
1187    static sp<const Element> U8_4(sp<RS> rs);
1188    /**
1189     * Utility function for returning an Element containing a char2.
1190     * @param[in] rs RenderScript context
1191     * @return Element
1192     */
1193    static sp<const Element> I8_2(sp<RS> rs);
1194    /**
1195     * Utility function for returning an Element containing a char3.
1196     * @param[in] rs RenderScript context
1197     * @return Element
1198     */
1199    static sp<const Element> I8_3(sp<RS> rs);
1200    /**
1201     * Utility function for returning an Element containing a char4.
1202     * @param[in] rs RenderScript context
1203     * @return Element
1204     */
1205    static sp<const Element> I8_4(sp<RS> rs);
1206    /**
1207     * Utility function for returning an Element containing a ushort2.
1208     * @param[in] rs RenderScript context
1209     * @return Element
1210     */
1211    static sp<const Element> U16_2(sp<RS> rs);
1212    /**
1213     * Utility function for returning an Element containing a ushort3.
1214     * @param[in] rs RenderScript context
1215     * @return Element
1216     */
1217    static sp<const Element> U16_3(sp<RS> rs);
1218    /**
1219     * Utility function for returning an Element containing a ushort4.
1220     * @param[in] rs RenderScript context
1221     * @return Element
1222     */
1223    static sp<const Element> U16_4(sp<RS> rs);
1224    /**
1225     * Utility function for returning an Element containing a short2.
1226     * @param[in] rs RenderScript context
1227     * @return Element
1228     */
1229    static sp<const Element> I16_2(sp<RS> rs);
1230    /**
1231     * Utility function for returning an Element containing a short3.
1232     * @param[in] rs RenderScript context
1233     * @return Element
1234     */
1235    static sp<const Element> I16_3(sp<RS> rs);
1236    /**
1237     * Utility function for returning an Element containing a short4.
1238     * @param[in] rs RenderScript context
1239     * @return Element
1240     */
1241    static sp<const Element> I16_4(sp<RS> rs);
1242    /**
1243     * Utility function for returning an Element containing a uint2.
1244     * @param[in] rs RenderScript context
1245     * @return Element
1246     */
1247    static sp<const Element> U32_2(sp<RS> rs);
1248    /**
1249     * Utility function for returning an Element containing a uint3.
1250     * @param[in] rs RenderScript context
1251     * @return Element
1252     */
1253    static sp<const Element> U32_3(sp<RS> rs);
1254    /**
1255     * Utility function for returning an Element containing a uint4.
1256     * @param[in] rs RenderScript context
1257     * @return Element
1258     */
1259    static sp<const Element> U32_4(sp<RS> rs);
1260    /**
1261     * Utility function for returning an Element containing an int2.
1262     * @param[in] rs RenderScript context
1263     * @return Element
1264     */
1265    static sp<const Element> I32_2(sp<RS> rs);
1266    /**
1267     * Utility function for returning an Element containing an int3.
1268     * @param[in] rs RenderScript context
1269     * @return Element
1270     */
1271    static sp<const Element> I32_3(sp<RS> rs);
1272    /**
1273     * Utility function for returning an Element containing an int4.
1274     * @param[in] rs RenderScript context
1275     * @return Element
1276     */
1277    static sp<const Element> I32_4(sp<RS> rs);
1278    /**
1279     * Utility function for returning an Element containing a ulong2.
1280     * @param[in] rs RenderScript context
1281     * @return Element
1282     */
1283    static sp<const Element> U64_2(sp<RS> rs);
1284    /**
1285     * Utility function for returning an Element containing a ulong3.
1286     * @param[in] rs RenderScript context
1287     * @return Element
1288     */
1289    static sp<const Element> U64_3(sp<RS> rs);
1290    /**
1291     * Utility function for returning an Element containing a ulong4.
1292     * @param[in] rs RenderScript context
1293     * @return Element
1294     */
1295    static sp<const Element> U64_4(sp<RS> rs);
1296    /**
1297     * Utility function for returning an Element containing a long2.
1298     * @param[in] rs RenderScript context
1299     * @return Element
1300     */
1301    static sp<const Element> I64_2(sp<RS> rs);
1302    /**
1303     * Utility function for returning an Element containing a long3.
1304     * @param[in] rs RenderScript context
1305     * @return Element
1306     */
1307    static sp<const Element> I64_3(sp<RS> rs);
1308    /**
1309     * Utility function for returning an Element containing a long4.
1310     * @param[in] rs RenderScript context
1311     * @return Element
1312     */
1313    static sp<const Element> I64_4(sp<RS> rs);
1314    /**
1315     * Utility function for returning an Element containing a YUV pixel.
1316     * @param[in] rs RenderScript context
1317     * @return Element
1318     */
1319    static sp<const Element> YUV(sp<RS> rs);
1320    /**
1321     * Utility function for returning an Element containing an rs_matrix_4x4.
1322     * @param[in] rs RenderScript context
1323     * @return Element
1324     */
1325    static sp<const Element> MATRIX_4X4(sp<RS> rs);
1326    /**
1327     * Utility function for returning an Element containing an rs_matrix_3x3.
1328     * @param[in] rs RenderScript context
1329     * @return Element
1330     */
1331    static sp<const Element> MATRIX_3X3(sp<RS> rs);
1332    /**
1333     * Utility function for returning an Element containing an rs_matrix_2x2.
1334     * @param[in] rs RenderScript context
1335     * @return Element
1336     */
1337    static sp<const Element> MATRIX_2X2(sp<RS> rs);
1338
1339    void updateFromNative();
1340
1341    /**
1342     * Create an Element with a given DataType.
1343     * @param[in] rs RenderScript context
1344     * @param[in] dt data type
1345     * @return Element
1346     */
1347    static sp<const Element> createUser(sp<RS> rs, RsDataType dt);
1348    /**
1349     * Create a vector Element with the given DataType
1350     * @param[in] rs RenderScript
1351     * @param[in] dt DataType
1352     * @param[in] size vector size
1353     * @return Element
1354     */
1355    static sp<const Element> createVector(sp<RS> rs, RsDataType dt, uint32_t size);
1356    /**
1357     * Create an Element with a given DataType and DataKind.
1358     * @param[in] rs RenderScript context
1359     * @param[in] dt DataType
1360     * @param[in] dk DataKind
1361     * @return Element
1362     */
1363    static sp<const Element> createPixel(sp<RS> rs, RsDataType dt, RsDataKind dk);
1364
1365    /**
1366     * Returns true if the Element can interoperate with this Element.
1367     * @param[in] e Element to compare
1368     * @return true if Elements can interoperate
1369     */
1370    bool isCompatible(sp<const Element>e) const;
1371
1372    /**
1373     * Builder class for producing complex elements with matching field and name
1374     * pairs. The builder starts empty. The order in which elements are added is
1375     * retained for the layout in memory.
1376     */
1377    class Builder {
1378    private:
1379        RS* mRS;
1380        size_t mElementsCount;
1381        size_t mElementsVecSize;
1382        sp<const Element> * mElements;
1383        char ** mElementNames;
1384        size_t * mElementNameLengths;
1385        uint32_t * mArraySizes;
1386        bool mSkipPadding;
1387
1388    public:
1389        Builder(sp<RS> rs);
1390        ~Builder();
1391        void add(sp<const Element> e, const char * name, uint32_t arraySize = 1);
1392        sp<const Element> create();
1393    };
1394
1395protected:
1396    friend class Type;
1397    Element(void *id, sp<RS> rs,
1398            sp<const Element> * elements,
1399            size_t elementCount,
1400            const char ** elementNames,
1401            size_t * elementNameLengths,
1402            uint32_t * arraySizes);
1403    Element(void *id, sp<RS> rs, RsDataType dt, RsDataKind dk, bool norm, uint32_t size);
1404    Element(void *id, sp<RS> rs);
1405    Element(sp<RS> rs);
1406    virtual ~Element();
1407
1408private:
1409    void updateVisibleSubElements();
1410
1411    size_t mElementsCount;
1412    size_t mVisibleElementMapSize;
1413
1414    sp<const Element> * mElements;
1415    char ** mElementNames;
1416    size_t * mElementNameLengths;
1417    uint32_t * mArraySizes;
1418    uint32_t * mVisibleElementMap;
1419    uint32_t * mOffsetInBytes;
1420
1421    RsDataType mType;
1422    RsDataKind mKind;
1423    bool mNormalized;
1424    size_t mSizeBytes;
1425    size_t mVectorSize;
1426};
1427
1428class FieldPacker {
1429protected:
1430    unsigned char* mData;
1431    size_t mPos;
1432    size_t mLen;
1433
1434public:
1435    FieldPacker(size_t len)
1436        : mPos(0), mLen(len) {
1437            mData = new unsigned char[len];
1438        }
1439
1440    virtual ~FieldPacker() {
1441        delete [] mData;
1442    }
1443
1444    void align(size_t v) {
1445        if ((v & (v - 1)) != 0) {
1446            //            ALOGE("Non-power-of-two alignment: %zu", v);
1447            return;
1448        }
1449
1450        while ((mPos & (v - 1)) != 0) {
1451            mData[mPos++] = 0;
1452        }
1453    }
1454
1455    void reset() {
1456        mPos = 0;
1457    }
1458
1459    void reset(size_t i) {
1460        if (i >= mLen) {
1461            //            ALOGE("Out of bounds: i (%zu) >= len (%zu)", i, mLen);
1462            return;
1463        }
1464        mPos = i;
1465    }
1466
1467    void skip(size_t i) {
1468        size_t res = mPos + i;
1469        if (res > mLen) {
1470            //            ALOGE("Exceeded buffer length: i (%zu) > len (%zu)", i, mLen);
1471            return;
1472        }
1473        mPos = res;
1474    }
1475
1476    void* getData() const {
1477        return mData;
1478    }
1479
1480    size_t getLength() const {
1481        return mLen;
1482    }
1483
1484    template <typename T>
1485        void add(T t) {
1486        align(sizeof(t));
1487        if (mPos + sizeof(t) <= mLen) {
1488            memcpy(&mData[mPos], &t, sizeof(t));
1489            mPos += sizeof(t);
1490        }
1491    }
1492
1493    /*
1494      void add(rs_matrix4x4 m) {
1495      for (size_t i = 0; i < 16; i++) {
1496      add(m.m[i]);
1497      }
1498      }
1499
1500      void add(rs_matrix3x3 m) {
1501      for (size_t i = 0; i < 9; i++) {
1502      add(m.m[i]);
1503      }
1504      }
1505
1506      void add(rs_matrix2x2 m) {
1507      for (size_t i = 0; i < 4; i++) {
1508      add(m.m[i]);
1509      }
1510      }
1511    */
1512
1513    void add(sp<BaseObj> obj) {
1514        if (obj != NULL) {
1515            add((uint32_t) (uintptr_t) obj->getID());
1516        } else {
1517            add((uint32_t) 0);
1518        }
1519    }
1520};
1521
1522/**
1523 * A Type describes the Element and dimensions used for an Allocation or a
1524 * parallel operation.
1525 *
1526 * A Type always includes an Element and an X dimension. A Type may be
1527 * multidimensional, up to three dimensions. A nonzero value in the Y or Z
1528 * dimensions indicates that the dimension is present. Note that a Type with
1529 * only a given X dimension and a Type with the same X dimension but Y = 1 are
1530 * not equivalent.
1531 *
1532 * A Type also supports inclusion of level of detail (LOD) or cube map
1533 * faces. LOD and cube map faces are booleans to indicate present or not
1534 * present.
1535 *
1536 * A Type also supports YUV format information to support an Allocation in a YUV
1537 * format. The YUV formats supported are YV12 and NV21.
1538 */
1539class Type : public BaseObj {
1540protected:
1541    friend class Allocation;
1542
1543    uint32_t mDimX;
1544    uint32_t mDimY;
1545    uint32_t mDimZ;
1546    RSYuvFormat mYuvFormat;
1547    bool mDimMipmaps;
1548    bool mDimFaces;
1549    size_t mElementCount;
1550    sp<const Element> mElement;
1551
1552    Type(void *id, sp<RS> rs);
1553
1554    void calcElementCount();
1555    virtual void updateFromNative();
1556
1557public:
1558
1559    /**
1560     * Returns the YUV format.
1561     * @return YUV format of the Allocation
1562     */
1563    RSYuvFormat getYuvFormat() const {
1564        return mYuvFormat;
1565    }
1566
1567    /**
1568     * Returns the Element of the Allocation.
1569     * @return YUV format of the Allocation
1570     */
1571    sp<const Element> getElement() const {
1572        return mElement;
1573    }
1574
1575    /**
1576     * Returns the X dimension of the Allocation.
1577     * @return X dimension of the allocation
1578     */
1579    uint32_t getX() const {
1580        return mDimX;
1581    }
1582
1583    /**
1584     * Returns the Y dimension of the Allocation.
1585     * @return Y dimension of the allocation
1586     */
1587    uint32_t getY() const {
1588        return mDimY;
1589    }
1590
1591    /**
1592     * Returns the Z dimension of the Allocation.
1593     * @return Z dimension of the allocation
1594     */
1595    uint32_t getZ() const {
1596        return mDimZ;
1597    }
1598
1599    /**
1600     * Returns true if the Allocation has mipmaps.
1601     * @return true if the Allocation has mipmaps
1602     */
1603    bool hasMipmaps() const {
1604        return mDimMipmaps;
1605    }
1606
1607    /**
1608     * Returns true if the Allocation is a cube map
1609     * @return true if the Allocation is a cube map
1610     */
1611    bool hasFaces() const {
1612        return mDimFaces;
1613    }
1614
1615    /**
1616     * Returns number of accessible Elements in the Allocation
1617     * @return number of accessible Elements in the Allocation
1618     */
1619    size_t getCount() const {
1620        return mElementCount;
1621    }
1622
1623    /**
1624     * Returns size in bytes of all Elements in the Allocation
1625     * @return size in bytes of all Elements in the Allocation
1626     */
1627    size_t getSizeBytes() const {
1628        return mElementCount * mElement->getSizeBytes();
1629    }
1630
1631    /**
1632     * Creates a new Type with the given Element and dimensions.
1633     * @param[in] rs RenderScript context
1634     * @param[in] e Element
1635     * @param[in] dimX X dimension
1636     * @param[in] dimY Y dimension
1637     * @param[in] dimZ Z dimension
1638     * @return new Type
1639     */
1640    static sp<const Type> create(sp<RS> rs, sp<const Element> e, uint32_t dimX, uint32_t dimY, uint32_t dimZ);
1641
1642    class Builder {
1643    protected:
1644        RS* mRS;
1645        uint32_t mDimX;
1646        uint32_t mDimY;
1647        uint32_t mDimZ;
1648        RSYuvFormat mYuvFormat;
1649        bool mDimMipmaps;
1650        bool mDimFaces;
1651        sp<const Element> mElement;
1652
1653    public:
1654        Builder(sp<RS> rs, sp<const Element> e);
1655
1656        void setX(uint32_t value);
1657        void setY(uint32_t value);
1658        void setZ(uint32_t value);
1659        void setYuvFormat(RSYuvFormat format);
1660        void setMipmaps(bool value);
1661        void setFaces(bool value);
1662        sp<const Type> create();
1663    };
1664
1665};
1666
1667/**
1668 * The parent class for all executable Scripts. This should not be used by applications.
1669 */
1670class Script : public BaseObj {
1671private:
1672
1673protected:
1674    Script(void *id, sp<RS> rs);
1675    void forEach(uint32_t slot, sp<const Allocation> in, sp<const Allocation> out,
1676            const void *v, size_t) const;
1677    void reduce(uint32_t slot, sp<const Allocation> in, sp<const Allocation> out,
1678                const RsScriptCall *sc) const;
1679    void bindAllocation(sp<Allocation> va, uint32_t slot) const;
1680    void setVar(uint32_t index, const void *, size_t len) const;
1681    void setVar(uint32_t index, sp<const BaseObj> o) const;
1682    void invoke(uint32_t slot, const void *v, size_t len) const;
1683
1684
1685    void invoke(uint32_t slot) const {
1686        invoke(slot, NULL, 0);
1687    }
1688    void setVar(uint32_t index, float v) const {
1689        setVar(index, &v, sizeof(v));
1690    }
1691    void setVar(uint32_t index, double v) const {
1692        setVar(index, &v, sizeof(v));
1693    }
1694    void setVar(uint32_t index, int32_t v) const {
1695        setVar(index, &v, sizeof(v));
1696    }
1697    void setVar(uint32_t index, uint32_t v) const {
1698        setVar(index, &v, sizeof(v));
1699    }
1700    void setVar(uint32_t index, int64_t v) const {
1701        setVar(index, &v, sizeof(v));
1702    }
1703    void setVar(uint32_t index, bool v) const {
1704        setVar(index, &v, sizeof(v));
1705    }
1706
1707public:
1708    class FieldBase {
1709    protected:
1710        sp<const Element> mElement;
1711        sp<Allocation> mAllocation;
1712
1713        void init(sp<RS> rs, uint32_t dimx, uint32_t usages = 0);
1714
1715    public:
1716        sp<const Element> getElement() {
1717            return mElement;
1718        }
1719
1720        sp<const Type> getType() {
1721            return mAllocation->getType();
1722        }
1723
1724        sp<const Allocation> getAllocation() {
1725            return mAllocation;
1726        }
1727
1728        //void updateAllocation();
1729    };
1730};
1731
1732/**
1733 * The parent class for all user-defined scripts. This is intended to be used by auto-generated code only.
1734 */
1735class ScriptC : public Script {
1736protected:
1737    ScriptC(sp<RS> rs,
1738            const void *codeTxt, size_t codeLength,
1739            const char *cachedName, size_t cachedNameLength,
1740            const char *cacheDir, size_t cacheDirLength);
1741
1742};
1743
1744/**
1745 * The parent class for all script intrinsics. Intrinsics provide highly optimized implementations of
1746 * basic functions. This is not intended to be used directly.
1747 */
1748class ScriptIntrinsic : public Script {
1749 protected:
1750    sp<const Element> mElement;
1751    ScriptIntrinsic(sp<RS> rs, int id, sp<const Element> e);
1752    virtual ~ScriptIntrinsic();
1753};
1754
1755/**
1756 * Intrinsic for converting RGB to RGBA by using a 3D lookup table. The incoming
1757 * r,g,b values are use as normalized x,y,z coordinates into a 3D
1758 * allocation. The 8 nearest values are sampled and linearly interpolated. The
1759 * result is placed in the output.
1760 */
1761class ScriptIntrinsic3DLUT : public ScriptIntrinsic {
1762 private:
1763    ScriptIntrinsic3DLUT(sp<RS> rs, sp<const Element> e);
1764 public:
1765    /**
1766     * Supported Element types are U8_4. Default lookup table is identity.
1767     * @param[in] rs RenderScript context
1768     * @param[in] e Element
1769     * @return new ScriptIntrinsic
1770     */
1771    static sp<ScriptIntrinsic3DLUT> create(sp<RS> rs, sp<const Element> e);
1772
1773    /**
1774     * Launch the intrinsic.
1775     * @param[in] ain input Allocation
1776     * @param[in] aout output Allocation
1777     */
1778    void forEach(sp<Allocation> ain, sp<Allocation> aout);
1779
1780    /**
1781     * Sets the lookup table. The lookup table must use the same Element as the
1782     * intrinsic.
1783     * @param[in] lut new lookup table
1784     */
1785    void setLUT(sp<Allocation> lut);
1786};
1787
1788
1789/**
1790 * Intrinsic kernel provides high performance RenderScript APIs to BLAS.
1791 *
1792 * The BLAS (Basic Linear Algebra Subprograms) are routines that provide standard
1793 * building blocks for performing basic vector and matrix operations.
1794 *
1795 * For detailed description of BLAS, please refer to http://www.netlib.org/blas/
1796 *
1797 **/
1798class ScriptIntrinsicBLAS : public ScriptIntrinsic {
1799 private:
1800    ScriptIntrinsicBLAS(sp<RS> rs, sp<const Element> e);
1801 public:
1802    /**
1803     * Create an intrinsic to access BLAS subroutines.
1804     *
1805     * @param rs The RenderScript context
1806     * @return ScriptIntrinsicBLAS
1807     */
1808    static sp<ScriptIntrinsicBLAS> create(sp<RS> rs);
1809
1810    /**
1811     * SGEMV performs one of the matrix-vector operations
1812     * y := alpha*A*x + beta*y   or   y := alpha*A**T*x + beta*y
1813     *
1814     * Details: http://www.netlib.org/lapack/explore-html/db/d58/sgemv_8f.html
1815     *
1816     * @param TransA The type of transpose applied to matrix A.
1817     * @param alpha The scalar alpha.
1818     * @param A The input allocation contains matrix A, supported elements type: {Element#F32}.
1819     * @param X The input allocation contains vector x, supported elements type: {Element#F32}.
1820     * @param incX The increment for the elements of vector x, must be larger than zero.
1821     * @param beta The scalar beta.
1822     * @param Y The input allocation contains vector y, supported elements type: {Element#F32}.
1823     * @param incY The increment for the elements of vector y, must be larger than zero.
1824     */
1825    void SGEMV(RsBlasTranspose TransA,
1826               float alpha, sp<Allocation> A, sp<Allocation> X, int incX,
1827               float beta, sp<Allocation> Y, int incY);
1828
1829    /**
1830     * DGEMV performs one of the matrix-vector operations
1831     * y := alpha*A*x + beta*y   or   y := alpha*A**T*x + beta*y
1832     *
1833     * Details: http://www.netlib.org/lapack/explore-html/dc/da8/dgemv_8f.html
1834     *
1835     * @param TransA The type of transpose applied to matrix A.
1836     * @param alpha The scalar alpha.
1837     * @param A The input allocation contains matrix A, supported elements type: {Element#F64}.
1838     * @param X The input allocation contains vector x, supported elements type: {Element#F64}.
1839     * @param incX The increment for the elements of vector x, must be larger than zero.
1840     * @param beta The scalar beta.
1841     * @param Y The input allocation contains vector y, supported elements type: {Element#F64}.
1842     * @param incY The increment for the elements of vector y, must be larger than zero.
1843     */
1844    void DGEMV(RsBlasTranspose TransA,
1845               double alpha, sp<Allocation> A, sp<Allocation> X, int incX,
1846               double beta, sp<Allocation> Y, int incY);
1847
1848    /**
1849     * CGEMV performs one of the matrix-vector operations
1850     * y := alpha*A*x + beta*y   or   y := alpha*A**T*x + beta*y   or   y := alpha*A**H*x + beta*y
1851     *
1852     * Details: http://www.netlib.org/lapack/explore-html/d4/d8a/cgemv_8f.html
1853     *
1854     * @param TransA The type of transpose applied to matrix A.
1855     * @param alpha The scalar alpha.
1856     * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}.
1857     * @param X The input allocation contains vector x, supported elements type: {Element#F32_2}.
1858     * @param incX The increment for the elements of vector x, must be larger than zero.
1859     * @param beta The scalar beta.
1860     * @param Y The input allocation contains vector y, supported elements type: {Element#F32_2}.
1861     * @param incY The increment for the elements of vector y, must be larger than zero.
1862     */
1863    void CGEMV(RsBlasTranspose TransA,
1864               Float2 alpha, sp<Allocation> A, sp<Allocation> X, int incX,
1865               Float2 beta, sp<Allocation> Y, int incY);
1866
1867    /**
1868     * ZGEMV performs one of the matrix-vector operations
1869     * y := alpha*A*x + beta*y   or   y := alpha*A**T*x + beta*y   or   y := alpha*A**H*x + beta*y
1870     *
1871     * Details: http://www.netlib.org/lapack/explore-html/db/d40/zgemv_8f.html
1872     *
1873     * @param TransA The type of transpose applied to matrix A.
1874     * @param alpha The scalar alpha.
1875     * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2}.
1876     * @param X The input allocation contains vector x, supported elements type: {Element#F64_2}.
1877     * @param incX The increment for the elements of vector x, must be larger than zero.
1878     * @param beta The scalar beta.
1879     * @param Y The input allocation contains vector y, supported elements type: {Element#F64_2}.
1880     * @param incY The increment for the elements of vector y, must be larger than zero.
1881     */
1882    void ZGEMV(RsBlasTranspose TransA,
1883               Double2 alpha, sp<Allocation> A, sp<Allocation> X, int incX,
1884               Double2 beta, sp<Allocation> Y, int incY);
1885
1886    /**
1887     * SGBMV performs one of the matrix-vector operations
1888     * y := alpha*A*x + beta*y   or   y := alpha*A**T*x + beta*y
1889     *
1890     * Details: http://www.netlib.org/lapack/explore-html/d6/d46/sgbmv_8f.html
1891     *
1892     * Note: For a M*N matrix, the input Allocation should also be of size M*N (dimY = M, dimX = N),
1893     *       but only the region M*(KL+KU+1) will be referenced. The following subroutine can is an
1894     *       example showing how to convert the original matrix 'a' to row-based band matrix 'b'.
1895     *           for i in range(0, m):
1896     *              for j in range(max(0, i-kl), min(i+ku+1, n)):
1897     *                  b[i, j-i+kl] = a[i, j]
1898     *
1899     * @param TransA The type of transpose applied to matrix A.
1900     * @param KL The number of sub-diagonals of the matrix A.
1901     * @param KU The number of super-diagonals of the matrix A.
1902     * @param alpha The scalar alpha.
1903     * @param A The input allocation contains the band matrix A, supported elements type: {Element#F32}.
1904     * @param X The input allocation contains vector x, supported elements type: {Element#F32}.
1905     * @param incX The increment for the elements of vector x, must be larger than zero.
1906     * @param beta The scalar beta.
1907     * @param Y The input allocation contains vector y, supported elements type: {Element#F32}.
1908     * @param incY The increment for the elements of vector y, must be larger than zero.
1909     */
1910    void SGBMV(RsBlasTranspose TransA,
1911               int KL, int KU, float alpha, sp<Allocation> A, sp<Allocation> X, int incX,
1912               float beta, sp<Allocation> Y, int incY);
1913
1914    /**
1915     * DGBMV performs one of the matrix-vector operations
1916     * y := alpha*A*x + beta*y   or   y := alpha*A**T*x + beta*y
1917     *
1918     * Details: http://www.netlib.org/lapack/explore-html/d2/d3f/dgbmv_8f.html
1919     *
1920     * Note: For a M*N matrix, the input Allocation should also be of size M*N (dimY = M, dimX = N),
1921     *       but only the region M*(KL+KU+1) will be referenced. The following subroutine can is an
1922     *       example showing how to convert the original matrix 'a' to row-based band matrix 'b'.
1923     *           for i in range(0, m):
1924     *              for j in range(max(0, i-kl), min(i+ku+1, n)):
1925     *                  b[i, j-i+kl] = a[i, j]
1926     *
1927     * @param TransA The type of transpose applied to matrix A.
1928     * @param KL The number of sub-diagonals of the matrix A.
1929     * @param KU The number of super-diagonals of the matrix A.
1930     * @param alpha The scalar alpha.
1931     * @param A The input allocation contains the band matrix A, supported elements type: {Element#F64}.
1932     * @param X The input allocation contains vector x, supported elements type: {Element#F64}.
1933     * @param incX The increment for the elements of vector x, must be larger than zero.
1934     * @param beta The scalar beta.
1935     * @param Y The input allocation contains vector y, supported elements type: {Element#F64}.
1936     * @param incY The increment for the elements of vector y, must be larger than zero.
1937     */
1938    void DGBMV(RsBlasTranspose TransA,
1939               int KL, int KU, double alpha, sp<Allocation> A, sp<Allocation> X,
1940               int incX, double beta, sp<Allocation> Y, int incY);
1941
1942    /**
1943     * CGBMV performs one of the matrix-vector operations
1944     * y := alpha*A*x + beta*y   or   y := alpha*A**T*x + beta*y   or   y := alpha*A**H*x + beta*y
1945     *
1946     * Details: http://www.netlib.org/lapack/explore-html/d0/d75/cgbmv_8f.html
1947     *
1948     * Note: For a M*N matrix, the input Allocation should also be of size M*N (dimY = M, dimX = N),
1949     *       but only the region M*(KL+KU+1) will be referenced. The following subroutine can is an
1950     *       example showing how to convert the original matrix 'a' to row-based band matrix 'b'.
1951     *           for i in range(0, m):
1952     *              for j in range(max(0, i-kl), min(i+ku+1, n)):
1953     *                  b[i, j-i+kl] = a[i, j]
1954     *
1955     * @param TransA The type of transpose applied to matrix A.
1956     * @param KL The number of sub-diagonals of the matrix A.
1957     * @param KU The number of super-diagonals of the matrix A.
1958     * @param alpha The scalar alpha.
1959     * @param A The input allocation contains the band matrix A, supported elements type: {Element#F32_2}.
1960     * @param X The input allocation contains vector x, supported elements type: {Element#F32_2}.
1961     * @param incX The increment for the elements of vector x, must be larger than zero.
1962     * @param beta The scalar beta.
1963     * @param Y The input allocation contains vector y, supported elements type: {Element#F32_2}.
1964     * @param incY The increment for the elements of vector y, must be larger than zero.
1965     */
1966    void CGBMV(RsBlasTranspose TransA,
1967               int KL, int KU, Float2 alpha, sp<Allocation> A, sp<Allocation> X,
1968               int incX, Float2 beta, sp<Allocation> Y, int incY);
1969
1970    /**
1971     * ZGBMV performs one of the matrix-vector operations
1972     * y := alpha*A*x + beta*y   or   y := alpha*A**T*x + beta*y   or   y := alpha*A**H*x + beta*y
1973     *
1974     * Details: http://www.netlib.org/lapack/explore-html/d9/d46/zgbmv_8f.html
1975     *
1976     * Note: For a M*N matrix, the input Allocation should also be of size M*N (dimY = M, dimX = N),
1977     *       but only the region M*(KL+KU+1) will be referenced. The following subroutine can is an
1978     *       example showing how to convert the original matrix 'a' to row-based band matrix 'b'.
1979     *           for i in range(0, m):
1980     *              for j in range(max(0, i-kl), min(i+ku+1, n)):
1981     *                  b[i, j-i+kl] = a[i, j]
1982     *
1983     * @param TransA The type of transpose applied to matrix A.
1984     * @param KL The number of sub-diagonals of the matrix A.
1985     * @param KU The number of super-diagonals of the matrix A.
1986     * @param alpha The scalar alpha.
1987     * @param A The input allocation contains the band matrix A, supported elements type: {Element#F64_2}.
1988     * @param X The input allocation contains vector x, supported elements type: {Element#F64_2}.
1989     * @param incX The increment for the elements of vector x, must be larger than zero.
1990     * @param beta The scalar beta.
1991     * @param Y The input allocation contains vector y, supported elements type: {Element#F64_2}.
1992     * @param incY The increment for the elements of vector y, must be larger than zero.
1993     */
1994    void ZGBMV(RsBlasTranspose TransA,
1995               int KL, int KU, Double2 alpha, sp<Allocation> A, sp<Allocation> X, int incX,
1996               Double2 beta, sp<Allocation> Y, int incY);
1997
1998    /**
1999     * STRMV performs one of the matrix-vector operations
2000     * x := A*x   or   x := A**T*x
2001     *
2002     * Details: http://www.netlib.org/lapack/explore-html/de/d45/strmv_8f.html
2003     *
2004     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2005     * @param TransA The type of transpose applied to matrix A.
2006     * @param Diag Specifies whether or not A is unit triangular.
2007     * @param A The input allocation contains matrix A, supported elements type: {Element#F32}.
2008     * @param X The input allocation contains vector x, supported elements type: {Element#F32}.
2009     * @param incX The increment for the elements of vector x, must be larger than zero.
2010     */
2011    void STRMV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2012               sp<Allocation> A, sp<Allocation> X, int incX);
2013
2014    /**
2015     * DTRMV performs one of the matrix-vector operations
2016     * x := A*x   or   x := A**T*x
2017     *
2018     * Details: http://www.netlib.org/lapack/explore-html/dc/d7e/dtrmv_8f.html
2019     *
2020     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2021     * @param TransA The type of transpose applied to matrix A.
2022     * @param Diag Specifies whether or not A is unit triangular.
2023     * @param A The input allocation contains matrix A, supported elements type: {Element#F64}.
2024     * @param X The input allocation contains vector x, supported elements type: {Element#F64}.
2025     * @param incX The increment for the elements of vector x, must be larger than zero.
2026     */
2027    void DTRMV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2028               sp<Allocation> A, sp<Allocation> X, int incX);
2029
2030    /**
2031     * CTRMV performs one of the matrix-vector operations
2032     * x := A*x   or   x := A**T*x   or   x := A**H*x
2033     *
2034     * Details: http://www.netlib.org/lapack/explore-html/df/d78/ctrmv_8f.html
2035     *
2036     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2037     * @param TransA The type of transpose applied to matrix A.
2038     * @param Diag Specifies whether or not A is unit triangular.
2039     * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}.
2040     * @param X The input allocation contains vector x, supported elements type: {Element#F32_2}.
2041     * @param incX The increment for the elements of vector x, must be larger than zero.
2042     */
2043    void CTRMV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2044               sp<Allocation> A, sp<Allocation> X, int incX);
2045
2046    /**
2047     * ZTRMV performs one of the matrix-vector operations
2048     * x := A*x   or   x := A**T*x   or   x := A**H*x
2049     *
2050     * Details: http://www.netlib.org/lapack/explore-html/d0/dd1/ztrmv_8f.html
2051     *
2052     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2053     * @param TransA The type of transpose applied to matrix A.
2054     * @param Diag Specifies whether or not A is unit triangular.
2055     * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2}.
2056     * @param X The input allocation contains vector x, supported elements type: {Element#F64_2}.
2057     * @param incX The increment for the elements of vector x, must be larger than zero.
2058     */
2059    void ZTRMV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2060               sp<Allocation> A, sp<Allocation> X, int incX);
2061
2062    /**
2063     * STBMV performs one of the matrix-vector operations
2064     * x := A*x   or   x := A**T*x
2065     *
2066     * Details: http://www.netlib.org/lapack/explore-html/d6/d7d/stbmv_8f.html
2067     *
2068     * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
2069     *       but only the region N*(K+1) will be referenced. The following subroutine can is an
2070     *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
2071     *           for i in range(0, n):
2072     *              for j in range(i, min(i+k+1, n)):
2073     *                  b[i, j-i] = a[i, j]
2074     *
2075     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2076     * @param TransA The type of transpose applied to matrix A.
2077     * @param Diag Specifies whether or not A is unit triangular.
2078     * @param K The number of off-diagonals of the matrix A
2079     * @param A The input allocation contains matrix A, supported elements type: {Element#F32}.
2080     * @param X The input allocation contains vector x, supported elements type: {Element#F32}.
2081     * @param incX The increment for the elements of vector x, must be larger than zero.
2082     */
2083    void STBMV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2084               int K, sp<Allocation> A, sp<Allocation> X, int incX);
2085
2086    /**
2087     * DTBMV performs one of the matrix-vector operations
2088     * x := A*x   or   x := A**T*x
2089     *
2090     * Details: http://www.netlib.org/lapack/explore-html/df/d29/dtbmv_8f.html
2091     *
2092     * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
2093     *       but only the region N*(K+1) will be referenced. The following subroutine can is an
2094     *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
2095     *           for i in range(0, n):
2096     *              for j in range(i, min(i+k+1, n)):
2097     *                  b[i, j-i] = a[i, j]
2098     *
2099     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2100     * @param TransA The type of transpose applied to matrix A.
2101     * @param Diag Specifies whether or not A is unit triangular.
2102     * @param K The number of off-diagonals of the matrix A
2103     * @param A The input allocation contains matrix A, supported elements type: {Element#F64}.
2104     * @param X The input allocation contains vector x, supported elements type: {Element#F64}.
2105     * @param incX The increment for the elements of vector x, must be larger than zero.
2106     */
2107    void DTBMV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2108               int K, sp<Allocation> A, sp<Allocation> X, int incX);
2109
2110    /**
2111     * CTBMV performs one of the matrix-vector operations
2112     * x := A*x   or   x := A**T*x   or   x := A**H*x
2113     *
2114     * Details: http://www.netlib.org/lapack/explore-html/d3/dcd/ctbmv_8f.html
2115     *
2116     * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
2117     *       but only the region N*(K+1) will be referenced. The following subroutine can is an
2118     *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
2119     *           for i in range(0, n):
2120     *              for j in range(i, min(i+k+1, n)):
2121     *                  b[i, j-i] = a[i, j]
2122     *
2123     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2124     * @param TransA The type of transpose applied to matrix A.
2125     * @param Diag Specifies whether or not A is unit triangular.
2126     * @param K The number of off-diagonals of the matrix A
2127     * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}.
2128     * @param X The input allocation contains vector x, supported elements type: {Element#F32_2}.
2129     * @param incX The increment for the elements of vector x, must be larger than zero.
2130     */
2131    void CTBMV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2132               int K, sp<Allocation> A, sp<Allocation> X, int incX);
2133
2134    /**
2135     * ZTBMV performs one of the matrix-vector operations
2136     * x := A*x   or   x := A**T*x   or   x := A**H*x
2137     *
2138     * Details: http://www.netlib.org/lapack/explore-html/d3/d39/ztbmv_8f.html
2139     *
2140     * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
2141     *       but only the region N*(K+1) will be referenced. The following subroutine can is an
2142     *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
2143     *           for i in range(0, n):
2144     *              for j in range(i, min(i+k+1, n)):
2145     *                  b[i, j-i] = a[i, j]
2146     *
2147     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2148     * @param TransA The type of transpose applied to matrix A.
2149     * @param Diag Specifies whether or not A is unit triangular.
2150     * @param K The number of off-diagonals of the matrix A
2151     * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2}.
2152     * @param X The input allocation contains vector x, supported elements type: {Element#F64_2}.
2153     * @param incX The increment for the elements of vector x, must be larger than zero.
2154     */
2155    void ZTBMV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2156               int K, sp<Allocation> A, sp<Allocation> X, int incX);
2157
2158    /**
2159     * STPMV performs one of the matrix-vector operations
2160     * x := A*x   or   x := A**T*x
2161     *
2162     * Details: http://www.netlib.org/lapack/explore-html/db/db1/stpmv_8f.html
2163     *
2164     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2165     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2166     *       'a' to packed matrix 'b'.
2167     *           k = 0
2168     *           for i in range(0, n):
2169     *              for j in range(i, n):
2170     *                  b[k++] = a[i, j]
2171     *
2172     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2173     * @param TransA The type of transpose applied to matrix A.
2174     * @param Diag Specifies whether or not A is unit triangular.
2175     * @param Ap The input allocation contains packed matrix A, supported elements type: {Element#F32}.
2176     * @param X The input allocation contains vector x, supported elements type: {Element#F32}.
2177     * @param incX The increment for the elements of vector x, must be larger than zero.
2178     */
2179    void STPMV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2180               sp<Allocation> Ap, sp<Allocation> X, int incX);
2181
2182    /**
2183     * DTPMV performs one of the matrix-vector operations
2184     * x := A*x   or   x := A**T*x
2185     *
2186     * Details: http://www.netlib.org/lapack/explore-html/dc/dcd/dtpmv_8f.html
2187     *
2188     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2189     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2190     *       'a' to packed matrix 'b'.
2191     *           k = 0
2192     *           for i in range(0, n):
2193     *              for j in range(i, n):
2194     *                  b[k++] = a[i, j]
2195     *
2196     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2197     * @param TransA The type of transpose applied to matrix A.
2198     * @param Diag Specifies whether or not A is unit triangular.
2199     * @param Ap The input allocation contains packed matrix A, supported elements type: {Element#F64}.
2200     * @param X The input allocation contains vector x, supported elements type: {Element#F64}.
2201     * @param incX The increment for the elements of vector x, must be larger than zero.
2202     */
2203    void DTPMV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2204               sp<Allocation> Ap, sp<Allocation> X, int incX);
2205
2206    /**
2207     * CTPMV performs one of the matrix-vector operations
2208     * x := A*x   or   x := A**T*x   or   x := A**H*x
2209     *
2210     * Details: http://www.netlib.org/lapack/explore-html/d4/dbb/ctpmv_8f.html
2211     *
2212     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2213     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2214     *       'a' to packed matrix 'b'.
2215     *           k = 0
2216     *           for i in range(0, n):
2217     *              for j in range(i, n):
2218     *                  b[k++] = a[i, j]
2219     *
2220     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2221     * @param TransA The type of transpose applied to matrix A.
2222     * @param Diag Specifies whether or not A is unit triangular.
2223     * @param Ap The input allocation contains packed matrix A, supported elements type: {Element#F32_2}.
2224     * @param X The input allocation contains vector x, supported elements type: {Element#F32_2}.
2225     * @param incX The increment for the elements of vector x, must be larger than zero.
2226     */
2227    void CTPMV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2228               sp<Allocation> Ap, sp<Allocation> X, int incX);
2229
2230    /**
2231     * ZTPMV performs one of the matrix-vector operations
2232     * x := A*x   or   x := A**T*x   or   x := A**H*x
2233     *
2234     * Details: http://www.netlib.org/lapack/explore-html/d2/d9e/ztpmv_8f.html
2235     *
2236     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2237     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2238     *       'a' to packed matrix 'b'.
2239     *           k = 0
2240     *           for i in range(0, n):
2241     *              for j in range(i, n):
2242     *                  b[k++] = a[i, j]
2243     *
2244     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2245     * @param TransA The type of transpose applied to matrix A.
2246     * @param Diag Specifies whether or not A is unit triangular.
2247     * @param Ap The input allocation contains packed matrix A, supported elements type: {Element#F64_2}.
2248     * @param X The input allocation contains vector x, supported elements type: {Element#F64_2}.
2249     * @param incX The increment for the elements of vector x, must be larger than zero.
2250     */
2251    void ZTPMV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2252               sp<Allocation> Ap, sp<Allocation> X, int incX);
2253
2254    /**
2255     * STRSV solves one of the systems of equations
2256     * A*x = b   or   A**T*x = b
2257     *
2258     * Details: http://www.netlib.org/lapack/explore-html/d0/d2a/strsv_8f.html
2259     *
2260     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2261     * @param TransA The type of transpose applied to matrix A.
2262     * @param Diag Specifies whether or not A is unit triangular.
2263     * @param A The input allocation contains matrix A, supported elements type: {Element#F32}.
2264     * @param X The input allocation contains vector x, supported elements type: {Element#F32}.
2265     * @param incX The increment for the elements of vector x, must be larger than zero.
2266     */
2267    void STRSV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2268               sp<Allocation> A, sp<Allocation> X, int incX);
2269
2270    /**
2271     * DTRSV solves one of the systems of equations
2272     * A*x = b   or   A**T*x = b
2273     *
2274     * Details: http://www.netlib.org/lapack/explore-html/d6/d96/dtrsv_8f.html
2275     *
2276     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2277     * @param TransA The type of transpose applied to matrix A.
2278     * @param Diag Specifies whether or not A is unit triangular.
2279     * @param A The input allocation contains matrix A, supported elements type: {Element#F64}.
2280     * @param X The input allocation contains vector x, supported elements type: {Element#F64}.
2281     * @param incX The increment for the elements of vector x, must be larger than zero.
2282     */
2283    void DTRSV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2284               sp<Allocation> A, sp<Allocation> X, int incX);
2285
2286    /**
2287     * CTRSV solves one of the systems of equations
2288     * A*x = b   or   A**T*x = b   or   A**H*x = b
2289     *
2290     * Details: http://www.netlib.org/lapack/explore-html/d4/dc8/ctrsv_8f.html
2291     *
2292     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2293     * @param TransA The type of transpose applied to matrix A.
2294     * @param Diag Specifies whether or not A is unit triangular.
2295     * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}.
2296     * @param X The input allocation contains vector x, supported elements type: {Element#F32_2}.
2297     * @param incX The increment for the elements of vector x, must be larger than zero.
2298     */
2299    void CTRSV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2300               sp<Allocation> A, sp<Allocation> X, int incX);
2301
2302    /**
2303     * ZTRSV solves one of the systems of equations
2304     * A*x = b   or   A**T*x = b   or   A**H*x = b
2305     *
2306     * Details: http://www.netlib.org/lapack/explore-html/d1/d2f/ztrsv_8f.html
2307     *
2308     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2309     * @param TransA The type of transpose applied to matrix A.
2310     * @param Diag Specifies whether or not A is unit triangular.
2311     * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2}.
2312     * @param X The input allocation contains vector x, supported elements type: {Element#F64_2}.
2313     * @param incX The increment for the elements of vector x, must be larger than zero.
2314     */
2315    void ZTRSV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2316               sp<Allocation> A, sp<Allocation> X, int incX);
2317
2318    /**
2319     * STBSV solves one of the systems of equations
2320     * A*x = b   or   A**T*x = b
2321     *
2322     * Details: http://www.netlib.org/lapack/explore-html/d0/d1f/stbsv_8f.html
2323     *
2324     * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
2325     *       but only the region N*(K+1) will be referenced. The following subroutine can is an
2326     *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
2327     *           for i in range(0, n):
2328     *              for j in range(i, min(i+k+1, n)):
2329     *                  b[i, j-i] = a[i, j]
2330     *
2331     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2332     * @param TransA The type of transpose applied to matrix A.
2333     * @param Diag Specifies whether or not A is unit triangular.
2334     * @param K The number of off-diagonals of the matrix A
2335     * @param A The input allocation contains matrix A, supported elements type: {Element#F32}.
2336     * @param X The input allocation contains vector x, supported elements type: {Element#F32}.
2337     * @param incX The increment for the elements of vector x, must be larger than zero.
2338     */
2339    void STBSV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2340               int K, sp<Allocation> A, sp<Allocation> X, int incX);
2341
2342    /**
2343     * DTBSV solves one of the systems of equations
2344     * A*x = b   or   A**T*x = b
2345     *
2346     * Details: http://www.netlib.org/lapack/explore-html/d4/dcf/dtbsv_8f.html
2347     *
2348     * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
2349     *       but only the region N*(K+1) will be referenced. The following subroutine can is an
2350     *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
2351     *           for i in range(0, n):
2352     *              for j in range(i, min(i+k+1, n)):
2353     *                  b[i, j-i] = a[i, j]
2354     *
2355     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2356     * @param TransA The type of transpose applied to matrix A.
2357     * @param Diag Specifies whether or not A is unit triangular.
2358     * @param K The number of off-diagonals of the matrix A
2359     * @param A The input allocation contains matrix A, supported elements type: {Element#F64}.
2360     * @param X The input allocation contains vector x, supported elements type: {Element#F64}.
2361     * @param incX The increment for the elements of vector x, must be larger than zero.
2362     */
2363    void DTBSV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2364               int K, sp<Allocation> A, sp<Allocation> X, int incX);
2365
2366    /**
2367     * CTBSV solves one of the systems of equations
2368     * A*x = b   or   A**T*x = b   or   A**H*x = b
2369     *
2370     * Details: http://www.netlib.org/lapack/explore-html/d9/d5f/ctbsv_8f.html
2371     *
2372     * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
2373     *       but only the region N*(K+1) will be referenced. The following subroutine can is an
2374     *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
2375     *           for i in range(0, n):
2376     *              for j in range(i, min(i+k+1, n)):
2377     *                  b[i, j-i] = a[i, j]
2378     *
2379     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2380     * @param TransA The type of transpose applied to matrix A.
2381     * @param Diag Specifies whether or not A is unit triangular.
2382     * @param K The number of off-diagonals of the matrix A
2383     * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}.
2384     * @param X The input allocation contains vector x, supported elements type: {Element#F32_2}.
2385     * @param incX The increment for the elements of vector x, must be larger than zero.
2386     */
2387    void CTBSV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2388               int K, sp<Allocation> A, sp<Allocation> X, int incX);
2389
2390    /**
2391     * ZTBSV solves one of the systems of equations
2392     * A*x = b   or   A**T*x = b   or   A**H*x = b
2393     *
2394     * Details: http://www.netlib.org/lapack/explore-html/d4/d5a/ztbsv_8f.html
2395     *
2396     * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
2397     *       but only the region N*(K+1) will be referenced. The following subroutine can is an
2398     *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
2399     *           for i in range(0, n):
2400     *              for j in range(i, min(i+k+1, n)):
2401     *                  b[i, j-i] = a[i, j]
2402     *
2403     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2404     * @param TransA The type of transpose applied to matrix A.
2405     * @param Diag Specifies whether or not A is unit triangular.
2406     * @param K The number of off-diagonals of the matrix A
2407     * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2}.
2408     * @param X The input allocation contains vector x, supported elements type: {Element#F64_2}.
2409     * @param incX The increment for the elements of vector x, must be larger than zero.
2410     */
2411    void ZTBSV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2412               int K, sp<Allocation> A, sp<Allocation> X, int incX);
2413
2414    /**
2415     * STPSV solves one of the systems of equations
2416     * A*x = b   or   A**T*x = b
2417     *
2418     * Details: http://www.netlib.org/lapack/explore-html/d0/d7c/stpsv_8f.html
2419     *
2420     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2421     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2422     *       'a' to packed matrix 'b'.
2423     *           k = 0
2424     *           for i in range(0, n):
2425     *              for j in range(i, n):
2426     *                  b[k++] = a[i, j]
2427     *
2428     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2429     * @param TransA The type of transpose applied to matrix A.
2430     * @param Diag Specifies whether or not A is unit triangular.
2431     * @param Ap The input allocation contains packed matrix A, supported elements type: {Element#F32}.
2432     * @param X The input allocation contains vector x, supported elements type: {Element#F32}.
2433     * @param incX The increment for the elements of vector x, must be larger than zero.
2434     */
2435    void STPSV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2436               sp<Allocation> Ap, sp<Allocation> X, int incX);
2437
2438    /**
2439     * DTPSV solves one of the systems of equations
2440     * A*x = b   or   A**T*x = b
2441     *
2442     * Details: http://www.netlib.org/lapack/explore-html/d9/d84/dtpsv_8f.html
2443     *
2444     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2445     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2446     *       'a' to packed matrix 'b'.
2447     *           k = 0
2448     *           for i in range(0, n):
2449     *              for j in range(i, n):
2450     *                  b[k++] = a[i, j]
2451     *
2452     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2453     * @param TransA The type of transpose applied to matrix A.
2454     * @param Diag Specifies whether or not A is unit triangular.
2455     * @param Ap The input allocation contains packed matrix A, supported elements type: {Element#F64}.
2456     * @param X The input allocation contains vector x, supported elements type: {Element#F64}.
2457     * @param incX The increment for the elements of vector x, must be larger than zero.
2458     */
2459    void DTPSV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2460               sp<Allocation> Ap, sp<Allocation> X, int incX);
2461
2462    /**
2463     * CTPSV solves one of the systems of equations
2464     * A*x = b   or   A**T*x = b   or   A**H*x = b
2465     *
2466     * Details: http://www.netlib.org/lapack/explore-html/d8/d56/ctpsv_8f.html
2467     *
2468     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2469     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2470     *       'a' to packed matrix 'b'.
2471     *           k = 0
2472     *           for i in range(0, n):
2473     *              for j in range(i, n):
2474     *                  b[k++] = a[i, j]
2475     *
2476     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2477     * @param TransA The type of transpose applied to matrix A.
2478     * @param Diag Specifies whether or not A is unit triangular.
2479     * @param Ap The input allocation contains packed matrix A, supported elements type: {Element#F32_2}.
2480     * @param X The input allocation contains vector x, supported elements type: {Element#F32_2}.
2481     * @param incX The increment for the elements of vector x, must be larger than zero.
2482     */
2483    void CTPSV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2484               sp<Allocation> Ap, sp<Allocation> X, int incX);
2485
2486    /**
2487     * ZTPSV solves one of the systems of equations
2488     * A*x = b   or   A**T*x = b   or   A**H*x = b
2489     *
2490     * Details: http://www.netlib.org/lapack/explore-html/da/d57/ztpsv_8f.html
2491     *
2492     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2493     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2494     *       'a' to packed matrix 'b'.
2495     *           k = 0
2496     *           for i in range(0, n):
2497     *              for j in range(i, n):
2498     *                  b[k++] = a[i, j]
2499     *
2500     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2501     * @param TransA The type of transpose applied to matrix A.
2502     * @param Diag Specifies whether or not A is unit triangular.
2503     * @param Ap The input allocation contains packed matrix A, supported elements type: {Element#F64_2}.
2504     * @param X The input allocation contains vector x, supported elements type: {Element#F64_2}.
2505     * @param incX The increment for the elements of vector x, must be larger than zero.
2506     */
2507    void ZTPSV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2508               sp<Allocation> Ap, sp<Allocation> X, int incX);
2509
2510    /**
2511     * SSYMV performs the matrix-vector operation
2512     * y := alpha*A*x + beta*y
2513     *
2514     * Details: http://www.netlib.org/lapack/explore-html/d2/d94/ssymv_8f.html
2515     *
2516     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
2517     * @param alpha The scalar alpha.
2518     * @param A The input allocation contains matrix A, supported elements type: {Element#F32}.
2519     * @param X The input allocation contains vector x, supported elements type: {Element#F32}.
2520     * @param incX The increment for the elements of vector x, must be larger than zero.
2521     * @param beta The scalar beta.
2522     * @param Y The input allocation contains vector y, supported elements type: {Element#F32}.
2523     * @param incY The increment for the elements of vector y, must be larger than zero.
2524     */
2525    void SSYMV(RsBlasUplo Uplo, float alpha, sp<Allocation> A, sp<Allocation> X,
2526               int incX, float beta, sp<Allocation> Y, int incY);
2527
2528    /**
2529     * SSBMV performs the matrix-vector operation
2530     * y := alpha*A*x + beta*y
2531     *
2532     * Details: http://www.netlib.org/lapack/explore-html/d3/da1/ssbmv_8f.html
2533     *
2534     * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
2535     *       but only the region N*(K+1) will be referenced. The following subroutine can is an
2536     *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
2537     *           for i in range(0, n):
2538     *              for j in range(i, min(i+k+1, n)):
2539     *                  b[i, j-i] = a[i, j]
2540     *
2541     * @param Uplo Specifies whether the upper or lower triangular part of the band matrix A is being supplied.
2542     * @param K The number of off-diagonals of the matrix A
2543     * @param alpha The scalar alpha.
2544     * @param A The input allocation contains matrix A, supported elements type: {Element#F32}.
2545     * @param X The input allocation contains vector x, supported elements type: {Element#F32}.
2546     * @param incX The increment for the elements of vector x, must be larger than zero.
2547     * @param beta The scalar beta.
2548     * @param Y The input allocation contains vector y, supported elements type: {Element#F32}.
2549     * @param incY The increment for the elements of vector y, must be larger than zero.
2550     */
2551    void SSBMV(RsBlasUplo Uplo, int K, float alpha, sp<Allocation> A, sp<Allocation> X,
2552               int incX, float beta, sp<Allocation> Y, int incY);
2553
2554    /**
2555     * SSPMV performs the matrix-vector operation
2556     * y := alpha*A*x + beta*y
2557     *
2558     * Details: http://www.netlib.org/lapack/explore-html/d8/d68/sspmv_8f.html
2559     *
2560     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2561     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2562     *       'a' to packed matrix 'b'.
2563     *           k = 0
2564     *           for i in range(0, n):
2565     *              for j in range(i, n):
2566     *                  b[k++] = a[i, j]
2567     *
2568     * @param Uplo Specifies whether the upper or lower triangular part of the matrix A is supplied in packed form.
2569     * @param alpha The scalar alpha.
2570     * @param Ap The input allocation contains matrix A, supported elements type: {Element#F32}.
2571     * @param X The input allocation contains vector x, supported elements type: {Element#F32}.
2572     * @param incX The increment for the elements of vector x, must be larger than zero.
2573     * @param beta The scalar beta.
2574     * @param Y The input allocation contains vector y, supported elements type: {Element#F32}.
2575     * @param incY The increment for the elements of vector y, must be larger than zero.
2576     */
2577    void SSPMV(RsBlasUplo Uplo, float alpha, sp<Allocation> Ap, sp<Allocation> X,
2578               int incX, float beta, sp<Allocation> Y, int incY);
2579
2580    /**
2581     * SGER performs the rank 1 operation
2582     * A := alpha*x*y**T + A
2583     *
2584     * Details: http://www.netlib.org/lapack/explore-html/db/d5c/sger_8f.html
2585     *
2586     * @param alpha The scalar alpha.
2587     * @param X The input allocation contains vector x, supported elements type: {Element#F32}.
2588     * @param incX The increment for the elements of vector x, must be larger than zero.
2589     * @param Y The input allocation contains vector y, supported elements type: {Element#F32}.
2590     * @param incY The increment for the elements of vector y, must be larger than zero.
2591     * @param A The input allocation contains matrix A, supported elements type: {Element#F32}.
2592     */
2593    void SGER(float alpha, sp<Allocation> X, int incX, sp<Allocation> Y, int incY, sp<Allocation> A);
2594
2595    /**
2596     * SSYR performs the rank 1 operation
2597     * A := alpha*x*x**T + A
2598     *
2599     * Details: http://www.netlib.org/lapack/explore-html/d6/dac/ssyr_8f.html
2600     *
2601     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
2602     * @param alpha The scalar alpha.
2603     * @param X The input allocation contains vector x, supported elements type: {Element#F32}.
2604     * @param incX The increment for the elements of vector x, must be larger than zero.
2605     * @param A The input allocation contains matrix A, supported elements type: {Element#F32}.
2606     */
2607    void SSYR(RsBlasUplo Uplo, float alpha, sp<Allocation> X, int incX, sp<Allocation> A);
2608
2609    /**
2610     * SSPR performs the rank 1 operation
2611     * A := alpha*x*x**T + A
2612     *
2613     * Details: http://www.netlib.org/lapack/explore-html/d2/d9b/sspr_8f.html
2614     *
2615     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2616     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2617     *       'a' to packed matrix 'b'.
2618     *           k = 0
2619     *           for i in range(0, n):
2620     *              for j in range(i, n):
2621     *                  b[k++] = a[i, j]
2622     *
2623     * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
2624     * @param alpha The scalar alpha.
2625     * @param X The input allocation contains vector x, supported elements type: {Element#F32}.
2626     * @param incX The increment for the elements of vector x, must be larger than zero.
2627     * @param Ap The input allocation contains matrix A, supported elements type: {Element#F32}.
2628     */
2629    void SSPR(RsBlasUplo Uplo, float alpha, sp<Allocation> X, int incX, sp<Allocation> Ap);
2630
2631    /**
2632     * SSYR2 performs the symmetric rank 2 operation
2633     * A := alpha*x*y**T + alpha*y*x**T + A
2634     *
2635     * Details: http://www.netlib.org/lapack/explore-html/db/d99/ssyr2_8f.html
2636     *
2637     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
2638     * @param alpha The scalar alpha.
2639     * @param X The input allocation contains vector x, supported elements type: {Element#F32}.
2640     * @param incX The increment for the elements of vector x, must be larger than zero.
2641     * @param Y The input allocation contains vector y, supported elements type: {Element#F32}.
2642     * @param incY The increment for the elements of vector y, must be larger than zero.
2643     * @param A The input allocation contains matrix A, supported elements type: {Element#F32}.
2644     */
2645    void SSYR2(RsBlasUplo Uplo, float alpha, sp<Allocation> X, int incX,
2646               sp<Allocation> Y, int incY, sp<Allocation> A);
2647
2648    /**
2649     * SSPR2 performs the symmetric rank 2 operation
2650     * A := alpha*x*y**T + alpha*y*x**T + A
2651     *
2652     * Details: http://www.netlib.org/lapack/explore-html/db/d3e/sspr2_8f.html
2653     *
2654     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2655     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2656     *       'a' to packed matrix 'b'.
2657     *           k = 0
2658     *           for i in range(0, n):
2659     *              for j in range(i, n):
2660     *                  b[k++] = a[i, j]
2661     *
2662     * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
2663     * @param alpha The scalar alpha.
2664     * @param X The input allocation contains vector x, supported elements type: {Element#F32}.
2665     * @param incX The increment for the elements of vector x, must be larger than zero.
2666     * @param Y The input allocation contains vector y, supported elements type: {Element#F32}.
2667     * @param incY The increment for the elements of vector y, must be larger than zero.
2668     * @param Ap The input allocation contains matrix A, supported elements type: {Element#F32}.
2669     */
2670    void SSPR2(RsBlasUplo Uplo, float alpha, sp<Allocation> X, int incX,
2671               sp<Allocation> Y, int incY, sp<Allocation> Ap);
2672
2673    /**
2674     * DSYMV performs the matrix-vector operation
2675     * y := alpha*A*x + beta*y
2676     *
2677     * Details: http://www.netlib.org/lapack/explore-html/d8/dbe/dsymv_8f.html
2678     *
2679     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
2680     * @param alpha The scalar alpha.
2681     * @param A The input allocation contains matrix A, supported elements type: {Element#F64}.
2682     * @param X The input allocation contains vector x, supported elements type: {Element#F64}.
2683     * @param incX The increment for the elements of vector x, must be larger than zero.
2684     * @param beta The scalar beta.
2685     * @param Y The input allocation contains vector y, supported elements type: {Element#F64}.
2686     * @param incY The increment for the elements of vector y, must be larger than zero.
2687     */
2688    void DSYMV(RsBlasUplo Uplo, double alpha, sp<Allocation> A, sp<Allocation> X, int incX,
2689               double beta, sp<Allocation> Y, int incY);
2690
2691    /**
2692     * DSBMV performs the matrix-vector operation
2693     * y := alpha*A*x + beta*y
2694     *
2695     * Details: http://www.netlib.org/lapack/explore-html/d8/d1e/dsbmv_8f.html
2696     *
2697     * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
2698     *       but only the region N*(K+1) will be referenced. The following subroutine can is an
2699     *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
2700     *           for i in range(0, n):
2701     *              for j in range(i, min(i+k+1, n)):
2702     *                  b[i, j-i] = a[i, j]
2703     *
2704     * @param Uplo Specifies whether the upper or lower triangular part of the band matrix A is being supplied.
2705     * @param K The number of off-diagonals of the matrix A
2706     * @param alpha The scalar alpha.
2707     * @param A The input allocation contains matrix A, supported elements type: {Element#F64}.
2708     * @param X The input allocation contains vector x, supported elements type: {Element#F64}.
2709     * @param incX The increment for the elements of vector x, must be larger than zero.
2710     * @param beta The scalar beta.
2711     * @param Y The input allocation contains vector y, supported elements type: {Element#F64}.
2712     * @param incY The increment for the elements of vector y, must be larger than zero.
2713     */
2714    void DSBMV(RsBlasUplo Uplo, int K, double alpha, sp<Allocation> A, sp<Allocation> X, int incX,
2715               double beta, sp<Allocation> Y, int incY);
2716
2717    /**
2718     * DSPMV performs the matrix-vector operation
2719     * y := alpha*A*x + beta*y
2720     *
2721     * Details: http://www.netlib.org/lapack/explore-html/d4/d85/dspmv_8f.html
2722     *
2723     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2724     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2725     *       'a' to packed matrix 'b'.
2726     *           k = 0
2727     *           for i in range(0, n):
2728     *              for j in range(i, n):
2729     *                  b[k++] = a[i, j]
2730     *
2731     * @param Uplo Specifies whether the upper or lower triangular part of the matrix A is supplied in packed form.
2732     * @param alpha The scalar alpha.
2733     * @param Ap The input allocation contains matrix A, supported elements type: {Element#F64}.
2734     * @param X The input allocation contains vector x, supported elements type: {Element#F64}.
2735     * @param incX The increment for the elements of vector x, must be larger than zero.
2736     * @param beta The scalar beta.
2737     * @param Y The input allocation contains vector y, supported elements type: {Element#F64}.
2738     * @param incY The increment for the elements of vector y, must be larger than zero.
2739     */
2740    void DSPMV(RsBlasUplo Uplo, double alpha, sp<Allocation> Ap, sp<Allocation> X, int incX,
2741               double beta, sp<Allocation> Y, int incY);
2742
2743    /**
2744     * DGER performs the rank 1 operation
2745     * A := alpha*x*y**T + A
2746     *
2747     * Details: http://www.netlib.org/lapack/explore-html/dc/da8/dger_8f.html
2748     *
2749     * @param alpha The scalar alpha.
2750     * @param X The input allocation contains vector x, supported elements type: {Element#F64}.
2751     * @param incX The increment for the elements of vector x, must be larger than zero.
2752     * @param Y The input allocation contains vector y, supported elements type: {Element#F64}.
2753     * @param incY The increment for the elements of vector y, must be larger than zero.
2754     * @param A The input allocation contains matrix A, supported elements type: {Element#F64}.
2755     */
2756    void DGER(double alpha, sp<Allocation> X, int incX, sp<Allocation> Y, int incY, sp<Allocation> A);
2757
2758    /**
2759     * DSYR performs the rank 1 operation
2760     * A := alpha*x*x**T + A
2761     *
2762     * Details: http://www.netlib.org/lapack/explore-html/d3/d60/dsyr_8f.html
2763     *
2764     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
2765     * @param alpha The scalar alpha.
2766     * @param X The input allocation contains vector x, supported elements type: {Element#F64}.
2767     * @param incX The increment for the elements of vector x, must be larger than zero.
2768     * @param A The input allocation contains matrix A, supported elements type: {Element#F64}.
2769     */
2770    void DSYR(RsBlasUplo Uplo, double alpha, sp<Allocation> X, int incX, sp<Allocation> A);
2771
2772    /**
2773     * DSPR performs the rank 1 operation
2774     * A := alpha*x*x**T + A
2775     *
2776     * Details: http://www.netlib.org/lapack/explore-html/dd/dba/dspr_8f.html
2777     *
2778     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2779     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2780     *       'a' to packed matrix 'b'.
2781     *           k = 0
2782     *           for i in range(0, n):
2783     *              for j in range(i, n):
2784     *                  b[k++] = a[i, j]
2785     *
2786     * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
2787     * @param alpha The scalar alpha.
2788     * @param X The input allocation contains vector x, supported elements type: {Element#F64}.
2789     * @param incX The increment for the elements of vector x, must be larger than zero.
2790     * @param Ap The input allocation contains matrix A, supported elements type: {Element#F64}.
2791     */
2792    void DSPR(RsBlasUplo Uplo, double alpha, sp<Allocation> X, int incX, sp<Allocation> Ap);
2793
2794    /**
2795     * DSYR2 performs the symmetric rank 2 operation
2796     * A := alpha*x*y**T + alpha*y*x**T + A
2797     *
2798     * Details: http://www.netlib.org/lapack/explore-html/de/d41/dsyr2_8f.html
2799     *
2800     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
2801     * @param alpha The scalar alpha.
2802     * @param X The input allocation contains vector x, supported elements type: {Element#F64}.
2803     * @param incX The increment for the elements of vector x, must be larger than zero.
2804     * @param Y The input allocation contains vector y, supported elements type: {Element#F64}.
2805     * @param incY The increment for the elements of vector y, must be larger than zero.
2806     * @param A The input allocation contains matrix A, supported elements type: {Element#F64}.
2807     */
2808    void DSYR2(RsBlasUplo Uplo, double alpha, sp<Allocation> X, int incX,
2809               sp<Allocation> Y, int incY, sp<Allocation> A);
2810
2811    /**
2812     * DSPR2 performs the symmetric rank 2 operation
2813     * A := alpha*x*y**T + alpha*y*x**T + A
2814     *
2815     * Details: http://www.netlib.org/lapack/explore-html/dd/d9e/dspr2_8f.html
2816     *
2817     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2818     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2819     *       'a' to packed matrix 'b'.
2820     *           k = 0
2821     *           for i in range(0, n):
2822     *              for j in range(i, n):
2823     *                  b[k++] = a[i, j]
2824     *
2825     * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
2826     * @param alpha The scalar alpha.
2827     * @param X The input allocation contains vector x, supported elements type: {Element#F64}.
2828     * @param incX The increment for the elements of vector x, must be larger than zero.
2829     * @param Y The input allocation contains vector y, supported elements type: {Element#F64}.
2830     * @param incY The increment for the elements of vector y, must be larger than zero.
2831     * @param Ap The input allocation contains matrix A, supported elements type: {Element#F64}.
2832     */
2833    void DSPR2(RsBlasUplo Uplo, double alpha, sp<Allocation> X, int incX,
2834               sp<Allocation> Y, int incY, sp<Allocation> Ap);
2835
2836    /**
2837     * CHEMV performs the matrix-vector operation
2838     * y := alpha*A*x + beta*y
2839     *
2840     * Details: http://www.netlib.org/lapack/explore-html/d7/d51/chemv_8f.html
2841     *
2842     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
2843     * @param alpha The scalar alpha.
2844     * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}.
2845     * @param X The input allocation contains vector x, supported elements type: {Element#F32_2}.
2846     * @param incX The increment for the elements of vector x, must be larger than zero.
2847     * @param beta The scalar beta.
2848     * @param Y The input allocation contains vector y, supported elements type: {Element#F32_2}.
2849     * @param incY The increment for the elements of vector y, must be larger than zero.
2850     */
2851    void CHEMV(RsBlasUplo Uplo, Float2 alpha, sp<Allocation> A, sp<Allocation> X,
2852               int incX, Float2 beta, sp<Allocation> Y, int incY);
2853
2854    /**
2855     * CHBMV performs the matrix-vector operation
2856     * y := alpha*A*x + beta*y
2857     *
2858     * Details: http://www.netlib.org/lapack/explore-html/db/dc2/chbmv_8f.html
2859     *
2860     * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
2861     *       but only the region N*(K+1) will be referenced. The following subroutine can is an
2862     *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
2863     *           for i in range(0, n):
2864     *              for j in range(i, min(i+k+1, n)):
2865     *                  b[i, j-i] = a[i, j]
2866     *
2867     * @param Uplo Specifies whether the upper or lower triangular part of the band matrix A is being supplied.
2868     * @param K The number of off-diagonals of the matrix A
2869     * @param alpha The scalar alpha.
2870     * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}.
2871     * @param X The input allocation contains vector x, supported elements type: {Element#F32_2}.
2872     * @param incX The increment for the elements of vector x, must be larger than zero.
2873     * @param beta The scalar beta.
2874     * @param Y The input allocation contains vector y, supported elements type: {Element#F32_2}.
2875     * @param incY The increment for the elements of vector y, must be larger than zero.
2876     */
2877    void CHBMV(RsBlasUplo Uplo, int K, Float2 alpha, sp<Allocation> A, sp<Allocation> X,
2878               int incX, Float2 beta, sp<Allocation> Y, int incY);
2879
2880    /**
2881     * CHPMV performs the matrix-vector operation
2882     * y := alpha*A*x + beta*y
2883     *
2884     * Details: http://www.netlib.org/lapack/explore-html/d2/d06/chpmv_8f.html
2885     *
2886     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2887     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2888     *       'a' to packed matrix 'b'.
2889     *           k = 0
2890     *           for i in range(0, n):
2891     *              for j in range(i, n):
2892     *                  b[k++] = a[i, j]
2893     *
2894     * @param Uplo Specifies whether the upper or lower triangular part of the matrix A is supplied in packed form.
2895     * @param alpha The scalar alpha.
2896     * @param Ap The input allocation contains matrix A, supported elements type: {Element#F32_2}.
2897     * @param X The input allocation contains vector x, supported elements type: {Element#F32_2}.
2898     * @param incX The increment for the elements of vector x, must be larger than zero.
2899     * @param beta The scalar beta.
2900     * @param Y The input allocation contains vector y, supported elements type: {Element#F32_2}.
2901     * @param incY The increment for the elements of vector y, must be larger than zero.
2902     */
2903    void CHPMV(RsBlasUplo Uplo, Float2 alpha, sp<Allocation> Ap, sp<Allocation> X,
2904               int incX, Float2 beta, sp<Allocation> Y, int incY);
2905
2906    /**
2907     * CGERU performs the rank 1 operation
2908     * A := alpha*x*y**T + A
2909     *
2910     * Details: http://www.netlib.org/lapack/explore-html/db/d5f/cgeru_8f.html
2911     *
2912     * @param alpha The scalar alpha.
2913     * @param X The input allocation contains vector x, supported elements type: {Element#F32_2}.
2914     * @param incX The increment for the elements of vector x, must be larger than zero.
2915     * @param Y The input allocation contains vector y, supported elements type: {Element#F32_2}.
2916     * @param incY The increment for the elements of vector y, must be larger than zero.
2917     * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}.
2918     */
2919    void CGERU(Float2 alpha, sp<Allocation> X, int incX,
2920               sp<Allocation> Y, int incY, sp<Allocation> A);
2921
2922    /**
2923     * CGERC performs the rank 1 operation
2924     * A := alpha*x*y**H + A
2925     *
2926     * Details: http://www.netlib.org/lapack/explore-html/dd/d84/cgerc_8f.html
2927     *
2928     * @param alpha The scalar alpha.
2929     * @param X The input allocation contains vector x, supported elements type: {Element#F32_2}.
2930     * @param incX The increment for the elements of vector x, must be larger than zero.
2931     * @param Y The input allocation contains vector y, supported elements type: {Element#F32_2}.
2932     * @param incY The increment for the elements of vector y, must be larger than zero.
2933     * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}.
2934     */
2935    void CGERC(Float2 alpha, sp<Allocation> X, int incX,
2936               sp<Allocation> Y, int incY, sp<Allocation> A);
2937
2938    /**
2939     * CHER performs the rank 1 operation
2940     * A := alpha*x*x**H + A
2941     *
2942     * Details: http://www.netlib.org/lapack/explore-html/d3/d6d/cher_8f.html
2943     *
2944     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
2945     * @param alpha The scalar alpha.
2946     * @param X The input allocation contains vector x, supported elements type: {Element#F32_2}.
2947     * @param incX The increment for the elements of vector x, must be larger than zero.
2948     * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}.
2949     */
2950    void CHER(RsBlasUplo Uplo, float alpha, sp<Allocation> X, int incX, sp<Allocation> A);
2951
2952    /**
2953     * CHPR performs the rank 1 operation
2954     * A := alpha*x*x**H + A
2955     *
2956     * Details: http://www.netlib.org/lapack/explore-html/db/dcd/chpr_8f.html
2957     *
2958     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2959     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2960     *       'a' to packed matrix 'b'.
2961     *           k = 0
2962     *           for i in range(0, n):
2963     *              for j in range(i, n):
2964     *                  b[k++] = a[i, j]
2965     *
2966     * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
2967     * @param alpha The scalar alpha.
2968     * @param X The input allocation contains vector x, supported elements type: {Element#F32_2}.
2969     * @param incX The increment for the elements of vector x, must be larger than zero.
2970     * @param Ap The input allocation contains matrix A, supported elements type: {Element#F32_2}.
2971     */
2972    void CHPR(RsBlasUplo Uplo, float alpha, sp<Allocation> X, int incX, sp<Allocation> Ap);
2973
2974    /**
2975     * CHER2 performs the symmetric rank 2 operation
2976     * A := alpha*x*y**H + alpha*y*x**H + A
2977     *
2978     * Details: http://www.netlib.org/lapack/explore-html/db/d87/cher2_8f.html
2979     *
2980     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
2981     * @param alpha The scalar alpha.
2982     * @param X The input allocation contains vector x, supported elements type: {Element#F32_2}.
2983     * @param incX The increment for the elements of vector x, must be larger than zero.
2984     * @param Y The input allocation contains vector y, supported elements type: {Element#F32_2}.
2985     * @param incY The increment for the elements of vector y, must be larger than zero.
2986     * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}.
2987     */
2988    void CHER2(RsBlasUplo Uplo, Float2 alpha, sp<Allocation> X, int incX,
2989               sp<Allocation> Y, int incY, sp<Allocation> A);
2990
2991    /**
2992     * CHPR2 performs the symmetric rank 2 operation
2993     * A := alpha*x*y**H + alpha*y*x**H + A
2994     *
2995     * Details: http://www.netlib.org/lapack/explore-html/d6/d44/chpr2_8f.html
2996     *
2997     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2998     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2999     *       'a' to packed matrix 'b'.
3000     *           k = 0
3001     *           for i in range(0, n):
3002     *              for j in range(i, n):
3003     *                  b[k++] = a[i, j]
3004     *
3005     * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
3006     * @param alpha The scalar alpha.
3007     * @param X The input allocation contains vector x, supported elements type: {Element#F32_2}.
3008     * @param incX The increment for the elements of vector x, must be larger than zero.
3009     * @param Y The input allocation contains vector y, supported elements type: {Element#F32_2}.
3010     * @param incY The increment for the elements of vector y, must be larger than zero.
3011     * @param Ap The input allocation contains matrix A, supported elements type: {Element#F32_2}.
3012     */
3013    void CHPR2(RsBlasUplo Uplo, Float2 alpha, sp<Allocation> X, int incX,
3014               sp<Allocation> Y, int incY, sp<Allocation> Ap);
3015
3016    /**
3017     * ZHEMV performs the matrix-vector operation
3018     * y := alpha*A*x + beta*y
3019     *
3020     * Details: http://www.netlib.org/lapack/explore-html/d0/ddd/zhemv_8f.html
3021     *
3022     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
3023     * @param alpha The scalar alpha.
3024     * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2}.
3025     * @param X The input allocation contains vector x, supported elements type: {Element#F64_2}.
3026     * @param incX The increment for the elements of vector x, must be larger than zero.
3027     * @param beta The scalar beta.
3028     * @param Y The input allocation contains vector y, supported elements type: {Element#F64_2}.
3029     * @param incY The increment for the elements of vector y, must be larger than zero.
3030     */
3031    void ZHEMV(RsBlasUplo Uplo, Double2 alpha, sp<Allocation> A, sp<Allocation> X,
3032               int incX, Double2 beta, sp<Allocation> Y, int incY);
3033
3034    /**
3035     * ZHBMV performs the matrix-vector operation
3036     * y := alpha*A*x + beta*y
3037     *
3038     * Details: http://www.netlib.org/lapack/explore-html/d3/d1a/zhbmv_8f.html
3039     *
3040     * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
3041     *       but only the region N*(K+1) will be referenced. The following subroutine can is an
3042     *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
3043     *           for i in range(0, n):
3044     *              for j in range(i, min(i+k+1, n)):
3045     *                  b[i, j-i] = a[i, j]
3046     *
3047     * @param Uplo Specifies whether the upper or lower triangular part of the band matrix A is being supplied.
3048     * @param K The number of off-diagonals of the matrix A
3049     * @param alpha The scalar alpha.
3050     * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2}.
3051     * @param X The input allocation contains vector x, supported elements type: {Element#F64_2}.
3052     * @param incX The increment for the elements of vector x, must be larger than zero.
3053     * @param beta The scalar beta.
3054     * @param Y The input allocation contains vector y, supported elements type: {Element#F64_2}.
3055     * @param incY The increment for the elements of vector y, must be larger than zero.
3056     */
3057    void ZHBMV(RsBlasUplo Uplo, int K, Double2 alpha, sp<Allocation> A, sp<Allocation> X,
3058               int incX, Double2 beta, sp<Allocation> Y, int incY);
3059
3060    /**
3061     * ZHPMV performs the matrix-vector operation
3062     * y := alpha*A*x + beta*y
3063     *
3064     * Details: http://www.netlib.org/lapack/explore-html/d0/d60/zhpmv_8f.html
3065     *
3066     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
3067     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
3068     *       'a' to packed matrix 'b'.
3069     *           k = 0
3070     *           for i in range(0, n):
3071     *              for j in range(i, n):
3072     *                  b[k++] = a[i, j]
3073     *
3074     * @param Uplo Specifies whether the upper or lower triangular part of the matrix A is supplied in packed form.
3075     * @param alpha The scalar alpha.
3076     * @param Ap The input allocation contains matrix A, supported elements type: {Element#F64_2}.
3077     * @param X The input allocation contains vector x, supported elements type: {Element#F64_2}.
3078     * @param incX The increment for the elements of vector x, must be larger than zero.
3079     * @param beta The scalar beta.
3080     * @param Y The input allocation contains vector y, supported elements type: {Element#F64_2}.
3081     * @param incY The increment for the elements of vector y, must be larger than zero.
3082     */
3083    void ZHPMV(RsBlasUplo Uplo, Double2 alpha, sp<Allocation> Ap, sp<Allocation> X,
3084               int incX, Double2 beta, sp<Allocation> Y, int incY);
3085
3086    /**
3087     * ZGERU performs the rank 1 operation
3088     * A := alpha*x*y**T + A
3089     *
3090     * Details: http://www.netlib.org/lapack/explore-html/d7/d12/zgeru_8f.html
3091     *
3092     * @param alpha The scalar alpha.
3093     * @param X The input allocation contains vector x, supported elements type: {Element#F64_2}.
3094     * @param incX The increment for the elements of vector x, must be larger than zero.
3095     * @param Y The input allocation contains vector y, supported elements type: {Element#F64_2}.
3096     * @param incY The increment for the elements of vector y, must be larger than zero.
3097     * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2}.
3098     */
3099    void ZGERU(Double2 alpha, sp<Allocation> X, int incX,
3100               sp<Allocation> Y, int incY, sp<Allocation> A);
3101
3102    /**
3103     * ZGERC performs the rank 1 operation
3104     * A := alpha*x*y**H + A
3105     *
3106     * Details: http://www.netlib.org/lapack/explore-html/d3/dad/zgerc_8f.html
3107     *
3108     * @param alpha The scalar alpha.
3109     * @param X The input allocation contains vector x, supported elements type: {Element#F64_2}.
3110     * @param incX The increment for the elements of vector x, must be larger than zero.
3111     * @param Y The input allocation contains vector y, supported elements type: {Element#F64_2}.
3112     * @param incY The increment for the elements of vector y, must be larger than zero.
3113     * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2}.
3114     */
3115    void ZGERC(Double2 alpha, sp<Allocation> X, int incX,
3116               sp<Allocation> Y, int incY, sp<Allocation> A);
3117
3118    /**
3119     * ZHER performs the rank 1 operation
3120     * A := alpha*x*x**H + A
3121     *
3122     * Details: http://www.netlib.org/lapack/explore-html/de/d0e/zher_8f.html
3123     *
3124     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
3125     * @param alpha The scalar alpha.
3126     * @param X The input allocation contains vector x, supported elements type: {Element#F64_2}.
3127     * @param incX The increment for the elements of vector x, must be larger than zero.
3128     * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2}.
3129     */
3130    void ZHER(RsBlasUplo Uplo, double alpha, sp<Allocation> X, int incX, sp<Allocation> A);
3131
3132    /**
3133     * ZHPR performs the rank 1 operation
3134     * A := alpha*x*x**H + A
3135     *
3136     * Details: http://www.netlib.org/lapack/explore-html/de/de1/zhpr_8f.html
3137     *
3138     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
3139     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
3140     *       'a' to packed matrix 'b'.
3141     *           k = 0
3142     *           for i in range(0, n):
3143     *              for j in range(i, n):
3144     *                  b[k++] = a[i, j]
3145     *
3146     * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
3147     * @param alpha The scalar alpha.
3148     * @param X The input allocation contains vector x, supported elements type: {Element#F64_2}.
3149     * @param incX The increment for the elements of vector x, must be larger than zero.
3150     * @param Ap The input allocation contains matrix A, supported elements type: {Element#F64_2}.
3151     */
3152    void ZHPR(RsBlasUplo Uplo, double alpha, sp<Allocation> X, int incX, sp<Allocation> Ap);
3153
3154    /**
3155     * ZHER2 performs the symmetric rank 2 operation
3156     * A := alpha*x*y**H + alpha*y*x**H + A
3157     *
3158     * Details: http://www.netlib.org/lapack/explore-html/da/d8a/zher2_8f.html
3159     *
3160     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
3161     * @param alpha The scalar alpha.
3162     * @param X The input allocation contains vector x, supported elements type: {Element#F64_2}.
3163     * @param incX The increment for the elements of vector x, must be larger than zero.
3164     * @param Y The input allocation contains vector y, supported elements type: {Element#F64_2}.
3165     * @param incY The increment for the elements of vector y, must be larger than zero.
3166     * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2}.
3167     */
3168    void ZHER2(RsBlasUplo Uplo, Double2 alpha, sp<Allocation> X, int incX,
3169               sp<Allocation> Y, int incY, sp<Allocation> A);
3170
3171    /**
3172     * ZHPR2 performs the symmetric rank 2 operation
3173     * A := alpha*x*y**H + alpha*y*x**H + A
3174     *
3175     * Details: http://www.netlib.org/lapack/explore-html/d5/d52/zhpr2_8f.html
3176     *
3177     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
3178     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
3179     *       'a' to packed matrix 'b'.
3180     *           k = 0
3181     *           for i in range(0, n):
3182     *              for j in range(i, n):
3183     *                  b[k++] = a[i, j]
3184     *
3185     * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
3186     * @param alpha The scalar alpha.
3187     * @param X The input allocation contains vector x, supported elements type: {Element#F64_2}.
3188     * @param incX The increment for the elements of vector x, must be larger than zero.
3189     * @param Y The input allocation contains vector y, supported elements type: {Element#F64_2}.
3190     * @param incY The increment for the elements of vector y, must be larger than zero.
3191     * @param Ap The input allocation contains matrix A, supported elements type: {Element#F64_2}.
3192     */
3193    void ZHPR2(RsBlasUplo Uplo, Double2 alpha, sp<Allocation> X, int incX,
3194               sp<Allocation> Y, int incY, sp<Allocation> Ap);
3195
3196    /**
3197     * SGEMM performs one of the matrix-matrix operations
3198     * C := alpha*op(A)*op(B) + beta*C   where op(X) is one of op(X) = X  or  op(X) = X**T
3199     *
3200     * Details: http://www.netlib.org/lapack/explore-html/d4/de2/sgemm_8f.html
3201     *
3202     * @param TransA The type of transpose applied to matrix A.
3203     * @param TransB The type of transpose applied to matrix B.
3204     * @param alpha The scalar alpha.
3205     * @param A The input allocation contains matrix A, supported elements type: {Element#F32}.
3206     * @param B The input allocation contains matrix B, supported elements type: {Element#F32}.
3207     * @param beta The scalar beta.
3208     * @param C The input allocation contains matrix C, supported elements type: {Element#F32}.
3209     */
3210    void SGEMM(RsBlasTranspose TransA, RsBlasTranspose TransB, float alpha, sp<Allocation> A,
3211                      sp<Allocation> B, float beta, sp<Allocation> C);
3212
3213
3214    /**
3215     * DGEMM performs one of the matrix-matrix operations
3216     * C := alpha*op(A)*op(B) + beta*C   where op(X) is one of op(X) = X  or  op(X) = X**T
3217     *
3218     * Details: http://www.netlib.org/lapack/explore-html/d7/d2b/dgemm_8f.html
3219     *
3220     * @param TransA The type of transpose applied to matrix A.
3221     * @param TransB The type of transpose applied to matrix B.
3222     * @param alpha The scalar alpha.
3223     * @param A The input allocation contains matrix A, supported elements type: {Element#F64}.
3224     * @param B The input allocation contains matrix B, supported elements type: {Element#F64}.
3225     * @param beta The scalar beta.
3226     * @param C The input allocation contains matrix C, supported elements type: {Element#F64}.
3227     */
3228    void DGEMM(RsBlasTranspose TransA, RsBlasTranspose TransB, double alpha, sp<Allocation> A,
3229                      sp<Allocation> B, double beta, sp<Allocation> C);
3230
3231    /**
3232     * CGEMM performs one of the matrix-matrix operations
3233     * C := alpha*op(A)*op(B) + beta*C   where op(X) is one of op(X) = X  or  op(X) = X**T  or  op(X) = X**H
3234     *
3235     * Details: http://www.netlib.org/lapack/explore-html/d6/d5b/cgemm_8f.html
3236     *
3237     * @param TransA The type of transpose applied to matrix A.
3238     * @param TransB The type of transpose applied to matrix B.
3239     * @param alpha The scalar alpha.
3240     * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}.
3241     * @param B The input allocation contains matrix B, supported elements type: {Element#F32_2}.
3242     * @param beta The scalar beta.
3243     * @param C The input allocation contains matrix C, supported elements type: {Element#F32_2}.
3244     */
3245    void CGEMM(RsBlasTranspose TransA, RsBlasTranspose TransB, Float2 alpha, sp<Allocation> A,
3246                      sp<Allocation> B, Float2 beta, sp<Allocation> C);
3247
3248    /**
3249     * ZGEMM performs one of the matrix-matrix operations
3250     * C := alpha*op(A)*op(B) + beta*C   where op(X) is one of op(X) = X  or  op(X) = X**T  or  op(X) = X**H
3251     *
3252     * Details: http://www.netlib.org/lapack/explore-html/d7/d76/zgemm_8f.html
3253     *
3254     * @param TransA The type of transpose applied to matrix A.
3255     * @param TransB The type of transpose applied to matrix B.
3256     * @param alpha The scalar alpha.
3257     * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2
3258     * @param B The input allocation contains matrix B, supported elements type: {Element#F64_2
3259     * @param beta The scalar beta.
3260     * @param C The input allocation contains matrix C, supported elements type: {Element#F64_2
3261     */
3262    void ZGEMM(RsBlasTranspose TransA, RsBlasTranspose TransB, Double2 alpha, sp<Allocation> A,
3263                      sp<Allocation> B, Double2 beta, sp<Allocation> C);
3264
3265    /**
3266     * SSYMM performs one of the matrix-matrix operations
3267     * C := alpha*A*B + beta*C   or   C := alpha*B*A + beta*C
3268     *
3269     * Details: http://www.netlib.org/lapack/explore-html/d7/d42/ssymm_8f.html
3270     *
3271     * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3272     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
3273     * @param alpha The scalar alpha.
3274     * @param A The input allocation contains matrix A, supported elements type: {Element#F32}.
3275     * @param B The input allocation contains matrix B, supported elements type: {Element#F32}.
3276     * @param beta The scalar beta.
3277     * @param C The input allocation contains matrix C, supported elements type: {Element#F32}.
3278     */
3279    void SSYMM(RsBlasSide Side, RsBlasUplo Uplo, float alpha, sp<Allocation> A,
3280                      sp<Allocation> B, float beta, sp<Allocation> C);
3281
3282    /**
3283     * DSYMM performs one of the matrix-matrix operations
3284     * C := alpha*A*B + beta*C   or   C := alpha*B*A + beta*C
3285     *
3286     * Details: http://www.netlib.org/lapack/explore-html/d8/db0/dsymm_8f.html
3287     *
3288     * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3289     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
3290     * @param alpha The scalar alpha.
3291     * @param A The input allocation contains matrix A, supported elements type: {Element#F64}.
3292     * @param B The input allocation contains matrix B, supported elements type: {Element#F64}.
3293     * @param beta The scalar beta.
3294     * @param C The input allocation contains matrix C, supported elements type: {Element#F64}.
3295     */
3296    void DSYMM(RsBlasSide Side, RsBlasUplo Uplo, double alpha, sp<Allocation> A,
3297                      sp<Allocation> B, double beta, sp<Allocation> C);
3298
3299    /**
3300     * CSYMM performs one of the matrix-matrix operations
3301     * C := alpha*A*B + beta*C   or   C := alpha*B*A + beta*C
3302     *
3303     * Details: http://www.netlib.org/lapack/explore-html/db/d59/csymm_8f.html
3304     *
3305     * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3306     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
3307     * @param alpha The scalar alpha.
3308     * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}.
3309     * @param B The input allocation contains matrix B, supported elements type: {Element#F32_2}.
3310     * @param beta The scalar beta.
3311     * @param C The input allocation contains matrix C, supported elements type: {Element#F32_2}.
3312     */
3313    void CSYMM(RsBlasSide Side, RsBlasUplo Uplo, Float2 alpha, sp<Allocation> A,
3314                      sp<Allocation> B, Float2 beta, sp<Allocation> C);
3315
3316    /**
3317     * ZSYMM performs one of the matrix-matrix operations
3318     * C := alpha*A*B + beta*C   or   C := alpha*B*A + beta*C
3319     *
3320     * Details: http://www.netlib.org/lapack/explore-html/df/d51/zsymm_8f.html
3321     *
3322     * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3323     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
3324     * @param alpha The scalar alpha.
3325     * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2}.
3326     * @param B The input allocation contains matrix B, supported elements type: {Element#F64_2}.
3327     * @param beta The scalar beta.
3328     * @param C The input allocation contains matrix C, supported elements type: {Element#F64_2}.
3329     */
3330    void ZSYMM(RsBlasSide Side, RsBlasUplo Uplo, Double2 alpha, sp<Allocation> A,
3331                      sp<Allocation> B, Double2 beta, sp<Allocation> C);
3332
3333    /**
3334     * SSYRK performs one of the symmetric rank k operations
3335     * C := alpha*A*A**T + beta*C   or   C := alpha*A**T*A + beta*C
3336     *
3337     * Details: http://www.netlib.org/lapack/explore-html/d0/d40/ssyrk_8f.html
3338     *
3339     * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
3340     * @param Trans The type of transpose applied to the operation.
3341     * @param alpha The scalar alpha.
3342     * @param A The input allocation contains matrix A, supported elements type: {Element#F32}.
3343     * @param beta The scalar beta.
3344     * @param C The input allocation contains matrix C, supported elements type: {Element#F32}.
3345     */
3346    void SSYRK(RsBlasUplo Uplo, RsBlasTranspose Trans, float alpha,
3347               sp<Allocation> A, float beta, sp<Allocation> C);
3348
3349    /**
3350     * DSYRK performs one of the symmetric rank k operations
3351     * C := alpha*A*A**T + beta*C   or   C := alpha*A**T*A + beta*C
3352     *
3353     * Details: http://www.netlib.org/lapack/explore-html/dc/d05/dsyrk_8f.html
3354     *
3355     * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
3356     * @param Trans The type of transpose applied to the operation.
3357     * @param alpha The scalar alpha.
3358     * @param A The input allocation contains matrix A, supported elements type: {Element#F64}.
3359     * @param beta The scalar beta.
3360     * @param C The input allocation contains matrix C, supported elements type: {Element#F64}.
3361     */
3362    void DSYRK(RsBlasUplo Uplo, RsBlasTranspose Trans, double alpha,
3363               sp<Allocation> A, double beta, sp<Allocation> C);
3364
3365    /**
3366     * CSYRK performs one of the symmetric rank k operations
3367     * C := alpha*A*A**T + beta*C   or   C := alpha*A**T*A + beta*C
3368     *
3369     * Details: http://www.netlib.org/lapack/explore-html/d3/d6a/csyrk_8f.html
3370     *
3371     * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
3372     * @param Trans The type of transpose applied to the operation.
3373     * @param alpha The scalar alpha.
3374     * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}.
3375     * @param beta The scalar beta.
3376     * @param C The input allocation contains matrix C, supported elements type: {Element#F32_2}.
3377     */
3378    void CSYRK(RsBlasUplo Uplo, RsBlasTranspose Trans, Float2 alpha,
3379               sp<Allocation> A, Float2 beta, sp<Allocation> C);
3380
3381    /**
3382     * ZSYRK performs one of the symmetric rank k operations
3383     * C := alpha*A*A**T + beta*C   or   C := alpha*A**T*A + beta*C
3384     *
3385     * Details: http://www.netlib.org/lapack/explore-html/de/d54/zsyrk_8f.html
3386     *
3387     * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
3388     * @param Trans The type of transpose applied to the operation.
3389     * @param alpha The scalar alpha.
3390     * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2}.
3391     * @param beta The scalar beta.
3392     * @param C The input allocation contains matrix C, supported elements type: {Element#F64_2}.
3393     */
3394    void ZSYRK(RsBlasUplo Uplo, RsBlasTranspose Trans, Double2 alpha,
3395               sp<Allocation> A, Double2 beta, sp<Allocation> C);
3396
3397    /**
3398     * SSYR2K performs one of the symmetric rank 2k operations
3399     * C := alpha*A*B**T + alpha*B*A**T + beta*C   or   C := alpha*A**T*B + alpha*B**T*A + beta*C
3400     *
3401     * Details: http://www.netlib.org/lapack/explore-html/df/d3d/ssyr2k_8f.html
3402     *
3403     * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
3404     * @param Trans The type of transpose applied to the operation.
3405     * @param alpha The scalar alpha.
3406     * @param A The input allocation contains matrix A, supported elements type: {Element#F32}.
3407     * @param B The input allocation contains matrix B, supported elements type: {Element#F32}.
3408     * @param beta The scalar beta.
3409     * @param C The input allocation contains matrix C, supported elements type: {Element#F32}.
3410     */
3411    void SSYR2K(RsBlasUplo Uplo, RsBlasTranspose Trans, float alpha,
3412                sp<Allocation> A, sp<Allocation> B, float beta, sp<Allocation> C);
3413
3414    /**
3415     * DSYR2K performs one of the symmetric rank 2k operations
3416     * C := alpha*A*B**T + alpha*B*A**T + beta*C   or   C := alpha*A**T*B + alpha*B**T*A + beta*C
3417     *
3418     * Details: http://www.netlib.org/lapack/explore-html/d1/dec/dsyr2k_8f.html
3419     *
3420     * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
3421     * @param Trans The type of transpose applied to the operation.
3422     * @param alpha The scalar alpha.
3423     * @param A The input allocation contains matrix A, supported elements type: {Element#F64}.
3424     * @param B The input allocation contains matrix B, supported elements type: {Element#F64}.
3425     * @param beta The scalar beta.
3426     * @param C The input allocation contains matrix C, supported elements type: {Element#F64}.
3427     */
3428    void DSYR2K(RsBlasUplo Uplo, RsBlasTranspose Trans, double alpha,
3429                sp<Allocation> A, sp<Allocation> B, double beta, sp<Allocation> C);
3430
3431    /**
3432     * CSYR2K performs one of the symmetric rank 2k operations
3433     * C := alpha*A*B**T + alpha*B*A**T + beta*C   or   C := alpha*A**T*B + alpha*B**T*A + beta*C
3434     *
3435     * Details: http://www.netlib.org/lapack/explore-html/de/d7e/csyr2k_8f.html
3436     *
3437     * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
3438     * @param Trans The type of transpose applied to the operation.
3439     * @param alpha The scalar alpha.
3440     * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}.
3441     * @param B The input allocation contains matrix B, supported elements type: {Element#F32_2}.
3442     * @param beta The scalar beta.
3443     * @param C The input allocation contains matrix C, supported elements type: {Element#F32_2}.
3444     */
3445    void CSYR2K(RsBlasUplo Uplo, RsBlasTranspose Trans, Float2 alpha,
3446                sp<Allocation> A, sp<Allocation> B, Float2 beta, sp<Allocation> C);
3447
3448    /**
3449     * ZSYR2K performs one of the symmetric rank 2k operations
3450     * C := alpha*A*B**T + alpha*B*A**T + beta*C   or   C := alpha*A**T*B + alpha*B**T*A + beta*C
3451     *
3452     * Details: http://www.netlib.org/lapack/explore-html/df/d20/zsyr2k_8f.html
3453     *
3454     * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
3455     * @param Trans The type of transpose applied to the operation.
3456     * @param alpha The scalar alpha.
3457     * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2}.
3458     * @param B The input allocation contains matrix B, supported elements type: {Element#F64_2}.
3459     * @param beta The scalar beta.
3460     * @param C The input allocation contains matrix C, supported elements type: {Element#F64_2}.
3461     */
3462    void ZSYR2K(RsBlasUplo Uplo, RsBlasTranspose Trans, Double2 alpha,
3463                sp<Allocation> A, sp<Allocation> B, Double2 beta, sp<Allocation> C);
3464
3465    /**
3466     * STRMM performs one of the matrix-matrix operations
3467     * B := alpha*op(A)*B   or   B := alpha*B*op(A)
3468     * op(A) is one of  op(A) = A  or  op(A) = A**T
3469     *
3470     * Details: http://www.netlib.org/lapack/explore-html/df/d01/strmm_8f.html
3471     *
3472     * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3473     * @param Uplo Specifies whether matrix A is upper or lower triangular.
3474     * @param TransA The type of transpose applied to matrix A.
3475     * @param Diag Specifies whether or not A is unit triangular.
3476     * @param alpha The scalar alpha.
3477     * @param A The input allocation contains matrix A, supported elements type: {Element#F32}.
3478     * @param B The input allocation contains matrix B, supported elements type: {Element#F32}.
3479     */
3480    void STRMM(RsBlasSide Side, RsBlasUplo Uplo, RsBlasTranspose TransA,
3481               RsBlasDiag Diag, float alpha, sp<Allocation> A, sp<Allocation> B);
3482
3483    /**
3484     * DTRMM performs one of the matrix-matrix operations
3485     * B := alpha*op(A)*B   or   B := alpha*B*op(A)
3486     * op(A) is one of  op(A) = A  or  op(A) = A**T
3487     *
3488     * Details: http://www.netlib.org/lapack/explore-html/dd/d19/dtrmm_8f.html
3489     *
3490     * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3491     * @param Uplo Specifies whether matrix A is upper or lower triangular.
3492     * @param TransA The type of transpose applied to matrix A.
3493     * @param Diag Specifies whether or not A is unit triangular.
3494     * @param alpha The scalar alpha.
3495     * @param A The input allocation contains matrix A, supported elements type: {Element#F64}.
3496     * @param B The input allocation contains matrix B, supported elements type: {Element#F64}.
3497     */
3498    void DTRMM(RsBlasSide Side, RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
3499               double alpha, sp<Allocation> A, sp<Allocation> B);
3500
3501    /**
3502     * CTRMM performs one of the matrix-matrix operations
3503     * B := alpha*op(A)*B   or   B := alpha*B*op(A)
3504     * op(A) is one of  op(A) = A  or  op(A) = A**T  or  op(A) = A**H
3505     *
3506     * Details: http://www.netlib.org/lapack/explore-html/d4/d9b/ctrmm_8f.html
3507     *
3508     * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3509     * @param Uplo Specifies whether matrix A is upper or lower triangular.
3510     * @param TransA The type of transpose applied to matrix A.
3511     * @param Diag Specifies whether or not A is unit triangular.
3512     * @param alpha The scalar alpha.
3513     * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}.
3514     * @param B The input allocation contains matrix B, supported elements type: {Element#F32_2}.
3515     */
3516    void CTRMM(RsBlasSide Side, RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
3517               Float2 alpha, sp<Allocation> A, sp<Allocation> B);
3518
3519    /**
3520     * ZTRMM performs one of the matrix-matrix operations
3521     * B := alpha*op(A)*B   or   B := alpha*B*op(A)
3522     * op(A) is one of  op(A) = A  or  op(A) = A**T  or  op(A) = A**H
3523     *
3524     * Details: http://www.netlib.org/lapack/explore-html/d8/de1/ztrmm_8f.html
3525     *
3526     * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3527     * @param Uplo Specifies whether matrix A is upper or lower triangular.
3528     * @param TransA The type of transpose applied to matrix A.
3529     * @param Diag Specifies whether or not A is unit triangular.
3530     * @param alpha The scalar alpha.
3531     * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2}.
3532     * @param B The input allocation contains matrix B, supported elements type: {Element#F64_2}.
3533     */
3534    void ZTRMM(RsBlasSide Side, RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
3535               Double2 alpha, sp<Allocation> A, sp<Allocation> B);
3536
3537    /**
3538     * STRSM solves one of the matrix equations
3539     * op(A)*X := alpha*B   or   X*op(A) := alpha*B
3540     * op(A) is one of  op(A) = A  or  op(A) = A**T
3541     *
3542     * Details: http://www.netlib.org/lapack/explore-html/d2/d8b/strsm_8f.html
3543     *
3544     * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3545     * @param Uplo Specifies whether matrix A is upper or lower triangular.
3546     * @param TransA The type of transpose applied to matrix A.
3547     * @param Diag Specifies whether or not A is unit triangular.
3548     * @param alpha The scalar alpha.
3549     * @param A The input allocation contains matrix A, supported elements type: {Element#F32}.
3550     * @param B The input allocation contains matrix B, supported elements type: {Element#F32}.
3551     */
3552    void STRSM(RsBlasSide Side, RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
3553               float alpha, sp<Allocation> A, sp<Allocation> B);
3554
3555    /**
3556     * DTRSM solves one of the matrix equations
3557     * op(A)*X := alpha*B   or   X*op(A) := alpha*B
3558     * op(A) is one of  op(A) = A  or  op(A) = A**T
3559     *
3560     * Details: http://www.netlib.org/lapack/explore-html/de/da7/dtrsm_8f.html
3561     *
3562     * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3563     * @param Uplo Specifies whether matrix A is upper or lower triangular.
3564     * @param TransA The type of transpose applied to matrix A.
3565     * @param Diag Specifies whether or not A is unit triangular.
3566     * @param alpha The scalar alpha.
3567     * @param A The input allocation contains matrix A, supported elements type: {Element#F64}.
3568     * @param B The input allocation contains matrix B, supported elements type: {Element#F64}.
3569     */
3570    void DTRSM(RsBlasSide Side, RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
3571               double alpha, sp<Allocation> A, sp<Allocation> B);
3572
3573    /**
3574     * CTRSM solves one of the matrix equations
3575     * op(A)*X := alpha*B   or   X*op(A) := alpha*B
3576     * op(A) is one of  op(A) = A  or  op(A) = A**T  or  op(A) = A**H
3577     *
3578     * Details: http://www.netlib.org/lapack/explore-html/de/d30/ctrsm_8f.html
3579     *
3580     * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3581     * @param Uplo Specifies whether matrix A is upper or lower triangular.
3582     * @param TransA The type of transpose applied to matrix A.
3583     * @param Diag Specifies whether or not A is unit triangular.
3584     * @param alpha The scalar alpha.
3585     * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}.
3586     * @param B The input allocation contains matrix B, supported elements type: {Element#F32_2}.
3587     */
3588    void CTRSM(RsBlasSide Side, RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
3589               Float2 alpha, sp<Allocation> A, sp<Allocation> B);
3590
3591    /**
3592     * ZTRSM solves one of the matrix equations
3593     * op(A)*X := alpha*B   or   X*op(A) := alpha*B
3594     * op(A) is one of  op(A) = A  or  op(A) = A**T  or  op(A) = A**H
3595     *
3596     * Details: http://www.netlib.org/lapack/explore-html/d1/d39/ztrsm_8f.html
3597     *
3598     * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3599     * @param Uplo Specifies whether matrix A is upper or lower triangular.
3600     * @param TransA The type of transpose applied to matrix A.
3601     * @param Diag Specifies whether or not A is unit triangular.
3602     * @param alpha The scalar alpha.
3603     * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2}.
3604     * @param B The input allocation contains matrix B, supported elements type: {Element#F64_2}.
3605     */
3606    void ZTRSM(RsBlasSide Side, RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
3607               Double2 alpha, sp<Allocation> A, sp<Allocation> B);
3608
3609    /**
3610     * CHEMM performs one of the matrix-matrix operations
3611     * C := alpha*A*B + beta*C   or   C := alpha*B*A + beta*C
3612     *
3613     * Details: http://www.netlib.org/lapack/explore-html/d3/d66/chemm_8f.html
3614     *
3615     * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3616     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
3617     * @param alpha The scalar alpha.
3618     * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}.
3619     * @param B The input allocation contains matrix B, supported elements type: {Element#F32_2}.
3620     * @param beta The scalar beta.
3621     * @param C The input allocation contains matrix C, supported elements type: {Element#F32_2}.
3622     */
3623    void CHEMM(RsBlasSide Side, RsBlasUplo Uplo, Float2 alpha, sp<Allocation> A,
3624               sp<Allocation> B, Float2 beta, sp<Allocation> C);
3625
3626    /**
3627     * ZHEMM performs one of the matrix-matrix operations
3628     * C := alpha*A*B + beta*C   or   C := alpha*B*A + beta*C
3629     *
3630     * Details: http://www.netlib.org/lapack/explore-html/d6/d3e/zhemm_8f.html
3631     *
3632     * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3633     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
3634     * @param alpha The scalar alpha.
3635     * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2}.
3636     * @param B The input allocation contains matrix B, supported elements type: {Element#F64_2}.
3637     * @param beta The scalar beta.
3638     * @param C The input allocation contains matrix C, supported elements type: {Element#F64_2}.
3639     */
3640    void ZHEMM(RsBlasSide Side, RsBlasUplo Uplo, Double2 alpha, sp<Allocation> A,
3641               sp<Allocation> B, Double2 beta, sp<Allocation> C);
3642
3643    /**
3644     * CHERK performs one of the hermitian rank k operations
3645     * C := alpha*A*A**H + beta*C   or   C := alpha*A**H*A + beta*C
3646     *
3647     * Details: http://www.netlib.org/lapack/explore-html/d8/d52/cherk_8f.html
3648     *
3649     * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
3650     * @param Trans The type of transpose applied to the operation.
3651     * @param alpha The scalar alpha.
3652     * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}.
3653     * @param beta The scalar beta.
3654     * @param C The input allocation contains matrix C, supported elements type: {Element#F32_2}.
3655     */
3656    void CHERK(RsBlasUplo Uplo, RsBlasTranspose Trans, float alpha, sp<Allocation> A,
3657               float beta, sp<Allocation> C);
3658
3659    /**
3660     * ZHERK performs one of the hermitian rank k operations
3661     * C := alpha*A*A**H + beta*C   or   C := alpha*A**H*A + beta*C
3662     *
3663     * Details: http://www.netlib.org/lapack/explore-html/d1/db1/zherk_8f.html
3664     *
3665     * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
3666     * @param Trans The type of transpose applied to the operation.
3667     * @param alpha The scalar alpha.
3668     * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2}.
3669     * @param beta The scalar beta.
3670     * @param C The input allocation contains matrix C, supported elements type: {Element#F64_2}.
3671     */
3672    void ZHERK(RsBlasUplo Uplo, RsBlasTranspose Trans, double alpha, sp<Allocation> A,
3673               double beta, sp<Allocation> C);
3674
3675    /**
3676     * CHER2K performs one of the hermitian rank 2k operations
3677     * C := alpha*A*B**H + conjg( alpha )*B*A**H + beta*C   or   C := alpha*A**H*B + conjg( alpha )*B**H*A + beta*C
3678     *
3679     * Details: http://www.netlib.org/lapack/explore-html/d1/d82/cher2k_8f.html
3680     *
3681     * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
3682     * @param Trans The type of transpose applied to the operation.
3683     * @param alpha The scalar alpha.
3684     * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}.
3685     * @param B The input allocation contains matrix B, supported elements type: {Element#F32_2}.
3686     * @param beta The scalar beta.
3687     * @param C The input allocation contains matrix C, supported elements type: {Element#F32_2}.
3688     */
3689    void CHER2K(RsBlasUplo Uplo, RsBlasTranspose Trans, Float2 alpha, sp<Allocation> A,
3690                sp<Allocation> B, float beta, sp<Allocation> C);
3691
3692    /**
3693     * ZHER2K performs one of the hermitian rank 2k operations
3694     * C := alpha*A*B**H + conjg( alpha )*B*A**H + beta*C   or   C := alpha*A**H*B + conjg( alpha )*B**H*A + beta*C
3695     *
3696     * Details: http://www.netlib.org/lapack/explore-html/d7/dfa/zher2k_8f.html
3697     *
3698     * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
3699     * @param Trans The type of transpose applied to the operation.
3700     * @param alpha The scalar alpha.
3701     * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2}.
3702     * @param B The input allocation contains matrix B, supported elements type: {Element#F64_2}.
3703     * @param beta The scalar beta.
3704     * @param C The input allocation contains matrix C, supported elements type: {Element#F64_2}.
3705     */
3706    void ZHER2K(RsBlasUplo Uplo, RsBlasTranspose Trans, Double2 alpha, sp<Allocation> A,
3707                sp<Allocation> B, double beta, sp<Allocation> C);
3708
3709    /**
3710     * 8-bit GEMM-like operation for neural networks: C = A * Transpose(B)
3711     * Calculations are done in 1.10.21 fixed-point format for the final output,
3712     * just before there's a shift down to drop the fractional parts. The output
3713     * values are gated to 0 to 255 to fit in a byte, but the 10-bit format
3714     * gives some headroom to avoid wrapping around on small overflows.
3715     *
3716     * @param A The input allocation contains matrix A, supported elements type: {Element#U8}.
3717     * @param a_offset The offset for all values in matrix A, e.g A[i,j] = A[i,j] - a_offset. Value should be from 0 to 255.
3718     * @param B The input allocation contains matrix B, supported elements type: {Element#U8}.
3719     * @param b_offset The offset for all values in matrix B, e.g B[i,j] = B[i,j] - b_offset. Value should be from 0 to 255.
3720     * @param C The input allocation contains matrix C, supported elements type: {Element#U8}.
3721     * @param c_offset The offset for all values in matrix C.
3722     * @param c_mult The multiplier for all values in matrix C, e.g C[i,j] = (C[i,j] + c_offset) * c_mult.
3723     **/
3724    void BNNM(sp<Allocation> A, int a_offset, sp<Allocation> B, int b_offset, sp<Allocation> C,
3725              int c_offset, int c_mult);
3726};
3727
3728/**
3729 * Intrinsic kernel for blending two Allocations.
3730 */
3731class ScriptIntrinsicBlend : public ScriptIntrinsic {
3732 private:
3733    ScriptIntrinsicBlend(sp<RS> rs, sp<const Element> e);
3734 public:
3735    /**
3736     * Supported Element types are U8_4.
3737     * @param[in] rs RenderScript context
3738     * @param[in] e Element
3739     * @return new ScriptIntrinsicBlend
3740     */
3741    static sp<ScriptIntrinsicBlend> create(sp<RS> rs, sp<const Element> e);
3742    /**
3743     * sets dst = {0, 0, 0, 0}
3744     * @param[in] in input Allocation
3745     * @param[in] out output Allocation
3746     */
3747    void forEachClear(sp<Allocation> in, sp<Allocation> out);
3748    /**
3749     * Sets dst = src
3750     * @param[in] in input Allocation
3751     * @param[in] out output Allocation
3752     */
3753    void forEachSrc(sp<Allocation> in, sp<Allocation> out);
3754    /**
3755     * Sets dst = dst (NOP)
3756     * @param[in] in input Allocation
3757     * @param[in] out output Allocation
3758     */
3759    void forEachDst(sp<Allocation> in, sp<Allocation> out);
3760    /**
3761     * Sets dst = src + dst * (1.0 - src.a)
3762     * @param[in] in input Allocation
3763     * @param[in] out output Allocation
3764     */
3765    void forEachSrcOver(sp<Allocation> in, sp<Allocation> out);
3766    /**
3767     * Sets dst = dst + src * (1.0 - dst.a)
3768     * @param[in] in input Allocation
3769     * @param[in] out output Allocation
3770     */
3771    void forEachDstOver(sp<Allocation> in, sp<Allocation> out);
3772    /**
3773     * Sets dst = src * dst.a
3774     * @param[in] in input Allocation
3775     * @param[in] out output Allocation
3776     */
3777    void forEachSrcIn(sp<Allocation> in, sp<Allocation> out);
3778    /**
3779     * Sets dst = dst * src.a
3780     * @param[in] in input Allocation
3781     * @param[in] out output Allocation
3782     */
3783    void forEachDstIn(sp<Allocation> in, sp<Allocation> out);
3784    /**
3785     * Sets dst = src * (1.0 - dst.a)
3786     * @param[in] in input Allocation
3787     * @param[in] out output Allocation
3788     */
3789    void forEachSrcOut(sp<Allocation> in, sp<Allocation> out);
3790    /**
3791     * Sets dst = dst * (1.0 - src.a)
3792     * @param[in] in input Allocation
3793     * @param[in] out output Allocation
3794     */
3795    void forEachDstOut(sp<Allocation> in, sp<Allocation> out);
3796    /**
3797     * Sets dst.rgb = src.rgb * dst.a + (1.0 - src.a) * dst.rgb
3798     * @param[in] in input Allocation
3799     * @param[in] out output Allocation
3800     */
3801    void forEachSrcAtop(sp<Allocation> in, sp<Allocation> out);
3802    /**
3803     * Sets dst.rgb = dst.rgb * src.a + (1.0 - dst.a) * src.rgb
3804     * @param[in] in input Allocation
3805     * @param[in] out output Allocation
3806     */
3807    void forEachDstAtop(sp<Allocation> in, sp<Allocation> out);
3808    /**
3809     * Sets dst = {src.r ^ dst.r, src.g ^ dst.g, src.b ^ dst.b, src.a ^ dst.a}
3810     * @param[in] in input Allocation
3811     * @param[in] out output Allocation
3812     */
3813    void forEachXor(sp<Allocation> in, sp<Allocation> out);
3814    /**
3815     * Sets dst = src * dst
3816     * @param[in] in input Allocation
3817     * @param[in] out output Allocation
3818     */
3819    void forEachMultiply(sp<Allocation> in, sp<Allocation> out);
3820    /**
3821     * Sets dst = min(src + dst, 1.0)
3822     * @param[in] in input Allocation
3823     * @param[in] out output Allocation
3824     */
3825    void forEachAdd(sp<Allocation> in, sp<Allocation> out);
3826    /**
3827     * Sets dst = max(dst - src, 0.0)
3828     * @param[in] in input Allocation
3829     * @param[in] out output Allocation
3830     */
3831    void forEachSubtract(sp<Allocation> in, sp<Allocation> out);
3832};
3833
3834/**
3835 * Intrinsic Gausian blur filter. Applies a Gaussian blur of the specified
3836 * radius to all elements of an Allocation.
3837 */
3838class ScriptIntrinsicBlur : public ScriptIntrinsic {
3839 private:
3840    ScriptIntrinsicBlur(sp<RS> rs, sp<const Element> e);
3841 public:
3842    /**
3843     * Supported Element types are U8 and U8_4.
3844     * @param[in] rs RenderScript context
3845     * @param[in] e Element
3846     * @return new ScriptIntrinsicBlur
3847     */
3848    static sp<ScriptIntrinsicBlur> create(sp<RS> rs, sp<const Element> e);
3849    /**
3850     * Sets the input of the blur.
3851     * @param[in] in input Allocation
3852     */
3853    void setInput(sp<Allocation> in);
3854    /**
3855     * Runs the intrinsic.
3856     * @param[in] output Allocation
3857     */
3858    void forEach(sp<Allocation> out);
3859    /**
3860     * Sets the radius of the blur. The supported range is 0 < radius <= 25.
3861     * @param[in] radius radius of the blur
3862     */
3863    void setRadius(float radius);
3864};
3865
3866/**
3867 * Intrinsic for applying a color matrix to allocations. This has the
3868 * same effect as loading each element and converting it to a
3869 * F32_N, multiplying the result by the 4x4 color matrix
3870 * as performed by rsMatrixMultiply() and writing it to the output
3871 * after conversion back to U8_N or F32_N.
3872 */
3873class ScriptIntrinsicColorMatrix : public ScriptIntrinsic {
3874 private:
3875    ScriptIntrinsicColorMatrix(sp<RS> rs, sp<const Element> e);
3876 public:
3877    /**
3878     * Creates a new intrinsic.
3879     * @param[in] rs RenderScript context
3880     * @return new ScriptIntrinsicColorMatrix
3881     */
3882    static sp<ScriptIntrinsicColorMatrix> create(sp<RS> rs);
3883    /**
3884     * Applies the color matrix. Supported types are U8 and F32 with
3885     * vector lengths between 1 and 4.
3886     * @param[in] in input Allocation
3887     * @param[out] out output Allocation
3888     */
3889    void forEach(sp<Allocation> in, sp<Allocation> out);
3890    /**
3891     * Set the value to be added after the color matrix has been
3892     * applied. The default value is {0, 0, 0, 0}.
3893     * @param[in] add float[4] of values
3894     */
3895    void setAdd(float* add);
3896
3897    /**
3898     * Set the color matrix which will be applied to each cell of the
3899     * image. The alpha channel will be copied.
3900     *
3901     * @param[in] m float[9] of values
3902     */
3903    void setColorMatrix3(float* m);
3904    /**
3905     * Set the color matrix which will be applied to each cell of the
3906     * image.
3907     *
3908     * @param[in] m float[16] of values
3909     */
3910    void setColorMatrix4(float* m);
3911    /**
3912     * Set a color matrix to convert from RGB to luminance. The alpha
3913     * channel will be a copy.
3914     */
3915    void setGreyscale();
3916    /**
3917     * Set the matrix to convert from RGB to YUV with a direct copy of
3918     * the 4th channel.
3919     */
3920    void setRGBtoYUV();
3921    /**
3922     * Set the matrix to convert from YUV to RGB with a direct copy of
3923     * the 4th channel.
3924     */
3925    void setYUVtoRGB();
3926};
3927
3928/**
3929 * Intrinsic for applying a 3x3 convolve to an allocation.
3930 */
3931class ScriptIntrinsicConvolve3x3 : public ScriptIntrinsic {
3932 private:
3933    ScriptIntrinsicConvolve3x3(sp<RS> rs, sp<const Element> e);
3934 public:
3935    /**
3936     * Supported types U8 and F32 with vector lengths between 1 and
3937     * 4. The default convolution kernel is the identity.
3938     * @param[in] rs RenderScript context
3939     * @param[in] e Element
3940     * @return new ScriptIntrinsicConvolve3x3
3941     */
3942    static sp<ScriptIntrinsicConvolve3x3> create(sp<RS> rs, sp<const Element> e);
3943    /**
3944     * Sets input for intrinsic.
3945     * @param[in] in input Allocation
3946     */
3947    void setInput(sp<Allocation> in);
3948    /**
3949     * Launches the intrinsic.
3950     * @param[in] out output Allocation
3951     */
3952    void forEach(sp<Allocation> out);
3953    /**
3954     * Sets convolution kernel.
3955     * @param[in] v float[9] of values
3956     */
3957    void setCoefficients(float* v);
3958};
3959
3960/**
3961 * Intrinsic for applying a 5x5 convolve to an allocation.
3962 */
3963class ScriptIntrinsicConvolve5x5 : public ScriptIntrinsic {
3964 private:
3965    ScriptIntrinsicConvolve5x5(sp<RS> rs, sp<const Element> e);
3966 public:
3967    /**
3968     * Supported types U8 and F32 with vector lengths between 1 and
3969     * 4. The default convolution kernel is the identity.
3970     * @param[in] rs RenderScript context
3971     * @param[in] e Element
3972     * @return new ScriptIntrinsicConvolve5x5
3973     */
3974    static sp<ScriptIntrinsicConvolve5x5> create(sp<RS> rs, sp<const Element> e);
3975    /**
3976     * Sets input for intrinsic.
3977     * @param[in] in input Allocation
3978     */
3979    void setInput(sp<Allocation> in);
3980    /**
3981     * Launches the intrinsic.
3982     * @param[in] out output Allocation
3983     */
3984    void forEach(sp<Allocation> out);
3985    /**
3986     * Sets convolution kernel.
3987     * @param[in] v float[25] of values
3988     */
3989    void setCoefficients(float* v);
3990};
3991
3992/**
3993 * Intrinsic for computing a histogram.
3994 */
3995class ScriptIntrinsicHistogram : public ScriptIntrinsic {
3996 private:
3997    ScriptIntrinsicHistogram(sp<RS> rs, sp<const Element> e);
3998    sp<Allocation> mOut;
3999 public:
4000    /**
4001     * Create an intrinsic for calculating the histogram of an uchar
4002     * or uchar4 image.
4003     *
4004     * Supported elements types are U8_4, U8_3, U8_2, and U8.
4005     *
4006     * @param[in] rs The RenderScript context
4007     * @param[in] e Element type for inputs
4008     *
4009     * @return ScriptIntrinsicHistogram
4010     */
4011    static sp<ScriptIntrinsicHistogram> create(sp<RS> rs, sp<const Element> e);
4012    /**
4013     * Set the output of the histogram.  32 bit integer types are
4014     * supported.
4015     *
4016     * @param[in] aout The output allocation
4017     */
4018    void setOutput(sp<Allocation> aout);
4019    /**
4020     * Set the coefficients used for the dot product calculation. The
4021     * default is {0.299f, 0.587f, 0.114f, 0.f}.
4022     *
4023     * Coefficients must be >= 0 and sum to 1.0 or less.
4024     *
4025     * @param[in] r Red coefficient
4026     * @param[in] g Green coefficient
4027     * @param[in] b Blue coefficient
4028     * @param[in] a Alpha coefficient
4029     */
4030    void setDotCoefficients(float r, float g, float b, float a);
4031    /**
4032     * Process an input buffer and place the histogram into the output
4033     * allocation. The output allocation may be a narrower vector size
4034     * than the input. In this case the vector size of the output is
4035     * used to determine how many of the input channels are used in
4036     * the computation. This is useful if you have an RGBA input
4037     * buffer but only want the histogram for RGB.
4038     *
4039     * 1D and 2D input allocations are supported.
4040     *
4041     * @param[in] ain The input image
4042     */
4043    void forEach(sp<Allocation> ain);
4044    /**
4045     * Process an input buffer and place the histogram into the output
4046     * allocation. The dot product of the input channel and the
4047     * coefficients from 'setDotCoefficients' are used to calculate
4048     * the output values.
4049     *
4050     * 1D and 2D input allocations are supported.
4051     *
4052     * @param ain The input image
4053     */
4054    void forEach_dot(sp<Allocation> ain);
4055};
4056
4057/**
4058 * Intrinsic for applying a per-channel lookup table. Each channel of
4059 * the input has an independant lookup table. The tables are 256
4060 * entries in size and can cover the full value range of U8_4.
4061 **/
4062class ScriptIntrinsicLUT : public ScriptIntrinsic {
4063 private:
4064    sp<Allocation> LUT;
4065    bool mDirty;
4066    unsigned char mCache[1024];
4067    void setTable(unsigned int offset, unsigned char base, unsigned int length, unsigned char* lutValues);
4068    ScriptIntrinsicLUT(sp<RS> rs, sp<const Element> e);
4069
4070 public:
4071    /**
4072     * Supported elements types are U8_4.
4073     *
4074     * The defaults tables are identity.
4075     *
4076     * @param[in] rs The RenderScript context
4077     * @param[in] e Element type for intputs and outputs
4078     *
4079     * @return ScriptIntrinsicLUT
4080     */
4081    static sp<ScriptIntrinsicLUT> create(sp<RS> rs, sp<const Element> e);
4082    /**
4083     * Invoke the kernel and apply the lookup to each cell of ain and
4084     * copy to aout.
4085     *
4086     * @param[in] ain Input allocation
4087     * @param[in] aout Output allocation
4088     */
4089    void forEach(sp<Allocation> ain, sp<Allocation> aout);
4090    /**
4091     * Sets entries in LUT for the red channel.
4092     * @param[in] base base of region to update
4093     * @param[in] length length of region to update
4094     * @param[in] lutValues LUT values to use
4095     */
4096    void setRed(unsigned char base, unsigned int length, unsigned char* lutValues);
4097    /**
4098     * Sets entries in LUT for the green channel.
4099     * @param[in] base base of region to update
4100     * @param[in] length length of region to update
4101     * @param[in] lutValues LUT values to use
4102     */
4103    void setGreen(unsigned char base, unsigned int length, unsigned char* lutValues);
4104    /**
4105     * Sets entries in LUT for the blue channel.
4106     * @param[in] base base of region to update
4107     * @param[in] length length of region to update
4108     * @param[in] lutValues LUT values to use
4109     */
4110    void setBlue(unsigned char base, unsigned int length, unsigned char* lutValues);
4111    /**
4112     * Sets entries in LUT for the alpha channel.
4113     * @param[in] base base of region to update
4114     * @param[in] length length of region to update
4115     * @param[in] lutValues LUT values to use
4116     */
4117    void setAlpha(unsigned char base, unsigned int length, unsigned char* lutValues);
4118    virtual ~ScriptIntrinsicLUT();
4119};
4120
4121/**
4122 * Intrinsic for performing a resize of a 2D allocation.
4123 */
4124class ScriptIntrinsicResize : public ScriptIntrinsic {
4125 private:
4126    sp<Allocation> mInput;
4127    ScriptIntrinsicResize(sp<RS> rs, sp<const Element> e);
4128 public:
4129    /**
4130     * Supported Element types are U8_4. Default lookup table is identity.
4131     * @param[in] rs RenderScript context
4132     * @param[in] e Element
4133     * @return new ScriptIntrinsic
4134     */
4135    static sp<ScriptIntrinsicResize> create(sp<RS> rs);
4136
4137    /**
4138     * Resize copy the input allocation to the output specified. The
4139     * Allocation is rescaled if necessary using bi-cubic
4140     * interpolation.
4141     * @param[in] ain input Allocation
4142     * @param[in] aout output Allocation
4143     */
4144    void forEach_bicubic(sp<Allocation> aout);
4145
4146    /**
4147     * Set the input of the resize.
4148     * @param[in] lut new lookup table
4149     */
4150    void setInput(sp<Allocation> ain);
4151};
4152
4153/**
4154 * Intrinsic for converting an Android YUV buffer to RGB.
4155 *
4156 * The input allocation should be supplied in a supported YUV format
4157 * as a YUV element Allocation. The output is RGBA; the alpha channel
4158 * will be set to 255.
4159 */
4160class ScriptIntrinsicYuvToRGB : public ScriptIntrinsic {
4161 private:
4162    ScriptIntrinsicYuvToRGB(sp<RS> rs, sp<const Element> e);
4163 public:
4164    /**
4165     * Create an intrinsic for converting YUV to RGB.
4166     *
4167     * Supported elements types are U8_4.
4168     *
4169     * @param[in] rs The RenderScript context
4170     * @param[in] e Element type for output
4171     *
4172     * @return ScriptIntrinsicYuvToRGB
4173     */
4174    static sp<ScriptIntrinsicYuvToRGB> create(sp<RS> rs, sp<const Element> e);
4175    /**
4176     * Set the input YUV allocation.
4177     *
4178     * @param[in] ain The input allocation.
4179     */
4180    void setInput(sp<Allocation> in);
4181
4182    /**
4183     * Convert the image to RGB.
4184     *
4185     * @param[in] aout Output allocation. Must match creation element
4186     *                 type.
4187     */
4188    void forEach(sp<Allocation> out);
4189
4190};
4191
4192/**
4193 * Sampler object that defines how Allocations can be read as textures
4194 * within a kernel. Samplers are used in conjunction with the rsSample
4195 * runtime function to return values from normalized coordinates.
4196 *
4197 * Any Allocation used with a Sampler must have been created with
4198 * RS_ALLOCATION_USAGE_GRAPHICS_TEXTURE; using a Sampler on an
4199 * Allocation that was not created with
4200 * RS_ALLOCATION_USAGE_GRAPHICS_TEXTURE is undefined.
4201 **/
4202 class Sampler : public BaseObj {
4203 private:
4204    Sampler(sp<RS> rs, void* id);
4205    Sampler(sp<RS> rs, void* id, RsSamplerValue min, RsSamplerValue mag,
4206            RsSamplerValue wrapS, RsSamplerValue wrapT, float anisotropy);
4207    RsSamplerValue mMin;
4208    RsSamplerValue mMag;
4209    RsSamplerValue mWrapS;
4210    RsSamplerValue mWrapT;
4211    float mAniso;
4212
4213 public:
4214    /**
4215     * Creates a non-standard Sampler.
4216     * @param[in] rs RenderScript context
4217     * @param[in] min minification
4218     * @param[in] mag magnification
4219     * @param[in] wrapS S wrapping mode
4220     * @param[in] wrapT T wrapping mode
4221     * @param[in] anisotropy anisotropy setting
4222     */
4223    static sp<Sampler> create(sp<RS> rs, RsSamplerValue min, RsSamplerValue mag, RsSamplerValue wrapS, RsSamplerValue wrapT, float anisotropy);
4224
4225    /**
4226     * @return minification setting for the sampler
4227     */
4228    RsSamplerValue getMinification();
4229    /**
4230     * @return magnification setting for the sampler
4231     */
4232    RsSamplerValue getMagnification();
4233    /**
4234     * @return S wrapping mode for the sampler
4235     */
4236    RsSamplerValue getWrapS();
4237    /**
4238     * @return T wrapping mode for the sampler
4239     */
4240    RsSamplerValue getWrapT();
4241    /**
4242     * @return anisotropy setting for the sampler
4243     */
4244    float getAnisotropy();
4245
4246    /**
4247     * Retrieve a sampler with min and mag set to nearest and wrap modes set to
4248     * clamp.
4249     *
4250     * @param rs Context to which the sampler will belong.
4251     *
4252     * @return Sampler
4253     */
4254    static sp<const Sampler> CLAMP_NEAREST(sp<RS> rs);
4255    /**
4256     * Retrieve a sampler with min and mag set to linear and wrap modes set to
4257     * clamp.
4258     *
4259     * @param rs Context to which the sampler will belong.
4260     *
4261     * @return Sampler
4262     */
4263    static sp<const Sampler> CLAMP_LINEAR(sp<RS> rs);
4264    /**
4265     * Retrieve a sampler with mag set to linear, min linear mipmap linear, and
4266     * wrap modes set to clamp.
4267     *
4268     * @param rs Context to which the sampler will belong.
4269     *
4270     * @return Sampler
4271     */
4272    static sp<const Sampler> CLAMP_LINEAR_MIP_LINEAR(sp<RS> rs);
4273    /**
4274     * Retrieve a sampler with min and mag set to nearest and wrap modes set to
4275     * wrap.
4276     *
4277     * @param rs Context to which the sampler will belong.
4278     *
4279     * @return Sampler
4280     */
4281    static sp<const Sampler> WRAP_NEAREST(sp<RS> rs);
4282    /**
4283     * Retrieve a sampler with min and mag set to linear and wrap modes set to
4284     * wrap.
4285     *
4286     * @param rs Context to which the sampler will belong.
4287     *
4288     * @return Sampler
4289     */
4290    static sp<const Sampler> WRAP_LINEAR(sp<RS> rs);
4291    /**
4292     * Retrieve a sampler with mag set to linear, min linear mipmap linear, and
4293     * wrap modes set to wrap.
4294     *
4295     * @param rs Context to which the sampler will belong.
4296     *
4297     * @return Sampler
4298     */
4299    static sp<const Sampler> WRAP_LINEAR_MIP_LINEAR(sp<RS> rs);
4300    /**
4301     * Retrieve a sampler with min and mag set to nearest and wrap modes set to
4302     * mirrored repeat.
4303     *
4304     * @param rs Context to which the sampler will belong.
4305     *
4306     * @return Sampler
4307     */
4308    static sp<const Sampler> MIRRORED_REPEAT_NEAREST(sp<RS> rs);
4309    /**
4310     * Retrieve a sampler with min and mag set to linear and wrap modes set to
4311     * mirrored repeat.
4312     *
4313     * @param rs Context to which the sampler will belong.
4314     *
4315     * @return Sampler
4316     */
4317    static sp<const Sampler> MIRRORED_REPEAT_LINEAR(sp<RS> rs);
4318    /**
4319     * Retrieve a sampler with min and mag set to linear and wrap modes set to
4320     * mirrored repeat.
4321     *
4322     * @param rs Context to which the sampler will belong.
4323     *
4324     * @return Sampler
4325     */
4326    static sp<const Sampler> MIRRORED_REPEAT_LINEAR_MIP_LINEAR(sp<RS> rs);
4327
4328};
4329
4330}
4331
4332}
4333
4334#endif
4335