rsCppStructs.h revision 49b1226e8399f2ad4a9fd4482ece95dab2ad53b8
1/*
2 * Copyright (C) 2013 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#ifndef ANDROID_RSCPPSTRUCTS_H
18#define ANDROID_RSCPPSTRUCTS_H
19
20#include "rsDefines.h"
21#include "util/RefBase.h"
22
23#include <pthread.h>
24
25
26/**
27 * Every row in an RS allocation is guaranteed to be aligned by this amount, and
28 * every row in a user-backed allocation must be aligned by this amount.
29 */
30#define RS_CPU_ALLOCATION_ALIGNMENT 16
31
32struct dispatchTable;
33
34namespace android {
35class Surface;
36
37namespace RSC {
38
39
40typedef void (*ErrorHandlerFunc_t)(uint32_t errorNum, const char *errorText);
41typedef void (*MessageHandlerFunc_t)(uint32_t msgNum, const void *msgData, size_t msgLen);
42
43class RS;
44class BaseObj;
45class Element;
46class Type;
47class Allocation;
48class Script;
49class ScriptC;
50class Sampler;
51
52/**
53 * Possible error codes used by RenderScript. Once a status other than RS_SUCCESS
54 * is returned, the RenderScript context is considered dead and cannot perform any
55 * additional work.
56 */
57 enum RSError {
58     RS_SUCCESS = 0,                 ///< No error
59     RS_ERROR_INVALID_PARAMETER = 1, ///< An invalid parameter was passed to a function
60     RS_ERROR_RUNTIME_ERROR = 2,     ///< The RenderScript driver returned an error; this is
61                                     ///< often indicative of a kernel that crashed
62     RS_ERROR_INVALID_ELEMENT = 3,   ///< An invalid Element was passed to a function
63     RS_ERROR_MAX = 9999
64
65 };
66
67 /**
68  * YUV formats supported by the RenderScript API.
69  */
70 enum RSYuvFormat {
71     RS_YUV_NONE = 0, ///< No YUV data
72     RS_YUV_YV12 = 1, ///< YUV data in YV12 format
73     RS_YUV_NV21 = 2, ///< YUV data in NV21 format
74     RS_YUV_MAX = 3
75 };
76
77 /**
78  * Flags that can control RenderScript behavior on a per-context level.
79  */
80 enum RSInitFlags {
81     RS_INIT_SYNCHRONOUS = 1, ///< All RenderScript calls will be synchronous. May reduce latency.
82     RS_INIT_LOW_LATENCY = 2, ///< Prefer low latency devices over potentially higher throughput devices.
83     // Bitflag 4 is reserved for the context flag low power
84     RS_INIT_WAIT_FOR_ATTACH = 8,   ///< Kernel execution will hold to give time for a debugger to be attached
85     RS_INIT_OPT_LEVEL_0 = 16, ///< Use the -O0 option to set the optimization level to zero when calling the bcc compiler.
86     RS_INIT_MAX = 32
87 };
88
89
90class Byte2 {
91 public:
92  int8_t x, y;
93
94  Byte2(int8_t initX, int8_t initY)
95    : x(initX), y(initY) {}
96  Byte2() : x(0), y(0) {}
97};
98
99class Byte3 {
100 public:
101  int8_t x, y, z;
102
103  Byte3(int8_t initX, int8_t initY, int8_t initZ)
104    : x(initX), y(initY), z(initZ) {}
105  Byte3() : x(0), y(0), z(0) {}
106};
107
108class Byte4 {
109 public:
110  int8_t x, y, z, w;
111
112  Byte4(int8_t initX, int8_t initY, int8_t initZ, int8_t initW)
113    : x(initX), y(initY), z(initZ), w(initW) {}
114  Byte4() : x(0), y(0), z(0), w(0) {}
115};
116
117class UByte2 {
118 public:
119  uint8_t x, y;
120
121  UByte2(uint8_t initX, uint8_t initY)
122    : x(initX), y(initY) {}
123  UByte2() : x(0), y(0) {}
124};
125
126class UByte3 {
127 public:
128  uint8_t x, y, z;
129
130  UByte3(uint8_t initX, uint8_t initY, uint8_t initZ)
131    : x(initX), y(initY), z(initZ) {}
132  UByte3() : x(0), y(0), z(0) {}
133};
134
135class UByte4 {
136 public:
137  uint8_t x, y, z, w;
138
139  UByte4(uint8_t initX, uint8_t initY, uint8_t initZ, uint8_t initW)
140    : x(initX), y(initY), z(initZ), w(initW) {}
141  UByte4() : x(0), y(0), z(0), w(0) {}
142};
143
144class Short2 {
145 public:
146  short x, y;
147
148  Short2(short initX, short initY)
149    : x(initX), y(initY) {}
150  Short2() : x(0), y(0) {}
151};
152
153class Short3 {
154 public:
155  short x, y, z;
156
157  Short3(short initX, short initY, short initZ)
158    : x(initX), y(initY), z(initZ) {}
159  Short3() : x(0), y(0), z(0) {}
160};
161
162class Short4 {
163 public:
164  short x, y, z, w;
165
166  Short4(short initX, short initY, short initZ, short initW)
167    : x(initX), y(initY), z(initZ), w(initW) {}
168  Short4() : x(0), y(0), z(0), w(0) {}
169};
170
171class UShort2 {
172 public:
173  uint16_t x, y;
174
175  UShort2(uint16_t initX, uint16_t initY)
176    : x(initX), y(initY) {}
177  UShort2() : x(0), y(0) {}
178};
179
180class UShort3 {
181 public:
182  uint16_t x, y, z;
183
184  UShort3(uint16_t initX, uint16_t initY, uint16_t initZ)
185    : x(initX), y(initY), z(initZ) {}
186  UShort3() : x(0), y(0), z(0) {}
187};
188
189class UShort4 {
190 public:
191  uint16_t x, y, z, w;
192
193  UShort4(uint16_t initX, uint16_t initY, uint16_t initZ, uint16_t initW)
194    : x(initX), y(initY), z(initZ), w(initW) {}
195  UShort4() : x(0), y(0), z(0), w(0) {}
196};
197
198class Int2 {
199 public:
200  int x, y;
201
202  Int2(int initX, int initY)
203    : x(initX), y(initY) {}
204  Int2() : x(0), y(0) {}
205};
206
207class Int3 {
208 public:
209  int x, y, z;
210
211  Int3(int initX, int initY, int initZ)
212    : x(initX), y(initY), z(initZ) {}
213  Int3() : x(0), y(0), z(0) {}
214};
215
216class Int4 {
217 public:
218  int x, y, z, w;
219
220  Int4(int initX, int initY, int initZ, int initW)
221    : x(initX), y(initY), z(initZ), w(initW) {}
222  Int4() : x(0), y(0), z(0), w(0) {}
223};
224
225class UInt2 {
226 public:
227  uint32_t x, y;
228
229  UInt2(uint32_t initX, uint32_t initY)
230    : x(initX), y(initY) {}
231  UInt2() : x(0), y(0) {}
232};
233
234class UInt3 {
235 public:
236  uint32_t x, y, z;
237
238  UInt3(uint32_t initX, uint32_t initY, uint32_t initZ)
239    : x(initX), y(initY), z(initZ) {}
240  UInt3() : x(0), y(0), z(0) {}
241};
242
243class UInt4 {
244 public:
245  uint32_t x, y, z, w;
246
247  UInt4(uint32_t initX, uint32_t initY, uint32_t initZ, uint32_t initW)
248    : x(initX), y(initY), z(initZ), w(initW) {}
249  UInt4() : x(0), y(0), z(0), w(0) {}
250};
251
252class Long2 {
253 public:
254  int64_t x, y;
255
256  Long2(int64_t initX, int64_t initY)
257    : x(initX), y(initY) {}
258  Long2() : x(0), y(0) {}
259};
260
261class Long3 {
262 public:
263  int64_t x, y, z;
264
265  Long3(int64_t initX, int64_t initY, int64_t initZ)
266    : x(initX), y(initY), z(initZ) {}
267  Long3() : x(0), y(0), z(0) {}
268};
269
270class Long4 {
271 public:
272  int64_t x, y, z, w;
273
274  Long4(int64_t initX, int64_t initY, int64_t initZ, int64_t initW)
275    : x(initX), y(initY), z(initZ), w(initW) {}
276  Long4() : x(0), y(0), z(0), w(0) {}
277};
278
279class ULong2 {
280 public:
281  uint64_t x, y;
282
283  ULong2(uint64_t initX, uint64_t initY)
284    : x(initX), y(initY) {}
285  ULong2() : x(0), y(0) {}
286};
287
288class ULong3 {
289 public:
290  uint64_t x, y, z;
291
292  ULong3(uint64_t initX, uint64_t initY, uint64_t initZ)
293    : x(initX), y(initY), z(initZ) {}
294  ULong3() : x(0), y(0), z(0) {}
295};
296
297class ULong4 {
298 public:
299  uint64_t x, y, z, w;
300
301  ULong4(uint64_t initX, uint64_t initY, uint64_t initZ, uint64_t initW)
302    : x(initX), y(initY), z(initZ), w(initW) {}
303  ULong4() : x(0), y(0), z(0), w(0) {}
304};
305
306class Float2 {
307 public:
308  float x, y;
309
310  Float2(float initX, float initY)
311    : x(initX), y(initY) {}
312  Float2() : x(0), y(0) {}
313};
314
315class Float3 {
316 public:
317  float x, y, z;
318
319  Float3(float initX, float initY, float initZ)
320    : x(initX), y(initY), z(initZ) {}
321  Float3() : x(0.f), y(0.f), z(0.f) {}
322};
323
324class Float4 {
325 public:
326  float x, y, z, w;
327
328  Float4(float initX, float initY, float initZ, float initW)
329    : x(initX), y(initY), z(initZ), w(initW) {}
330  Float4() : x(0.f), y(0.f), z(0.f), w(0.f) {}
331};
332
333class Double2 {
334 public:
335  double x, y;
336
337  Double2(double initX, double initY)
338    : x(initX), y(initY) {}
339  Double2() : x(0), y(0) {}
340};
341
342class Double3 {
343 public:
344  double x, y, z;
345
346  Double3(double initX, double initY, double initZ)
347    : x(initX), y(initY), z(initZ) {}
348  Double3() : x(0), y(0), z(0) {}
349};
350
351class Double4 {
352 public:
353  double x, y, z, w;
354
355  Double4(double initX, double initY, double initZ, double initW)
356    : x(initX), y(initY), z(initZ), w(initW) {}
357  Double4() : x(0), y(0), z(0), w(0) {}
358};
359
360 /**
361  * The RenderScript context. This class controls initialization, resource management, and teardown.
362  */
363 class RS : public android::RSC::LightRefBase<RS> {
364
365 public:
366    RS();
367    virtual ~RS();
368
369    /**
370     * Initializes a RenderScript context. A context must be initialized before it can be used.
371     * @param[in] name Directory name to be used by this context. This should be equivalent to
372     * Context.getCacheDir().
373     * @param[in] flags Optional flags for this context.
374     * @return true on success
375     */
376    bool init(const char * name, uint32_t flags = 0);
377
378    /**
379     * Sets the error handler function for this context. This error handler is
380     * called whenever an error is set.
381     *
382     * @param[in] func Error handler function
383     */
384    void setErrorHandler(ErrorHandlerFunc_t func);
385
386    /**
387     * Returns the current error handler function for this context.
388     *
389     * @return pointer to current error handler function or NULL if not set
390     */
391    ErrorHandlerFunc_t getErrorHandler() { return mErrorFunc; }
392
393    /**
394     * Sets the message handler function for this context. This message handler
395     * is called whenever a message is sent from a RenderScript kernel.
396     *
397     *  @param[in] func Message handler function
398     */
399    void setMessageHandler(MessageHandlerFunc_t func);
400
401    /**
402     * Returns the current message handler function for this context.
403     *
404     * @return pointer to current message handler function or NULL if not set
405     */
406    MessageHandlerFunc_t getMessageHandler() { return mMessageFunc; }
407
408    /**
409     * Returns current status for the context.
410     *
411     * @return current error
412     */
413    RSError getError();
414
415    /**
416     * Waits for any currently running asynchronous operations to finish. This
417     * should only be used for performance testing and timing.
418     */
419    void finish();
420
421    RsContext getContext() { return mContext; }
422    void throwError(RSError error, const char *errMsg);
423
424    static dispatchTable* dispatch;
425
426 private:
427    static bool usingNative;
428    static bool initDispatch(int targetApi);
429
430    bool init(const char * name, int targetApi, uint32_t flags);
431    static void * threadProc(void *);
432
433    static bool gInitialized;
434    static pthread_mutex_t gInitMutex;
435
436    pthread_t mMessageThreadId;
437    pid_t mNativeMessageThreadId;
438    bool mMessageRun;
439
440    RsDevice mDev;
441    RsContext mContext;
442    RSError mCurrentError;
443
444    ErrorHandlerFunc_t mErrorFunc;
445    MessageHandlerFunc_t mMessageFunc;
446    bool mInit;
447
448    char mCacheDir[PATH_MAX+1];
449    uint32_t mCacheDirLen;
450
451    struct {
452        sp<const Element> U8;
453        sp<const Element> U8_2;
454        sp<const Element> U8_3;
455        sp<const Element> U8_4;
456        sp<const Element> I8;
457        sp<const Element> I8_2;
458        sp<const Element> I8_3;
459        sp<const Element> I8_4;
460        sp<const Element> U16;
461        sp<const Element> U16_2;
462        sp<const Element> U16_3;
463        sp<const Element> U16_4;
464        sp<const Element> I16;
465        sp<const Element> I16_2;
466        sp<const Element> I16_3;
467        sp<const Element> I16_4;
468        sp<const Element> U32;
469        sp<const Element> U32_2;
470        sp<const Element> U32_3;
471        sp<const Element> U32_4;
472        sp<const Element> I32;
473        sp<const Element> I32_2;
474        sp<const Element> I32_3;
475        sp<const Element> I32_4;
476        sp<const Element> U64;
477        sp<const Element> U64_2;
478        sp<const Element> U64_3;
479        sp<const Element> U64_4;
480        sp<const Element> I64;
481        sp<const Element> I64_2;
482        sp<const Element> I64_3;
483        sp<const Element> I64_4;
484        sp<const Element> F32;
485        sp<const Element> F32_2;
486        sp<const Element> F32_3;
487        sp<const Element> F32_4;
488        sp<const Element> F64;
489        sp<const Element> F64_2;
490        sp<const Element> F64_3;
491        sp<const Element> F64_4;
492        sp<const Element> BOOLEAN;
493
494        sp<const Element> ELEMENT;
495        sp<const Element> TYPE;
496        sp<const Element> ALLOCATION;
497        sp<const Element> SAMPLER;
498        sp<const Element> SCRIPT;
499        sp<const Element> MESH;
500        sp<const Element> PROGRAM_FRAGMENT;
501        sp<const Element> PROGRAM_VERTEX;
502        sp<const Element> PROGRAM_RASTER;
503        sp<const Element> PROGRAM_STORE;
504
505        sp<const Element> A_8;
506        sp<const Element> RGB_565;
507        sp<const Element> RGB_888;
508        sp<const Element> RGBA_5551;
509        sp<const Element> RGBA_4444;
510        sp<const Element> RGBA_8888;
511
512        sp<const Element> YUV;
513
514        sp<const Element> MATRIX_4X4;
515        sp<const Element> MATRIX_3X3;
516        sp<const Element> MATRIX_2X2;
517    } mElements;
518
519    struct {
520        sp<const Sampler> CLAMP_NEAREST;
521        sp<const Sampler> CLAMP_LINEAR;
522        sp<const Sampler> CLAMP_LINEAR_MIP_LINEAR;
523        sp<const Sampler> WRAP_NEAREST;
524        sp<const Sampler> WRAP_LINEAR;
525        sp<const Sampler> WRAP_LINEAR_MIP_LINEAR;
526        sp<const Sampler> MIRRORED_REPEAT_NEAREST;
527        sp<const Sampler> MIRRORED_REPEAT_LINEAR;
528        sp<const Sampler> MIRRORED_REPEAT_LINEAR_MIP_LINEAR;
529    } mSamplers;
530    friend class Sampler;
531    friend class Element;
532    friend class ScriptC;
533};
534
535 /**
536  * Base class for all RenderScript objects. Not for direct use by developers.
537  */
538class BaseObj : public android::RSC::LightRefBase<BaseObj> {
539public:
540    void * getID() const;
541    virtual ~BaseObj();
542    virtual void updateFromNative();
543    virtual bool equals(sp<const BaseObj> obj);
544
545protected:
546    void *mID;
547    RS* mRS;
548    const char * mName;
549
550    BaseObj(void *id, sp<RS> rs);
551    void checkValid();
552
553    static void * getObjID(sp<const BaseObj> o);
554
555};
556
557 /**
558  * This class provides the primary method through which data is passed to and
559  * from RenderScript kernels. An Allocation provides the backing store for a
560  * given Type.
561  *
562  * An Allocation also contains a set of usage flags that denote how the
563  * Allocation could be used. For example, an Allocation may have usage flags
564  * specifying that it can be used from a script as well as input to a
565  * Sampler. A developer must synchronize across these different usages using
566  * syncAll(int) in order to ensure that different users of the Allocation have
567  * a consistent view of memory. For example, in the case where an Allocation is
568  * used as the output of one kernel and as Sampler input in a later kernel, a
569  * developer must call syncAll(RS_ALLOCATION_USAGE_SCRIPT) prior to launching the
570  * second kernel to ensure correctness.
571  */
572class Allocation : public BaseObj {
573protected:
574    sp<const Type> mType;
575    uint32_t mUsage;
576    sp<Allocation> mAdaptedAllocation;
577
578    bool mConstrainedLOD;
579    bool mConstrainedFace;
580    bool mConstrainedY;
581    bool mConstrainedZ;
582    bool mReadAllowed;
583    bool mWriteAllowed;
584    bool mAutoPadding;
585    uint32_t mSelectedY;
586    uint32_t mSelectedZ;
587    uint32_t mSelectedLOD;
588    RsAllocationCubemapFace mSelectedFace;
589
590    uint32_t mCurrentDimX;
591    uint32_t mCurrentDimY;
592    uint32_t mCurrentDimZ;
593    uint32_t mCurrentCount;
594
595    void * getIDSafe() const;
596    void updateCacheInfo(sp<const Type> t);
597
598    Allocation(void *id, sp<RS> rs, sp<const Type> t, uint32_t usage);
599
600    void validateIsInt64();
601    void validateIsInt32();
602    void validateIsInt16();
603    void validateIsInt8();
604    void validateIsFloat32();
605    void validateIsFloat64();
606    void validateIsObject();
607
608    virtual void updateFromNative();
609
610    void validate2DRange(uint32_t xoff, uint32_t yoff, uint32_t w, uint32_t h);
611    void validate3DRange(uint32_t xoff, uint32_t yoff, uint32_t zoff,
612                         uint32_t w, uint32_t h, uint32_t d);
613
614public:
615
616    /**
617     * Return Type for the allocation.
618     * @return pointer to underlying Type
619     */
620    sp<const Type> getType() const {
621        return mType;
622    }
623
624    /**
625     * Enable/Disable AutoPadding for Vec3 elements.
626     *
627     * @param useAutoPadding True: enable AutoPadding; flase: disable AutoPadding
628     *
629     */
630    void setAutoPadding(bool useAutoPadding) {
631        mAutoPadding = useAutoPadding;
632    }
633
634    /**
635     * Propagate changes from one usage of the Allocation to other usages of the Allocation.
636     * @param[in] srcLocation source location with changes to propagate elsewhere
637     */
638    void syncAll(RsAllocationUsageType srcLocation);
639
640    /**
641     * Send a buffer to the output stream.  The contents of the Allocation will
642     * be undefined after this operation. This operation is only valid if
643     * USAGE_IO_OUTPUT is set on the Allocation.
644     */
645    void ioSendOutput();
646
647    /**
648     * Receive the latest input into the Allocation. This operation
649     * is only valid if USAGE_IO_INPUT is set on the Allocation.
650     */
651    void ioGetInput();
652
653#if !defined(RS_SERVER) && !defined(RS_COMPATIBILITY_LIB)
654    /**
655     * Returns the handle to a raw buffer that is being managed by the screen
656     * compositor. This operation is only valid for Allocations with USAGE_IO_INPUT.
657     * @return Surface associated with allocation
658     */
659    sp<Surface> getSurface();
660
661    /**
662     * Associate a Surface with this Allocation. This
663     * operation is only valid for Allocations with USAGE_IO_OUTPUT.
664     * @param[in] s Surface to associate with allocation
665     */
666    void setSurface(sp<Surface> s);
667#endif
668
669    /**
670     * Generate a mipmap chain. This is only valid if the Type of the Allocation
671     * includes mipmaps. This function will generate a complete set of mipmaps
672     * from the top level LOD and place them into the script memory space. If
673     * the Allocation is also using other memory spaces, a call to
674     * syncAll(Allocation.USAGE_SCRIPT) is required.
675     */
676    void generateMipmaps();
677
678    /**
679     * Copy an array into part of this Allocation.
680     * @param[in] off offset of first Element to be overwritten
681     * @param[in] count number of Elements to copy
682     * @param[in] data array from which to copy
683     */
684    void copy1DRangeFrom(uint32_t off, size_t count, const void *data);
685
686    /**
687     * Copy part of an Allocation into part of this Allocation.
688     * @param[in] off offset of first Element to be overwritten
689     * @param[in] count number of Elements to copy
690     * @param[in] data Allocation from which to copy
691     * @param[in] dataOff offset of first Element in data to copy
692     */
693    void copy1DRangeFrom(uint32_t off, size_t count, sp<const Allocation> data, uint32_t dataOff);
694
695    /**
696     * Copy an array into part of this Allocation.
697     * @param[in] off offset of first Element to be overwritten
698     * @param[in] count number of Elements to copy
699     * @param[in] data array from which to copy
700     */
701    void copy1DRangeTo(uint32_t off, size_t count, void *data);
702
703    /**
704     * Copy entire array to an Allocation.
705     * @param[in] data array from which to copy
706     */
707    void copy1DFrom(const void* data);
708
709    /**
710     * Copy entire Allocation to an array.
711     * @param[in] data destination array
712     */
713    void copy1DTo(void* data);
714
715    /**
716     * Copy from an array into a rectangular region in this Allocation. The
717     * array is assumed to be tightly packed.
718     * @param[in] xoff X offset of region to update in this Allocation
719     * @param[in] yoff Y offset of region to update in this Allocation
720     * @param[in] w Width of region to update
721     * @param[in] h Height of region to update
722     * @param[in] data Array from which to copy
723     */
724    void copy2DRangeFrom(uint32_t xoff, uint32_t yoff, uint32_t w, uint32_t h,
725                         const void *data);
726
727    /**
728     * Copy from this Allocation into a rectangular region in an array. The
729     * array is assumed to be tightly packed.
730     * @param[in] xoff X offset of region to copy from this Allocation
731     * @param[in] yoff Y offset of region to copy from this Allocation
732     * @param[in] w Width of region to update
733     * @param[in] h Height of region to update
734     * @param[in] data destination array
735     */
736    void copy2DRangeTo(uint32_t xoff, uint32_t yoff, uint32_t w, uint32_t h,
737                       void *data);
738
739    /**
740     * Copy from an Allocation into a rectangular region in this Allocation.
741     * @param[in] xoff X offset of region to update in this Allocation
742     * @param[in] yoff Y offset of region to update in this Allocation
743     * @param[in] w Width of region to update
744     * @param[in] h Height of region to update
745     * @param[in] data Allocation from which to copy
746     * @param[in] dataXoff X offset of region to copy from in data
747     * @param[in] dataYoff Y offset of region to copy from in data
748     */
749    void copy2DRangeFrom(uint32_t xoff, uint32_t yoff, uint32_t w, uint32_t h,
750                         sp<const Allocation> data, uint32_t dataXoff, uint32_t dataYoff);
751
752    /**
753     * Copy from a strided array into a rectangular region in this Allocation.
754     * @param[in] xoff X offset of region to update in this Allocation
755     * @param[in] yoff Y offset of region to update in this Allocation
756     * @param[in] w Width of region to update
757     * @param[in] h Height of region to update
758     * @param[in] data array from which to copy
759     * @param[in] stride stride of data in bytes
760     */
761    void copy2DStridedFrom(uint32_t xoff, uint32_t yoff, uint32_t w, uint32_t h,
762                           const void *data, size_t stride);
763
764    /**
765     * Copy from a strided array into this Allocation.
766     * @param[in] data array from which to copy
767     * @param[in] stride stride of data in bytes
768     */
769    void copy2DStridedFrom(const void *data, size_t stride);
770
771    /**
772     * Copy from a rectangular region in this Allocation into a strided array.
773     * @param[in] xoff X offset of region to update in this Allocation
774     * @param[in] yoff Y offset of region to update in this Allocation
775     * @param[in] w Width of region to update
776     * @param[in] h Height of region to update
777     * @param[in] data destination array
778     * @param[in] stride stride of data in bytes
779     */
780    void copy2DStridedTo(uint32_t xoff, uint32_t yoff, uint32_t w, uint32_t h,
781                         void *data, size_t stride);
782
783    /**
784     * Copy this Allocation into a strided array.
785     * @param[in] data destination array
786     * @param[in] stride stride of data in bytes
787     */
788    void copy2DStridedTo(void *data, size_t stride);
789
790
791    /**
792     * Copy from an array into a 3D region in this Allocation. The
793     * array is assumed to be tightly packed.
794     * @param[in] xoff X offset of region to update in this Allocation
795     * @param[in] yoff Y offset of region to update in this Allocation
796     * @param[in] zoff Z offset of region to update in this Allocation
797     * @param[in] w Width of region to update
798     * @param[in] h Height of region to update
799     * @param[in] d Depth of region to update
800     * @param[in] data Array from which to copy
801     */
802    void copy3DRangeFrom(uint32_t xoff, uint32_t yoff, uint32_t zoff, uint32_t w,
803                         uint32_t h, uint32_t d, const void* data);
804
805    /**
806     * Copy from an Allocation into a 3D region in this Allocation.
807     * @param[in] xoff X offset of region to update in this Allocation
808     * @param[in] yoff Y offset of region to update in this Allocation
809     * @param[in] zoff Z offset of region to update in this Allocation
810     * @param[in] w Width of region to update
811     * @param[in] h Height of region to update
812     * @param[in] d Depth of region to update
813     * @param[in] data Allocation from which to copy
814     * @param[in] dataXoff X offset of region in data to copy from
815     * @param[in] dataYoff Y offset of region in data to copy from
816     * @param[in] dataZoff Z offset of region in data to copy from
817     */
818    void copy3DRangeFrom(uint32_t xoff, uint32_t yoff, uint32_t zoff,
819                         uint32_t w, uint32_t h, uint32_t d,
820                         sp<const Allocation> data,
821                         uint32_t dataXoff, uint32_t dataYoff, uint32_t dataZoff);
822
823    /**
824     * Copy a 3D region in this Allocation into an array. The
825     * array is assumed to be tightly packed.
826     * @param[in] xoff X offset of region to update in this Allocation
827     * @param[in] yoff Y offset of region to update in this Allocation
828     * @param[in] zoff Z offset of region to update in this Allocation
829     * @param[in] w Width of region to update
830     * @param[in] h Height of region to update
831     * @param[in] d Depth of region to update
832     * @param[in] data Array from which to copy
833     */
834    void copy3DRangeTo(uint32_t xoff, uint32_t yoff, uint32_t zoff, uint32_t w,
835                         uint32_t h, uint32_t d, void* data);
836
837    /**
838     * Creates an Allocation for use by scripts with a given Type.
839     * @param[in] rs Context to which the Allocation will belong
840     * @param[in] type Type of the Allocation
841     * @param[in] mipmaps desired mipmap behavior for the Allocation
842     * @param[in] usage usage for the Allocation
843     * @return new Allocation
844     */
845    static sp<Allocation> createTyped(sp<RS> rs, sp<const Type> type,
846                                   RsAllocationMipmapControl mipmaps, uint32_t usage);
847
848    /**
849     * Creates an Allocation for use by scripts with a given Type and a backing pointer. For use
850     * with RS_ALLOCATION_USAGE_SHARED.
851     * @param[in] rs Context to which the Allocation will belong
852     * @param[in] type Type of the Allocation
853     * @param[in] mipmaps desired mipmap behavior for the Allocation
854     * @param[in] usage usage for the Allocation
855     * @param[in] pointer existing backing store to use for this Allocation if possible
856     * @return new Allocation
857     */
858    static sp<Allocation> createTyped(sp<RS> rs, sp<const Type> type,
859                                   RsAllocationMipmapControl mipmaps, uint32_t usage, void * pointer);
860
861    /**
862     * Creates an Allocation for use by scripts with a given Type with no mipmaps.
863     * @param[in] rs Context to which the Allocation will belong
864     * @param[in] type Type of the Allocation
865     * @param[in] usage usage for the Allocation
866     * @return new Allocation
867     */
868    static sp<Allocation> createTyped(sp<RS> rs, sp<const Type> type,
869                                   uint32_t usage = RS_ALLOCATION_USAGE_SCRIPT);
870    /**
871     * Creates an Allocation with a specified number of given elements.
872     * @param[in] rs Context to which the Allocation will belong
873     * @param[in] e Element used in the Allocation
874     * @param[in] count Number of elements of the Allocation
875     * @param[in] usage usage for the Allocation
876     * @return new Allocation
877     */
878    static sp<Allocation> createSized(sp<RS> rs, sp<const Element> e, size_t count,
879                                   uint32_t usage = RS_ALLOCATION_USAGE_SCRIPT);
880
881    /**
882     * Creates a 2D Allocation with a specified number of given elements.
883     * @param[in] rs Context to which the Allocation will belong
884     * @param[in] e Element used in the Allocation
885     * @param[in] x Width in Elements of the Allocation
886     * @param[in] y Height of the Allocation
887     * @param[in] usage usage for the Allocation
888     * @return new Allocation
889     */
890    static sp<Allocation> createSized2D(sp<RS> rs, sp<const Element> e,
891                                        size_t x, size_t y,
892                                        uint32_t usage = RS_ALLOCATION_USAGE_SCRIPT);
893
894
895    /**
896     * Get the backing pointer for a USAGE_SHARED allocation.
897     * @param[in] stride optional parameter. when non-NULL, will contain
898     *   stride in bytes of a 2D Allocation
899     * @return pointer to data
900     */
901    void * getPointer(size_t *stride = NULL);
902};
903
904 /**
905  * An Element represents one item within an Allocation. An Element is roughly
906  * equivalent to a C type in a RenderScript kernel. Elements may be basic
907  * or complex. Some basic elements are:
908
909  * - A single float value (equivalent to a float in a kernel)
910  * - A four-element float vector (equivalent to a float4 in a kernel)
911  * - An unsigned 32-bit integer (equivalent to an unsigned int in a kernel)
912  * - A single signed 8-bit integer (equivalent to a char in a kernel)
913
914  * Basic Elements are comprised of a Element.DataType and a
915  * Element.DataKind. The DataType encodes C type information of an Element,
916  * while the DataKind encodes how that Element should be interpreted by a
917  * Sampler. Note that Allocation objects with DataKind USER cannot be used as
918  * input for a Sampler. In general, Allocation objects that are intended for
919  * use with a Sampler should use bitmap-derived Elements such as
920  * Element::RGBA_8888.
921 */
922
923
924class Element : public BaseObj {
925public:
926    bool isComplex();
927
928    /**
929     * Elements could be simple, such as an int or a float, or a structure with
930     * multiple sub-elements, such as a collection of floats, float2,
931     * float4. This function returns zero for simple elements or the number of
932     * sub-elements otherwise.
933     * @return number of sub-elements
934     */
935    size_t getSubElementCount() {
936        return mVisibleElementMapSize;
937    }
938
939    /**
940     * For complex Elements, this returns the sub-element at a given index.
941     * @param[in] index index of sub-element
942     * @return sub-element
943     */
944    sp<const Element> getSubElement(uint32_t index);
945
946    /**
947     * For complex Elements, this returns the name of the sub-element at a given
948     * index.
949     * @param[in] index index of sub-element
950     * @return name of sub-element
951     */
952    const char * getSubElementName(uint32_t index);
953
954    /**
955     * For complex Elements, this returns the size of the sub-element at a given
956     * index.
957     * @param[in] index index of sub-element
958     * @return size of sub-element
959     */
960    size_t getSubElementArraySize(uint32_t index);
961
962    /**
963     * Returns the location of a sub-element within a complex Element.
964     * @param[in] index index of sub-element
965     * @return offset in bytes
966     */
967    uint32_t getSubElementOffsetBytes(uint32_t index);
968
969    /**
970     * Returns the data type used for the Element.
971     * @return data type
972     */
973    RsDataType getDataType() const {
974        return mType;
975    }
976
977    /**
978     * Returns the data kind used for the Element.
979     * @return data kind
980     */
981    RsDataKind getDataKind() const {
982        return mKind;
983    }
984
985    /**
986     * Returns the size in bytes of the Element.
987     * @return size in bytes
988     */
989    size_t getSizeBytes() const {
990        return mSizeBytes;
991    }
992
993    /**
994     * Returns the number of vector components for this Element.
995     * @return number of vector components
996     */
997    uint32_t getVectorSize() const {
998        return mVectorSize;
999    }
1000
1001    /**
1002     * Utility function for returning an Element containing a single bool.
1003     * @param[in] rs RenderScript context
1004     * @return Element
1005     */
1006    static sp<const Element> BOOLEAN(sp<RS> rs);
1007    /**
1008     * Utility function for returning an Element containing a single unsigned char.
1009     * @param[in] rs RenderScript context
1010     * @return Element
1011     */
1012    static sp<const Element> U8(sp<RS> rs);
1013    /**
1014     * Utility function for returning an Element containing a single signed char.
1015     * @param[in] rs RenderScript context
1016     * @return Element
1017     */
1018    static sp<const Element> I8(sp<RS> rs);
1019    /**
1020     * Utility function for returning an Element containing a single unsigned short.
1021     * @param[in] rs RenderScript context
1022     * @return Element
1023     */
1024    static sp<const Element> U16(sp<RS> rs);
1025    /**
1026     * Utility function for returning an Element containing a single signed short.
1027     * @param[in] rs RenderScript context
1028     * @return Element
1029     */
1030    static sp<const Element> I16(sp<RS> rs);
1031    /**
1032     * Utility function for returning an Element containing a single unsigned int.
1033     * @param[in] rs RenderScript context
1034     * @return Element
1035     */
1036    static sp<const Element> U32(sp<RS> rs);
1037    /**
1038     * Utility function for returning an Element containing a single signed int.
1039     * @param[in] rs RenderScript context
1040     * @return Element
1041     */
1042    static sp<const Element> I32(sp<RS> rs);
1043    /**
1044     * Utility function for returning an Element containing a single unsigned long long.
1045     * @param[in] rs RenderScript context
1046     * @return Element
1047     */
1048    static sp<const Element> U64(sp<RS> rs);
1049    /**
1050     * Utility function for returning an Element containing a single signed long long.
1051     * @param[in] rs RenderScript context
1052     * @return Element
1053     */
1054    static sp<const Element> I64(sp<RS> rs);
1055    /**
1056     * Utility function for returning an Element containing a single float.
1057     * @param[in] rs RenderScript context
1058     * @return Element
1059     */
1060    static sp<const Element> F32(sp<RS> rs);
1061    /**
1062     * Utility function for returning an Element containing a single double.
1063     * @param[in] rs RenderScript context
1064     * @return Element
1065     */
1066    static sp<const Element> F64(sp<RS> rs);
1067    /**
1068     * Utility function for returning an Element containing a single Element.
1069     * @param[in] rs RenderScript context
1070     * @return Element
1071     */
1072    static sp<const Element> ELEMENT(sp<RS> rs);
1073    /**
1074     * Utility function for returning an Element containing a single Type.
1075     * @param[in] rs RenderScript context
1076     * @return Element
1077     */
1078    static sp<const Element> TYPE(sp<RS> rs);
1079    /**
1080     * Utility function for returning an Element containing a single Allocation.
1081     * @param[in] rs RenderScript context
1082     * @return Element
1083     */
1084    static sp<const Element> ALLOCATION(sp<RS> rs);
1085    /**
1086     * Utility function for returning an Element containing a single Sampler.
1087     * @param[in] rs RenderScript context
1088     * @return Element
1089     */
1090    static sp<const Element> SAMPLER(sp<RS> rs);
1091    /**
1092     * Utility function for returning an Element containing a single Script.
1093     * @param[in] rs RenderScript context
1094     * @return Element
1095     */
1096    static sp<const Element> SCRIPT(sp<RS> rs);
1097    /**
1098     * Utility function for returning an Element containing an ALPHA_8 pixel.
1099     * @param[in] rs RenderScript context
1100     * @return Element
1101     */
1102    static sp<const Element> A_8(sp<RS> rs);
1103    /**
1104     * Utility function for returning an Element containing an RGB_565 pixel.
1105     * @param[in] rs RenderScript context
1106     * @return Element
1107     */
1108    static sp<const Element> RGB_565(sp<RS> rs);
1109    /**
1110     * Utility function for returning an Element containing an RGB_888 pixel.
1111     * @param[in] rs RenderScript context
1112     * @return Element
1113     */
1114    static sp<const Element> RGB_888(sp<RS> rs);
1115    /**
1116     * Utility function for returning an Element containing an RGBA_5551 pixel.
1117     * @param[in] rs RenderScript context
1118     * @return Element
1119     */
1120    static sp<const Element> RGBA_5551(sp<RS> rs);
1121    /**
1122     * Utility function for returning an Element containing an RGBA_4444 pixel.
1123     * @param[in] rs RenderScript context
1124     * @return Element
1125     */
1126    static sp<const Element> RGBA_4444(sp<RS> rs);
1127    /**
1128     * Utility function for returning an Element containing an RGBA_8888 pixel.
1129     * @param[in] rs RenderScript context
1130     * @return Element
1131     */
1132    static sp<const Element> RGBA_8888(sp<RS> rs);
1133
1134    /**
1135     * Utility function for returning an Element containing a float2.
1136     * @param[in] rs RenderScript context
1137     * @return Element
1138     */
1139    static sp<const Element> F32_2(sp<RS> rs);
1140    /**
1141     * Utility function for returning an Element containing a float3.
1142     * @param[in] rs RenderScript context
1143     * @return Element
1144     */
1145    static sp<const Element> F32_3(sp<RS> rs);
1146    /**
1147     * Utility function for returning an Element containing a float4.
1148     * @param[in] rs RenderScript context
1149     * @return Element
1150     */
1151    static sp<const Element> F32_4(sp<RS> rs);
1152    /**
1153     * Utility function for returning an Element containing a double2.
1154     * @param[in] rs RenderScript context
1155     * @return Element
1156     */
1157    static sp<const Element> F64_2(sp<RS> rs);
1158    /**
1159     * Utility function for returning an Element containing a double3.
1160     * @param[in] rs RenderScript context
1161     * @return Element
1162     */
1163    static sp<const Element> F64_3(sp<RS> rs);
1164    /**
1165     * Utility function for returning an Element containing a double4.
1166     * @param[in] rs RenderScript context
1167     * @return Element
1168     */
1169    static sp<const Element> F64_4(sp<RS> rs);
1170    /**
1171     * Utility function for returning an Element containing a uchar2.
1172     * @param[in] rs RenderScript context
1173     * @return Element
1174     */
1175    static sp<const Element> U8_2(sp<RS> rs);
1176    /**
1177     * Utility function for returning an Element containing a uchar3.
1178     * @param[in] rs RenderScript context
1179     * @return Element
1180     */
1181    static sp<const Element> U8_3(sp<RS> rs);
1182    /**
1183     * Utility function for returning an Element containing a uchar4.
1184     * @param[in] rs RenderScript context
1185     * @return Element
1186     */
1187    static sp<const Element> U8_4(sp<RS> rs);
1188    /**
1189     * Utility function for returning an Element containing a char2.
1190     * @param[in] rs RenderScript context
1191     * @return Element
1192     */
1193    static sp<const Element> I8_2(sp<RS> rs);
1194    /**
1195     * Utility function for returning an Element containing a char3.
1196     * @param[in] rs RenderScript context
1197     * @return Element
1198     */
1199    static sp<const Element> I8_3(sp<RS> rs);
1200    /**
1201     * Utility function for returning an Element containing a char4.
1202     * @param[in] rs RenderScript context
1203     * @return Element
1204     */
1205    static sp<const Element> I8_4(sp<RS> rs);
1206    /**
1207     * Utility function for returning an Element containing a ushort2.
1208     * @param[in] rs RenderScript context
1209     * @return Element
1210     */
1211    static sp<const Element> U16_2(sp<RS> rs);
1212    /**
1213     * Utility function for returning an Element containing a ushort3.
1214     * @param[in] rs RenderScript context
1215     * @return Element
1216     */
1217    static sp<const Element> U16_3(sp<RS> rs);
1218    /**
1219     * Utility function for returning an Element containing a ushort4.
1220     * @param[in] rs RenderScript context
1221     * @return Element
1222     */
1223    static sp<const Element> U16_4(sp<RS> rs);
1224    /**
1225     * Utility function for returning an Element containing a short2.
1226     * @param[in] rs RenderScript context
1227     * @return Element
1228     */
1229    static sp<const Element> I16_2(sp<RS> rs);
1230    /**
1231     * Utility function for returning an Element containing a short3.
1232     * @param[in] rs RenderScript context
1233     * @return Element
1234     */
1235    static sp<const Element> I16_3(sp<RS> rs);
1236    /**
1237     * Utility function for returning an Element containing a short4.
1238     * @param[in] rs RenderScript context
1239     * @return Element
1240     */
1241    static sp<const Element> I16_4(sp<RS> rs);
1242    /**
1243     * Utility function for returning an Element containing a uint2.
1244     * @param[in] rs RenderScript context
1245     * @return Element
1246     */
1247    static sp<const Element> U32_2(sp<RS> rs);
1248    /**
1249     * Utility function for returning an Element containing a uint3.
1250     * @param[in] rs RenderScript context
1251     * @return Element
1252     */
1253    static sp<const Element> U32_3(sp<RS> rs);
1254    /**
1255     * Utility function for returning an Element containing a uint4.
1256     * @param[in] rs RenderScript context
1257     * @return Element
1258     */
1259    static sp<const Element> U32_4(sp<RS> rs);
1260    /**
1261     * Utility function for returning an Element containing an int2.
1262     * @param[in] rs RenderScript context
1263     * @return Element
1264     */
1265    static sp<const Element> I32_2(sp<RS> rs);
1266    /**
1267     * Utility function for returning an Element containing an int3.
1268     * @param[in] rs RenderScript context
1269     * @return Element
1270     */
1271    static sp<const Element> I32_3(sp<RS> rs);
1272    /**
1273     * Utility function for returning an Element containing an int4.
1274     * @param[in] rs RenderScript context
1275     * @return Element
1276     */
1277    static sp<const Element> I32_4(sp<RS> rs);
1278    /**
1279     * Utility function for returning an Element containing a ulong2.
1280     * @param[in] rs RenderScript context
1281     * @return Element
1282     */
1283    static sp<const Element> U64_2(sp<RS> rs);
1284    /**
1285     * Utility function for returning an Element containing a ulong3.
1286     * @param[in] rs RenderScript context
1287     * @return Element
1288     */
1289    static sp<const Element> U64_3(sp<RS> rs);
1290    /**
1291     * Utility function for returning an Element containing a ulong4.
1292     * @param[in] rs RenderScript context
1293     * @return Element
1294     */
1295    static sp<const Element> U64_4(sp<RS> rs);
1296    /**
1297     * Utility function for returning an Element containing a long2.
1298     * @param[in] rs RenderScript context
1299     * @return Element
1300     */
1301    static sp<const Element> I64_2(sp<RS> rs);
1302    /**
1303     * Utility function for returning an Element containing a long3.
1304     * @param[in] rs RenderScript context
1305     * @return Element
1306     */
1307    static sp<const Element> I64_3(sp<RS> rs);
1308    /**
1309     * Utility function for returning an Element containing a long4.
1310     * @param[in] rs RenderScript context
1311     * @return Element
1312     */
1313    static sp<const Element> I64_4(sp<RS> rs);
1314    /**
1315     * Utility function for returning an Element containing a YUV pixel.
1316     * @param[in] rs RenderScript context
1317     * @return Element
1318     */
1319    static sp<const Element> YUV(sp<RS> rs);
1320    /**
1321     * Utility function for returning an Element containing an rs_matrix_4x4.
1322     * @param[in] rs RenderScript context
1323     * @return Element
1324     */
1325    static sp<const Element> MATRIX_4X4(sp<RS> rs);
1326    /**
1327     * Utility function for returning an Element containing an rs_matrix_3x3.
1328     * @param[in] rs RenderScript context
1329     * @return Element
1330     */
1331    static sp<const Element> MATRIX_3X3(sp<RS> rs);
1332    /**
1333     * Utility function for returning an Element containing an rs_matrix_2x2.
1334     * @param[in] rs RenderScript context
1335     * @return Element
1336     */
1337    static sp<const Element> MATRIX_2X2(sp<RS> rs);
1338
1339    void updateFromNative();
1340
1341    /**
1342     * Create an Element with a given DataType.
1343     * @param[in] rs RenderScript context
1344     * @param[in] dt data type
1345     * @return Element
1346     */
1347    static sp<const Element> createUser(sp<RS> rs, RsDataType dt);
1348    /**
1349     * Create a vector Element with the given DataType
1350     * @param[in] rs RenderScript
1351     * @param[in] dt DataType
1352     * @param[in] size vector size
1353     * @return Element
1354     */
1355    static sp<const Element> createVector(sp<RS> rs, RsDataType dt, uint32_t size);
1356    /**
1357     * Create an Element with a given DataType and DataKind.
1358     * @param[in] rs RenderScript context
1359     * @param[in] dt DataType
1360     * @param[in] dk DataKind
1361     * @return Element
1362     */
1363    static sp<const Element> createPixel(sp<RS> rs, RsDataType dt, RsDataKind dk);
1364
1365    /**
1366     * Returns true if the Element can interoperate with this Element.
1367     * @param[in] e Element to compare
1368     * @return true if Elements can interoperate
1369     */
1370    bool isCompatible(sp<const Element>e) const;
1371
1372    /**
1373     * Builder class for producing complex elements with matching field and name
1374     * pairs. The builder starts empty. The order in which elements are added is
1375     * retained for the layout in memory.
1376     */
1377    class Builder {
1378    private:
1379        RS* mRS;
1380        size_t mElementsCount;
1381        size_t mElementsVecSize;
1382        sp<const Element> * mElements;
1383        char ** mElementNames;
1384        size_t * mElementNameLengths;
1385        uint32_t * mArraySizes;
1386        bool mSkipPadding;
1387
1388    public:
1389        Builder(sp<RS> rs);
1390        ~Builder();
1391        void add(sp<const Element> e, const char * name, uint32_t arraySize = 1);
1392        sp<const Element> create();
1393    };
1394
1395protected:
1396    Element(void *id, sp<RS> rs,
1397            sp<const Element> * elements,
1398            size_t elementCount,
1399            const char ** elementNames,
1400            size_t * elementNameLengths,
1401            uint32_t * arraySizes);
1402    Element(void *id, sp<RS> rs, RsDataType dt, RsDataKind dk, bool norm, uint32_t size);
1403    Element(sp<RS> rs);
1404    virtual ~Element();
1405
1406private:
1407    void updateVisibleSubElements();
1408
1409    size_t mElementsCount;
1410    size_t mVisibleElementMapSize;
1411
1412    sp<const Element> * mElements;
1413    char ** mElementNames;
1414    size_t * mElementNameLengths;
1415    uint32_t * mArraySizes;
1416    uint32_t * mVisibleElementMap;
1417    uint32_t * mOffsetInBytes;
1418
1419    RsDataType mType;
1420    RsDataKind mKind;
1421    bool mNormalized;
1422    size_t mSizeBytes;
1423    size_t mVectorSize;
1424};
1425
1426class FieldPacker {
1427protected:
1428    unsigned char* mData;
1429    size_t mPos;
1430    size_t mLen;
1431
1432public:
1433    FieldPacker(size_t len)
1434        : mPos(0), mLen(len) {
1435            mData = new unsigned char[len];
1436        }
1437
1438    virtual ~FieldPacker() {
1439        delete [] mData;
1440    }
1441
1442    void align(size_t v) {
1443        if ((v & (v - 1)) != 0) {
1444            //            ALOGE("Non-power-of-two alignment: %zu", v);
1445            return;
1446        }
1447
1448        while ((mPos & (v - 1)) != 0) {
1449            mData[mPos++] = 0;
1450        }
1451    }
1452
1453    void reset() {
1454        mPos = 0;
1455    }
1456
1457    void reset(size_t i) {
1458        if (i >= mLen) {
1459            //            ALOGE("Out of bounds: i (%zu) >= len (%zu)", i, mLen);
1460            return;
1461        }
1462        mPos = i;
1463    }
1464
1465    void skip(size_t i) {
1466        size_t res = mPos + i;
1467        if (res > mLen) {
1468            //            ALOGE("Exceeded buffer length: i (%zu) > len (%zu)", i, mLen);
1469            return;
1470        }
1471        mPos = res;
1472    }
1473
1474    void* getData() const {
1475        return mData;
1476    }
1477
1478    size_t getLength() const {
1479        return mLen;
1480    }
1481
1482    template <typename T>
1483        void add(T t) {
1484        align(sizeof(t));
1485        if (mPos + sizeof(t) <= mLen) {
1486            memcpy(&mData[mPos], &t, sizeof(t));
1487            mPos += sizeof(t);
1488        }
1489    }
1490
1491    /*
1492      void add(rs_matrix4x4 m) {
1493      for (size_t i = 0; i < 16; i++) {
1494      add(m.m[i]);
1495      }
1496      }
1497
1498      void add(rs_matrix3x3 m) {
1499      for (size_t i = 0; i < 9; i++) {
1500      add(m.m[i]);
1501      }
1502      }
1503
1504      void add(rs_matrix2x2 m) {
1505      for (size_t i = 0; i < 4; i++) {
1506      add(m.m[i]);
1507      }
1508      }
1509    */
1510
1511    void add(sp<BaseObj> obj) {
1512        if (obj != NULL) {
1513            add((uint32_t) (uintptr_t) obj->getID());
1514        } else {
1515            add((uint32_t) 0);
1516        }
1517    }
1518};
1519
1520/**
1521 * A Type describes the Element and dimensions used for an Allocation or a
1522 * parallel operation.
1523 *
1524 * A Type always includes an Element and an X dimension. A Type may be
1525 * multidimensional, up to three dimensions. A nonzero value in the Y or Z
1526 * dimensions indicates that the dimension is present. Note that a Type with
1527 * only a given X dimension and a Type with the same X dimension but Y = 1 are
1528 * not equivalent.
1529 *
1530 * A Type also supports inclusion of level of detail (LOD) or cube map
1531 * faces. LOD and cube map faces are booleans to indicate present or not
1532 * present.
1533 *
1534 * A Type also supports YUV format information to support an Allocation in a YUV
1535 * format. The YUV formats supported are YV12 and NV21.
1536 */
1537class Type : public BaseObj {
1538protected:
1539    friend class Allocation;
1540
1541    uint32_t mDimX;
1542    uint32_t mDimY;
1543    uint32_t mDimZ;
1544    RSYuvFormat mYuvFormat;
1545    bool mDimMipmaps;
1546    bool mDimFaces;
1547    size_t mElementCount;
1548    sp<const Element> mElement;
1549
1550    Type(void *id, sp<RS> rs);
1551
1552    void calcElementCount();
1553    virtual void updateFromNative();
1554
1555public:
1556
1557    /**
1558     * Returns the YUV format.
1559     * @return YUV format of the Allocation
1560     */
1561    RSYuvFormat getYuvFormat() const {
1562        return mYuvFormat;
1563    }
1564
1565    /**
1566     * Returns the Element of the Allocation.
1567     * @return YUV format of the Allocation
1568     */
1569    sp<const Element> getElement() const {
1570        return mElement;
1571    }
1572
1573    /**
1574     * Returns the X dimension of the Allocation.
1575     * @return X dimension of the allocation
1576     */
1577    uint32_t getX() const {
1578        return mDimX;
1579    }
1580
1581    /**
1582     * Returns the Y dimension of the Allocation.
1583     * @return Y dimension of the allocation
1584     */
1585    uint32_t getY() const {
1586        return mDimY;
1587    }
1588
1589    /**
1590     * Returns the Z dimension of the Allocation.
1591     * @return Z dimension of the allocation
1592     */
1593    uint32_t getZ() const {
1594        return mDimZ;
1595    }
1596
1597    /**
1598     * Returns true if the Allocation has mipmaps.
1599     * @return true if the Allocation has mipmaps
1600     */
1601    bool hasMipmaps() const {
1602        return mDimMipmaps;
1603    }
1604
1605    /**
1606     * Returns true if the Allocation is a cube map
1607     * @return true if the Allocation is a cube map
1608     */
1609    bool hasFaces() const {
1610        return mDimFaces;
1611    }
1612
1613    /**
1614     * Returns number of accessible Elements in the Allocation
1615     * @return number of accessible Elements in the Allocation
1616     */
1617    size_t getCount() const {
1618        return mElementCount;
1619    }
1620
1621    /**
1622     * Returns size in bytes of all Elements in the Allocation
1623     * @return size in bytes of all Elements in the Allocation
1624     */
1625    size_t getSizeBytes() const {
1626        return mElementCount * mElement->getSizeBytes();
1627    }
1628
1629    /**
1630     * Creates a new Type with the given Element and dimensions.
1631     * @param[in] rs RenderScript context
1632     * @param[in] e Element
1633     * @param[in] dimX X dimension
1634     * @param[in] dimY Y dimension
1635     * @param[in] dimZ Z dimension
1636     * @return new Type
1637     */
1638    static sp<const Type> create(sp<RS> rs, sp<const Element> e, uint32_t dimX, uint32_t dimY, uint32_t dimZ);
1639
1640    class Builder {
1641    protected:
1642        RS* mRS;
1643        uint32_t mDimX;
1644        uint32_t mDimY;
1645        uint32_t mDimZ;
1646        RSYuvFormat mYuvFormat;
1647        bool mDimMipmaps;
1648        bool mDimFaces;
1649        sp<const Element> mElement;
1650
1651    public:
1652        Builder(sp<RS> rs, sp<const Element> e);
1653
1654        void setX(uint32_t value);
1655        void setY(uint32_t value);
1656        void setZ(uint32_t value);
1657        void setYuvFormat(RSYuvFormat format);
1658        void setMipmaps(bool value);
1659        void setFaces(bool value);
1660        sp<const Type> create();
1661    };
1662
1663};
1664
1665/**
1666 * The parent class for all executable Scripts. This should not be used by applications.
1667 */
1668class Script : public BaseObj {
1669private:
1670
1671protected:
1672    Script(void *id, sp<RS> rs);
1673    void forEach(uint32_t slot, sp<const Allocation> in, sp<const Allocation> out,
1674            const void *v, size_t) const;
1675    void reduce(uint32_t slot, sp<const Allocation> in, sp<const Allocation> out,
1676                const RsScriptCall *sc) const;
1677    void bindAllocation(sp<Allocation> va, uint32_t slot) const;
1678    void setVar(uint32_t index, const void *, size_t len) const;
1679    void setVar(uint32_t index, sp<const BaseObj> o) const;
1680    void invoke(uint32_t slot, const void *v, size_t len) const;
1681
1682
1683    void invoke(uint32_t slot) const {
1684        invoke(slot, NULL, 0);
1685    }
1686    void setVar(uint32_t index, float v) const {
1687        setVar(index, &v, sizeof(v));
1688    }
1689    void setVar(uint32_t index, double v) const {
1690        setVar(index, &v, sizeof(v));
1691    }
1692    void setVar(uint32_t index, int32_t v) const {
1693        setVar(index, &v, sizeof(v));
1694    }
1695    void setVar(uint32_t index, uint32_t v) const {
1696        setVar(index, &v, sizeof(v));
1697    }
1698    void setVar(uint32_t index, int64_t v) const {
1699        setVar(index, &v, sizeof(v));
1700    }
1701    void setVar(uint32_t index, bool v) const {
1702        setVar(index, &v, sizeof(v));
1703    }
1704
1705public:
1706    class FieldBase {
1707    protected:
1708        sp<const Element> mElement;
1709        sp<Allocation> mAllocation;
1710
1711        void init(sp<RS> rs, uint32_t dimx, uint32_t usages = 0);
1712
1713    public:
1714        sp<const Element> getElement() {
1715            return mElement;
1716        }
1717
1718        sp<const Type> getType() {
1719            return mAllocation->getType();
1720        }
1721
1722        sp<const Allocation> getAllocation() {
1723            return mAllocation;
1724        }
1725
1726        //void updateAllocation();
1727    };
1728};
1729
1730/**
1731 * The parent class for all user-defined scripts. This is intended to be used by auto-generated code only.
1732 */
1733class ScriptC : public Script {
1734protected:
1735    ScriptC(sp<RS> rs,
1736            const void *codeTxt, size_t codeLength,
1737            const char *cachedName, size_t cachedNameLength,
1738            const char *cacheDir, size_t cacheDirLength);
1739
1740};
1741
1742/**
1743 * The parent class for all script intrinsics. Intrinsics provide highly optimized implementations of
1744 * basic functions. This is not intended to be used directly.
1745 */
1746class ScriptIntrinsic : public Script {
1747 protected:
1748    sp<const Element> mElement;
1749    ScriptIntrinsic(sp<RS> rs, int id, sp<const Element> e);
1750    virtual ~ScriptIntrinsic();
1751};
1752
1753/**
1754 * Intrinsic for converting RGB to RGBA by using a 3D lookup table. The incoming
1755 * r,g,b values are use as normalized x,y,z coordinates into a 3D
1756 * allocation. The 8 nearest values are sampled and linearly interpolated. The
1757 * result is placed in the output.
1758 */
1759class ScriptIntrinsic3DLUT : public ScriptIntrinsic {
1760 private:
1761    ScriptIntrinsic3DLUT(sp<RS> rs, sp<const Element> e);
1762 public:
1763    /**
1764     * Supported Element types are U8_4. Default lookup table is identity.
1765     * @param[in] rs RenderScript context
1766     * @param[in] e Element
1767     * @return new ScriptIntrinsic
1768     */
1769    static sp<ScriptIntrinsic3DLUT> create(sp<RS> rs, sp<const Element> e);
1770
1771    /**
1772     * Launch the intrinsic.
1773     * @param[in] ain input Allocation
1774     * @param[in] aout output Allocation
1775     */
1776    void forEach(sp<Allocation> ain, sp<Allocation> aout);
1777
1778    /**
1779     * Sets the lookup table. The lookup table must use the same Element as the
1780     * intrinsic.
1781     * @param[in] lut new lookup table
1782     */
1783    void setLUT(sp<Allocation> lut);
1784};
1785
1786
1787/**
1788 * Intrinsic kernel provides high performance RenderScript APIs to BLAS.
1789 *
1790 * The BLAS (Basic Linear Algebra Subprograms) are routines that provide standard
1791 * building blocks for performing basic vector and matrix operations.
1792 *
1793 * For detailed description of BLAS, please refer to http://www.netlib.org/blas/
1794 *
1795 **/
1796class ScriptIntrinsicBLAS : public ScriptIntrinsic {
1797 private:
1798    ScriptIntrinsicBLAS(sp<RS> rs, sp<const Element> e);
1799 public:
1800    /**
1801     * Create an intrinsic to access BLAS subroutines.
1802     *
1803     * @param rs The RenderScript context
1804     * @return ScriptIntrinsicBLAS
1805     */
1806    static sp<ScriptIntrinsicBLAS> create(sp<RS> rs);
1807
1808    /**
1809     * SGEMV performs one of the matrix-vector operations
1810     * y := alpha*A*x + beta*y   or   y := alpha*A**T*x + beta*y
1811     *
1812     * Details: http://www.netlib.org/lapack/explore-html/db/d58/sgemv_8f.html
1813     *
1814     * @param TransA The type of transpose applied to matrix A.
1815     * @param alpha The scalar alpha.
1816     * @param A The input allocation contains matrix A, supported elements type: {Element#F32}.
1817     * @param X The input allocation contains vector x, supported elements type: {Element#F32}.
1818     * @param incX The increment for the elements of vector x, must be larger than zero.
1819     * @param beta The scalar beta.
1820     * @param Y The input allocation contains vector y, supported elements type: {Element#F32}.
1821     * @param incY The increment for the elements of vector y, must be larger than zero.
1822     */
1823    void SGEMV(RsBlasTranspose TransA,
1824               float alpha, sp<Allocation> A, sp<Allocation> X, int incX,
1825               float beta, sp<Allocation> Y, int incY);
1826
1827    /**
1828     * DGEMV performs one of the matrix-vector operations
1829     * y := alpha*A*x + beta*y   or   y := alpha*A**T*x + beta*y
1830     *
1831     * Details: http://www.netlib.org/lapack/explore-html/dc/da8/dgemv_8f.html
1832     *
1833     * @param TransA The type of transpose applied to matrix A.
1834     * @param alpha The scalar alpha.
1835     * @param A The input allocation contains matrix A, supported elements type: {Element#F64}.
1836     * @param X The input allocation contains vector x, supported elements type: {Element#F64}.
1837     * @param incX The increment for the elements of vector x, must be larger than zero.
1838     * @param beta The scalar beta.
1839     * @param Y The input allocation contains vector y, supported elements type: {Element#F64}.
1840     * @param incY The increment for the elements of vector y, must be larger than zero.
1841     */
1842    void DGEMV(RsBlasTranspose TransA,
1843               double alpha, sp<Allocation> A, sp<Allocation> X, int incX,
1844               double beta, sp<Allocation> Y, int incY);
1845
1846    /**
1847     * CGEMV performs one of the matrix-vector operations
1848     * y := alpha*A*x + beta*y   or   y := alpha*A**T*x + beta*y   or   y := alpha*A**H*x + beta*y
1849     *
1850     * Details: http://www.netlib.org/lapack/explore-html/d4/d8a/cgemv_8f.html
1851     *
1852     * @param TransA The type of transpose applied to matrix A.
1853     * @param alpha The scalar alpha.
1854     * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}.
1855     * @param X The input allocation contains vector x, supported elements type: {Element#F32_2}.
1856     * @param incX The increment for the elements of vector x, must be larger than zero.
1857     * @param beta The scalar beta.
1858     * @param Y The input allocation contains vector y, supported elements type: {Element#F32_2}.
1859     * @param incY The increment for the elements of vector y, must be larger than zero.
1860     */
1861    void CGEMV(RsBlasTranspose TransA,
1862               Float2 alpha, sp<Allocation> A, sp<Allocation> X, int incX,
1863               Float2 beta, sp<Allocation> Y, int incY);
1864
1865    /**
1866     * ZGEMV performs one of the matrix-vector operations
1867     * y := alpha*A*x + beta*y   or   y := alpha*A**T*x + beta*y   or   y := alpha*A**H*x + beta*y
1868     *
1869     * Details: http://www.netlib.org/lapack/explore-html/db/d40/zgemv_8f.html
1870     *
1871     * @param TransA The type of transpose applied to matrix A.
1872     * @param alpha The scalar alpha.
1873     * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2}.
1874     * @param X The input allocation contains vector x, supported elements type: {Element#F64_2}.
1875     * @param incX The increment for the elements of vector x, must be larger than zero.
1876     * @param beta The scalar beta.
1877     * @param Y The input allocation contains vector y, supported elements type: {Element#F64_2}.
1878     * @param incY The increment for the elements of vector y, must be larger than zero.
1879     */
1880    void ZGEMV(RsBlasTranspose TransA,
1881               Double2 alpha, sp<Allocation> A, sp<Allocation> X, int incX,
1882               Double2 beta, sp<Allocation> Y, int incY);
1883
1884    /**
1885     * SGBMV performs one of the matrix-vector operations
1886     * y := alpha*A*x + beta*y   or   y := alpha*A**T*x + beta*y
1887     *
1888     * Details: http://www.netlib.org/lapack/explore-html/d6/d46/sgbmv_8f.html
1889     *
1890     * Note: For a M*N matrix, the input Allocation should also be of size M*N (dimY = M, dimX = N),
1891     *       but only the region M*(KL+KU+1) will be referenced. The following subroutine can is an
1892     *       example showing how to convert the original matrix 'a' to row-based band matrix 'b'.
1893     *           for i in range(0, m):
1894     *              for j in range(max(0, i-kl), min(i+ku+1, n)):
1895     *                  b[i, j-i+kl] = a[i, j]
1896     *
1897     * @param TransA The type of transpose applied to matrix A.
1898     * @param KL The number of sub-diagonals of the matrix A.
1899     * @param KU The number of super-diagonals of the matrix A.
1900     * @param alpha The scalar alpha.
1901     * @param A The input allocation contains the band matrix A, supported elements type: {Element#F32}.
1902     * @param X The input allocation contains vector x, supported elements type: {Element#F32}.
1903     * @param incX The increment for the elements of vector x, must be larger than zero.
1904     * @param beta The scalar beta.
1905     * @param Y The input allocation contains vector y, supported elements type: {Element#F32}.
1906     * @param incY The increment for the elements of vector y, must be larger than zero.
1907     */
1908    void SGBMV(RsBlasTranspose TransA,
1909               int KL, int KU, float alpha, sp<Allocation> A, sp<Allocation> X, int incX,
1910               float beta, sp<Allocation> Y, int incY);
1911
1912    /**
1913     * DGBMV performs one of the matrix-vector operations
1914     * y := alpha*A*x + beta*y   or   y := alpha*A**T*x + beta*y
1915     *
1916     * Details: http://www.netlib.org/lapack/explore-html/d2/d3f/dgbmv_8f.html
1917     *
1918     * Note: For a M*N matrix, the input Allocation should also be of size M*N (dimY = M, dimX = N),
1919     *       but only the region M*(KL+KU+1) will be referenced. The following subroutine can is an
1920     *       example showing how to convert the original matrix 'a' to row-based band matrix 'b'.
1921     *           for i in range(0, m):
1922     *              for j in range(max(0, i-kl), min(i+ku+1, n)):
1923     *                  b[i, j-i+kl] = a[i, j]
1924     *
1925     * @param TransA The type of transpose applied to matrix A.
1926     * @param KL The number of sub-diagonals of the matrix A.
1927     * @param KU The number of super-diagonals of the matrix A.
1928     * @param alpha The scalar alpha.
1929     * @param A The input allocation contains the band matrix A, supported elements type: {Element#F64}.
1930     * @param X The input allocation contains vector x, supported elements type: {Element#F64}.
1931     * @param incX The increment for the elements of vector x, must be larger than zero.
1932     * @param beta The scalar beta.
1933     * @param Y The input allocation contains vector y, supported elements type: {Element#F64}.
1934     * @param incY The increment for the elements of vector y, must be larger than zero.
1935     */
1936    void DGBMV(RsBlasTranspose TransA,
1937               int KL, int KU, double alpha, sp<Allocation> A, sp<Allocation> X,
1938               int incX, double beta, sp<Allocation> Y, int incY);
1939
1940    /**
1941     * CGBMV performs one of the matrix-vector operations
1942     * y := alpha*A*x + beta*y   or   y := alpha*A**T*x + beta*y   or   y := alpha*A**H*x + beta*y
1943     *
1944     * Details: http://www.netlib.org/lapack/explore-html/d0/d75/cgbmv_8f.html
1945     *
1946     * Note: For a M*N matrix, the input Allocation should also be of size M*N (dimY = M, dimX = N),
1947     *       but only the region M*(KL+KU+1) will be referenced. The following subroutine can is an
1948     *       example showing how to convert the original matrix 'a' to row-based band matrix 'b'.
1949     *           for i in range(0, m):
1950     *              for j in range(max(0, i-kl), min(i+ku+1, n)):
1951     *                  b[i, j-i+kl] = a[i, j]
1952     *
1953     * @param TransA The type of transpose applied to matrix A.
1954     * @param KL The number of sub-diagonals of the matrix A.
1955     * @param KU The number of super-diagonals of the matrix A.
1956     * @param alpha The scalar alpha.
1957     * @param A The input allocation contains the band matrix A, supported elements type: {Element#F32_2}.
1958     * @param X The input allocation contains vector x, supported elements type: {Element#F32_2}.
1959     * @param incX The increment for the elements of vector x, must be larger than zero.
1960     * @param beta The scalar beta.
1961     * @param Y The input allocation contains vector y, supported elements type: {Element#F32_2}.
1962     * @param incY The increment for the elements of vector y, must be larger than zero.
1963     */
1964    void CGBMV(RsBlasTranspose TransA,
1965               int KL, int KU, Float2 alpha, sp<Allocation> A, sp<Allocation> X,
1966               int incX, Float2 beta, sp<Allocation> Y, int incY);
1967
1968    /**
1969     * ZGBMV performs one of the matrix-vector operations
1970     * y := alpha*A*x + beta*y   or   y := alpha*A**T*x + beta*y   or   y := alpha*A**H*x + beta*y
1971     *
1972     * Details: http://www.netlib.org/lapack/explore-html/d9/d46/zgbmv_8f.html
1973     *
1974     * Note: For a M*N matrix, the input Allocation should also be of size M*N (dimY = M, dimX = N),
1975     *       but only the region M*(KL+KU+1) will be referenced. The following subroutine can is an
1976     *       example showing how to convert the original matrix 'a' to row-based band matrix 'b'.
1977     *           for i in range(0, m):
1978     *              for j in range(max(0, i-kl), min(i+ku+1, n)):
1979     *                  b[i, j-i+kl] = a[i, j]
1980     *
1981     * @param TransA The type of transpose applied to matrix A.
1982     * @param KL The number of sub-diagonals of the matrix A.
1983     * @param KU The number of super-diagonals of the matrix A.
1984     * @param alpha The scalar alpha.
1985     * @param A The input allocation contains the band matrix A, supported elements type: {Element#F64_2}.
1986     * @param X The input allocation contains vector x, supported elements type: {Element#F64_2}.
1987     * @param incX The increment for the elements of vector x, must be larger than zero.
1988     * @param beta The scalar beta.
1989     * @param Y The input allocation contains vector y, supported elements type: {Element#F64_2}.
1990     * @param incY The increment for the elements of vector y, must be larger than zero.
1991     */
1992    void ZGBMV(RsBlasTranspose TransA,
1993               int KL, int KU, Double2 alpha, sp<Allocation> A, sp<Allocation> X, int incX,
1994               Double2 beta, sp<Allocation> Y, int incY);
1995
1996    /**
1997     * STRMV performs one of the matrix-vector operations
1998     * x := A*x   or   x := A**T*x
1999     *
2000     * Details: http://www.netlib.org/lapack/explore-html/de/d45/strmv_8f.html
2001     *
2002     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2003     * @param TransA The type of transpose applied to matrix A.
2004     * @param Diag Specifies whether or not A is unit triangular.
2005     * @param A The input allocation contains matrix A, supported elements type: {Element#F32}.
2006     * @param X The input allocation contains vector x, supported elements type: {Element#F32}.
2007     * @param incX The increment for the elements of vector x, must be larger than zero.
2008     */
2009    void STRMV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2010               sp<Allocation> A, sp<Allocation> X, int incX);
2011
2012    /**
2013     * DTRMV performs one of the matrix-vector operations
2014     * x := A*x   or   x := A**T*x
2015     *
2016     * Details: http://www.netlib.org/lapack/explore-html/dc/d7e/dtrmv_8f.html
2017     *
2018     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2019     * @param TransA The type of transpose applied to matrix A.
2020     * @param Diag Specifies whether or not A is unit triangular.
2021     * @param A The input allocation contains matrix A, supported elements type: {Element#F64}.
2022     * @param X The input allocation contains vector x, supported elements type: {Element#F64}.
2023     * @param incX The increment for the elements of vector x, must be larger than zero.
2024     */
2025    void DTRMV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2026               sp<Allocation> A, sp<Allocation> X, int incX);
2027
2028    /**
2029     * CTRMV performs one of the matrix-vector operations
2030     * x := A*x   or   x := A**T*x   or   x := A**H*x
2031     *
2032     * Details: http://www.netlib.org/lapack/explore-html/df/d78/ctrmv_8f.html
2033     *
2034     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2035     * @param TransA The type of transpose applied to matrix A.
2036     * @param Diag Specifies whether or not A is unit triangular.
2037     * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}.
2038     * @param X The input allocation contains vector x, supported elements type: {Element#F32_2}.
2039     * @param incX The increment for the elements of vector x, must be larger than zero.
2040     */
2041    void CTRMV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2042               sp<Allocation> A, sp<Allocation> X, int incX);
2043
2044    /**
2045     * ZTRMV performs one of the matrix-vector operations
2046     * x := A*x   or   x := A**T*x   or   x := A**H*x
2047     *
2048     * Details: http://www.netlib.org/lapack/explore-html/d0/dd1/ztrmv_8f.html
2049     *
2050     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2051     * @param TransA The type of transpose applied to matrix A.
2052     * @param Diag Specifies whether or not A is unit triangular.
2053     * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2}.
2054     * @param X The input allocation contains vector x, supported elements type: {Element#F64_2}.
2055     * @param incX The increment for the elements of vector x, must be larger than zero.
2056     */
2057    void ZTRMV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2058               sp<Allocation> A, sp<Allocation> X, int incX);
2059
2060    /**
2061     * STBMV performs one of the matrix-vector operations
2062     * x := A*x   or   x := A**T*x
2063     *
2064     * Details: http://www.netlib.org/lapack/explore-html/d6/d7d/stbmv_8f.html
2065     *
2066     * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
2067     *       but only the region N*(K+1) will be referenced. The following subroutine can is an
2068     *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
2069     *           for i in range(0, n):
2070     *              for j in range(i, min(i+k+1, n)):
2071     *                  b[i, j-i] = a[i, j]
2072     *
2073     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2074     * @param TransA The type of transpose applied to matrix A.
2075     * @param Diag Specifies whether or not A is unit triangular.
2076     * @param K The number of off-diagonals of the matrix A
2077     * @param A The input allocation contains matrix A, supported elements type: {Element#F32}.
2078     * @param X The input allocation contains vector x, supported elements type: {Element#F32}.
2079     * @param incX The increment for the elements of vector x, must be larger than zero.
2080     */
2081    void STBMV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2082               int K, sp<Allocation> A, sp<Allocation> X, int incX);
2083
2084    /**
2085     * DTBMV performs one of the matrix-vector operations
2086     * x := A*x   or   x := A**T*x
2087     *
2088     * Details: http://www.netlib.org/lapack/explore-html/df/d29/dtbmv_8f.html
2089     *
2090     * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
2091     *       but only the region N*(K+1) will be referenced. The following subroutine can is an
2092     *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
2093     *           for i in range(0, n):
2094     *              for j in range(i, min(i+k+1, n)):
2095     *                  b[i, j-i] = a[i, j]
2096     *
2097     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2098     * @param TransA The type of transpose applied to matrix A.
2099     * @param Diag Specifies whether or not A is unit triangular.
2100     * @param K The number of off-diagonals of the matrix A
2101     * @param A The input allocation contains matrix A, supported elements type: {Element#F64}.
2102     * @param X The input allocation contains vector x, supported elements type: {Element#F64}.
2103     * @param incX The increment for the elements of vector x, must be larger than zero.
2104     */
2105    void DTBMV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2106               int K, sp<Allocation> A, sp<Allocation> X, int incX);
2107
2108    /**
2109     * CTBMV performs one of the matrix-vector operations
2110     * x := A*x   or   x := A**T*x   or   x := A**H*x
2111     *
2112     * Details: http://www.netlib.org/lapack/explore-html/d3/dcd/ctbmv_8f.html
2113     *
2114     * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
2115     *       but only the region N*(K+1) will be referenced. The following subroutine can is an
2116     *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
2117     *           for i in range(0, n):
2118     *              for j in range(i, min(i+k+1, n)):
2119     *                  b[i, j-i] = a[i, j]
2120     *
2121     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2122     * @param TransA The type of transpose applied to matrix A.
2123     * @param Diag Specifies whether or not A is unit triangular.
2124     * @param K The number of off-diagonals of the matrix A
2125     * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}.
2126     * @param X The input allocation contains vector x, supported elements type: {Element#F32_2}.
2127     * @param incX The increment for the elements of vector x, must be larger than zero.
2128     */
2129    void CTBMV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2130               int K, sp<Allocation> A, sp<Allocation> X, int incX);
2131
2132    /**
2133     * ZTBMV performs one of the matrix-vector operations
2134     * x := A*x   or   x := A**T*x   or   x := A**H*x
2135     *
2136     * Details: http://www.netlib.org/lapack/explore-html/d3/d39/ztbmv_8f.html
2137     *
2138     * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
2139     *       but only the region N*(K+1) will be referenced. The following subroutine can is an
2140     *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
2141     *           for i in range(0, n):
2142     *              for j in range(i, min(i+k+1, n)):
2143     *                  b[i, j-i] = a[i, j]
2144     *
2145     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2146     * @param TransA The type of transpose applied to matrix A.
2147     * @param Diag Specifies whether or not A is unit triangular.
2148     * @param K The number of off-diagonals of the matrix A
2149     * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2}.
2150     * @param X The input allocation contains vector x, supported elements type: {Element#F64_2}.
2151     * @param incX The increment for the elements of vector x, must be larger than zero.
2152     */
2153    void ZTBMV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2154               int K, sp<Allocation> A, sp<Allocation> X, int incX);
2155
2156    /**
2157     * STPMV performs one of the matrix-vector operations
2158     * x := A*x   or   x := A**T*x
2159     *
2160     * Details: http://www.netlib.org/lapack/explore-html/db/db1/stpmv_8f.html
2161     *
2162     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2163     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2164     *       'a' to packed matrix 'b'.
2165     *           k = 0
2166     *           for i in range(0, n):
2167     *              for j in range(i, n):
2168     *                  b[k++] = a[i, j]
2169     *
2170     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2171     * @param TransA The type of transpose applied to matrix A.
2172     * @param Diag Specifies whether or not A is unit triangular.
2173     * @param Ap The input allocation contains packed matrix A, supported elements type: {Element#F32}.
2174     * @param X The input allocation contains vector x, supported elements type: {Element#F32}.
2175     * @param incX The increment for the elements of vector x, must be larger than zero.
2176     */
2177    void STPMV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2178               sp<Allocation> Ap, sp<Allocation> X, int incX);
2179
2180    /**
2181     * DTPMV performs one of the matrix-vector operations
2182     * x := A*x   or   x := A**T*x
2183     *
2184     * Details: http://www.netlib.org/lapack/explore-html/dc/dcd/dtpmv_8f.html
2185     *
2186     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2187     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2188     *       'a' to packed matrix 'b'.
2189     *           k = 0
2190     *           for i in range(0, n):
2191     *              for j in range(i, n):
2192     *                  b[k++] = a[i, j]
2193     *
2194     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2195     * @param TransA The type of transpose applied to matrix A.
2196     * @param Diag Specifies whether or not A is unit triangular.
2197     * @param Ap The input allocation contains packed matrix A, supported elements type: {Element#F64}.
2198     * @param X The input allocation contains vector x, supported elements type: {Element#F64}.
2199     * @param incX The increment for the elements of vector x, must be larger than zero.
2200     */
2201    void DTPMV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2202               sp<Allocation> Ap, sp<Allocation> X, int incX);
2203
2204    /**
2205     * CTPMV performs one of the matrix-vector operations
2206     * x := A*x   or   x := A**T*x   or   x := A**H*x
2207     *
2208     * Details: http://www.netlib.org/lapack/explore-html/d4/dbb/ctpmv_8f.html
2209     *
2210     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2211     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2212     *       'a' to packed matrix 'b'.
2213     *           k = 0
2214     *           for i in range(0, n):
2215     *              for j in range(i, n):
2216     *                  b[k++] = a[i, j]
2217     *
2218     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2219     * @param TransA The type of transpose applied to matrix A.
2220     * @param Diag Specifies whether or not A is unit triangular.
2221     * @param Ap The input allocation contains packed matrix A, supported elements type: {Element#F32_2}.
2222     * @param X The input allocation contains vector x, supported elements type: {Element#F32_2}.
2223     * @param incX The increment for the elements of vector x, must be larger than zero.
2224     */
2225    void CTPMV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2226               sp<Allocation> Ap, sp<Allocation> X, int incX);
2227
2228    /**
2229     * ZTPMV performs one of the matrix-vector operations
2230     * x := A*x   or   x := A**T*x   or   x := A**H*x
2231     *
2232     * Details: http://www.netlib.org/lapack/explore-html/d2/d9e/ztpmv_8f.html
2233     *
2234     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2235     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2236     *       'a' to packed matrix 'b'.
2237     *           k = 0
2238     *           for i in range(0, n):
2239     *              for j in range(i, n):
2240     *                  b[k++] = a[i, j]
2241     *
2242     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2243     * @param TransA The type of transpose applied to matrix A.
2244     * @param Diag Specifies whether or not A is unit triangular.
2245     * @param Ap The input allocation contains packed matrix A, supported elements type: {Element#F64_2}.
2246     * @param X The input allocation contains vector x, supported elements type: {Element#F64_2}.
2247     * @param incX The increment for the elements of vector x, must be larger than zero.
2248     */
2249    void ZTPMV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2250               sp<Allocation> Ap, sp<Allocation> X, int incX);
2251
2252    /**
2253     * STRSV solves one of the systems of equations
2254     * A*x = b   or   A**T*x = b
2255     *
2256     * Details: http://www.netlib.org/lapack/explore-html/d0/d2a/strsv_8f.html
2257     *
2258     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2259     * @param TransA The type of transpose applied to matrix A.
2260     * @param Diag Specifies whether or not A is unit triangular.
2261     * @param A The input allocation contains matrix A, supported elements type: {Element#F32}.
2262     * @param X The input allocation contains vector x, supported elements type: {Element#F32}.
2263     * @param incX The increment for the elements of vector x, must be larger than zero.
2264     */
2265    void STRSV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2266               sp<Allocation> A, sp<Allocation> X, int incX);
2267
2268    /**
2269     * DTRSV solves one of the systems of equations
2270     * A*x = b   or   A**T*x = b
2271     *
2272     * Details: http://www.netlib.org/lapack/explore-html/d6/d96/dtrsv_8f.html
2273     *
2274     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2275     * @param TransA The type of transpose applied to matrix A.
2276     * @param Diag Specifies whether or not A is unit triangular.
2277     * @param A The input allocation contains matrix A, supported elements type: {Element#F64}.
2278     * @param X The input allocation contains vector x, supported elements type: {Element#F64}.
2279     * @param incX The increment for the elements of vector x, must be larger than zero.
2280     */
2281    void DTRSV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2282               sp<Allocation> A, sp<Allocation> X, int incX);
2283
2284    /**
2285     * CTRSV solves one of the systems of equations
2286     * A*x = b   or   A**T*x = b   or   A**H*x = b
2287     *
2288     * Details: http://www.netlib.org/lapack/explore-html/d4/dc8/ctrsv_8f.html
2289     *
2290     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2291     * @param TransA The type of transpose applied to matrix A.
2292     * @param Diag Specifies whether or not A is unit triangular.
2293     * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}.
2294     * @param X The input allocation contains vector x, supported elements type: {Element#F32_2}.
2295     * @param incX The increment for the elements of vector x, must be larger than zero.
2296     */
2297    void CTRSV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2298               sp<Allocation> A, sp<Allocation> X, int incX);
2299
2300    /**
2301     * ZTRSV solves one of the systems of equations
2302     * A*x = b   or   A**T*x = b   or   A**H*x = b
2303     *
2304     * Details: http://www.netlib.org/lapack/explore-html/d1/d2f/ztrsv_8f.html
2305     *
2306     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2307     * @param TransA The type of transpose applied to matrix A.
2308     * @param Diag Specifies whether or not A is unit triangular.
2309     * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2}.
2310     * @param X The input allocation contains vector x, supported elements type: {Element#F64_2}.
2311     * @param incX The increment for the elements of vector x, must be larger than zero.
2312     */
2313    void ZTRSV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2314               sp<Allocation> A, sp<Allocation> X, int incX);
2315
2316    /**
2317     * STBSV solves one of the systems of equations
2318     * A*x = b   or   A**T*x = b
2319     *
2320     * Details: http://www.netlib.org/lapack/explore-html/d0/d1f/stbsv_8f.html
2321     *
2322     * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
2323     *       but only the region N*(K+1) will be referenced. The following subroutine can is an
2324     *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
2325     *           for i in range(0, n):
2326     *              for j in range(i, min(i+k+1, n)):
2327     *                  b[i, j-i] = a[i, j]
2328     *
2329     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2330     * @param TransA The type of transpose applied to matrix A.
2331     * @param Diag Specifies whether or not A is unit triangular.
2332     * @param K The number of off-diagonals of the matrix A
2333     * @param A The input allocation contains matrix A, supported elements type: {Element#F32}.
2334     * @param X The input allocation contains vector x, supported elements type: {Element#F32}.
2335     * @param incX The increment for the elements of vector x, must be larger than zero.
2336     */
2337    void STBSV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2338               int K, sp<Allocation> A, sp<Allocation> X, int incX);
2339
2340    /**
2341     * DTBSV solves one of the systems of equations
2342     * A*x = b   or   A**T*x = b
2343     *
2344     * Details: http://www.netlib.org/lapack/explore-html/d4/dcf/dtbsv_8f.html
2345     *
2346     * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
2347     *       but only the region N*(K+1) will be referenced. The following subroutine can is an
2348     *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
2349     *           for i in range(0, n):
2350     *              for j in range(i, min(i+k+1, n)):
2351     *                  b[i, j-i] = a[i, j]
2352     *
2353     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2354     * @param TransA The type of transpose applied to matrix A.
2355     * @param Diag Specifies whether or not A is unit triangular.
2356     * @param K The number of off-diagonals of the matrix A
2357     * @param A The input allocation contains matrix A, supported elements type: {Element#F64}.
2358     * @param X The input allocation contains vector x, supported elements type: {Element#F64}.
2359     * @param incX The increment for the elements of vector x, must be larger than zero.
2360     */
2361    void DTBSV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2362               int K, sp<Allocation> A, sp<Allocation> X, int incX);
2363
2364    /**
2365     * CTBSV solves one of the systems of equations
2366     * A*x = b   or   A**T*x = b   or   A**H*x = b
2367     *
2368     * Details: http://www.netlib.org/lapack/explore-html/d9/d5f/ctbsv_8f.html
2369     *
2370     * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
2371     *       but only the region N*(K+1) will be referenced. The following subroutine can is an
2372     *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
2373     *           for i in range(0, n):
2374     *              for j in range(i, min(i+k+1, n)):
2375     *                  b[i, j-i] = a[i, j]
2376     *
2377     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2378     * @param TransA The type of transpose applied to matrix A.
2379     * @param Diag Specifies whether or not A is unit triangular.
2380     * @param K The number of off-diagonals of the matrix A
2381     * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}.
2382     * @param X The input allocation contains vector x, supported elements type: {Element#F32_2}.
2383     * @param incX The increment for the elements of vector x, must be larger than zero.
2384     */
2385    void CTBSV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2386               int K, sp<Allocation> A, sp<Allocation> X, int incX);
2387
2388    /**
2389     * ZTBSV solves one of the systems of equations
2390     * A*x = b   or   A**T*x = b   or   A**H*x = b
2391     *
2392     * Details: http://www.netlib.org/lapack/explore-html/d4/d5a/ztbsv_8f.html
2393     *
2394     * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
2395     *       but only the region N*(K+1) will be referenced. The following subroutine can is an
2396     *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
2397     *           for i in range(0, n):
2398     *              for j in range(i, min(i+k+1, n)):
2399     *                  b[i, j-i] = a[i, j]
2400     *
2401     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2402     * @param TransA The type of transpose applied to matrix A.
2403     * @param Diag Specifies whether or not A is unit triangular.
2404     * @param K The number of off-diagonals of the matrix A
2405     * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2}.
2406     * @param X The input allocation contains vector x, supported elements type: {Element#F64_2}.
2407     * @param incX The increment for the elements of vector x, must be larger than zero.
2408     */
2409    void ZTBSV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2410               int K, sp<Allocation> A, sp<Allocation> X, int incX);
2411
2412    /**
2413     * STPSV solves one of the systems of equations
2414     * A*x = b   or   A**T*x = b
2415     *
2416     * Details: http://www.netlib.org/lapack/explore-html/d0/d7c/stpsv_8f.html
2417     *
2418     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2419     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2420     *       'a' to packed matrix 'b'.
2421     *           k = 0
2422     *           for i in range(0, n):
2423     *              for j in range(i, n):
2424     *                  b[k++] = a[i, j]
2425     *
2426     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2427     * @param TransA The type of transpose applied to matrix A.
2428     * @param Diag Specifies whether or not A is unit triangular.
2429     * @param Ap The input allocation contains packed matrix A, supported elements type: {Element#F32}.
2430     * @param X The input allocation contains vector x, supported elements type: {Element#F32}.
2431     * @param incX The increment for the elements of vector x, must be larger than zero.
2432     */
2433    void STPSV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2434               sp<Allocation> Ap, sp<Allocation> X, int incX);
2435
2436    /**
2437     * DTPSV solves one of the systems of equations
2438     * A*x = b   or   A**T*x = b
2439     *
2440     * Details: http://www.netlib.org/lapack/explore-html/d9/d84/dtpsv_8f.html
2441     *
2442     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2443     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2444     *       'a' to packed matrix 'b'.
2445     *           k = 0
2446     *           for i in range(0, n):
2447     *              for j in range(i, n):
2448     *                  b[k++] = a[i, j]
2449     *
2450     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2451     * @param TransA The type of transpose applied to matrix A.
2452     * @param Diag Specifies whether or not A is unit triangular.
2453     * @param Ap The input allocation contains packed matrix A, supported elements type: {Element#F64}.
2454     * @param X The input allocation contains vector x, supported elements type: {Element#F64}.
2455     * @param incX The increment for the elements of vector x, must be larger than zero.
2456     */
2457    void DTPSV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2458               sp<Allocation> Ap, sp<Allocation> X, int incX);
2459
2460    /**
2461     * CTPSV solves one of the systems of equations
2462     * A*x = b   or   A**T*x = b   or   A**H*x = b
2463     *
2464     * Details: http://www.netlib.org/lapack/explore-html/d8/d56/ctpsv_8f.html
2465     *
2466     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2467     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2468     *       'a' to packed matrix 'b'.
2469     *           k = 0
2470     *           for i in range(0, n):
2471     *              for j in range(i, n):
2472     *                  b[k++] = a[i, j]
2473     *
2474     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2475     * @param TransA The type of transpose applied to matrix A.
2476     * @param Diag Specifies whether or not A is unit triangular.
2477     * @param Ap The input allocation contains packed matrix A, supported elements type: {Element#F32_2}.
2478     * @param X The input allocation contains vector x, supported elements type: {Element#F32_2}.
2479     * @param incX The increment for the elements of vector x, must be larger than zero.
2480     */
2481    void CTPSV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2482               sp<Allocation> Ap, sp<Allocation> X, int incX);
2483
2484    /**
2485     * ZTPSV solves one of the systems of equations
2486     * A*x = b   or   A**T*x = b   or   A**H*x = b
2487     *
2488     * Details: http://www.netlib.org/lapack/explore-html/da/d57/ztpsv_8f.html
2489     *
2490     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2491     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2492     *       'a' to packed matrix 'b'.
2493     *           k = 0
2494     *           for i in range(0, n):
2495     *              for j in range(i, n):
2496     *                  b[k++] = a[i, j]
2497     *
2498     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2499     * @param TransA The type of transpose applied to matrix A.
2500     * @param Diag Specifies whether or not A is unit triangular.
2501     * @param Ap The input allocation contains packed matrix A, supported elements type: {Element#F64_2}.
2502     * @param X The input allocation contains vector x, supported elements type: {Element#F64_2}.
2503     * @param incX The increment for the elements of vector x, must be larger than zero.
2504     */
2505    void ZTPSV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2506               sp<Allocation> Ap, sp<Allocation> X, int incX);
2507
2508    /**
2509     * SSYMV performs the matrix-vector operation
2510     * y := alpha*A*x + beta*y
2511     *
2512     * Details: http://www.netlib.org/lapack/explore-html/d2/d94/ssymv_8f.html
2513     *
2514     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
2515     * @param alpha The scalar alpha.
2516     * @param A The input allocation contains matrix A, supported elements type: {Element#F32}.
2517     * @param X The input allocation contains vector x, supported elements type: {Element#F32}.
2518     * @param incX The increment for the elements of vector x, must be larger than zero.
2519     * @param beta The scalar beta.
2520     * @param Y The input allocation contains vector y, supported elements type: {Element#F32}.
2521     * @param incY The increment for the elements of vector y, must be larger than zero.
2522     */
2523    void SSYMV(RsBlasUplo Uplo, float alpha, sp<Allocation> A, sp<Allocation> X,
2524               int incX, float beta, sp<Allocation> Y, int incY);
2525
2526    /**
2527     * SSBMV performs the matrix-vector operation
2528     * y := alpha*A*x + beta*y
2529     *
2530     * Details: http://www.netlib.org/lapack/explore-html/d3/da1/ssbmv_8f.html
2531     *
2532     * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
2533     *       but only the region N*(K+1) will be referenced. The following subroutine can is an
2534     *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
2535     *           for i in range(0, n):
2536     *              for j in range(i, min(i+k+1, n)):
2537     *                  b[i, j-i] = a[i, j]
2538     *
2539     * @param Uplo Specifies whether the upper or lower triangular part of the band matrix A is being supplied.
2540     * @param K The number of off-diagonals of the matrix A
2541     * @param alpha The scalar alpha.
2542     * @param A The input allocation contains matrix A, supported elements type: {Element#F32}.
2543     * @param X The input allocation contains vector x, supported elements type: {Element#F32}.
2544     * @param incX The increment for the elements of vector x, must be larger than zero.
2545     * @param beta The scalar beta.
2546     * @param Y The input allocation contains vector y, supported elements type: {Element#F32}.
2547     * @param incY The increment for the elements of vector y, must be larger than zero.
2548     */
2549    void SSBMV(RsBlasUplo Uplo, int K, float alpha, sp<Allocation> A, sp<Allocation> X,
2550               int incX, float beta, sp<Allocation> Y, int incY);
2551
2552    /**
2553     * SSPMV performs the matrix-vector operation
2554     * y := alpha*A*x + beta*y
2555     *
2556     * Details: http://www.netlib.org/lapack/explore-html/d8/d68/sspmv_8f.html
2557     *
2558     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2559     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2560     *       'a' to packed matrix 'b'.
2561     *           k = 0
2562     *           for i in range(0, n):
2563     *              for j in range(i, n):
2564     *                  b[k++] = a[i, j]
2565     *
2566     * @param Uplo Specifies whether the upper or lower triangular part of the matrix A is supplied in packed form.
2567     * @param alpha The scalar alpha.
2568     * @param Ap The input allocation contains matrix A, supported elements type: {Element#F32}.
2569     * @param X The input allocation contains vector x, supported elements type: {Element#F32}.
2570     * @param incX The increment for the elements of vector x, must be larger than zero.
2571     * @param beta The scalar beta.
2572     * @param Y The input allocation contains vector y, supported elements type: {Element#F32}.
2573     * @param incY The increment for the elements of vector y, must be larger than zero.
2574     */
2575    void SSPMV(RsBlasUplo Uplo, float alpha, sp<Allocation> Ap, sp<Allocation> X,
2576               int incX, float beta, sp<Allocation> Y, int incY);
2577
2578    /**
2579     * SGER performs the rank 1 operation
2580     * A := alpha*x*y**T + A
2581     *
2582     * Details: http://www.netlib.org/lapack/explore-html/db/d5c/sger_8f.html
2583     *
2584     * @param alpha The scalar alpha.
2585     * @param X The input allocation contains vector x, supported elements type: {Element#F32}.
2586     * @param incX The increment for the elements of vector x, must be larger than zero.
2587     * @param Y The input allocation contains vector y, supported elements type: {Element#F32}.
2588     * @param incY The increment for the elements of vector y, must be larger than zero.
2589     * @param A The input allocation contains matrix A, supported elements type: {Element#F32}.
2590     */
2591    void SGER(float alpha, sp<Allocation> X, int incX, sp<Allocation> Y, int incY, sp<Allocation> A);
2592
2593    /**
2594     * SSYR performs the rank 1 operation
2595     * A := alpha*x*x**T + A
2596     *
2597     * Details: http://www.netlib.org/lapack/explore-html/d6/dac/ssyr_8f.html
2598     *
2599     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
2600     * @param alpha The scalar alpha.
2601     * @param X The input allocation contains vector x, supported elements type: {Element#F32}.
2602     * @param incX The increment for the elements of vector x, must be larger than zero.
2603     * @param A The input allocation contains matrix A, supported elements type: {Element#F32}.
2604     */
2605    void SSYR(RsBlasUplo Uplo, float alpha, sp<Allocation> X, int incX, sp<Allocation> A);
2606
2607    /**
2608     * SSPR performs the rank 1 operation
2609     * A := alpha*x*x**T + A
2610     *
2611     * Details: http://www.netlib.org/lapack/explore-html/d2/d9b/sspr_8f.html
2612     *
2613     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2614     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2615     *       'a' to packed matrix 'b'.
2616     *           k = 0
2617     *           for i in range(0, n):
2618     *              for j in range(i, n):
2619     *                  b[k++] = a[i, j]
2620     *
2621     * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
2622     * @param alpha The scalar alpha.
2623     * @param X The input allocation contains vector x, supported elements type: {Element#F32}.
2624     * @param incX The increment for the elements of vector x, must be larger than zero.
2625     * @param Ap The input allocation contains matrix A, supported elements type: {Element#F32}.
2626     */
2627    void SSPR(RsBlasUplo Uplo, float alpha, sp<Allocation> X, int incX, sp<Allocation> Ap);
2628
2629    /**
2630     * SSYR2 performs the symmetric rank 2 operation
2631     * A := alpha*x*y**T + alpha*y*x**T + A
2632     *
2633     * Details: http://www.netlib.org/lapack/explore-html/db/d99/ssyr2_8f.html
2634     *
2635     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
2636     * @param alpha The scalar alpha.
2637     * @param X The input allocation contains vector x, supported elements type: {Element#F32}.
2638     * @param incX The increment for the elements of vector x, must be larger than zero.
2639     * @param Y The input allocation contains vector y, supported elements type: {Element#F32}.
2640     * @param incY The increment for the elements of vector y, must be larger than zero.
2641     * @param A The input allocation contains matrix A, supported elements type: {Element#F32}.
2642     */
2643    void SSYR2(RsBlasUplo Uplo, float alpha, sp<Allocation> X, int incX,
2644               sp<Allocation> Y, int incY, sp<Allocation> A);
2645
2646    /**
2647     * SSPR2 performs the symmetric rank 2 operation
2648     * A := alpha*x*y**T + alpha*y*x**T + A
2649     *
2650     * Details: http://www.netlib.org/lapack/explore-html/db/d3e/sspr2_8f.html
2651     *
2652     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2653     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2654     *       'a' to packed matrix 'b'.
2655     *           k = 0
2656     *           for i in range(0, n):
2657     *              for j in range(i, n):
2658     *                  b[k++] = a[i, j]
2659     *
2660     * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
2661     * @param alpha The scalar alpha.
2662     * @param X The input allocation contains vector x, supported elements type: {Element#F32}.
2663     * @param incX The increment for the elements of vector x, must be larger than zero.
2664     * @param Y The input allocation contains vector y, supported elements type: {Element#F32}.
2665     * @param incY The increment for the elements of vector y, must be larger than zero.
2666     * @param Ap The input allocation contains matrix A, supported elements type: {Element#F32}.
2667     */
2668    void SSPR2(RsBlasUplo Uplo, float alpha, sp<Allocation> X, int incX,
2669               sp<Allocation> Y, int incY, sp<Allocation> Ap);
2670
2671    /**
2672     * DSYMV performs the matrix-vector operation
2673     * y := alpha*A*x + beta*y
2674     *
2675     * Details: http://www.netlib.org/lapack/explore-html/d8/dbe/dsymv_8f.html
2676     *
2677     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
2678     * @param alpha The scalar alpha.
2679     * @param A The input allocation contains matrix A, supported elements type: {Element#F64}.
2680     * @param X The input allocation contains vector x, supported elements type: {Element#F64}.
2681     * @param incX The increment for the elements of vector x, must be larger than zero.
2682     * @param beta The scalar beta.
2683     * @param Y The input allocation contains vector y, supported elements type: {Element#F64}.
2684     * @param incY The increment for the elements of vector y, must be larger than zero.
2685     */
2686    void DSYMV(RsBlasUplo Uplo, double alpha, sp<Allocation> A, sp<Allocation> X, int incX,
2687               double beta, sp<Allocation> Y, int incY);
2688
2689    /**
2690     * DSBMV performs the matrix-vector operation
2691     * y := alpha*A*x + beta*y
2692     *
2693     * Details: http://www.netlib.org/lapack/explore-html/d8/d1e/dsbmv_8f.html
2694     *
2695     * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
2696     *       but only the region N*(K+1) will be referenced. The following subroutine can is an
2697     *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
2698     *           for i in range(0, n):
2699     *              for j in range(i, min(i+k+1, n)):
2700     *                  b[i, j-i] = a[i, j]
2701     *
2702     * @param Uplo Specifies whether the upper or lower triangular part of the band matrix A is being supplied.
2703     * @param K The number of off-diagonals of the matrix A
2704     * @param alpha The scalar alpha.
2705     * @param A The input allocation contains matrix A, supported elements type: {Element#F64}.
2706     * @param X The input allocation contains vector x, supported elements type: {Element#F64}.
2707     * @param incX The increment for the elements of vector x, must be larger than zero.
2708     * @param beta The scalar beta.
2709     * @param Y The input allocation contains vector y, supported elements type: {Element#F64}.
2710     * @param incY The increment for the elements of vector y, must be larger than zero.
2711     */
2712    void DSBMV(RsBlasUplo Uplo, int K, double alpha, sp<Allocation> A, sp<Allocation> X, int incX,
2713               double beta, sp<Allocation> Y, int incY);
2714
2715    /**
2716     * DSPMV performs the matrix-vector operation
2717     * y := alpha*A*x + beta*y
2718     *
2719     * Details: http://www.netlib.org/lapack/explore-html/d4/d85/dspmv_8f.html
2720     *
2721     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2722     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2723     *       'a' to packed matrix 'b'.
2724     *           k = 0
2725     *           for i in range(0, n):
2726     *              for j in range(i, n):
2727     *                  b[k++] = a[i, j]
2728     *
2729     * @param Uplo Specifies whether the upper or lower triangular part of the matrix A is supplied in packed form.
2730     * @param alpha The scalar alpha.
2731     * @param Ap The input allocation contains matrix A, supported elements type: {Element#F64}.
2732     * @param X The input allocation contains vector x, supported elements type: {Element#F64}.
2733     * @param incX The increment for the elements of vector x, must be larger than zero.
2734     * @param beta The scalar beta.
2735     * @param Y The input allocation contains vector y, supported elements type: {Element#F64}.
2736     * @param incY The increment for the elements of vector y, must be larger than zero.
2737     */
2738    void DSPMV(RsBlasUplo Uplo, double alpha, sp<Allocation> Ap, sp<Allocation> X, int incX,
2739               double beta, sp<Allocation> Y, int incY);
2740
2741    /**
2742     * DGER performs the rank 1 operation
2743     * A := alpha*x*y**T + A
2744     *
2745     * Details: http://www.netlib.org/lapack/explore-html/dc/da8/dger_8f.html
2746     *
2747     * @param alpha The scalar alpha.
2748     * @param X The input allocation contains vector x, supported elements type: {Element#F64}.
2749     * @param incX The increment for the elements of vector x, must be larger than zero.
2750     * @param Y The input allocation contains vector y, supported elements type: {Element#F64}.
2751     * @param incY The increment for the elements of vector y, must be larger than zero.
2752     * @param A The input allocation contains matrix A, supported elements type: {Element#F64}.
2753     */
2754    void DGER(double alpha, sp<Allocation> X, int incX, sp<Allocation> Y, int incY, sp<Allocation> A);
2755
2756    /**
2757     * DSYR performs the rank 1 operation
2758     * A := alpha*x*x**T + A
2759     *
2760     * Details: http://www.netlib.org/lapack/explore-html/d3/d60/dsyr_8f.html
2761     *
2762     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
2763     * @param alpha The scalar alpha.
2764     * @param X The input allocation contains vector x, supported elements type: {Element#F64}.
2765     * @param incX The increment for the elements of vector x, must be larger than zero.
2766     * @param A The input allocation contains matrix A, supported elements type: {Element#F64}.
2767     */
2768    void DSYR(RsBlasUplo Uplo, double alpha, sp<Allocation> X, int incX, sp<Allocation> A);
2769
2770    /**
2771     * DSPR performs the rank 1 operation
2772     * A := alpha*x*x**T + A
2773     *
2774     * Details: http://www.netlib.org/lapack/explore-html/dd/dba/dspr_8f.html
2775     *
2776     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2777     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2778     *       'a' to packed matrix 'b'.
2779     *           k = 0
2780     *           for i in range(0, n):
2781     *              for j in range(i, n):
2782     *                  b[k++] = a[i, j]
2783     *
2784     * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
2785     * @param alpha The scalar alpha.
2786     * @param X The input allocation contains vector x, supported elements type: {Element#F64}.
2787     * @param incX The increment for the elements of vector x, must be larger than zero.
2788     * @param Ap The input allocation contains matrix A, supported elements type: {Element#F64}.
2789     */
2790    void DSPR(RsBlasUplo Uplo, double alpha, sp<Allocation> X, int incX, sp<Allocation> Ap);
2791
2792    /**
2793     * DSYR2 performs the symmetric rank 2 operation
2794     * A := alpha*x*y**T + alpha*y*x**T + A
2795     *
2796     * Details: http://www.netlib.org/lapack/explore-html/de/d41/dsyr2_8f.html
2797     *
2798     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
2799     * @param alpha The scalar alpha.
2800     * @param X The input allocation contains vector x, supported elements type: {Element#F64}.
2801     * @param incX The increment for the elements of vector x, must be larger than zero.
2802     * @param Y The input allocation contains vector y, supported elements type: {Element#F64}.
2803     * @param incY The increment for the elements of vector y, must be larger than zero.
2804     * @param A The input allocation contains matrix A, supported elements type: {Element#F64}.
2805     */
2806    void DSYR2(RsBlasUplo Uplo, double alpha, sp<Allocation> X, int incX,
2807               sp<Allocation> Y, int incY, sp<Allocation> A);
2808
2809    /**
2810     * DSPR2 performs the symmetric rank 2 operation
2811     * A := alpha*x*y**T + alpha*y*x**T + A
2812     *
2813     * Details: http://www.netlib.org/lapack/explore-html/dd/d9e/dspr2_8f.html
2814     *
2815     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2816     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2817     *       'a' to packed matrix 'b'.
2818     *           k = 0
2819     *           for i in range(0, n):
2820     *              for j in range(i, n):
2821     *                  b[k++] = a[i, j]
2822     *
2823     * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
2824     * @param alpha The scalar alpha.
2825     * @param X The input allocation contains vector x, supported elements type: {Element#F64}.
2826     * @param incX The increment for the elements of vector x, must be larger than zero.
2827     * @param Y The input allocation contains vector y, supported elements type: {Element#F64}.
2828     * @param incY The increment for the elements of vector y, must be larger than zero.
2829     * @param Ap The input allocation contains matrix A, supported elements type: {Element#F64}.
2830     */
2831    void DSPR2(RsBlasUplo Uplo, double alpha, sp<Allocation> X, int incX,
2832               sp<Allocation> Y, int incY, sp<Allocation> Ap);
2833
2834    /**
2835     * CHEMV performs the matrix-vector operation
2836     * y := alpha*A*x + beta*y
2837     *
2838     * Details: http://www.netlib.org/lapack/explore-html/d7/d51/chemv_8f.html
2839     *
2840     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
2841     * @param alpha The scalar alpha.
2842     * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}.
2843     * @param X The input allocation contains vector x, supported elements type: {Element#F32_2}.
2844     * @param incX The increment for the elements of vector x, must be larger than zero.
2845     * @param beta The scalar beta.
2846     * @param Y The input allocation contains vector y, supported elements type: {Element#F32_2}.
2847     * @param incY The increment for the elements of vector y, must be larger than zero.
2848     */
2849    void CHEMV(RsBlasUplo Uplo, Float2 alpha, sp<Allocation> A, sp<Allocation> X,
2850               int incX, Float2 beta, sp<Allocation> Y, int incY);
2851
2852    /**
2853     * CHBMV performs the matrix-vector operation
2854     * y := alpha*A*x + beta*y
2855     *
2856     * Details: http://www.netlib.org/lapack/explore-html/db/dc2/chbmv_8f.html
2857     *
2858     * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
2859     *       but only the region N*(K+1) will be referenced. The following subroutine can is an
2860     *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
2861     *           for i in range(0, n):
2862     *              for j in range(i, min(i+k+1, n)):
2863     *                  b[i, j-i] = a[i, j]
2864     *
2865     * @param Uplo Specifies whether the upper or lower triangular part of the band matrix A is being supplied.
2866     * @param K The number of off-diagonals of the matrix A
2867     * @param alpha The scalar alpha.
2868     * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}.
2869     * @param X The input allocation contains vector x, supported elements type: {Element#F32_2}.
2870     * @param incX The increment for the elements of vector x, must be larger than zero.
2871     * @param beta The scalar beta.
2872     * @param Y The input allocation contains vector y, supported elements type: {Element#F32_2}.
2873     * @param incY The increment for the elements of vector y, must be larger than zero.
2874     */
2875    void CHBMV(RsBlasUplo Uplo, int K, Float2 alpha, sp<Allocation> A, sp<Allocation> X,
2876               int incX, Float2 beta, sp<Allocation> Y, int incY);
2877
2878    /**
2879     * CHPMV performs the matrix-vector operation
2880     * y := alpha*A*x + beta*y
2881     *
2882     * Details: http://www.netlib.org/lapack/explore-html/d2/d06/chpmv_8f.html
2883     *
2884     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2885     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2886     *       'a' to packed matrix 'b'.
2887     *           k = 0
2888     *           for i in range(0, n):
2889     *              for j in range(i, n):
2890     *                  b[k++] = a[i, j]
2891     *
2892     * @param Uplo Specifies whether the upper or lower triangular part of the matrix A is supplied in packed form.
2893     * @param alpha The scalar alpha.
2894     * @param Ap The input allocation contains matrix A, supported elements type: {Element#F32_2}.
2895     * @param X The input allocation contains vector x, supported elements type: {Element#F32_2}.
2896     * @param incX The increment for the elements of vector x, must be larger than zero.
2897     * @param beta The scalar beta.
2898     * @param Y The input allocation contains vector y, supported elements type: {Element#F32_2}.
2899     * @param incY The increment for the elements of vector y, must be larger than zero.
2900     */
2901    void CHPMV(RsBlasUplo Uplo, Float2 alpha, sp<Allocation> Ap, sp<Allocation> X,
2902               int incX, Float2 beta, sp<Allocation> Y, int incY);
2903
2904    /**
2905     * CGERU performs the rank 1 operation
2906     * A := alpha*x*y**T + A
2907     *
2908     * Details: http://www.netlib.org/lapack/explore-html/db/d5f/cgeru_8f.html
2909     *
2910     * @param alpha The scalar alpha.
2911     * @param X The input allocation contains vector x, supported elements type: {Element#F32_2}.
2912     * @param incX The increment for the elements of vector x, must be larger than zero.
2913     * @param Y The input allocation contains vector y, supported elements type: {Element#F32_2}.
2914     * @param incY The increment for the elements of vector y, must be larger than zero.
2915     * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}.
2916     */
2917    void CGERU(Float2 alpha, sp<Allocation> X, int incX,
2918               sp<Allocation> Y, int incY, sp<Allocation> A);
2919
2920    /**
2921     * CGERC performs the rank 1 operation
2922     * A := alpha*x*y**H + A
2923     *
2924     * Details: http://www.netlib.org/lapack/explore-html/dd/d84/cgerc_8f.html
2925     *
2926     * @param alpha The scalar alpha.
2927     * @param X The input allocation contains vector x, supported elements type: {Element#F32_2}.
2928     * @param incX The increment for the elements of vector x, must be larger than zero.
2929     * @param Y The input allocation contains vector y, supported elements type: {Element#F32_2}.
2930     * @param incY The increment for the elements of vector y, must be larger than zero.
2931     * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}.
2932     */
2933    void CGERC(Float2 alpha, sp<Allocation> X, int incX,
2934               sp<Allocation> Y, int incY, sp<Allocation> A);
2935
2936    /**
2937     * CHER performs the rank 1 operation
2938     * A := alpha*x*x**H + A
2939     *
2940     * Details: http://www.netlib.org/lapack/explore-html/d3/d6d/cher_8f.html
2941     *
2942     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
2943     * @param alpha The scalar alpha.
2944     * @param X The input allocation contains vector x, supported elements type: {Element#F32_2}.
2945     * @param incX The increment for the elements of vector x, must be larger than zero.
2946     * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}.
2947     */
2948    void CHER(RsBlasUplo Uplo, float alpha, sp<Allocation> X, int incX, sp<Allocation> A);
2949
2950    /**
2951     * CHPR performs the rank 1 operation
2952     * A := alpha*x*x**H + A
2953     *
2954     * Details: http://www.netlib.org/lapack/explore-html/db/dcd/chpr_8f.html
2955     *
2956     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2957     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2958     *       'a' to packed matrix 'b'.
2959     *           k = 0
2960     *           for i in range(0, n):
2961     *              for j in range(i, n):
2962     *                  b[k++] = a[i, j]
2963     *
2964     * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
2965     * @param alpha The scalar alpha.
2966     * @param X The input allocation contains vector x, supported elements type: {Element#F32_2}.
2967     * @param incX The increment for the elements of vector x, must be larger than zero.
2968     * @param Ap The input allocation contains matrix A, supported elements type: {Element#F32_2}.
2969     */
2970    void CHPR(RsBlasUplo Uplo, float alpha, sp<Allocation> X, int incX, sp<Allocation> Ap);
2971
2972    /**
2973     * CHER2 performs the symmetric rank 2 operation
2974     * A := alpha*x*y**H + alpha*y*x**H + A
2975     *
2976     * Details: http://www.netlib.org/lapack/explore-html/db/d87/cher2_8f.html
2977     *
2978     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
2979     * @param alpha The scalar alpha.
2980     * @param X The input allocation contains vector x, supported elements type: {Element#F32_2}.
2981     * @param incX The increment for the elements of vector x, must be larger than zero.
2982     * @param Y The input allocation contains vector y, supported elements type: {Element#F32_2}.
2983     * @param incY The increment for the elements of vector y, must be larger than zero.
2984     * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}.
2985     */
2986    void CHER2(RsBlasUplo Uplo, Float2 alpha, sp<Allocation> X, int incX,
2987               sp<Allocation> Y, int incY, sp<Allocation> A);
2988
2989    /**
2990     * CHPR2 performs the symmetric rank 2 operation
2991     * A := alpha*x*y**H + alpha*y*x**H + A
2992     *
2993     * Details: http://www.netlib.org/lapack/explore-html/d6/d44/chpr2_8f.html
2994     *
2995     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2996     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2997     *       'a' to packed matrix 'b'.
2998     *           k = 0
2999     *           for i in range(0, n):
3000     *              for j in range(i, n):
3001     *                  b[k++] = a[i, j]
3002     *
3003     * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
3004     * @param alpha The scalar alpha.
3005     * @param X The input allocation contains vector x, supported elements type: {Element#F32_2}.
3006     * @param incX The increment for the elements of vector x, must be larger than zero.
3007     * @param Y The input allocation contains vector y, supported elements type: {Element#F32_2}.
3008     * @param incY The increment for the elements of vector y, must be larger than zero.
3009     * @param Ap The input allocation contains matrix A, supported elements type: {Element#F32_2}.
3010     */
3011    void CHPR2(RsBlasUplo Uplo, Float2 alpha, sp<Allocation> X, int incX,
3012               sp<Allocation> Y, int incY, sp<Allocation> Ap);
3013
3014    /**
3015     * ZHEMV performs the matrix-vector operation
3016     * y := alpha*A*x + beta*y
3017     *
3018     * Details: http://www.netlib.org/lapack/explore-html/d0/ddd/zhemv_8f.html
3019     *
3020     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
3021     * @param alpha The scalar alpha.
3022     * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2}.
3023     * @param X The input allocation contains vector x, supported elements type: {Element#F64_2}.
3024     * @param incX The increment for the elements of vector x, must be larger than zero.
3025     * @param beta The scalar beta.
3026     * @param Y The input allocation contains vector y, supported elements type: {Element#F64_2}.
3027     * @param incY The increment for the elements of vector y, must be larger than zero.
3028     */
3029    void ZHEMV(RsBlasUplo Uplo, Double2 alpha, sp<Allocation> A, sp<Allocation> X,
3030               int incX, Double2 beta, sp<Allocation> Y, int incY);
3031
3032    /**
3033     * ZHBMV performs the matrix-vector operation
3034     * y := alpha*A*x + beta*y
3035     *
3036     * Details: http://www.netlib.org/lapack/explore-html/d3/d1a/zhbmv_8f.html
3037     *
3038     * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
3039     *       but only the region N*(K+1) will be referenced. The following subroutine can is an
3040     *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
3041     *           for i in range(0, n):
3042     *              for j in range(i, min(i+k+1, n)):
3043     *                  b[i, j-i] = a[i, j]
3044     *
3045     * @param Uplo Specifies whether the upper or lower triangular part of the band matrix A is being supplied.
3046     * @param K The number of off-diagonals of the matrix A
3047     * @param alpha The scalar alpha.
3048     * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2}.
3049     * @param X The input allocation contains vector x, supported elements type: {Element#F64_2}.
3050     * @param incX The increment for the elements of vector x, must be larger than zero.
3051     * @param beta The scalar beta.
3052     * @param Y The input allocation contains vector y, supported elements type: {Element#F64_2}.
3053     * @param incY The increment for the elements of vector y, must be larger than zero.
3054     */
3055    void ZHBMV(RsBlasUplo Uplo, int K, Double2 alpha, sp<Allocation> A, sp<Allocation> X,
3056               int incX, Double2 beta, sp<Allocation> Y, int incY);
3057
3058    /**
3059     * ZHPMV performs the matrix-vector operation
3060     * y := alpha*A*x + beta*y
3061     *
3062     * Details: http://www.netlib.org/lapack/explore-html/d0/d60/zhpmv_8f.html
3063     *
3064     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
3065     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
3066     *       'a' to packed matrix 'b'.
3067     *           k = 0
3068     *           for i in range(0, n):
3069     *              for j in range(i, n):
3070     *                  b[k++] = a[i, j]
3071     *
3072     * @param Uplo Specifies whether the upper or lower triangular part of the matrix A is supplied in packed form.
3073     * @param alpha The scalar alpha.
3074     * @param Ap The input allocation contains matrix A, supported elements type: {Element#F64_2}.
3075     * @param X The input allocation contains vector x, supported elements type: {Element#F64_2}.
3076     * @param incX The increment for the elements of vector x, must be larger than zero.
3077     * @param beta The scalar beta.
3078     * @param Y The input allocation contains vector y, supported elements type: {Element#F64_2}.
3079     * @param incY The increment for the elements of vector y, must be larger than zero.
3080     */
3081    void ZHPMV(RsBlasUplo Uplo, Double2 alpha, sp<Allocation> Ap, sp<Allocation> X,
3082               int incX, Double2 beta, sp<Allocation> Y, int incY);
3083
3084    /**
3085     * ZGERU performs the rank 1 operation
3086     * A := alpha*x*y**T + A
3087     *
3088     * Details: http://www.netlib.org/lapack/explore-html/d7/d12/zgeru_8f.html
3089     *
3090     * @param alpha The scalar alpha.
3091     * @param X The input allocation contains vector x, supported elements type: {Element#F64_2}.
3092     * @param incX The increment for the elements of vector x, must be larger than zero.
3093     * @param Y The input allocation contains vector y, supported elements type: {Element#F64_2}.
3094     * @param incY The increment for the elements of vector y, must be larger than zero.
3095     * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2}.
3096     */
3097    void ZGERU(Double2 alpha, sp<Allocation> X, int incX,
3098               sp<Allocation> Y, int incY, sp<Allocation> A);
3099
3100    /**
3101     * ZGERC performs the rank 1 operation
3102     * A := alpha*x*y**H + A
3103     *
3104     * Details: http://www.netlib.org/lapack/explore-html/d3/dad/zgerc_8f.html
3105     *
3106     * @param alpha The scalar alpha.
3107     * @param X The input allocation contains vector x, supported elements type: {Element#F64_2}.
3108     * @param incX The increment for the elements of vector x, must be larger than zero.
3109     * @param Y The input allocation contains vector y, supported elements type: {Element#F64_2}.
3110     * @param incY The increment for the elements of vector y, must be larger than zero.
3111     * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2}.
3112     */
3113    void ZGERC(Double2 alpha, sp<Allocation> X, int incX,
3114               sp<Allocation> Y, int incY, sp<Allocation> A);
3115
3116    /**
3117     * ZHER performs the rank 1 operation
3118     * A := alpha*x*x**H + A
3119     *
3120     * Details: http://www.netlib.org/lapack/explore-html/de/d0e/zher_8f.html
3121     *
3122     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
3123     * @param alpha The scalar alpha.
3124     * @param X The input allocation contains vector x, supported elements type: {Element#F64_2}.
3125     * @param incX The increment for the elements of vector x, must be larger than zero.
3126     * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2}.
3127     */
3128    void ZHER(RsBlasUplo Uplo, double alpha, sp<Allocation> X, int incX, sp<Allocation> A);
3129
3130    /**
3131     * ZHPR performs the rank 1 operation
3132     * A := alpha*x*x**H + A
3133     *
3134     * Details: http://www.netlib.org/lapack/explore-html/de/de1/zhpr_8f.html
3135     *
3136     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
3137     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
3138     *       'a' to packed matrix 'b'.
3139     *           k = 0
3140     *           for i in range(0, n):
3141     *              for j in range(i, n):
3142     *                  b[k++] = a[i, j]
3143     *
3144     * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
3145     * @param alpha The scalar alpha.
3146     * @param X The input allocation contains vector x, supported elements type: {Element#F64_2}.
3147     * @param incX The increment for the elements of vector x, must be larger than zero.
3148     * @param Ap The input allocation contains matrix A, supported elements type: {Element#F64_2}.
3149     */
3150    void ZHPR(RsBlasUplo Uplo, double alpha, sp<Allocation> X, int incX, sp<Allocation> Ap);
3151
3152    /**
3153     * ZHER2 performs the symmetric rank 2 operation
3154     * A := alpha*x*y**H + alpha*y*x**H + A
3155     *
3156     * Details: http://www.netlib.org/lapack/explore-html/da/d8a/zher2_8f.html
3157     *
3158     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
3159     * @param alpha The scalar alpha.
3160     * @param X The input allocation contains vector x, supported elements type: {Element#F64_2}.
3161     * @param incX The increment for the elements of vector x, must be larger than zero.
3162     * @param Y The input allocation contains vector y, supported elements type: {Element#F64_2}.
3163     * @param incY The increment for the elements of vector y, must be larger than zero.
3164     * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2}.
3165     */
3166    void ZHER2(RsBlasUplo Uplo, Double2 alpha, sp<Allocation> X, int incX,
3167               sp<Allocation> Y, int incY, sp<Allocation> A);
3168
3169    /**
3170     * ZHPR2 performs the symmetric rank 2 operation
3171     * A := alpha*x*y**H + alpha*y*x**H + A
3172     *
3173     * Details: http://www.netlib.org/lapack/explore-html/d5/d52/zhpr2_8f.html
3174     *
3175     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
3176     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
3177     *       'a' to packed matrix 'b'.
3178     *           k = 0
3179     *           for i in range(0, n):
3180     *              for j in range(i, n):
3181     *                  b[k++] = a[i, j]
3182     *
3183     * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
3184     * @param alpha The scalar alpha.
3185     * @param X The input allocation contains vector x, supported elements type: {Element#F64_2}.
3186     * @param incX The increment for the elements of vector x, must be larger than zero.
3187     * @param Y The input allocation contains vector y, supported elements type: {Element#F64_2}.
3188     * @param incY The increment for the elements of vector y, must be larger than zero.
3189     * @param Ap The input allocation contains matrix A, supported elements type: {Element#F64_2}.
3190     */
3191    void ZHPR2(RsBlasUplo Uplo, Double2 alpha, sp<Allocation> X, int incX,
3192               sp<Allocation> Y, int incY, sp<Allocation> Ap);
3193
3194    /**
3195     * SGEMM performs one of the matrix-matrix operations
3196     * C := alpha*op(A)*op(B) + beta*C   where op(X) is one of op(X) = X  or  op(X) = X**T
3197     *
3198     * Details: http://www.netlib.org/lapack/explore-html/d4/de2/sgemm_8f.html
3199     *
3200     * @param TransA The type of transpose applied to matrix A.
3201     * @param TransB The type of transpose applied to matrix B.
3202     * @param alpha The scalar alpha.
3203     * @param A The input allocation contains matrix A, supported elements type: {Element#F32}.
3204     * @param B The input allocation contains matrix B, supported elements type: {Element#F32}.
3205     * @param beta The scalar beta.
3206     * @param C The input allocation contains matrix C, supported elements type: {Element#F32}.
3207     */
3208    void SGEMM(RsBlasTranspose TransA, RsBlasTranspose TransB, float alpha, sp<Allocation> A,
3209                      sp<Allocation> B, float beta, sp<Allocation> C);
3210
3211
3212    /**
3213     * DGEMM performs one of the matrix-matrix operations
3214     * C := alpha*op(A)*op(B) + beta*C   where op(X) is one of op(X) = X  or  op(X) = X**T
3215     *
3216     * Details: http://www.netlib.org/lapack/explore-html/d7/d2b/dgemm_8f.html
3217     *
3218     * @param TransA The type of transpose applied to matrix A.
3219     * @param TransB The type of transpose applied to matrix B.
3220     * @param alpha The scalar alpha.
3221     * @param A The input allocation contains matrix A, supported elements type: {Element#F64}.
3222     * @param B The input allocation contains matrix B, supported elements type: {Element#F64}.
3223     * @param beta The scalar beta.
3224     * @param C The input allocation contains matrix C, supported elements type: {Element#F64}.
3225     */
3226    void DGEMM(RsBlasTranspose TransA, RsBlasTranspose TransB, double alpha, sp<Allocation> A,
3227                      sp<Allocation> B, double beta, sp<Allocation> C);
3228
3229    /**
3230     * CGEMM performs one of the matrix-matrix operations
3231     * C := alpha*op(A)*op(B) + beta*C   where op(X) is one of op(X) = X  or  op(X) = X**T  or  op(X) = X**H
3232     *
3233     * Details: http://www.netlib.org/lapack/explore-html/d6/d5b/cgemm_8f.html
3234     *
3235     * @param TransA The type of transpose applied to matrix A.
3236     * @param TransB The type of transpose applied to matrix B.
3237     * @param alpha The scalar alpha.
3238     * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}.
3239     * @param B The input allocation contains matrix B, supported elements type: {Element#F32_2}.
3240     * @param beta The scalar beta.
3241     * @param C The input allocation contains matrix C, supported elements type: {Element#F32_2}.
3242     */
3243    void CGEMM(RsBlasTranspose TransA, RsBlasTranspose TransB, Float2 alpha, sp<Allocation> A,
3244                      sp<Allocation> B, Float2 beta, sp<Allocation> C);
3245
3246    /**
3247     * ZGEMM performs one of the matrix-matrix operations
3248     * C := alpha*op(A)*op(B) + beta*C   where op(X) is one of op(X) = X  or  op(X) = X**T  or  op(X) = X**H
3249     *
3250     * Details: http://www.netlib.org/lapack/explore-html/d7/d76/zgemm_8f.html
3251     *
3252     * @param TransA The type of transpose applied to matrix A.
3253     * @param TransB The type of transpose applied to matrix B.
3254     * @param alpha The scalar alpha.
3255     * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2
3256     * @param B The input allocation contains matrix B, supported elements type: {Element#F64_2
3257     * @param beta The scalar beta.
3258     * @param C The input allocation contains matrix C, supported elements type: {Element#F64_2
3259     */
3260    void ZGEMM(RsBlasTranspose TransA, RsBlasTranspose TransB, Double2 alpha, sp<Allocation> A,
3261                      sp<Allocation> B, Double2 beta, sp<Allocation> C);
3262
3263    /**
3264     * SSYMM performs one of the matrix-matrix operations
3265     * C := alpha*A*B + beta*C   or   C := alpha*B*A + beta*C
3266     *
3267     * Details: http://www.netlib.org/lapack/explore-html/d7/d42/ssymm_8f.html
3268     *
3269     * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3270     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
3271     * @param alpha The scalar alpha.
3272     * @param A The input allocation contains matrix A, supported elements type: {Element#F32}.
3273     * @param B The input allocation contains matrix B, supported elements type: {Element#F32}.
3274     * @param beta The scalar beta.
3275     * @param C The input allocation contains matrix C, supported elements type: {Element#F32}.
3276     */
3277    void SSYMM(RsBlasSide Side, RsBlasUplo Uplo, float alpha, sp<Allocation> A,
3278                      sp<Allocation> B, float beta, sp<Allocation> C);
3279
3280    /**
3281     * DSYMM performs one of the matrix-matrix operations
3282     * C := alpha*A*B + beta*C   or   C := alpha*B*A + beta*C
3283     *
3284     * Details: http://www.netlib.org/lapack/explore-html/d8/db0/dsymm_8f.html
3285     *
3286     * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3287     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
3288     * @param alpha The scalar alpha.
3289     * @param A The input allocation contains matrix A, supported elements type: {Element#F64}.
3290     * @param B The input allocation contains matrix B, supported elements type: {Element#F64}.
3291     * @param beta The scalar beta.
3292     * @param C The input allocation contains matrix C, supported elements type: {Element#F64}.
3293     */
3294    void DSYMM(RsBlasSide Side, RsBlasUplo Uplo, double alpha, sp<Allocation> A,
3295                      sp<Allocation> B, double beta, sp<Allocation> C);
3296
3297    /**
3298     * CSYMM performs one of the matrix-matrix operations
3299     * C := alpha*A*B + beta*C   or   C := alpha*B*A + beta*C
3300     *
3301     * Details: http://www.netlib.org/lapack/explore-html/db/d59/csymm_8f.html
3302     *
3303     * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3304     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
3305     * @param alpha The scalar alpha.
3306     * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}.
3307     * @param B The input allocation contains matrix B, supported elements type: {Element#F32_2}.
3308     * @param beta The scalar beta.
3309     * @param C The input allocation contains matrix C, supported elements type: {Element#F32_2}.
3310     */
3311    void CSYMM(RsBlasSide Side, RsBlasUplo Uplo, Float2 alpha, sp<Allocation> A,
3312                      sp<Allocation> B, Float2 beta, sp<Allocation> C);
3313
3314    /**
3315     * ZSYMM performs one of the matrix-matrix operations
3316     * C := alpha*A*B + beta*C   or   C := alpha*B*A + beta*C
3317     *
3318     * Details: http://www.netlib.org/lapack/explore-html/df/d51/zsymm_8f.html
3319     *
3320     * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3321     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
3322     * @param alpha The scalar alpha.
3323     * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2}.
3324     * @param B The input allocation contains matrix B, supported elements type: {Element#F64_2}.
3325     * @param beta The scalar beta.
3326     * @param C The input allocation contains matrix C, supported elements type: {Element#F64_2}.
3327     */
3328    void ZSYMM(RsBlasSide Side, RsBlasUplo Uplo, Double2 alpha, sp<Allocation> A,
3329                      sp<Allocation> B, Double2 beta, sp<Allocation> C);
3330
3331    /**
3332     * SSYRK performs one of the symmetric rank k operations
3333     * C := alpha*A*A**T + beta*C   or   C := alpha*A**T*A + beta*C
3334     *
3335     * Details: http://www.netlib.org/lapack/explore-html/d0/d40/ssyrk_8f.html
3336     *
3337     * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
3338     * @param Trans The type of transpose applied to the operation.
3339     * @param alpha The scalar alpha.
3340     * @param A The input allocation contains matrix A, supported elements type: {Element#F32}.
3341     * @param beta The scalar beta.
3342     * @param C The input allocation contains matrix C, supported elements type: {Element#F32}.
3343     */
3344    void SSYRK(RsBlasUplo Uplo, RsBlasTranspose Trans, float alpha,
3345               sp<Allocation> A, float beta, sp<Allocation> C);
3346
3347    /**
3348     * DSYRK performs one of the symmetric rank k operations
3349     * C := alpha*A*A**T + beta*C   or   C := alpha*A**T*A + beta*C
3350     *
3351     * Details: http://www.netlib.org/lapack/explore-html/dc/d05/dsyrk_8f.html
3352     *
3353     * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
3354     * @param Trans The type of transpose applied to the operation.
3355     * @param alpha The scalar alpha.
3356     * @param A The input allocation contains matrix A, supported elements type: {Element#F64}.
3357     * @param beta The scalar beta.
3358     * @param C The input allocation contains matrix C, supported elements type: {Element#F64}.
3359     */
3360    void DSYRK(RsBlasUplo Uplo, RsBlasTranspose Trans, double alpha,
3361               sp<Allocation> A, double beta, sp<Allocation> C);
3362
3363    /**
3364     * CSYRK performs one of the symmetric rank k operations
3365     * C := alpha*A*A**T + beta*C   or   C := alpha*A**T*A + beta*C
3366     *
3367     * Details: http://www.netlib.org/lapack/explore-html/d3/d6a/csyrk_8f.html
3368     *
3369     * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
3370     * @param Trans The type of transpose applied to the operation.
3371     * @param alpha The scalar alpha.
3372     * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}.
3373     * @param beta The scalar beta.
3374     * @param C The input allocation contains matrix C, supported elements type: {Element#F32_2}.
3375     */
3376    void CSYRK(RsBlasUplo Uplo, RsBlasTranspose Trans, Float2 alpha,
3377               sp<Allocation> A, Float2 beta, sp<Allocation> C);
3378
3379    /**
3380     * ZSYRK performs one of the symmetric rank k operations
3381     * C := alpha*A*A**T + beta*C   or   C := alpha*A**T*A + beta*C
3382     *
3383     * Details: http://www.netlib.org/lapack/explore-html/de/d54/zsyrk_8f.html
3384     *
3385     * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
3386     * @param Trans The type of transpose applied to the operation.
3387     * @param alpha The scalar alpha.
3388     * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2}.
3389     * @param beta The scalar beta.
3390     * @param C The input allocation contains matrix C, supported elements type: {Element#F64_2}.
3391     */
3392    void ZSYRK(RsBlasUplo Uplo, RsBlasTranspose Trans, Double2 alpha,
3393               sp<Allocation> A, Double2 beta, sp<Allocation> C);
3394
3395    /**
3396     * SSYR2K performs one of the symmetric rank 2k operations
3397     * C := alpha*A*B**T + alpha*B*A**T + beta*C   or   C := alpha*A**T*B + alpha*B**T*A + beta*C
3398     *
3399     * Details: http://www.netlib.org/lapack/explore-html/df/d3d/ssyr2k_8f.html
3400     *
3401     * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
3402     * @param Trans The type of transpose applied to the operation.
3403     * @param alpha The scalar alpha.
3404     * @param A The input allocation contains matrix A, supported elements type: {Element#F32}.
3405     * @param B The input allocation contains matrix B, supported elements type: {Element#F32}.
3406     * @param beta The scalar beta.
3407     * @param C The input allocation contains matrix C, supported elements type: {Element#F32}.
3408     */
3409    void SSYR2K(RsBlasUplo Uplo, RsBlasTranspose Trans, float alpha,
3410                sp<Allocation> A, sp<Allocation> B, float beta, sp<Allocation> C);
3411
3412    /**
3413     * DSYR2K performs one of the symmetric rank 2k operations
3414     * C := alpha*A*B**T + alpha*B*A**T + beta*C   or   C := alpha*A**T*B + alpha*B**T*A + beta*C
3415     *
3416     * Details: http://www.netlib.org/lapack/explore-html/d1/dec/dsyr2k_8f.html
3417     *
3418     * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
3419     * @param Trans The type of transpose applied to the operation.
3420     * @param alpha The scalar alpha.
3421     * @param A The input allocation contains matrix A, supported elements type: {Element#F64}.
3422     * @param B The input allocation contains matrix B, supported elements type: {Element#F64}.
3423     * @param beta The scalar beta.
3424     * @param C The input allocation contains matrix C, supported elements type: {Element#F64}.
3425     */
3426    void DSYR2K(RsBlasUplo Uplo, RsBlasTranspose Trans, double alpha,
3427                sp<Allocation> A, sp<Allocation> B, double beta, sp<Allocation> C);
3428
3429    /**
3430     * CSYR2K performs one of the symmetric rank 2k operations
3431     * C := alpha*A*B**T + alpha*B*A**T + beta*C   or   C := alpha*A**T*B + alpha*B**T*A + beta*C
3432     *
3433     * Details: http://www.netlib.org/lapack/explore-html/de/d7e/csyr2k_8f.html
3434     *
3435     * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
3436     * @param Trans The type of transpose applied to the operation.
3437     * @param alpha The scalar alpha.
3438     * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}.
3439     * @param B The input allocation contains matrix B, supported elements type: {Element#F32_2}.
3440     * @param beta The scalar beta.
3441     * @param C The input allocation contains matrix C, supported elements type: {Element#F32_2}.
3442     */
3443    void CSYR2K(RsBlasUplo Uplo, RsBlasTranspose Trans, Float2 alpha,
3444                sp<Allocation> A, sp<Allocation> B, Float2 beta, sp<Allocation> C);
3445
3446    /**
3447     * ZSYR2K performs one of the symmetric rank 2k operations
3448     * C := alpha*A*B**T + alpha*B*A**T + beta*C   or   C := alpha*A**T*B + alpha*B**T*A + beta*C
3449     *
3450     * Details: http://www.netlib.org/lapack/explore-html/df/d20/zsyr2k_8f.html
3451     *
3452     * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
3453     * @param Trans The type of transpose applied to the operation.
3454     * @param alpha The scalar alpha.
3455     * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2}.
3456     * @param B The input allocation contains matrix B, supported elements type: {Element#F64_2}.
3457     * @param beta The scalar beta.
3458     * @param C The input allocation contains matrix C, supported elements type: {Element#F64_2}.
3459     */
3460    void ZSYR2K(RsBlasUplo Uplo, RsBlasTranspose Trans, Double2 alpha,
3461                sp<Allocation> A, sp<Allocation> B, Double2 beta, sp<Allocation> C);
3462
3463    /**
3464     * STRMM performs one of the matrix-matrix operations
3465     * B := alpha*op(A)*B   or   B := alpha*B*op(A)
3466     * op(A) is one of  op(A) = A  or  op(A) = A**T
3467     *
3468     * Details: http://www.netlib.org/lapack/explore-html/df/d01/strmm_8f.html
3469     *
3470     * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3471     * @param Uplo Specifies whether matrix A is upper or lower triangular.
3472     * @param TransA The type of transpose applied to matrix A.
3473     * @param Diag Specifies whether or not A is unit triangular.
3474     * @param alpha The scalar alpha.
3475     * @param A The input allocation contains matrix A, supported elements type: {Element#F32}.
3476     * @param B The input allocation contains matrix B, supported elements type: {Element#F32}.
3477     */
3478    void STRMM(RsBlasSide Side, RsBlasUplo Uplo, RsBlasTranspose TransA,
3479               RsBlasDiag Diag, float alpha, sp<Allocation> A, sp<Allocation> B);
3480
3481    /**
3482     * DTRMM performs one of the matrix-matrix operations
3483     * B := alpha*op(A)*B   or   B := alpha*B*op(A)
3484     * op(A) is one of  op(A) = A  or  op(A) = A**T
3485     *
3486     * Details: http://www.netlib.org/lapack/explore-html/dd/d19/dtrmm_8f.html
3487     *
3488     * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3489     * @param Uplo Specifies whether matrix A is upper or lower triangular.
3490     * @param TransA The type of transpose applied to matrix A.
3491     * @param Diag Specifies whether or not A is unit triangular.
3492     * @param alpha The scalar alpha.
3493     * @param A The input allocation contains matrix A, supported elements type: {Element#F64}.
3494     * @param B The input allocation contains matrix B, supported elements type: {Element#F64}.
3495     */
3496    void DTRMM(RsBlasSide Side, RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
3497               double alpha, sp<Allocation> A, sp<Allocation> B);
3498
3499    /**
3500     * CTRMM performs one of the matrix-matrix operations
3501     * B := alpha*op(A)*B   or   B := alpha*B*op(A)
3502     * op(A) is one of  op(A) = A  or  op(A) = A**T  or  op(A) = A**H
3503     *
3504     * Details: http://www.netlib.org/lapack/explore-html/d4/d9b/ctrmm_8f.html
3505     *
3506     * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3507     * @param Uplo Specifies whether matrix A is upper or lower triangular.
3508     * @param TransA The type of transpose applied to matrix A.
3509     * @param Diag Specifies whether or not A is unit triangular.
3510     * @param alpha The scalar alpha.
3511     * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}.
3512     * @param B The input allocation contains matrix B, supported elements type: {Element#F32_2}.
3513     */
3514    void CTRMM(RsBlasSide Side, RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
3515               Float2 alpha, sp<Allocation> A, sp<Allocation> B);
3516
3517    /**
3518     * ZTRMM performs one of the matrix-matrix operations
3519     * B := alpha*op(A)*B   or   B := alpha*B*op(A)
3520     * op(A) is one of  op(A) = A  or  op(A) = A**T  or  op(A) = A**H
3521     *
3522     * Details: http://www.netlib.org/lapack/explore-html/d8/de1/ztrmm_8f.html
3523     *
3524     * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3525     * @param Uplo Specifies whether matrix A is upper or lower triangular.
3526     * @param TransA The type of transpose applied to matrix A.
3527     * @param Diag Specifies whether or not A is unit triangular.
3528     * @param alpha The scalar alpha.
3529     * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2}.
3530     * @param B The input allocation contains matrix B, supported elements type: {Element#F64_2}.
3531     */
3532    void ZTRMM(RsBlasSide Side, RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
3533               Double2 alpha, sp<Allocation> A, sp<Allocation> B);
3534
3535    /**
3536     * STRSM solves one of the matrix equations
3537     * op(A)*X := alpha*B   or   X*op(A) := alpha*B
3538     * op(A) is one of  op(A) = A  or  op(A) = A**T
3539     *
3540     * Details: http://www.netlib.org/lapack/explore-html/d2/d8b/strsm_8f.html
3541     *
3542     * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3543     * @param Uplo Specifies whether matrix A is upper or lower triangular.
3544     * @param TransA The type of transpose applied to matrix A.
3545     * @param Diag Specifies whether or not A is unit triangular.
3546     * @param alpha The scalar alpha.
3547     * @param A The input allocation contains matrix A, supported elements type: {Element#F32}.
3548     * @param B The input allocation contains matrix B, supported elements type: {Element#F32}.
3549     */
3550    void STRSM(RsBlasSide Side, RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
3551               float alpha, sp<Allocation> A, sp<Allocation> B);
3552
3553    /**
3554     * DTRSM solves one of the matrix equations
3555     * op(A)*X := alpha*B   or   X*op(A) := alpha*B
3556     * op(A) is one of  op(A) = A  or  op(A) = A**T
3557     *
3558     * Details: http://www.netlib.org/lapack/explore-html/de/da7/dtrsm_8f.html
3559     *
3560     * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3561     * @param Uplo Specifies whether matrix A is upper or lower triangular.
3562     * @param TransA The type of transpose applied to matrix A.
3563     * @param Diag Specifies whether or not A is unit triangular.
3564     * @param alpha The scalar alpha.
3565     * @param A The input allocation contains matrix A, supported elements type: {Element#F64}.
3566     * @param B The input allocation contains matrix B, supported elements type: {Element#F64}.
3567     */
3568    void DTRSM(RsBlasSide Side, RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
3569               double alpha, sp<Allocation> A, sp<Allocation> B);
3570
3571    /**
3572     * CTRSM solves one of the matrix equations
3573     * op(A)*X := alpha*B   or   X*op(A) := alpha*B
3574     * op(A) is one of  op(A) = A  or  op(A) = A**T  or  op(A) = A**H
3575     *
3576     * Details: http://www.netlib.org/lapack/explore-html/de/d30/ctrsm_8f.html
3577     *
3578     * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3579     * @param Uplo Specifies whether matrix A is upper or lower triangular.
3580     * @param TransA The type of transpose applied to matrix A.
3581     * @param Diag Specifies whether or not A is unit triangular.
3582     * @param alpha The scalar alpha.
3583     * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}.
3584     * @param B The input allocation contains matrix B, supported elements type: {Element#F32_2}.
3585     */
3586    void CTRSM(RsBlasSide Side, RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
3587               Float2 alpha, sp<Allocation> A, sp<Allocation> B);
3588
3589    /**
3590     * ZTRSM solves one of the matrix equations
3591     * op(A)*X := alpha*B   or   X*op(A) := alpha*B
3592     * op(A) is one of  op(A) = A  or  op(A) = A**T  or  op(A) = A**H
3593     *
3594     * Details: http://www.netlib.org/lapack/explore-html/d1/d39/ztrsm_8f.html
3595     *
3596     * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3597     * @param Uplo Specifies whether matrix A is upper or lower triangular.
3598     * @param TransA The type of transpose applied to matrix A.
3599     * @param Diag Specifies whether or not A is unit triangular.
3600     * @param alpha The scalar alpha.
3601     * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2}.
3602     * @param B The input allocation contains matrix B, supported elements type: {Element#F64_2}.
3603     */
3604    void ZTRSM(RsBlasSide Side, RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
3605               Double2 alpha, sp<Allocation> A, sp<Allocation> B);
3606
3607    /**
3608     * CHEMM performs one of the matrix-matrix operations
3609     * C := alpha*A*B + beta*C   or   C := alpha*B*A + beta*C
3610     *
3611     * Details: http://www.netlib.org/lapack/explore-html/d3/d66/chemm_8f.html
3612     *
3613     * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3614     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
3615     * @param alpha The scalar alpha.
3616     * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}.
3617     * @param B The input allocation contains matrix B, supported elements type: {Element#F32_2}.
3618     * @param beta The scalar beta.
3619     * @param C The input allocation contains matrix C, supported elements type: {Element#F32_2}.
3620     */
3621    void CHEMM(RsBlasSide Side, RsBlasUplo Uplo, Float2 alpha, sp<Allocation> A,
3622               sp<Allocation> B, Float2 beta, sp<Allocation> C);
3623
3624    /**
3625     * ZHEMM performs one of the matrix-matrix operations
3626     * C := alpha*A*B + beta*C   or   C := alpha*B*A + beta*C
3627     *
3628     * Details: http://www.netlib.org/lapack/explore-html/d6/d3e/zhemm_8f.html
3629     *
3630     * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3631     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
3632     * @param alpha The scalar alpha.
3633     * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2}.
3634     * @param B The input allocation contains matrix B, supported elements type: {Element#F64_2}.
3635     * @param beta The scalar beta.
3636     * @param C The input allocation contains matrix C, supported elements type: {Element#F64_2}.
3637     */
3638    void ZHEMM(RsBlasSide Side, RsBlasUplo Uplo, Double2 alpha, sp<Allocation> A,
3639               sp<Allocation> B, Double2 beta, sp<Allocation> C);
3640
3641    /**
3642     * CHERK performs one of the hermitian rank k operations
3643     * C := alpha*A*A**H + beta*C   or   C := alpha*A**H*A + beta*C
3644     *
3645     * Details: http://www.netlib.org/lapack/explore-html/d8/d52/cherk_8f.html
3646     *
3647     * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
3648     * @param Trans The type of transpose applied to the operation.
3649     * @param alpha The scalar alpha.
3650     * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}.
3651     * @param beta The scalar beta.
3652     * @param C The input allocation contains matrix C, supported elements type: {Element#F32_2}.
3653     */
3654    void CHERK(RsBlasUplo Uplo, RsBlasTranspose Trans, float alpha, sp<Allocation> A,
3655               float beta, sp<Allocation> C);
3656
3657    /**
3658     * ZHERK performs one of the hermitian rank k operations
3659     * C := alpha*A*A**H + beta*C   or   C := alpha*A**H*A + beta*C
3660     *
3661     * Details: http://www.netlib.org/lapack/explore-html/d1/db1/zherk_8f.html
3662     *
3663     * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
3664     * @param Trans The type of transpose applied to the operation.
3665     * @param alpha The scalar alpha.
3666     * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2}.
3667     * @param beta The scalar beta.
3668     * @param C The input allocation contains matrix C, supported elements type: {Element#F64_2}.
3669     */
3670    void ZHERK(RsBlasUplo Uplo, RsBlasTranspose Trans, double alpha, sp<Allocation> A,
3671               double beta, sp<Allocation> C);
3672
3673    /**
3674     * CHER2K performs one of the hermitian rank 2k operations
3675     * C := alpha*A*B**H + conjg( alpha )*B*A**H + beta*C   or   C := alpha*A**H*B + conjg( alpha )*B**H*A + beta*C
3676     *
3677     * Details: http://www.netlib.org/lapack/explore-html/d1/d82/cher2k_8f.html
3678     *
3679     * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
3680     * @param Trans The type of transpose applied to the operation.
3681     * @param alpha The scalar alpha.
3682     * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}.
3683     * @param B The input allocation contains matrix B, supported elements type: {Element#F32_2}.
3684     * @param beta The scalar beta.
3685     * @param C The input allocation contains matrix C, supported elements type: {Element#F32_2}.
3686     */
3687    void CHER2K(RsBlasUplo Uplo, RsBlasTranspose Trans, Float2 alpha, sp<Allocation> A,
3688                sp<Allocation> B, float beta, sp<Allocation> C);
3689
3690    /**
3691     * ZHER2K performs one of the hermitian rank 2k operations
3692     * C := alpha*A*B**H + conjg( alpha )*B*A**H + beta*C   or   C := alpha*A**H*B + conjg( alpha )*B**H*A + beta*C
3693     *
3694     * Details: http://www.netlib.org/lapack/explore-html/d7/dfa/zher2k_8f.html
3695     *
3696     * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
3697     * @param Trans The type of transpose applied to the operation.
3698     * @param alpha The scalar alpha.
3699     * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2}.
3700     * @param B The input allocation contains matrix B, supported elements type: {Element#F64_2}.
3701     * @param beta The scalar beta.
3702     * @param C The input allocation contains matrix C, supported elements type: {Element#F64_2}.
3703     */
3704    void ZHER2K(RsBlasUplo Uplo, RsBlasTranspose Trans, Double2 alpha, sp<Allocation> A,
3705                sp<Allocation> B, double beta, sp<Allocation> C);
3706
3707    /**
3708     * 8-bit GEMM-like operation for neural networks: C = A * Transpose(B)
3709     * Calculations are done in 1.10.21 fixed-point format for the final output,
3710     * just before there's a shift down to drop the fractional parts. The output
3711     * values are gated to 0 to 255 to fit in a byte, but the 10-bit format
3712     * gives some headroom to avoid wrapping around on small overflows.
3713     *
3714     * @param A The input allocation contains matrix A, supported elements type: {Element#U8}.
3715     * @param a_offset The offset for all values in matrix A, e.g A[i,j] = A[i,j] - a_offset. Value should be from 0 to 255.
3716     * @param B The input allocation contains matrix B, supported elements type: {Element#U8}.
3717     * @param b_offset The offset for all values in matrix B, e.g B[i,j] = B[i,j] - b_offset. Value should be from 0 to 255.
3718     * @param C The input allocation contains matrix C, supported elements type: {Element#U8}.
3719     * @param c_offset The offset for all values in matrix C.
3720     * @param c_mult The multiplier for all values in matrix C, e.g C[i,j] = (C[i,j] + c_offset) * c_mult.
3721     **/
3722    void BNNM(sp<Allocation> A, int a_offset, sp<Allocation> B, int b_offset, sp<Allocation> C,
3723              int c_offset, int c_mult);
3724};
3725
3726/**
3727 * Intrinsic kernel for blending two Allocations.
3728 */
3729class ScriptIntrinsicBlend : public ScriptIntrinsic {
3730 private:
3731    ScriptIntrinsicBlend(sp<RS> rs, sp<const Element> e);
3732 public:
3733    /**
3734     * Supported Element types are U8_4.
3735     * @param[in] rs RenderScript context
3736     * @param[in] e Element
3737     * @return new ScriptIntrinsicBlend
3738     */
3739    static sp<ScriptIntrinsicBlend> create(sp<RS> rs, sp<const Element> e);
3740    /**
3741     * sets dst = {0, 0, 0, 0}
3742     * @param[in] in input Allocation
3743     * @param[in] out output Allocation
3744     */
3745    void forEachClear(sp<Allocation> in, sp<Allocation> out);
3746    /**
3747     * Sets dst = src
3748     * @param[in] in input Allocation
3749     * @param[in] out output Allocation
3750     */
3751    void forEachSrc(sp<Allocation> in, sp<Allocation> out);
3752    /**
3753     * Sets dst = dst (NOP)
3754     * @param[in] in input Allocation
3755     * @param[in] out output Allocation
3756     */
3757    void forEachDst(sp<Allocation> in, sp<Allocation> out);
3758    /**
3759     * Sets dst = src + dst * (1.0 - src.a)
3760     * @param[in] in input Allocation
3761     * @param[in] out output Allocation
3762     */
3763    void forEachSrcOver(sp<Allocation> in, sp<Allocation> out);
3764    /**
3765     * Sets dst = dst + src * (1.0 - dst.a)
3766     * @param[in] in input Allocation
3767     * @param[in] out output Allocation
3768     */
3769    void forEachDstOver(sp<Allocation> in, sp<Allocation> out);
3770    /**
3771     * Sets dst = src * dst.a
3772     * @param[in] in input Allocation
3773     * @param[in] out output Allocation
3774     */
3775    void forEachSrcIn(sp<Allocation> in, sp<Allocation> out);
3776    /**
3777     * Sets dst = dst * src.a
3778     * @param[in] in input Allocation
3779     * @param[in] out output Allocation
3780     */
3781    void forEachDstIn(sp<Allocation> in, sp<Allocation> out);
3782    /**
3783     * Sets dst = src * (1.0 - dst.a)
3784     * @param[in] in input Allocation
3785     * @param[in] out output Allocation
3786     */
3787    void forEachSrcOut(sp<Allocation> in, sp<Allocation> out);
3788    /**
3789     * Sets dst = dst * (1.0 - src.a)
3790     * @param[in] in input Allocation
3791     * @param[in] out output Allocation
3792     */
3793    void forEachDstOut(sp<Allocation> in, sp<Allocation> out);
3794    /**
3795     * Sets dst.rgb = src.rgb * dst.a + (1.0 - src.a) * dst.rgb
3796     * @param[in] in input Allocation
3797     * @param[in] out output Allocation
3798     */
3799    void forEachSrcAtop(sp<Allocation> in, sp<Allocation> out);
3800    /**
3801     * Sets dst.rgb = dst.rgb * src.a + (1.0 - dst.a) * src.rgb
3802     * @param[in] in input Allocation
3803     * @param[in] out output Allocation
3804     */
3805    void forEachDstAtop(sp<Allocation> in, sp<Allocation> out);
3806    /**
3807     * Sets dst = {src.r ^ dst.r, src.g ^ dst.g, src.b ^ dst.b, src.a ^ dst.a}
3808     * @param[in] in input Allocation
3809     * @param[in] out output Allocation
3810     */
3811    void forEachXor(sp<Allocation> in, sp<Allocation> out);
3812    /**
3813     * Sets dst = src * dst
3814     * @param[in] in input Allocation
3815     * @param[in] out output Allocation
3816     */
3817    void forEachMultiply(sp<Allocation> in, sp<Allocation> out);
3818    /**
3819     * Sets dst = min(src + dst, 1.0)
3820     * @param[in] in input Allocation
3821     * @param[in] out output Allocation
3822     */
3823    void forEachAdd(sp<Allocation> in, sp<Allocation> out);
3824    /**
3825     * Sets dst = max(dst - src, 0.0)
3826     * @param[in] in input Allocation
3827     * @param[in] out output Allocation
3828     */
3829    void forEachSubtract(sp<Allocation> in, sp<Allocation> out);
3830};
3831
3832/**
3833 * Intrinsic Gausian blur filter. Applies a Gaussian blur of the specified
3834 * radius to all elements of an Allocation.
3835 */
3836class ScriptIntrinsicBlur : public ScriptIntrinsic {
3837 private:
3838    ScriptIntrinsicBlur(sp<RS> rs, sp<const Element> e);
3839 public:
3840    /**
3841     * Supported Element types are U8 and U8_4.
3842     * @param[in] rs RenderScript context
3843     * @param[in] e Element
3844     * @return new ScriptIntrinsicBlur
3845     */
3846    static sp<ScriptIntrinsicBlur> create(sp<RS> rs, sp<const Element> e);
3847    /**
3848     * Sets the input of the blur.
3849     * @param[in] in input Allocation
3850     */
3851    void setInput(sp<Allocation> in);
3852    /**
3853     * Runs the intrinsic.
3854     * @param[in] output Allocation
3855     */
3856    void forEach(sp<Allocation> out);
3857    /**
3858     * Sets the radius of the blur. The supported range is 0 < radius <= 25.
3859     * @param[in] radius radius of the blur
3860     */
3861    void setRadius(float radius);
3862};
3863
3864/**
3865 * Intrinsic for applying a color matrix to allocations. This has the
3866 * same effect as loading each element and converting it to a
3867 * F32_N, multiplying the result by the 4x4 color matrix
3868 * as performed by rsMatrixMultiply() and writing it to the output
3869 * after conversion back to U8_N or F32_N.
3870 */
3871class ScriptIntrinsicColorMatrix : public ScriptIntrinsic {
3872 private:
3873    ScriptIntrinsicColorMatrix(sp<RS> rs, sp<const Element> e);
3874 public:
3875    /**
3876     * Creates a new intrinsic.
3877     * @param[in] rs RenderScript context
3878     * @return new ScriptIntrinsicColorMatrix
3879     */
3880    static sp<ScriptIntrinsicColorMatrix> create(sp<RS> rs);
3881    /**
3882     * Applies the color matrix. Supported types are U8 and F32 with
3883     * vector lengths between 1 and 4.
3884     * @param[in] in input Allocation
3885     * @param[out] out output Allocation
3886     */
3887    void forEach(sp<Allocation> in, sp<Allocation> out);
3888    /**
3889     * Set the value to be added after the color matrix has been
3890     * applied. The default value is {0, 0, 0, 0}.
3891     * @param[in] add float[4] of values
3892     */
3893    void setAdd(float* add);
3894
3895    /**
3896     * Set the color matrix which will be applied to each cell of the
3897     * image. The alpha channel will be copied.
3898     *
3899     * @param[in] m float[9] of values
3900     */
3901    void setColorMatrix3(float* m);
3902    /**
3903     * Set the color matrix which will be applied to each cell of the
3904     * image.
3905     *
3906     * @param[in] m float[16] of values
3907     */
3908    void setColorMatrix4(float* m);
3909    /**
3910     * Set a color matrix to convert from RGB to luminance. The alpha
3911     * channel will be a copy.
3912     */
3913    void setGreyscale();
3914    /**
3915     * Set the matrix to convert from RGB to YUV with a direct copy of
3916     * the 4th channel.
3917     */
3918    void setRGBtoYUV();
3919    /**
3920     * Set the matrix to convert from YUV to RGB with a direct copy of
3921     * the 4th channel.
3922     */
3923    void setYUVtoRGB();
3924};
3925
3926/**
3927 * Intrinsic for applying a 3x3 convolve to an allocation.
3928 */
3929class ScriptIntrinsicConvolve3x3 : public ScriptIntrinsic {
3930 private:
3931    ScriptIntrinsicConvolve3x3(sp<RS> rs, sp<const Element> e);
3932 public:
3933    /**
3934     * Supported types U8 and F32 with vector lengths between 1 and
3935     * 4. The default convolution kernel is the identity.
3936     * @param[in] rs RenderScript context
3937     * @param[in] e Element
3938     * @return new ScriptIntrinsicConvolve3x3
3939     */
3940    static sp<ScriptIntrinsicConvolve3x3> create(sp<RS> rs, sp<const Element> e);
3941    /**
3942     * Sets input for intrinsic.
3943     * @param[in] in input Allocation
3944     */
3945    void setInput(sp<Allocation> in);
3946    /**
3947     * Launches the intrinsic.
3948     * @param[in] out output Allocation
3949     */
3950    void forEach(sp<Allocation> out);
3951    /**
3952     * Sets convolution kernel.
3953     * @param[in] v float[9] of values
3954     */
3955    void setCoefficients(float* v);
3956};
3957
3958/**
3959 * Intrinsic for applying a 5x5 convolve to an allocation.
3960 */
3961class ScriptIntrinsicConvolve5x5 : public ScriptIntrinsic {
3962 private:
3963    ScriptIntrinsicConvolve5x5(sp<RS> rs, sp<const Element> e);
3964 public:
3965    /**
3966     * Supported types U8 and F32 with vector lengths between 1 and
3967     * 4. The default convolution kernel is the identity.
3968     * @param[in] rs RenderScript context
3969     * @param[in] e Element
3970     * @return new ScriptIntrinsicConvolve5x5
3971     */
3972    static sp<ScriptIntrinsicConvolve5x5> create(sp<RS> rs, sp<const Element> e);
3973    /**
3974     * Sets input for intrinsic.
3975     * @param[in] in input Allocation
3976     */
3977    void setInput(sp<Allocation> in);
3978    /**
3979     * Launches the intrinsic.
3980     * @param[in] out output Allocation
3981     */
3982    void forEach(sp<Allocation> out);
3983    /**
3984     * Sets convolution kernel.
3985     * @param[in] v float[25] of values
3986     */
3987    void setCoefficients(float* v);
3988};
3989
3990/**
3991 * Intrinsic for computing a histogram.
3992 */
3993class ScriptIntrinsicHistogram : public ScriptIntrinsic {
3994 private:
3995    ScriptIntrinsicHistogram(sp<RS> rs, sp<const Element> e);
3996    sp<Allocation> mOut;
3997 public:
3998    /**
3999     * Create an intrinsic for calculating the histogram of an uchar
4000     * or uchar4 image.
4001     *
4002     * Supported elements types are U8_4, U8_3, U8_2, and U8.
4003     *
4004     * @param[in] rs The RenderScript context
4005     * @param[in] e Element type for inputs
4006     *
4007     * @return ScriptIntrinsicHistogram
4008     */
4009    static sp<ScriptIntrinsicHistogram> create(sp<RS> rs, sp<const Element> e);
4010    /**
4011     * Set the output of the histogram.  32 bit integer types are
4012     * supported.
4013     *
4014     * @param[in] aout The output allocation
4015     */
4016    void setOutput(sp<Allocation> aout);
4017    /**
4018     * Set the coefficients used for the dot product calculation. The
4019     * default is {0.299f, 0.587f, 0.114f, 0.f}.
4020     *
4021     * Coefficients must be >= 0 and sum to 1.0 or less.
4022     *
4023     * @param[in] r Red coefficient
4024     * @param[in] g Green coefficient
4025     * @param[in] b Blue coefficient
4026     * @param[in] a Alpha coefficient
4027     */
4028    void setDotCoefficients(float r, float g, float b, float a);
4029    /**
4030     * Process an input buffer and place the histogram into the output
4031     * allocation. The output allocation may be a narrower vector size
4032     * than the input. In this case the vector size of the output is
4033     * used to determine how many of the input channels are used in
4034     * the computation. This is useful if you have an RGBA input
4035     * buffer but only want the histogram for RGB.
4036     *
4037     * 1D and 2D input allocations are supported.
4038     *
4039     * @param[in] ain The input image
4040     */
4041    void forEach(sp<Allocation> ain);
4042    /**
4043     * Process an input buffer and place the histogram into the output
4044     * allocation. The dot product of the input channel and the
4045     * coefficients from 'setDotCoefficients' are used to calculate
4046     * the output values.
4047     *
4048     * 1D and 2D input allocations are supported.
4049     *
4050     * @param ain The input image
4051     */
4052    void forEach_dot(sp<Allocation> ain);
4053};
4054
4055/**
4056 * Intrinsic for applying a per-channel lookup table. Each channel of
4057 * the input has an independant lookup table. The tables are 256
4058 * entries in size and can cover the full value range of U8_4.
4059 **/
4060class ScriptIntrinsicLUT : public ScriptIntrinsic {
4061 private:
4062    sp<Allocation> LUT;
4063    bool mDirty;
4064    unsigned char mCache[1024];
4065    void setTable(unsigned int offset, unsigned char base, unsigned int length, unsigned char* lutValues);
4066    ScriptIntrinsicLUT(sp<RS> rs, sp<const Element> e);
4067
4068 public:
4069    /**
4070     * Supported elements types are U8_4.
4071     *
4072     * The defaults tables are identity.
4073     *
4074     * @param[in] rs The RenderScript context
4075     * @param[in] e Element type for intputs and outputs
4076     *
4077     * @return ScriptIntrinsicLUT
4078     */
4079    static sp<ScriptIntrinsicLUT> create(sp<RS> rs, sp<const Element> e);
4080    /**
4081     * Invoke the kernel and apply the lookup to each cell of ain and
4082     * copy to aout.
4083     *
4084     * @param[in] ain Input allocation
4085     * @param[in] aout Output allocation
4086     */
4087    void forEach(sp<Allocation> ain, sp<Allocation> aout);
4088    /**
4089     * Sets entries in LUT for the red channel.
4090     * @param[in] base base of region to update
4091     * @param[in] length length of region to update
4092     * @param[in] lutValues LUT values to use
4093     */
4094    void setRed(unsigned char base, unsigned int length, unsigned char* lutValues);
4095    /**
4096     * Sets entries in LUT for the green channel.
4097     * @param[in] base base of region to update
4098     * @param[in] length length of region to update
4099     * @param[in] lutValues LUT values to use
4100     */
4101    void setGreen(unsigned char base, unsigned int length, unsigned char* lutValues);
4102    /**
4103     * Sets entries in LUT for the blue channel.
4104     * @param[in] base base of region to update
4105     * @param[in] length length of region to update
4106     * @param[in] lutValues LUT values to use
4107     */
4108    void setBlue(unsigned char base, unsigned int length, unsigned char* lutValues);
4109    /**
4110     * Sets entries in LUT for the alpha channel.
4111     * @param[in] base base of region to update
4112     * @param[in] length length of region to update
4113     * @param[in] lutValues LUT values to use
4114     */
4115    void setAlpha(unsigned char base, unsigned int length, unsigned char* lutValues);
4116    virtual ~ScriptIntrinsicLUT();
4117};
4118
4119/**
4120 * Intrinsic for performing a resize of a 2D allocation.
4121 */
4122class ScriptIntrinsicResize : public ScriptIntrinsic {
4123 private:
4124    sp<Allocation> mInput;
4125    ScriptIntrinsicResize(sp<RS> rs, sp<const Element> e);
4126 public:
4127    /**
4128     * Supported Element types are U8_4. Default lookup table is identity.
4129     * @param[in] rs RenderScript context
4130     * @param[in] e Element
4131     * @return new ScriptIntrinsic
4132     */
4133    static sp<ScriptIntrinsicResize> create(sp<RS> rs);
4134
4135    /**
4136     * Resize copy the input allocation to the output specified. The
4137     * Allocation is rescaled if necessary using bi-cubic
4138     * interpolation.
4139     * @param[in] ain input Allocation
4140     * @param[in] aout output Allocation
4141     */
4142    void forEach_bicubic(sp<Allocation> aout);
4143
4144    /**
4145     * Set the input of the resize.
4146     * @param[in] lut new lookup table
4147     */
4148    void setInput(sp<Allocation> ain);
4149};
4150
4151/**
4152 * Intrinsic for converting an Android YUV buffer to RGB.
4153 *
4154 * The input allocation should be supplied in a supported YUV format
4155 * as a YUV element Allocation. The output is RGBA; the alpha channel
4156 * will be set to 255.
4157 */
4158class ScriptIntrinsicYuvToRGB : public ScriptIntrinsic {
4159 private:
4160    ScriptIntrinsicYuvToRGB(sp<RS> rs, sp<const Element> e);
4161 public:
4162    /**
4163     * Create an intrinsic for converting YUV to RGB.
4164     *
4165     * Supported elements types are U8_4.
4166     *
4167     * @param[in] rs The RenderScript context
4168     * @param[in] e Element type for output
4169     *
4170     * @return ScriptIntrinsicYuvToRGB
4171     */
4172    static sp<ScriptIntrinsicYuvToRGB> create(sp<RS> rs, sp<const Element> e);
4173    /**
4174     * Set the input YUV allocation.
4175     *
4176     * @param[in] ain The input allocation.
4177     */
4178    void setInput(sp<Allocation> in);
4179
4180    /**
4181     * Convert the image to RGB.
4182     *
4183     * @param[in] aout Output allocation. Must match creation element
4184     *                 type.
4185     */
4186    void forEach(sp<Allocation> out);
4187
4188};
4189
4190/**
4191 * Sampler object that defines how Allocations can be read as textures
4192 * within a kernel. Samplers are used in conjunction with the rsSample
4193 * runtime function to return values from normalized coordinates.
4194 *
4195 * Any Allocation used with a Sampler must have been created with
4196 * RS_ALLOCATION_USAGE_GRAPHICS_TEXTURE; using a Sampler on an
4197 * Allocation that was not created with
4198 * RS_ALLOCATION_USAGE_GRAPHICS_TEXTURE is undefined.
4199 **/
4200 class Sampler : public BaseObj {
4201 private:
4202    Sampler(sp<RS> rs, void* id);
4203    Sampler(sp<RS> rs, void* id, RsSamplerValue min, RsSamplerValue mag,
4204            RsSamplerValue wrapS, RsSamplerValue wrapT, float anisotropy);
4205    RsSamplerValue mMin;
4206    RsSamplerValue mMag;
4207    RsSamplerValue mWrapS;
4208    RsSamplerValue mWrapT;
4209    float mAniso;
4210
4211 public:
4212    /**
4213     * Creates a non-standard Sampler.
4214     * @param[in] rs RenderScript context
4215     * @param[in] min minification
4216     * @param[in] mag magnification
4217     * @param[in] wrapS S wrapping mode
4218     * @param[in] wrapT T wrapping mode
4219     * @param[in] anisotropy anisotropy setting
4220     */
4221    static sp<Sampler> create(sp<RS> rs, RsSamplerValue min, RsSamplerValue mag, RsSamplerValue wrapS, RsSamplerValue wrapT, float anisotropy);
4222
4223    /**
4224     * @return minification setting for the sampler
4225     */
4226    RsSamplerValue getMinification();
4227    /**
4228     * @return magnification setting for the sampler
4229     */
4230    RsSamplerValue getMagnification();
4231    /**
4232     * @return S wrapping mode for the sampler
4233     */
4234    RsSamplerValue getWrapS();
4235    /**
4236     * @return T wrapping mode for the sampler
4237     */
4238    RsSamplerValue getWrapT();
4239    /**
4240     * @return anisotropy setting for the sampler
4241     */
4242    float getAnisotropy();
4243
4244    /**
4245     * Retrieve a sampler with min and mag set to nearest and wrap modes set to
4246     * clamp.
4247     *
4248     * @param rs Context to which the sampler will belong.
4249     *
4250     * @return Sampler
4251     */
4252    static sp<const Sampler> CLAMP_NEAREST(sp<RS> rs);
4253    /**
4254     * Retrieve a sampler with min and mag set to linear and wrap modes set to
4255     * clamp.
4256     *
4257     * @param rs Context to which the sampler will belong.
4258     *
4259     * @return Sampler
4260     */
4261    static sp<const Sampler> CLAMP_LINEAR(sp<RS> rs);
4262    /**
4263     * Retrieve a sampler with mag set to linear, min linear mipmap linear, and
4264     * wrap modes set to clamp.
4265     *
4266     * @param rs Context to which the sampler will belong.
4267     *
4268     * @return Sampler
4269     */
4270    static sp<const Sampler> CLAMP_LINEAR_MIP_LINEAR(sp<RS> rs);
4271    /**
4272     * Retrieve a sampler with min and mag set to nearest and wrap modes set to
4273     * wrap.
4274     *
4275     * @param rs Context to which the sampler will belong.
4276     *
4277     * @return Sampler
4278     */
4279    static sp<const Sampler> WRAP_NEAREST(sp<RS> rs);
4280    /**
4281     * Retrieve a sampler with min and mag set to linear and wrap modes set to
4282     * wrap.
4283     *
4284     * @param rs Context to which the sampler will belong.
4285     *
4286     * @return Sampler
4287     */
4288    static sp<const Sampler> WRAP_LINEAR(sp<RS> rs);
4289    /**
4290     * Retrieve a sampler with mag set to linear, min linear mipmap linear, and
4291     * wrap modes set to wrap.
4292     *
4293     * @param rs Context to which the sampler will belong.
4294     *
4295     * @return Sampler
4296     */
4297    static sp<const Sampler> WRAP_LINEAR_MIP_LINEAR(sp<RS> rs);
4298    /**
4299     * Retrieve a sampler with min and mag set to nearest and wrap modes set to
4300     * mirrored repeat.
4301     *
4302     * @param rs Context to which the sampler will belong.
4303     *
4304     * @return Sampler
4305     */
4306    static sp<const Sampler> MIRRORED_REPEAT_NEAREST(sp<RS> rs);
4307    /**
4308     * Retrieve a sampler with min and mag set to linear and wrap modes set to
4309     * mirrored repeat.
4310     *
4311     * @param rs Context to which the sampler will belong.
4312     *
4313     * @return Sampler
4314     */
4315    static sp<const Sampler> MIRRORED_REPEAT_LINEAR(sp<RS> rs);
4316    /**
4317     * Retrieve a sampler with min and mag set to linear and wrap modes set to
4318     * mirrored repeat.
4319     *
4320     * @param rs Context to which the sampler will belong.
4321     *
4322     * @return Sampler
4323     */
4324    static sp<const Sampler> MIRRORED_REPEAT_LINEAR_MIP_LINEAR(sp<RS> rs);
4325
4326};
4327
4328}
4329
4330}
4331
4332#endif
4333