rsCppStructs.h revision c6f437422fe6b0093b5b32df2f145c5ce23e5a13
1/*
2 * Copyright (C) 2013 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#ifndef ANDROID_RSCPPSTRUCTS_H
18#define ANDROID_RSCPPSTRUCTS_H
19
20#include "rsDefines.h"
21#include "util/RefBase.h"
22
23#include <pthread.h>
24
25
26/**
27 * Every row in an RS allocation is guaranteed to be aligned by this amount, and
28 * every row in a user-backed allocation must be aligned by this amount.
29 */
30#define RS_CPU_ALLOCATION_ALIGNMENT 16
31
32struct dispatchTable;
33
34namespace android {
35class Surface;
36
37namespace RSC {
38
39
40typedef void (*ErrorHandlerFunc_t)(uint32_t errorNum, const char *errorText);
41typedef void (*MessageHandlerFunc_t)(uint32_t msgNum, const void *msgData, size_t msgLen);
42
43class RS;
44class BaseObj;
45class Element;
46class Type;
47class Allocation;
48class Script;
49class ScriptC;
50class Sampler;
51
52/**
53 * Possible error codes used by RenderScript. Once a status other than RS_SUCCESS
54 * is returned, the RenderScript context is considered dead and cannot perform any
55 * additional work.
56 */
57 enum RSError {
58     RS_SUCCESS = 0,                 ///< No error
59     RS_ERROR_INVALID_PARAMETER = 1, ///< An invalid parameter was passed to a function
60     RS_ERROR_RUNTIME_ERROR = 2,     ///< The RenderScript driver returned an error; this is
61                                     ///< often indicative of a kernel that crashed
62     RS_ERROR_INVALID_ELEMENT = 3,   ///< An invalid Element was passed to a function
63     RS_ERROR_MAX = 9999
64
65 };
66
67 /**
68  * Flags that can control RenderScript behavior on a per-context level.
69  */
70 enum RSInitFlags {
71     RS_INIT_SYNCHRONOUS = 1, ///< All RenderScript calls will be synchronous. May reduce latency.
72     RS_INIT_LOW_LATENCY = 2, ///< Prefer low latency devices over potentially higher throughput devices.
73     // Bitflag 4 is reserved for the context flag low power
74     RS_INIT_WAIT_FOR_ATTACH = 8,   ///< Kernel execution will hold to give time for a debugger to be attached
75     RS_INIT_OPT_LEVEL_0 = 16, ///< Use the -O0 option to set the optimization level to zero when calling the bcc compiler.
76     RS_INIT_MAX = 32
77 };
78
79
80class Byte2 {
81 public:
82  int8_t x, y;
83
84  Byte2(int8_t initX, int8_t initY)
85    : x(initX), y(initY) {}
86  Byte2() : x(0), y(0) {}
87};
88
89class Byte3 {
90 public:
91  int8_t x, y, z;
92
93  Byte3(int8_t initX, int8_t initY, int8_t initZ)
94    : x(initX), y(initY), z(initZ) {}
95  Byte3() : x(0), y(0), z(0) {}
96};
97
98class Byte4 {
99 public:
100  int8_t x, y, z, w;
101
102  Byte4(int8_t initX, int8_t initY, int8_t initZ, int8_t initW)
103    : x(initX), y(initY), z(initZ), w(initW) {}
104  Byte4() : x(0), y(0), z(0), w(0) {}
105};
106
107class UByte2 {
108 public:
109  uint8_t x, y;
110
111  UByte2(uint8_t initX, uint8_t initY)
112    : x(initX), y(initY) {}
113  UByte2() : x(0), y(0) {}
114};
115
116class UByte3 {
117 public:
118  uint8_t x, y, z;
119
120  UByte3(uint8_t initX, uint8_t initY, uint8_t initZ)
121    : x(initX), y(initY), z(initZ) {}
122  UByte3() : x(0), y(0), z(0) {}
123};
124
125class UByte4 {
126 public:
127  uint8_t x, y, z, w;
128
129  UByte4(uint8_t initX, uint8_t initY, uint8_t initZ, uint8_t initW)
130    : x(initX), y(initY), z(initZ), w(initW) {}
131  UByte4() : x(0), y(0), z(0), w(0) {}
132};
133
134class Short2 {
135 public:
136  short x, y;
137
138  Short2(short initX, short initY)
139    : x(initX), y(initY) {}
140  Short2() : x(0), y(0) {}
141};
142
143class Short3 {
144 public:
145  short x, y, z;
146
147  Short3(short initX, short initY, short initZ)
148    : x(initX), y(initY), z(initZ) {}
149  Short3() : x(0), y(0), z(0) {}
150};
151
152class Short4 {
153 public:
154  short x, y, z, w;
155
156  Short4(short initX, short initY, short initZ, short initW)
157    : x(initX), y(initY), z(initZ), w(initW) {}
158  Short4() : x(0), y(0), z(0), w(0) {}
159};
160
161class UShort2 {
162 public:
163  uint16_t x, y;
164
165  UShort2(uint16_t initX, uint16_t initY)
166    : x(initX), y(initY) {}
167  UShort2() : x(0), y(0) {}
168};
169
170class UShort3 {
171 public:
172  uint16_t x, y, z;
173
174  UShort3(uint16_t initX, uint16_t initY, uint16_t initZ)
175    : x(initX), y(initY), z(initZ) {}
176  UShort3() : x(0), y(0), z(0) {}
177};
178
179class UShort4 {
180 public:
181  uint16_t x, y, z, w;
182
183  UShort4(uint16_t initX, uint16_t initY, uint16_t initZ, uint16_t initW)
184    : x(initX), y(initY), z(initZ), w(initW) {}
185  UShort4() : x(0), y(0), z(0), w(0) {}
186};
187
188class Int2 {
189 public:
190  int x, y;
191
192  Int2(int initX, int initY)
193    : x(initX), y(initY) {}
194  Int2() : x(0), y(0) {}
195};
196
197class Int3 {
198 public:
199  int x, y, z;
200
201  Int3(int initX, int initY, int initZ)
202    : x(initX), y(initY), z(initZ) {}
203  Int3() : x(0), y(0), z(0) {}
204};
205
206class Int4 {
207 public:
208  int x, y, z, w;
209
210  Int4(int initX, int initY, int initZ, int initW)
211    : x(initX), y(initY), z(initZ), w(initW) {}
212  Int4() : x(0), y(0), z(0), w(0) {}
213};
214
215class UInt2 {
216 public:
217  uint32_t x, y;
218
219  UInt2(uint32_t initX, uint32_t initY)
220    : x(initX), y(initY) {}
221  UInt2() : x(0), y(0) {}
222};
223
224class UInt3 {
225 public:
226  uint32_t x, y, z;
227
228  UInt3(uint32_t initX, uint32_t initY, uint32_t initZ)
229    : x(initX), y(initY), z(initZ) {}
230  UInt3() : x(0), y(0), z(0) {}
231};
232
233class UInt4 {
234 public:
235  uint32_t x, y, z, w;
236
237  UInt4(uint32_t initX, uint32_t initY, uint32_t initZ, uint32_t initW)
238    : x(initX), y(initY), z(initZ), w(initW) {}
239  UInt4() : x(0), y(0), z(0), w(0) {}
240};
241
242class Long2 {
243 public:
244  int64_t x, y;
245
246  Long2(int64_t initX, int64_t initY)
247    : x(initX), y(initY) {}
248  Long2() : x(0), y(0) {}
249};
250
251class Long3 {
252 public:
253  int64_t x, y, z;
254
255  Long3(int64_t initX, int64_t initY, int64_t initZ)
256    : x(initX), y(initY), z(initZ) {}
257  Long3() : x(0), y(0), z(0) {}
258};
259
260class Long4 {
261 public:
262  int64_t x, y, z, w;
263
264  Long4(int64_t initX, int64_t initY, int64_t initZ, int64_t initW)
265    : x(initX), y(initY), z(initZ), w(initW) {}
266  Long4() : x(0), y(0), z(0), w(0) {}
267};
268
269class ULong2 {
270 public:
271  uint64_t x, y;
272
273  ULong2(uint64_t initX, uint64_t initY)
274    : x(initX), y(initY) {}
275  ULong2() : x(0), y(0) {}
276};
277
278class ULong3 {
279 public:
280  uint64_t x, y, z;
281
282  ULong3(uint64_t initX, uint64_t initY, uint64_t initZ)
283    : x(initX), y(initY), z(initZ) {}
284  ULong3() : x(0), y(0), z(0) {}
285};
286
287class ULong4 {
288 public:
289  uint64_t x, y, z, w;
290
291  ULong4(uint64_t initX, uint64_t initY, uint64_t initZ, uint64_t initW)
292    : x(initX), y(initY), z(initZ), w(initW) {}
293  ULong4() : x(0), y(0), z(0), w(0) {}
294};
295
296class Float2 {
297 public:
298  float x, y;
299
300  Float2(float initX, float initY)
301    : x(initX), y(initY) {}
302  Float2() : x(0), y(0) {}
303};
304
305class Float3 {
306 public:
307  float x, y, z;
308
309  Float3(float initX, float initY, float initZ)
310    : x(initX), y(initY), z(initZ) {}
311  Float3() : x(0.f), y(0.f), z(0.f) {}
312};
313
314class Float4 {
315 public:
316  float x, y, z, w;
317
318  Float4(float initX, float initY, float initZ, float initW)
319    : x(initX), y(initY), z(initZ), w(initW) {}
320  Float4() : x(0.f), y(0.f), z(0.f), w(0.f) {}
321};
322
323class Double2 {
324 public:
325  double x, y;
326
327  Double2(double initX, double initY)
328    : x(initX), y(initY) {}
329  Double2() : x(0), y(0) {}
330};
331
332class Double3 {
333 public:
334  double x, y, z;
335
336  Double3(double initX, double initY, double initZ)
337    : x(initX), y(initY), z(initZ) {}
338  Double3() : x(0), y(0), z(0) {}
339};
340
341class Double4 {
342 public:
343  double x, y, z, w;
344
345  Double4(double initX, double initY, double initZ, double initW)
346    : x(initX), y(initY), z(initZ), w(initW) {}
347  Double4() : x(0), y(0), z(0), w(0) {}
348};
349
350 /**
351  * The RenderScript context. This class controls initialization, resource management, and teardown.
352  */
353 class RS : public android::RSC::LightRefBase<RS> {
354
355 public:
356    RS();
357    virtual ~RS();
358
359    /**
360     * Initializes a RenderScript context. A context must be initialized before it can be used.
361     * @param[in] name Directory name to be used by this context. This should be equivalent to
362     * Context.getCacheDir().
363     * @param[in] flags Optional flags for this context.
364     * @param[in] targetApi Optional target RS API level. (Default 0: Using the latest SDK/Platform API).
365     * @return true on success
366     */
367    bool init(const char * name, uint32_t flags = 0, int targetApi = 0);
368
369    /**
370     * Sets the error handler function for this context. This error handler is
371     * called whenever an error is set.
372     *
373     * @param[in] func Error handler function
374     */
375    void setErrorHandler(ErrorHandlerFunc_t func);
376
377    /**
378     * Returns the current error handler function for this context.
379     *
380     * @return pointer to current error handler function or NULL if not set
381     */
382    ErrorHandlerFunc_t getErrorHandler() { return mErrorFunc; }
383
384    /**
385     * Sets the message handler function for this context. This message handler
386     * is called whenever a message is sent from a RenderScript kernel.
387     *
388     *  @param[in] func Message handler function
389     */
390    void setMessageHandler(MessageHandlerFunc_t func);
391
392    /**
393     * Returns the current message handler function for this context.
394     *
395     * @return pointer to current message handler function or NULL if not set
396     */
397    MessageHandlerFunc_t getMessageHandler() { return mMessageFunc; }
398
399    /**
400     * Returns current status for the context.
401     *
402     * @return current error
403     */
404    RSError getError();
405
406    /**
407     * Waits for any currently running asynchronous operations to finish. This
408     * should only be used for performance testing and timing.
409     */
410    void finish();
411
412    RsContext getContext() { return mContext; }
413    void throwError(RSError error, const char *errMsg);
414
415    static dispatchTable* dispatch;
416
417 private:
418    static bool usingNative;
419    static bool initDispatch(int targetApi);
420
421    static void * threadProc(void *);
422
423    static bool gInitialized;
424    static pthread_mutex_t gInitMutex;
425
426    pthread_t mMessageThreadId;
427    pid_t mNativeMessageThreadId;
428    bool mMessageRun;
429
430    RsDevice mDev;
431    RsContext mContext;
432    RSError mCurrentError;
433
434    ErrorHandlerFunc_t mErrorFunc;
435    MessageHandlerFunc_t mMessageFunc;
436    bool mInit;
437
438    char mCacheDir[PATH_MAX+1];
439    uint32_t mCacheDirLen;
440
441    struct {
442        sp<const Element> U8;
443        sp<const Element> U8_2;
444        sp<const Element> U8_3;
445        sp<const Element> U8_4;
446        sp<const Element> I8;
447        sp<const Element> I8_2;
448        sp<const Element> I8_3;
449        sp<const Element> I8_4;
450        sp<const Element> U16;
451        sp<const Element> U16_2;
452        sp<const Element> U16_3;
453        sp<const Element> U16_4;
454        sp<const Element> I16;
455        sp<const Element> I16_2;
456        sp<const Element> I16_3;
457        sp<const Element> I16_4;
458        sp<const Element> U32;
459        sp<const Element> U32_2;
460        sp<const Element> U32_3;
461        sp<const Element> U32_4;
462        sp<const Element> I32;
463        sp<const Element> I32_2;
464        sp<const Element> I32_3;
465        sp<const Element> I32_4;
466        sp<const Element> U64;
467        sp<const Element> U64_2;
468        sp<const Element> U64_3;
469        sp<const Element> U64_4;
470        sp<const Element> I64;
471        sp<const Element> I64_2;
472        sp<const Element> I64_3;
473        sp<const Element> I64_4;
474        sp<const Element> F32;
475        sp<const Element> F32_2;
476        sp<const Element> F32_3;
477        sp<const Element> F32_4;
478        sp<const Element> F64;
479        sp<const Element> F64_2;
480        sp<const Element> F64_3;
481        sp<const Element> F64_4;
482        sp<const Element> BOOLEAN;
483
484        sp<const Element> ELEMENT;
485        sp<const Element> TYPE;
486        sp<const Element> ALLOCATION;
487        sp<const Element> SAMPLER;
488        sp<const Element> SCRIPT;
489        sp<const Element> MESH;
490        sp<const Element> PROGRAM_FRAGMENT;
491        sp<const Element> PROGRAM_VERTEX;
492        sp<const Element> PROGRAM_RASTER;
493        sp<const Element> PROGRAM_STORE;
494
495        sp<const Element> A_8;
496        sp<const Element> RGB_565;
497        sp<const Element> RGB_888;
498        sp<const Element> RGBA_5551;
499        sp<const Element> RGBA_4444;
500        sp<const Element> RGBA_8888;
501
502        sp<const Element> YUV;
503
504        sp<const Element> MATRIX_4X4;
505        sp<const Element> MATRIX_3X3;
506        sp<const Element> MATRIX_2X2;
507    } mElements;
508
509    struct {
510        sp<const Sampler> CLAMP_NEAREST;
511        sp<const Sampler> CLAMP_LINEAR;
512        sp<const Sampler> CLAMP_LINEAR_MIP_LINEAR;
513        sp<const Sampler> WRAP_NEAREST;
514        sp<const Sampler> WRAP_LINEAR;
515        sp<const Sampler> WRAP_LINEAR_MIP_LINEAR;
516        sp<const Sampler> MIRRORED_REPEAT_NEAREST;
517        sp<const Sampler> MIRRORED_REPEAT_LINEAR;
518        sp<const Sampler> MIRRORED_REPEAT_LINEAR_MIP_LINEAR;
519    } mSamplers;
520    friend class Sampler;
521    friend class Element;
522    friend class ScriptC;
523};
524
525 /**
526  * Base class for all RenderScript objects. Not for direct use by developers.
527  */
528class BaseObj : public android::RSC::LightRefBase<BaseObj> {
529public:
530    void * getID() const;
531    virtual ~BaseObj();
532    virtual void updateFromNative();
533    virtual bool equals(sp<const BaseObj> obj);
534
535protected:
536    void *mID;
537    RS* mRS;
538    const char * mName;
539
540    BaseObj(void *id, sp<RS> rs);
541    void checkValid();
542
543    static void * getObjID(sp<const BaseObj> o);
544
545};
546
547 /**
548  * This class provides the primary method through which data is passed to and
549  * from RenderScript kernels. An Allocation provides the backing store for a
550  * given Type.
551  *
552  * An Allocation also contains a set of usage flags that denote how the
553  * Allocation could be used. For example, an Allocation may have usage flags
554  * specifying that it can be used from a script as well as input to a
555  * Sampler. A developer must synchronize across these different usages using
556  * syncAll(int) in order to ensure that different users of the Allocation have
557  * a consistent view of memory. For example, in the case where an Allocation is
558  * used as the output of one kernel and as Sampler input in a later kernel, a
559  * developer must call syncAll(RS_ALLOCATION_USAGE_SCRIPT) prior to launching the
560  * second kernel to ensure correctness.
561  */
562class Allocation : public BaseObj {
563protected:
564    sp<const Type> mType;
565    uint32_t mUsage;
566    sp<Allocation> mAdaptedAllocation;
567
568    bool mConstrainedLOD;
569    bool mConstrainedFace;
570    bool mConstrainedY;
571    bool mConstrainedZ;
572    bool mReadAllowed;
573    bool mWriteAllowed;
574    bool mAutoPadding;
575    uint32_t mSelectedY;
576    uint32_t mSelectedZ;
577    uint32_t mSelectedLOD;
578    RsAllocationCubemapFace mSelectedFace;
579
580    uint32_t mCurrentDimX;
581    uint32_t mCurrentDimY;
582    uint32_t mCurrentDimZ;
583    uint32_t mCurrentCount;
584
585    void * getIDSafe() const;
586    void updateCacheInfo(sp<const Type> t);
587
588    Allocation(void *id, sp<RS> rs, sp<const Type> t, uint32_t usage);
589
590    void validateIsInt64();
591    void validateIsInt32();
592    void validateIsInt16();
593    void validateIsInt8();
594    void validateIsFloat32();
595    void validateIsFloat64();
596    void validateIsObject();
597
598    virtual void updateFromNative();
599
600    void validate2DRange(uint32_t xoff, uint32_t yoff, uint32_t w, uint32_t h);
601    void validate3DRange(uint32_t xoff, uint32_t yoff, uint32_t zoff,
602                         uint32_t w, uint32_t h, uint32_t d);
603
604public:
605
606    /**
607     * Return Type for the allocation.
608     * @return pointer to underlying Type
609     */
610    sp<const Type> getType() const {
611        return mType;
612    }
613
614    /**
615     * Enable/Disable AutoPadding for Vec3 elements.
616     *
617     * @param useAutoPadding True: enable AutoPadding; flase: disable AutoPadding
618     *
619     */
620    void setAutoPadding(bool useAutoPadding) {
621        mAutoPadding = useAutoPadding;
622    }
623
624    /**
625     * Propagate changes from one usage of the Allocation to other usages of the Allocation.
626     * @param[in] srcLocation source location with changes to propagate elsewhere
627     */
628    void syncAll(RsAllocationUsageType srcLocation);
629
630    /**
631     * Send a buffer to the output stream.  The contents of the Allocation will
632     * be undefined after this operation. This operation is only valid if
633     * USAGE_IO_OUTPUT is set on the Allocation.
634     */
635    void ioSendOutput();
636
637    /**
638     * Receive the latest input into the Allocation. This operation
639     * is only valid if USAGE_IO_INPUT is set on the Allocation.
640     */
641    void ioGetInput();
642
643#if !defined(RS_SERVER) && !defined(RS_COMPATIBILITY_LIB)
644    /**
645     * Returns the handle to a raw buffer that is being managed by the screen
646     * compositor. This operation is only valid for Allocations with USAGE_IO_INPUT.
647     * @return Surface associated with allocation
648     */
649    sp<Surface> getSurface();
650
651    /**
652     * Associate a Surface with this Allocation. This
653     * operation is only valid for Allocations with USAGE_IO_OUTPUT.
654     * @param[in] s Surface to associate with allocation
655     */
656    void setSurface(sp<Surface> s);
657#endif
658
659    /**
660     * Generate a mipmap chain. This is only valid if the Type of the Allocation
661     * includes mipmaps. This function will generate a complete set of mipmaps
662     * from the top level LOD and place them into the script memory space. If
663     * the Allocation is also using other memory spaces, a call to
664     * syncAll(Allocation.USAGE_SCRIPT) is required.
665     */
666    void generateMipmaps();
667
668    /**
669     * Copy an array into part of this Allocation.
670     * @param[in] off offset of first Element to be overwritten
671     * @param[in] count number of Elements to copy
672     * @param[in] data array from which to copy
673     */
674    void copy1DRangeFrom(uint32_t off, size_t count, const void *data);
675
676    /**
677     * Copy part of an Allocation into part of this Allocation.
678     * @param[in] off offset of first Element to be overwritten
679     * @param[in] count number of Elements to copy
680     * @param[in] data Allocation from which to copy
681     * @param[in] dataOff offset of first Element in data to copy
682     */
683    void copy1DRangeFrom(uint32_t off, size_t count, sp<const Allocation> data, uint32_t dataOff);
684
685    /**
686     * Copy an array into part of this Allocation.
687     * @param[in] off offset of first Element to be overwritten
688     * @param[in] count number of Elements to copy
689     * @param[in] data array from which to copy
690     */
691    void copy1DRangeTo(uint32_t off, size_t count, void *data);
692
693    /**
694     * Copy entire array to an Allocation.
695     * @param[in] data array from which to copy
696     */
697    void copy1DFrom(const void* data);
698
699    /**
700     * Copy entire Allocation to an array.
701     * @param[in] data destination array
702     */
703    void copy1DTo(void* data);
704
705    /**
706     * Copy from an array into a rectangular region in this Allocation. The
707     * array is assumed to be tightly packed.
708     * @param[in] xoff X offset of region to update in this Allocation
709     * @param[in] yoff Y offset of region to update in this Allocation
710     * @param[in] w Width of region to update
711     * @param[in] h Height of region to update
712     * @param[in] data Array from which to copy
713     */
714    void copy2DRangeFrom(uint32_t xoff, uint32_t yoff, uint32_t w, uint32_t h,
715                         const void *data);
716
717    /**
718     * Copy from this Allocation into a rectangular region in an array. The
719     * array is assumed to be tightly packed.
720     * @param[in] xoff X offset of region to copy from this Allocation
721     * @param[in] yoff Y offset of region to copy from this Allocation
722     * @param[in] w Width of region to update
723     * @param[in] h Height of region to update
724     * @param[in] data destination array
725     */
726    void copy2DRangeTo(uint32_t xoff, uint32_t yoff, uint32_t w, uint32_t h,
727                       void *data);
728
729    /**
730     * Copy from an Allocation into a rectangular region in this Allocation.
731     * @param[in] xoff X offset of region to update in this Allocation
732     * @param[in] yoff Y offset of region to update in this Allocation
733     * @param[in] w Width of region to update
734     * @param[in] h Height of region to update
735     * @param[in] data Allocation from which to copy
736     * @param[in] dataXoff X offset of region to copy from in data
737     * @param[in] dataYoff Y offset of region to copy from in data
738     */
739    void copy2DRangeFrom(uint32_t xoff, uint32_t yoff, uint32_t w, uint32_t h,
740                         sp<const Allocation> data, uint32_t dataXoff, uint32_t dataYoff);
741
742    /**
743     * Copy from a strided array into a rectangular region in this Allocation.
744     * @param[in] xoff X offset of region to update in this Allocation
745     * @param[in] yoff Y offset of region to update in this Allocation
746     * @param[in] w Width of region to update
747     * @param[in] h Height of region to update
748     * @param[in] data array from which to copy
749     * @param[in] stride stride of data in bytes
750     */
751    void copy2DStridedFrom(uint32_t xoff, uint32_t yoff, uint32_t w, uint32_t h,
752                           const void *data, size_t stride);
753
754    /**
755     * Copy from a strided array into this Allocation.
756     * @param[in] data array from which to copy
757     * @param[in] stride stride of data in bytes
758     */
759    void copy2DStridedFrom(const void *data, size_t stride);
760
761    /**
762     * Copy from a rectangular region in this Allocation into a strided array.
763     * @param[in] xoff X offset of region to update in this Allocation
764     * @param[in] yoff Y offset of region to update in this Allocation
765     * @param[in] w Width of region to update
766     * @param[in] h Height of region to update
767     * @param[in] data destination array
768     * @param[in] stride stride of data in bytes
769     */
770    void copy2DStridedTo(uint32_t xoff, uint32_t yoff, uint32_t w, uint32_t h,
771                         void *data, size_t stride);
772
773    /**
774     * Copy this Allocation into a strided array.
775     * @param[in] data destination array
776     * @param[in] stride stride of data in bytes
777     */
778    void copy2DStridedTo(void *data, size_t stride);
779
780
781    /**
782     * Copy from an array into a 3D region in this Allocation. The
783     * array is assumed to be tightly packed.
784     * @param[in] xoff X offset of region to update in this Allocation
785     * @param[in] yoff Y offset of region to update in this Allocation
786     * @param[in] zoff Z offset of region to update in this Allocation
787     * @param[in] w Width of region to update
788     * @param[in] h Height of region to update
789     * @param[in] d Depth of region to update
790     * @param[in] data Array from which to copy
791     */
792    void copy3DRangeFrom(uint32_t xoff, uint32_t yoff, uint32_t zoff, uint32_t w,
793                         uint32_t h, uint32_t d, const void* data);
794
795    /**
796     * Copy from an Allocation into a 3D region in this Allocation.
797     * @param[in] xoff X offset of region to update in this Allocation
798     * @param[in] yoff Y offset of region to update in this Allocation
799     * @param[in] zoff Z offset of region to update in this Allocation
800     * @param[in] w Width of region to update
801     * @param[in] h Height of region to update
802     * @param[in] d Depth of region to update
803     * @param[in] data Allocation from which to copy
804     * @param[in] dataXoff X offset of region in data to copy from
805     * @param[in] dataYoff Y offset of region in data to copy from
806     * @param[in] dataZoff Z offset of region in data to copy from
807     */
808    void copy3DRangeFrom(uint32_t xoff, uint32_t yoff, uint32_t zoff,
809                         uint32_t w, uint32_t h, uint32_t d,
810                         sp<const Allocation> data,
811                         uint32_t dataXoff, uint32_t dataYoff, uint32_t dataZoff);
812
813    /**
814     * Copy a 3D region in this Allocation into an array. The
815     * array is assumed to be tightly packed.
816     * @param[in] xoff X offset of region to update in this Allocation
817     * @param[in] yoff Y offset of region to update in this Allocation
818     * @param[in] zoff Z offset of region to update in this Allocation
819     * @param[in] w Width of region to update
820     * @param[in] h Height of region to update
821     * @param[in] d Depth of region to update
822     * @param[in] data Array from which to copy
823     */
824    void copy3DRangeTo(uint32_t xoff, uint32_t yoff, uint32_t zoff, uint32_t w,
825                         uint32_t h, uint32_t d, void* data);
826
827    /**
828     * Creates an Allocation for use by scripts with a given Type.
829     * @param[in] rs Context to which the Allocation will belong
830     * @param[in] type Type of the Allocation
831     * @param[in] mipmaps desired mipmap behavior for the Allocation
832     * @param[in] usage usage for the Allocation
833     * @return new Allocation
834     */
835    static sp<Allocation> createTyped(sp<RS> rs, sp<const Type> type,
836                                   RsAllocationMipmapControl mipmaps, uint32_t usage);
837
838    /**
839     * Creates an Allocation for use by scripts with a given Type and a backing pointer. For use
840     * with RS_ALLOCATION_USAGE_SHARED.
841     * @param[in] rs Context to which the Allocation will belong
842     * @param[in] type Type of the Allocation
843     * @param[in] mipmaps desired mipmap behavior for the Allocation
844     * @param[in] usage usage for the Allocation
845     * @param[in] pointer existing backing store to use for this Allocation if possible
846     * @return new Allocation
847     */
848    static sp<Allocation> createTyped(sp<RS> rs, sp<const Type> type,
849                                   RsAllocationMipmapControl mipmaps, uint32_t usage, void * pointer);
850
851    /**
852     * Creates an Allocation for use by scripts with a given Type with no mipmaps.
853     * @param[in] rs Context to which the Allocation will belong
854     * @param[in] type Type of the Allocation
855     * @param[in] usage usage for the Allocation
856     * @return new Allocation
857     */
858    static sp<Allocation> createTyped(sp<RS> rs, sp<const Type> type,
859                                   uint32_t usage = RS_ALLOCATION_USAGE_SCRIPT);
860    /**
861     * Creates an Allocation with a specified number of given elements.
862     * @param[in] rs Context to which the Allocation will belong
863     * @param[in] e Element used in the Allocation
864     * @param[in] count Number of elements of the Allocation
865     * @param[in] usage usage for the Allocation
866     * @return new Allocation
867     */
868    static sp<Allocation> createSized(sp<RS> rs, sp<const Element> e, size_t count,
869                                   uint32_t usage = RS_ALLOCATION_USAGE_SCRIPT);
870
871    /**
872     * Creates a 2D Allocation with a specified number of given elements.
873     * @param[in] rs Context to which the Allocation will belong
874     * @param[in] e Element used in the Allocation
875     * @param[in] x Width in Elements of the Allocation
876     * @param[in] y Height of the Allocation
877     * @param[in] usage usage for the Allocation
878     * @return new Allocation
879     */
880    static sp<Allocation> createSized2D(sp<RS> rs, sp<const Element> e,
881                                        size_t x, size_t y,
882                                        uint32_t usage = RS_ALLOCATION_USAGE_SCRIPT);
883
884
885    /**
886     * Get the backing pointer for a USAGE_SHARED allocation.
887     * @param[in] stride optional parameter. when non-NULL, will contain
888     *   stride in bytes of a 2D Allocation
889     * @return pointer to data
890     */
891    void * getPointer(size_t *stride = NULL);
892};
893
894 /**
895  * An Element represents one item within an Allocation. An Element is roughly
896  * equivalent to a C type in a RenderScript kernel. Elements may be basic
897  * or complex. Some basic elements are:
898
899  * - A single float value (equivalent to a float in a kernel)
900  * - A four-element float vector (equivalent to a float4 in a kernel)
901  * - An unsigned 32-bit integer (equivalent to an unsigned int in a kernel)
902  * - A single signed 8-bit integer (equivalent to a char in a kernel)
903
904  * Basic Elements are comprised of a Element.DataType and a
905  * Element.DataKind. The DataType encodes C type information of an Element,
906  * while the DataKind encodes how that Element should be interpreted by a
907  * Sampler. Note that Allocation objects with DataKind USER cannot be used as
908  * input for a Sampler. In general, Allocation objects that are intended for
909  * use with a Sampler should use bitmap-derived Elements such as
910  * Element::RGBA_8888.
911 */
912
913
914class Element : public BaseObj {
915public:
916    bool isComplex();
917
918    /**
919     * Elements could be simple, such as an int or a float, or a structure with
920     * multiple sub-elements, such as a collection of floats, float2,
921     * float4. This function returns zero for simple elements or the number of
922     * sub-elements otherwise.
923     * @return number of sub-elements
924     */
925    size_t getSubElementCount() {
926        return mVisibleElementMapSize;
927    }
928
929    /**
930     * For complex Elements, this returns the sub-element at a given index.
931     * @param[in] index index of sub-element
932     * @return sub-element
933     */
934    sp<const Element> getSubElement(uint32_t index);
935
936    /**
937     * For complex Elements, this returns the name of the sub-element at a given
938     * index.
939     * @param[in] index index of sub-element
940     * @return name of sub-element
941     */
942    const char * getSubElementName(uint32_t index);
943
944    /**
945     * For complex Elements, this returns the size of the sub-element at a given
946     * index.
947     * @param[in] index index of sub-element
948     * @return size of sub-element
949     */
950    size_t getSubElementArraySize(uint32_t index);
951
952    /**
953     * Returns the location of a sub-element within a complex Element.
954     * @param[in] index index of sub-element
955     * @return offset in bytes
956     */
957    uint32_t getSubElementOffsetBytes(uint32_t index);
958
959    /**
960     * Returns the data type used for the Element.
961     * @return data type
962     */
963    RsDataType getDataType() const {
964        return mType;
965    }
966
967    /**
968     * Returns the data kind used for the Element.
969     * @return data kind
970     */
971    RsDataKind getDataKind() const {
972        return mKind;
973    }
974
975    /**
976     * Returns the size in bytes of the Element.
977     * @return size in bytes
978     */
979    size_t getSizeBytes() const {
980        return mSizeBytes;
981    }
982
983    /**
984     * Returns the number of vector components for this Element.
985     * @return number of vector components
986     */
987    uint32_t getVectorSize() const {
988        return mVectorSize;
989    }
990
991    /**
992     * Utility function for returning an Element containing a single bool.
993     * @param[in] rs RenderScript context
994     * @return Element
995     */
996    static sp<const Element> BOOLEAN(sp<RS> rs);
997    /**
998     * Utility function for returning an Element containing a single unsigned char.
999     * @param[in] rs RenderScript context
1000     * @return Element
1001     */
1002    static sp<const Element> U8(sp<RS> rs);
1003    /**
1004     * Utility function for returning an Element containing a single signed char.
1005     * @param[in] rs RenderScript context
1006     * @return Element
1007     */
1008    static sp<const Element> I8(sp<RS> rs);
1009    /**
1010     * Utility function for returning an Element containing a single unsigned short.
1011     * @param[in] rs RenderScript context
1012     * @return Element
1013     */
1014    static sp<const Element> U16(sp<RS> rs);
1015    /**
1016     * Utility function for returning an Element containing a single signed short.
1017     * @param[in] rs RenderScript context
1018     * @return Element
1019     */
1020    static sp<const Element> I16(sp<RS> rs);
1021    /**
1022     * Utility function for returning an Element containing a single unsigned int.
1023     * @param[in] rs RenderScript context
1024     * @return Element
1025     */
1026    static sp<const Element> U32(sp<RS> rs);
1027    /**
1028     * Utility function for returning an Element containing a single signed int.
1029     * @param[in] rs RenderScript context
1030     * @return Element
1031     */
1032    static sp<const Element> I32(sp<RS> rs);
1033    /**
1034     * Utility function for returning an Element containing a single unsigned long long.
1035     * @param[in] rs RenderScript context
1036     * @return Element
1037     */
1038    static sp<const Element> U64(sp<RS> rs);
1039    /**
1040     * Utility function for returning an Element containing a single signed long long.
1041     * @param[in] rs RenderScript context
1042     * @return Element
1043     */
1044    static sp<const Element> I64(sp<RS> rs);
1045    /**
1046     * Utility function for returning an Element containing a single float.
1047     * @param[in] rs RenderScript context
1048     * @return Element
1049     */
1050    static sp<const Element> F32(sp<RS> rs);
1051    /**
1052     * Utility function for returning an Element containing a single double.
1053     * @param[in] rs RenderScript context
1054     * @return Element
1055     */
1056    static sp<const Element> F64(sp<RS> rs);
1057    /**
1058     * Utility function for returning an Element containing a single Element.
1059     * @param[in] rs RenderScript context
1060     * @return Element
1061     */
1062    static sp<const Element> ELEMENT(sp<RS> rs);
1063    /**
1064     * Utility function for returning an Element containing a single Type.
1065     * @param[in] rs RenderScript context
1066     * @return Element
1067     */
1068    static sp<const Element> TYPE(sp<RS> rs);
1069    /**
1070     * Utility function for returning an Element containing a single Allocation.
1071     * @param[in] rs RenderScript context
1072     * @return Element
1073     */
1074    static sp<const Element> ALLOCATION(sp<RS> rs);
1075    /**
1076     * Utility function for returning an Element containing a single Sampler.
1077     * @param[in] rs RenderScript context
1078     * @return Element
1079     */
1080    static sp<const Element> SAMPLER(sp<RS> rs);
1081    /**
1082     * Utility function for returning an Element containing a single Script.
1083     * @param[in] rs RenderScript context
1084     * @return Element
1085     */
1086    static sp<const Element> SCRIPT(sp<RS> rs);
1087    /**
1088     * Utility function for returning an Element containing an ALPHA_8 pixel.
1089     * @param[in] rs RenderScript context
1090     * @return Element
1091     */
1092    static sp<const Element> A_8(sp<RS> rs);
1093    /**
1094     * Utility function for returning an Element containing an RGB_565 pixel.
1095     * @param[in] rs RenderScript context
1096     * @return Element
1097     */
1098    static sp<const Element> RGB_565(sp<RS> rs);
1099    /**
1100     * Utility function for returning an Element containing an RGB_888 pixel.
1101     * @param[in] rs RenderScript context
1102     * @return Element
1103     */
1104    static sp<const Element> RGB_888(sp<RS> rs);
1105    /**
1106     * Utility function for returning an Element containing an RGBA_5551 pixel.
1107     * @param[in] rs RenderScript context
1108     * @return Element
1109     */
1110    static sp<const Element> RGBA_5551(sp<RS> rs);
1111    /**
1112     * Utility function for returning an Element containing an RGBA_4444 pixel.
1113     * @param[in] rs RenderScript context
1114     * @return Element
1115     */
1116    static sp<const Element> RGBA_4444(sp<RS> rs);
1117    /**
1118     * Utility function for returning an Element containing an RGBA_8888 pixel.
1119     * @param[in] rs RenderScript context
1120     * @return Element
1121     */
1122    static sp<const Element> RGBA_8888(sp<RS> rs);
1123
1124    /**
1125     * Utility function for returning an Element containing a float2.
1126     * @param[in] rs RenderScript context
1127     * @return Element
1128     */
1129    static sp<const Element> F32_2(sp<RS> rs);
1130    /**
1131     * Utility function for returning an Element containing a float3.
1132     * @param[in] rs RenderScript context
1133     * @return Element
1134     */
1135    static sp<const Element> F32_3(sp<RS> rs);
1136    /**
1137     * Utility function for returning an Element containing a float4.
1138     * @param[in] rs RenderScript context
1139     * @return Element
1140     */
1141    static sp<const Element> F32_4(sp<RS> rs);
1142    /**
1143     * Utility function for returning an Element containing a double2.
1144     * @param[in] rs RenderScript context
1145     * @return Element
1146     */
1147    static sp<const Element> F64_2(sp<RS> rs);
1148    /**
1149     * Utility function for returning an Element containing a double3.
1150     * @param[in] rs RenderScript context
1151     * @return Element
1152     */
1153    static sp<const Element> F64_3(sp<RS> rs);
1154    /**
1155     * Utility function for returning an Element containing a double4.
1156     * @param[in] rs RenderScript context
1157     * @return Element
1158     */
1159    static sp<const Element> F64_4(sp<RS> rs);
1160    /**
1161     * Utility function for returning an Element containing a uchar2.
1162     * @param[in] rs RenderScript context
1163     * @return Element
1164     */
1165    static sp<const Element> U8_2(sp<RS> rs);
1166    /**
1167     * Utility function for returning an Element containing a uchar3.
1168     * @param[in] rs RenderScript context
1169     * @return Element
1170     */
1171    static sp<const Element> U8_3(sp<RS> rs);
1172    /**
1173     * Utility function for returning an Element containing a uchar4.
1174     * @param[in] rs RenderScript context
1175     * @return Element
1176     */
1177    static sp<const Element> U8_4(sp<RS> rs);
1178    /**
1179     * Utility function for returning an Element containing a char2.
1180     * @param[in] rs RenderScript context
1181     * @return Element
1182     */
1183    static sp<const Element> I8_2(sp<RS> rs);
1184    /**
1185     * Utility function for returning an Element containing a char3.
1186     * @param[in] rs RenderScript context
1187     * @return Element
1188     */
1189    static sp<const Element> I8_3(sp<RS> rs);
1190    /**
1191     * Utility function for returning an Element containing a char4.
1192     * @param[in] rs RenderScript context
1193     * @return Element
1194     */
1195    static sp<const Element> I8_4(sp<RS> rs);
1196    /**
1197     * Utility function for returning an Element containing a ushort2.
1198     * @param[in] rs RenderScript context
1199     * @return Element
1200     */
1201    static sp<const Element> U16_2(sp<RS> rs);
1202    /**
1203     * Utility function for returning an Element containing a ushort3.
1204     * @param[in] rs RenderScript context
1205     * @return Element
1206     */
1207    static sp<const Element> U16_3(sp<RS> rs);
1208    /**
1209     * Utility function for returning an Element containing a ushort4.
1210     * @param[in] rs RenderScript context
1211     * @return Element
1212     */
1213    static sp<const Element> U16_4(sp<RS> rs);
1214    /**
1215     * Utility function for returning an Element containing a short2.
1216     * @param[in] rs RenderScript context
1217     * @return Element
1218     */
1219    static sp<const Element> I16_2(sp<RS> rs);
1220    /**
1221     * Utility function for returning an Element containing a short3.
1222     * @param[in] rs RenderScript context
1223     * @return Element
1224     */
1225    static sp<const Element> I16_3(sp<RS> rs);
1226    /**
1227     * Utility function for returning an Element containing a short4.
1228     * @param[in] rs RenderScript context
1229     * @return Element
1230     */
1231    static sp<const Element> I16_4(sp<RS> rs);
1232    /**
1233     * Utility function for returning an Element containing a uint2.
1234     * @param[in] rs RenderScript context
1235     * @return Element
1236     */
1237    static sp<const Element> U32_2(sp<RS> rs);
1238    /**
1239     * Utility function for returning an Element containing a uint3.
1240     * @param[in] rs RenderScript context
1241     * @return Element
1242     */
1243    static sp<const Element> U32_3(sp<RS> rs);
1244    /**
1245     * Utility function for returning an Element containing a uint4.
1246     * @param[in] rs RenderScript context
1247     * @return Element
1248     */
1249    static sp<const Element> U32_4(sp<RS> rs);
1250    /**
1251     * Utility function for returning an Element containing an int2.
1252     * @param[in] rs RenderScript context
1253     * @return Element
1254     */
1255    static sp<const Element> I32_2(sp<RS> rs);
1256    /**
1257     * Utility function for returning an Element containing an int3.
1258     * @param[in] rs RenderScript context
1259     * @return Element
1260     */
1261    static sp<const Element> I32_3(sp<RS> rs);
1262    /**
1263     * Utility function for returning an Element containing an int4.
1264     * @param[in] rs RenderScript context
1265     * @return Element
1266     */
1267    static sp<const Element> I32_4(sp<RS> rs);
1268    /**
1269     * Utility function for returning an Element containing a ulong2.
1270     * @param[in] rs RenderScript context
1271     * @return Element
1272     */
1273    static sp<const Element> U64_2(sp<RS> rs);
1274    /**
1275     * Utility function for returning an Element containing a ulong3.
1276     * @param[in] rs RenderScript context
1277     * @return Element
1278     */
1279    static sp<const Element> U64_3(sp<RS> rs);
1280    /**
1281     * Utility function for returning an Element containing a ulong4.
1282     * @param[in] rs RenderScript context
1283     * @return Element
1284     */
1285    static sp<const Element> U64_4(sp<RS> rs);
1286    /**
1287     * Utility function for returning an Element containing a long2.
1288     * @param[in] rs RenderScript context
1289     * @return Element
1290     */
1291    static sp<const Element> I64_2(sp<RS> rs);
1292    /**
1293     * Utility function for returning an Element containing a long3.
1294     * @param[in] rs RenderScript context
1295     * @return Element
1296     */
1297    static sp<const Element> I64_3(sp<RS> rs);
1298    /**
1299     * Utility function for returning an Element containing a long4.
1300     * @param[in] rs RenderScript context
1301     * @return Element
1302     */
1303    static sp<const Element> I64_4(sp<RS> rs);
1304    /**
1305     * Utility function for returning an Element containing a YUV pixel.
1306     * @param[in] rs RenderScript context
1307     * @return Element
1308     */
1309    static sp<const Element> YUV(sp<RS> rs);
1310    /**
1311     * Utility function for returning an Element containing an rs_matrix_4x4.
1312     * @param[in] rs RenderScript context
1313     * @return Element
1314     */
1315    static sp<const Element> MATRIX_4X4(sp<RS> rs);
1316    /**
1317     * Utility function for returning an Element containing an rs_matrix_3x3.
1318     * @param[in] rs RenderScript context
1319     * @return Element
1320     */
1321    static sp<const Element> MATRIX_3X3(sp<RS> rs);
1322    /**
1323     * Utility function for returning an Element containing an rs_matrix_2x2.
1324     * @param[in] rs RenderScript context
1325     * @return Element
1326     */
1327    static sp<const Element> MATRIX_2X2(sp<RS> rs);
1328
1329    void updateFromNative();
1330
1331    /**
1332     * Create an Element with a given DataType.
1333     * @param[in] rs RenderScript context
1334     * @param[in] dt data type
1335     * @return Element
1336     */
1337    static sp<const Element> createUser(sp<RS> rs, RsDataType dt);
1338    /**
1339     * Create a vector Element with the given DataType
1340     * @param[in] rs RenderScript
1341     * @param[in] dt DataType
1342     * @param[in] size vector size
1343     * @return Element
1344     */
1345    static sp<const Element> createVector(sp<RS> rs, RsDataType dt, uint32_t size);
1346    /**
1347     * Create an Element with a given DataType and DataKind.
1348     * @param[in] rs RenderScript context
1349     * @param[in] dt DataType
1350     * @param[in] dk DataKind
1351     * @return Element
1352     */
1353    static sp<const Element> createPixel(sp<RS> rs, RsDataType dt, RsDataKind dk);
1354
1355    /**
1356     * Returns true if the Element can interoperate with this Element.
1357     * @param[in] e Element to compare
1358     * @return true if Elements can interoperate
1359     */
1360    bool isCompatible(sp<const Element>e) const;
1361
1362    /**
1363     * Builder class for producing complex elements with matching field and name
1364     * pairs. The builder starts empty. The order in which elements are added is
1365     * retained for the layout in memory.
1366     */
1367    class Builder {
1368    private:
1369        RS* mRS;
1370        size_t mElementsCount;
1371        size_t mElementsVecSize;
1372        sp<const Element> * mElements;
1373        char ** mElementNames;
1374        size_t * mElementNameLengths;
1375        uint32_t * mArraySizes;
1376        bool mSkipPadding;
1377
1378    public:
1379        Builder(sp<RS> rs);
1380        ~Builder();
1381        void add(sp<const Element> e, const char * name, uint32_t arraySize = 1);
1382        sp<const Element> create();
1383    };
1384
1385protected:
1386    friend class Type;
1387    Element(void *id, sp<RS> rs,
1388            sp<const Element> * elements,
1389            size_t elementCount,
1390            const char ** elementNames,
1391            size_t * elementNameLengths,
1392            uint32_t * arraySizes);
1393    Element(void *id, sp<RS> rs, RsDataType dt, RsDataKind dk, bool norm, uint32_t size);
1394    Element(void *id, sp<RS> rs);
1395    Element(sp<RS> rs);
1396    virtual ~Element();
1397
1398private:
1399    void updateVisibleSubElements();
1400
1401    size_t mElementsCount;
1402    size_t mVisibleElementMapSize;
1403
1404    sp<const Element> * mElements;
1405    char ** mElementNames;
1406    size_t * mElementNameLengths;
1407    uint32_t * mArraySizes;
1408    uint32_t * mVisibleElementMap;
1409    uint32_t * mOffsetInBytes;
1410
1411    RsDataType mType;
1412    RsDataKind mKind;
1413    bool mNormalized;
1414    size_t mSizeBytes;
1415    size_t mVectorSize;
1416};
1417
1418class FieldPacker {
1419protected:
1420    unsigned char* mData;
1421    size_t mPos;
1422    size_t mLen;
1423
1424public:
1425    FieldPacker(size_t len)
1426        : mPos(0), mLen(len) {
1427            mData = new unsigned char[len];
1428        }
1429
1430    virtual ~FieldPacker() {
1431        delete [] mData;
1432    }
1433
1434    void align(size_t v) {
1435        if ((v & (v - 1)) != 0) {
1436            //            ALOGE("Non-power-of-two alignment: %zu", v);
1437            return;
1438        }
1439
1440        while ((mPos & (v - 1)) != 0) {
1441            mData[mPos++] = 0;
1442        }
1443    }
1444
1445    void reset() {
1446        mPos = 0;
1447    }
1448
1449    void reset(size_t i) {
1450        if (i >= mLen) {
1451            //            ALOGE("Out of bounds: i (%zu) >= len (%zu)", i, mLen);
1452            return;
1453        }
1454        mPos = i;
1455    }
1456
1457    void skip(size_t i) {
1458        size_t res = mPos + i;
1459        if (res > mLen) {
1460            //            ALOGE("Exceeded buffer length: i (%zu) > len (%zu)", i, mLen);
1461            return;
1462        }
1463        mPos = res;
1464    }
1465
1466    void* getData() const {
1467        return mData;
1468    }
1469
1470    size_t getLength() const {
1471        return mLen;
1472    }
1473
1474    template <typename T>
1475        void add(T t) {
1476        align(sizeof(t));
1477        if (mPos + sizeof(t) <= mLen) {
1478            memcpy(&mData[mPos], &t, sizeof(t));
1479            mPos += sizeof(t);
1480        }
1481    }
1482
1483    /*
1484      void add(rs_matrix4x4 m) {
1485      for (size_t i = 0; i < 16; i++) {
1486      add(m.m[i]);
1487      }
1488      }
1489
1490      void add(rs_matrix3x3 m) {
1491      for (size_t i = 0; i < 9; i++) {
1492      add(m.m[i]);
1493      }
1494      }
1495
1496      void add(rs_matrix2x2 m) {
1497      for (size_t i = 0; i < 4; i++) {
1498      add(m.m[i]);
1499      }
1500      }
1501    */
1502
1503    void add(sp<BaseObj> obj) {
1504        if (obj != NULL) {
1505            add((uint32_t) (uintptr_t) obj->getID());
1506        } else {
1507            add((uint32_t) 0);
1508        }
1509    }
1510};
1511
1512/**
1513 * A Type describes the Element and dimensions used for an Allocation or a
1514 * parallel operation.
1515 *
1516 * A Type always includes an Element and an X dimension. A Type may be
1517 * multidimensional, up to three dimensions. A nonzero value in the Y or Z
1518 * dimensions indicates that the dimension is present. Note that a Type with
1519 * only a given X dimension and a Type with the same X dimension but Y = 1 are
1520 * not equivalent.
1521 *
1522 * A Type also supports inclusion of level of detail (LOD) or cube map
1523 * faces. LOD and cube map faces are booleans to indicate present or not
1524 * present.
1525 *
1526 * A Type also supports YUV format information to support an Allocation in a YUV
1527 * format. The YUV formats supported are RS_YUV_YV12 and RS_YUV_NV21.
1528 */
1529class Type : public BaseObj {
1530protected:
1531    friend class Allocation;
1532
1533    uint32_t mDimX;
1534    uint32_t mDimY;
1535    uint32_t mDimZ;
1536    RsYuvFormat mYuvFormat;
1537    bool mDimMipmaps;
1538    bool mDimFaces;
1539    size_t mElementCount;
1540    sp<const Element> mElement;
1541
1542    Type(void *id, sp<RS> rs);
1543
1544    void calcElementCount();
1545    virtual void updateFromNative();
1546
1547public:
1548
1549    /**
1550     * Returns the YUV format.
1551     * @return YUV format of the Allocation
1552     */
1553    RsYuvFormat getYuvFormat() const {
1554        return mYuvFormat;
1555    }
1556
1557    /**
1558     * Returns the Element of the Allocation.
1559     * @return YUV format of the Allocation
1560     */
1561    sp<const Element> getElement() const {
1562        return mElement;
1563    }
1564
1565    /**
1566     * Returns the X dimension of the Allocation.
1567     * @return X dimension of the allocation
1568     */
1569    uint32_t getX() const {
1570        return mDimX;
1571    }
1572
1573    /**
1574     * Returns the Y dimension of the Allocation.
1575     * @return Y dimension of the allocation
1576     */
1577    uint32_t getY() const {
1578        return mDimY;
1579    }
1580
1581    /**
1582     * Returns the Z dimension of the Allocation.
1583     * @return Z dimension of the allocation
1584     */
1585    uint32_t getZ() const {
1586        return mDimZ;
1587    }
1588
1589    /**
1590     * Returns true if the Allocation has mipmaps.
1591     * @return true if the Allocation has mipmaps
1592     */
1593    bool hasMipmaps() const {
1594        return mDimMipmaps;
1595    }
1596
1597    /**
1598     * Returns true if the Allocation is a cube map
1599     * @return true if the Allocation is a cube map
1600     */
1601    bool hasFaces() const {
1602        return mDimFaces;
1603    }
1604
1605    /**
1606     * Returns number of accessible Elements in the Allocation
1607     * @return number of accessible Elements in the Allocation
1608     */
1609    size_t getCount() const {
1610        return mElementCount;
1611    }
1612
1613    /**
1614     * Returns size in bytes of all Elements in the Allocation
1615     * @return size in bytes of all Elements in the Allocation
1616     */
1617    size_t getSizeBytes() const {
1618        return mElementCount * mElement->getSizeBytes();
1619    }
1620
1621    /**
1622     * Creates a new Type with the given Element and dimensions.
1623     * @param[in] rs RenderScript context
1624     * @param[in] e Element
1625     * @param[in] dimX X dimension
1626     * @param[in] dimY Y dimension
1627     * @param[in] dimZ Z dimension
1628     * @return new Type
1629     */
1630    static sp<const Type> create(sp<RS> rs, sp<const Element> e, uint32_t dimX, uint32_t dimY, uint32_t dimZ);
1631
1632    class Builder {
1633    protected:
1634        RS* mRS;
1635        uint32_t mDimX;
1636        uint32_t mDimY;
1637        uint32_t mDimZ;
1638        RsYuvFormat mYuvFormat;
1639        bool mDimMipmaps;
1640        bool mDimFaces;
1641        sp<const Element> mElement;
1642
1643    public:
1644        Builder(sp<RS> rs, sp<const Element> e);
1645
1646        void setX(uint32_t value);
1647        void setY(uint32_t value);
1648        void setZ(uint32_t value);
1649        void setYuvFormat(RsYuvFormat format);
1650        void setMipmaps(bool value);
1651        void setFaces(bool value);
1652        sp<const Type> create();
1653    };
1654
1655};
1656
1657/**
1658 * The parent class for all executable Scripts. This should not be used by applications.
1659 */
1660class Script : public BaseObj {
1661private:
1662
1663protected:
1664    Script(void *id, sp<RS> rs);
1665    void forEach(uint32_t slot, sp<const Allocation> in, sp<const Allocation> out,
1666            const void *v, size_t) const;
1667    void reduce(uint32_t slot, sp<const Allocation> in, sp<const Allocation> out,
1668                const RsScriptCall *sc) const;
1669    void bindAllocation(sp<Allocation> va, uint32_t slot) const;
1670    void setVar(uint32_t index, const void *, size_t len) const;
1671    void setVar(uint32_t index, sp<const BaseObj> o) const;
1672    void invoke(uint32_t slot, const void *v, size_t len) const;
1673
1674
1675    void invoke(uint32_t slot) const {
1676        invoke(slot, NULL, 0);
1677    }
1678    void setVar(uint32_t index, float v) const {
1679        setVar(index, &v, sizeof(v));
1680    }
1681    void setVar(uint32_t index, double v) const {
1682        setVar(index, &v, sizeof(v));
1683    }
1684    void setVar(uint32_t index, int32_t v) const {
1685        setVar(index, &v, sizeof(v));
1686    }
1687    void setVar(uint32_t index, uint32_t v) const {
1688        setVar(index, &v, sizeof(v));
1689    }
1690    void setVar(uint32_t index, int64_t v) const {
1691        setVar(index, &v, sizeof(v));
1692    }
1693    void setVar(uint32_t index, bool v) const {
1694        setVar(index, &v, sizeof(v));
1695    }
1696
1697public:
1698    class FieldBase {
1699    protected:
1700        sp<const Element> mElement;
1701        sp<Allocation> mAllocation;
1702
1703        void init(sp<RS> rs, uint32_t dimx, uint32_t usages = 0);
1704
1705    public:
1706        sp<const Element> getElement() {
1707            return mElement;
1708        }
1709
1710        sp<const Type> getType() {
1711            return mAllocation->getType();
1712        }
1713
1714        sp<const Allocation> getAllocation() {
1715            return mAllocation;
1716        }
1717
1718        //void updateAllocation();
1719    };
1720};
1721
1722/**
1723 * The parent class for all user-defined scripts. This is intended to be used by auto-generated code only.
1724 */
1725class ScriptC : public Script {
1726protected:
1727    ScriptC(sp<RS> rs,
1728            const void *codeTxt, size_t codeLength,
1729            const char *cachedName, size_t cachedNameLength,
1730            const char *cacheDir, size_t cacheDirLength);
1731
1732};
1733
1734/**
1735 * The parent class for all script intrinsics. Intrinsics provide highly optimized implementations of
1736 * basic functions. This is not intended to be used directly.
1737 */
1738class ScriptIntrinsic : public Script {
1739 protected:
1740    sp<const Element> mElement;
1741    ScriptIntrinsic(sp<RS> rs, int id, sp<const Element> e);
1742    virtual ~ScriptIntrinsic();
1743};
1744
1745/**
1746 * Intrinsic for converting RGB to RGBA by using a 3D lookup table. The incoming
1747 * r,g,b values are use as normalized x,y,z coordinates into a 3D
1748 * allocation. The 8 nearest values are sampled and linearly interpolated. The
1749 * result is placed in the output.
1750 */
1751class ScriptIntrinsic3DLUT : public ScriptIntrinsic {
1752 private:
1753    ScriptIntrinsic3DLUT(sp<RS> rs, sp<const Element> e);
1754 public:
1755    /**
1756     * Supported Element types are U8_4. Default lookup table is identity.
1757     * @param[in] rs RenderScript context
1758     * @param[in] e Element
1759     * @return new ScriptIntrinsic
1760     */
1761    static sp<ScriptIntrinsic3DLUT> create(sp<RS> rs, sp<const Element> e);
1762
1763    /**
1764     * Launch the intrinsic.
1765     * @param[in] ain input Allocation
1766     * @param[in] aout output Allocation
1767     */
1768    void forEach(sp<Allocation> ain, sp<Allocation> aout);
1769
1770    /**
1771     * Sets the lookup table. The lookup table must use the same Element as the
1772     * intrinsic.
1773     * @param[in] lut new lookup table
1774     */
1775    void setLUT(sp<Allocation> lut);
1776};
1777
1778
1779/**
1780 * Intrinsic kernel provides high performance RenderScript APIs to BLAS.
1781 *
1782 * The BLAS (Basic Linear Algebra Subprograms) are routines that provide standard
1783 * building blocks for performing basic vector and matrix operations.
1784 *
1785 * For detailed description of BLAS, please refer to http://www.netlib.org/blas/
1786 *
1787 **/
1788class ScriptIntrinsicBLAS : public ScriptIntrinsic {
1789 private:
1790    ScriptIntrinsicBLAS(sp<RS> rs, sp<const Element> e);
1791 public:
1792    /**
1793     * Create an intrinsic to access BLAS subroutines.
1794     *
1795     * @param rs The RenderScript context
1796     * @return ScriptIntrinsicBLAS
1797     */
1798    static sp<ScriptIntrinsicBLAS> create(sp<RS> rs);
1799
1800    /**
1801     * SGEMV performs one of the matrix-vector operations
1802     * y := alpha*A*x + beta*y   or   y := alpha*A**T*x + beta*y
1803     *
1804     * Details: http://www.netlib.org/lapack/explore-html/db/d58/sgemv_8f.html
1805     *
1806     * @param TransA The type of transpose applied to matrix A.
1807     * @param alpha The scalar alpha.
1808     * @param A The input allocation contains matrix A, supported elements type: {Element#F32}.
1809     * @param X The input allocation contains vector x, supported elements type: {Element#F32}.
1810     * @param incX The increment for the elements of vector x, must be larger than zero.
1811     * @param beta The scalar beta.
1812     * @param Y The input allocation contains vector y, supported elements type: {Element#F32}.
1813     * @param incY The increment for the elements of vector y, must be larger than zero.
1814     */
1815    void SGEMV(RsBlasTranspose TransA,
1816               float alpha, sp<Allocation> A, sp<Allocation> X, int incX,
1817               float beta, sp<Allocation> Y, int incY);
1818
1819    /**
1820     * DGEMV performs one of the matrix-vector operations
1821     * y := alpha*A*x + beta*y   or   y := alpha*A**T*x + beta*y
1822     *
1823     * Details: http://www.netlib.org/lapack/explore-html/dc/da8/dgemv_8f.html
1824     *
1825     * @param TransA The type of transpose applied to matrix A.
1826     * @param alpha The scalar alpha.
1827     * @param A The input allocation contains matrix A, supported elements type: {Element#F64}.
1828     * @param X The input allocation contains vector x, supported elements type: {Element#F64}.
1829     * @param incX The increment for the elements of vector x, must be larger than zero.
1830     * @param beta The scalar beta.
1831     * @param Y The input allocation contains vector y, supported elements type: {Element#F64}.
1832     * @param incY The increment for the elements of vector y, must be larger than zero.
1833     */
1834    void DGEMV(RsBlasTranspose TransA,
1835               double alpha, sp<Allocation> A, sp<Allocation> X, int incX,
1836               double beta, sp<Allocation> Y, int incY);
1837
1838    /**
1839     * CGEMV performs one of the matrix-vector operations
1840     * y := alpha*A*x + beta*y   or   y := alpha*A**T*x + beta*y   or   y := alpha*A**H*x + beta*y
1841     *
1842     * Details: http://www.netlib.org/lapack/explore-html/d4/d8a/cgemv_8f.html
1843     *
1844     * @param TransA The type of transpose applied to matrix A.
1845     * @param alpha The scalar alpha.
1846     * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}.
1847     * @param X The input allocation contains vector x, supported elements type: {Element#F32_2}.
1848     * @param incX The increment for the elements of vector x, must be larger than zero.
1849     * @param beta The scalar beta.
1850     * @param Y The input allocation contains vector y, supported elements type: {Element#F32_2}.
1851     * @param incY The increment for the elements of vector y, must be larger than zero.
1852     */
1853    void CGEMV(RsBlasTranspose TransA,
1854               Float2 alpha, sp<Allocation> A, sp<Allocation> X, int incX,
1855               Float2 beta, sp<Allocation> Y, int incY);
1856
1857    /**
1858     * ZGEMV performs one of the matrix-vector operations
1859     * y := alpha*A*x + beta*y   or   y := alpha*A**T*x + beta*y   or   y := alpha*A**H*x + beta*y
1860     *
1861     * Details: http://www.netlib.org/lapack/explore-html/db/d40/zgemv_8f.html
1862     *
1863     * @param TransA The type of transpose applied to matrix A.
1864     * @param alpha The scalar alpha.
1865     * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2}.
1866     * @param X The input allocation contains vector x, supported elements type: {Element#F64_2}.
1867     * @param incX The increment for the elements of vector x, must be larger than zero.
1868     * @param beta The scalar beta.
1869     * @param Y The input allocation contains vector y, supported elements type: {Element#F64_2}.
1870     * @param incY The increment for the elements of vector y, must be larger than zero.
1871     */
1872    void ZGEMV(RsBlasTranspose TransA,
1873               Double2 alpha, sp<Allocation> A, sp<Allocation> X, int incX,
1874               Double2 beta, sp<Allocation> Y, int incY);
1875
1876    /**
1877     * SGBMV performs one of the matrix-vector operations
1878     * y := alpha*A*x + beta*y   or   y := alpha*A**T*x + beta*y
1879     *
1880     * Details: http://www.netlib.org/lapack/explore-html/d6/d46/sgbmv_8f.html
1881     *
1882     * Note: For a M*N matrix, the input Allocation should also be of size M*N (dimY = M, dimX = N),
1883     *       but only the region M*(KL+KU+1) will be referenced. The following subroutine can is an
1884     *       example showing how to convert the original matrix 'a' to row-based band matrix 'b'.
1885     *           for i in range(0, m):
1886     *              for j in range(max(0, i-kl), min(i+ku+1, n)):
1887     *                  b[i, j-i+kl] = a[i, j]
1888     *
1889     * @param TransA The type of transpose applied to matrix A.
1890     * @param KL The number of sub-diagonals of the matrix A.
1891     * @param KU The number of super-diagonals of the matrix A.
1892     * @param alpha The scalar alpha.
1893     * @param A The input allocation contains the band matrix A, supported elements type: {Element#F32}.
1894     * @param X The input allocation contains vector x, supported elements type: {Element#F32}.
1895     * @param incX The increment for the elements of vector x, must be larger than zero.
1896     * @param beta The scalar beta.
1897     * @param Y The input allocation contains vector y, supported elements type: {Element#F32}.
1898     * @param incY The increment for the elements of vector y, must be larger than zero.
1899     */
1900    void SGBMV(RsBlasTranspose TransA,
1901               int KL, int KU, float alpha, sp<Allocation> A, sp<Allocation> X, int incX,
1902               float beta, sp<Allocation> Y, int incY);
1903
1904    /**
1905     * DGBMV performs one of the matrix-vector operations
1906     * y := alpha*A*x + beta*y   or   y := alpha*A**T*x + beta*y
1907     *
1908     * Details: http://www.netlib.org/lapack/explore-html/d2/d3f/dgbmv_8f.html
1909     *
1910     * Note: For a M*N matrix, the input Allocation should also be of size M*N (dimY = M, dimX = N),
1911     *       but only the region M*(KL+KU+1) will be referenced. The following subroutine can is an
1912     *       example showing how to convert the original matrix 'a' to row-based band matrix 'b'.
1913     *           for i in range(0, m):
1914     *              for j in range(max(0, i-kl), min(i+ku+1, n)):
1915     *                  b[i, j-i+kl] = a[i, j]
1916     *
1917     * @param TransA The type of transpose applied to matrix A.
1918     * @param KL The number of sub-diagonals of the matrix A.
1919     * @param KU The number of super-diagonals of the matrix A.
1920     * @param alpha The scalar alpha.
1921     * @param A The input allocation contains the band matrix A, supported elements type: {Element#F64}.
1922     * @param X The input allocation contains vector x, supported elements type: {Element#F64}.
1923     * @param incX The increment for the elements of vector x, must be larger than zero.
1924     * @param beta The scalar beta.
1925     * @param Y The input allocation contains vector y, supported elements type: {Element#F64}.
1926     * @param incY The increment for the elements of vector y, must be larger than zero.
1927     */
1928    void DGBMV(RsBlasTranspose TransA,
1929               int KL, int KU, double alpha, sp<Allocation> A, sp<Allocation> X,
1930               int incX, double beta, sp<Allocation> Y, int incY);
1931
1932    /**
1933     * CGBMV performs one of the matrix-vector operations
1934     * y := alpha*A*x + beta*y   or   y := alpha*A**T*x + beta*y   or   y := alpha*A**H*x + beta*y
1935     *
1936     * Details: http://www.netlib.org/lapack/explore-html/d0/d75/cgbmv_8f.html
1937     *
1938     * Note: For a M*N matrix, the input Allocation should also be of size M*N (dimY = M, dimX = N),
1939     *       but only the region M*(KL+KU+1) will be referenced. The following subroutine can is an
1940     *       example showing how to convert the original matrix 'a' to row-based band matrix 'b'.
1941     *           for i in range(0, m):
1942     *              for j in range(max(0, i-kl), min(i+ku+1, n)):
1943     *                  b[i, j-i+kl] = a[i, j]
1944     *
1945     * @param TransA The type of transpose applied to matrix A.
1946     * @param KL The number of sub-diagonals of the matrix A.
1947     * @param KU The number of super-diagonals of the matrix A.
1948     * @param alpha The scalar alpha.
1949     * @param A The input allocation contains the band matrix A, supported elements type: {Element#F32_2}.
1950     * @param X The input allocation contains vector x, supported elements type: {Element#F32_2}.
1951     * @param incX The increment for the elements of vector x, must be larger than zero.
1952     * @param beta The scalar beta.
1953     * @param Y The input allocation contains vector y, supported elements type: {Element#F32_2}.
1954     * @param incY The increment for the elements of vector y, must be larger than zero.
1955     */
1956    void CGBMV(RsBlasTranspose TransA,
1957               int KL, int KU, Float2 alpha, sp<Allocation> A, sp<Allocation> X,
1958               int incX, Float2 beta, sp<Allocation> Y, int incY);
1959
1960    /**
1961     * ZGBMV performs one of the matrix-vector operations
1962     * y := alpha*A*x + beta*y   or   y := alpha*A**T*x + beta*y   or   y := alpha*A**H*x + beta*y
1963     *
1964     * Details: http://www.netlib.org/lapack/explore-html/d9/d46/zgbmv_8f.html
1965     *
1966     * Note: For a M*N matrix, the input Allocation should also be of size M*N (dimY = M, dimX = N),
1967     *       but only the region M*(KL+KU+1) will be referenced. The following subroutine can is an
1968     *       example showing how to convert the original matrix 'a' to row-based band matrix 'b'.
1969     *           for i in range(0, m):
1970     *              for j in range(max(0, i-kl), min(i+ku+1, n)):
1971     *                  b[i, j-i+kl] = a[i, j]
1972     *
1973     * @param TransA The type of transpose applied to matrix A.
1974     * @param KL The number of sub-diagonals of the matrix A.
1975     * @param KU The number of super-diagonals of the matrix A.
1976     * @param alpha The scalar alpha.
1977     * @param A The input allocation contains the band matrix A, supported elements type: {Element#F64_2}.
1978     * @param X The input allocation contains vector x, supported elements type: {Element#F64_2}.
1979     * @param incX The increment for the elements of vector x, must be larger than zero.
1980     * @param beta The scalar beta.
1981     * @param Y The input allocation contains vector y, supported elements type: {Element#F64_2}.
1982     * @param incY The increment for the elements of vector y, must be larger than zero.
1983     */
1984    void ZGBMV(RsBlasTranspose TransA,
1985               int KL, int KU, Double2 alpha, sp<Allocation> A, sp<Allocation> X, int incX,
1986               Double2 beta, sp<Allocation> Y, int incY);
1987
1988    /**
1989     * STRMV performs one of the matrix-vector operations
1990     * x := A*x   or   x := A**T*x
1991     *
1992     * Details: http://www.netlib.org/lapack/explore-html/de/d45/strmv_8f.html
1993     *
1994     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
1995     * @param TransA The type of transpose applied to matrix A.
1996     * @param Diag Specifies whether or not A is unit triangular.
1997     * @param A The input allocation contains matrix A, supported elements type: {Element#F32}.
1998     * @param X The input allocation contains vector x, supported elements type: {Element#F32}.
1999     * @param incX The increment for the elements of vector x, must be larger than zero.
2000     */
2001    void STRMV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2002               sp<Allocation> A, sp<Allocation> X, int incX);
2003
2004    /**
2005     * DTRMV performs one of the matrix-vector operations
2006     * x := A*x   or   x := A**T*x
2007     *
2008     * Details: http://www.netlib.org/lapack/explore-html/dc/d7e/dtrmv_8f.html
2009     *
2010     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2011     * @param TransA The type of transpose applied to matrix A.
2012     * @param Diag Specifies whether or not A is unit triangular.
2013     * @param A The input allocation contains matrix A, supported elements type: {Element#F64}.
2014     * @param X The input allocation contains vector x, supported elements type: {Element#F64}.
2015     * @param incX The increment for the elements of vector x, must be larger than zero.
2016     */
2017    void DTRMV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2018               sp<Allocation> A, sp<Allocation> X, int incX);
2019
2020    /**
2021     * CTRMV performs one of the matrix-vector operations
2022     * x := A*x   or   x := A**T*x   or   x := A**H*x
2023     *
2024     * Details: http://www.netlib.org/lapack/explore-html/df/d78/ctrmv_8f.html
2025     *
2026     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2027     * @param TransA The type of transpose applied to matrix A.
2028     * @param Diag Specifies whether or not A is unit triangular.
2029     * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}.
2030     * @param X The input allocation contains vector x, supported elements type: {Element#F32_2}.
2031     * @param incX The increment for the elements of vector x, must be larger than zero.
2032     */
2033    void CTRMV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2034               sp<Allocation> A, sp<Allocation> X, int incX);
2035
2036    /**
2037     * ZTRMV performs one of the matrix-vector operations
2038     * x := A*x   or   x := A**T*x   or   x := A**H*x
2039     *
2040     * Details: http://www.netlib.org/lapack/explore-html/d0/dd1/ztrmv_8f.html
2041     *
2042     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2043     * @param TransA The type of transpose applied to matrix A.
2044     * @param Diag Specifies whether or not A is unit triangular.
2045     * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2}.
2046     * @param X The input allocation contains vector x, supported elements type: {Element#F64_2}.
2047     * @param incX The increment for the elements of vector x, must be larger than zero.
2048     */
2049    void ZTRMV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2050               sp<Allocation> A, sp<Allocation> X, int incX);
2051
2052    /**
2053     * STBMV performs one of the matrix-vector operations
2054     * x := A*x   or   x := A**T*x
2055     *
2056     * Details: http://www.netlib.org/lapack/explore-html/d6/d7d/stbmv_8f.html
2057     *
2058     * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
2059     *       but only the region N*(K+1) will be referenced. The following subroutine can is an
2060     *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
2061     *           for i in range(0, n):
2062     *              for j in range(i, min(i+k+1, n)):
2063     *                  b[i, j-i] = a[i, j]
2064     *
2065     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2066     * @param TransA The type of transpose applied to matrix A.
2067     * @param Diag Specifies whether or not A is unit triangular.
2068     * @param K The number of off-diagonals of the matrix A
2069     * @param A The input allocation contains matrix A, supported elements type: {Element#F32}.
2070     * @param X The input allocation contains vector x, supported elements type: {Element#F32}.
2071     * @param incX The increment for the elements of vector x, must be larger than zero.
2072     */
2073    void STBMV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2074               int K, sp<Allocation> A, sp<Allocation> X, int incX);
2075
2076    /**
2077     * DTBMV performs one of the matrix-vector operations
2078     * x := A*x   or   x := A**T*x
2079     *
2080     * Details: http://www.netlib.org/lapack/explore-html/df/d29/dtbmv_8f.html
2081     *
2082     * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
2083     *       but only the region N*(K+1) will be referenced. The following subroutine can is an
2084     *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
2085     *           for i in range(0, n):
2086     *              for j in range(i, min(i+k+1, n)):
2087     *                  b[i, j-i] = a[i, j]
2088     *
2089     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2090     * @param TransA The type of transpose applied to matrix A.
2091     * @param Diag Specifies whether or not A is unit triangular.
2092     * @param K The number of off-diagonals of the matrix A
2093     * @param A The input allocation contains matrix A, supported elements type: {Element#F64}.
2094     * @param X The input allocation contains vector x, supported elements type: {Element#F64}.
2095     * @param incX The increment for the elements of vector x, must be larger than zero.
2096     */
2097    void DTBMV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2098               int K, sp<Allocation> A, sp<Allocation> X, int incX);
2099
2100    /**
2101     * CTBMV performs one of the matrix-vector operations
2102     * x := A*x   or   x := A**T*x   or   x := A**H*x
2103     *
2104     * Details: http://www.netlib.org/lapack/explore-html/d3/dcd/ctbmv_8f.html
2105     *
2106     * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
2107     *       but only the region N*(K+1) will be referenced. The following subroutine can is an
2108     *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
2109     *           for i in range(0, n):
2110     *              for j in range(i, min(i+k+1, n)):
2111     *                  b[i, j-i] = a[i, j]
2112     *
2113     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2114     * @param TransA The type of transpose applied to matrix A.
2115     * @param Diag Specifies whether or not A is unit triangular.
2116     * @param K The number of off-diagonals of the matrix A
2117     * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}.
2118     * @param X The input allocation contains vector x, supported elements type: {Element#F32_2}.
2119     * @param incX The increment for the elements of vector x, must be larger than zero.
2120     */
2121    void CTBMV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2122               int K, sp<Allocation> A, sp<Allocation> X, int incX);
2123
2124    /**
2125     * ZTBMV performs one of the matrix-vector operations
2126     * x := A*x   or   x := A**T*x   or   x := A**H*x
2127     *
2128     * Details: http://www.netlib.org/lapack/explore-html/d3/d39/ztbmv_8f.html
2129     *
2130     * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
2131     *       but only the region N*(K+1) will be referenced. The following subroutine can is an
2132     *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
2133     *           for i in range(0, n):
2134     *              for j in range(i, min(i+k+1, n)):
2135     *                  b[i, j-i] = a[i, j]
2136     *
2137     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2138     * @param TransA The type of transpose applied to matrix A.
2139     * @param Diag Specifies whether or not A is unit triangular.
2140     * @param K The number of off-diagonals of the matrix A
2141     * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2}.
2142     * @param X The input allocation contains vector x, supported elements type: {Element#F64_2}.
2143     * @param incX The increment for the elements of vector x, must be larger than zero.
2144     */
2145    void ZTBMV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2146               int K, sp<Allocation> A, sp<Allocation> X, int incX);
2147
2148    /**
2149     * STPMV performs one of the matrix-vector operations
2150     * x := A*x   or   x := A**T*x
2151     *
2152     * Details: http://www.netlib.org/lapack/explore-html/db/db1/stpmv_8f.html
2153     *
2154     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2155     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2156     *       'a' to packed matrix 'b'.
2157     *           k = 0
2158     *           for i in range(0, n):
2159     *              for j in range(i, n):
2160     *                  b[k++] = a[i, j]
2161     *
2162     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2163     * @param TransA The type of transpose applied to matrix A.
2164     * @param Diag Specifies whether or not A is unit triangular.
2165     * @param Ap The input allocation contains packed matrix A, supported elements type: {Element#F32}.
2166     * @param X The input allocation contains vector x, supported elements type: {Element#F32}.
2167     * @param incX The increment for the elements of vector x, must be larger than zero.
2168     */
2169    void STPMV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2170               sp<Allocation> Ap, sp<Allocation> X, int incX);
2171
2172    /**
2173     * DTPMV performs one of the matrix-vector operations
2174     * x := A*x   or   x := A**T*x
2175     *
2176     * Details: http://www.netlib.org/lapack/explore-html/dc/dcd/dtpmv_8f.html
2177     *
2178     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2179     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2180     *       'a' to packed matrix 'b'.
2181     *           k = 0
2182     *           for i in range(0, n):
2183     *              for j in range(i, n):
2184     *                  b[k++] = a[i, j]
2185     *
2186     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2187     * @param TransA The type of transpose applied to matrix A.
2188     * @param Diag Specifies whether or not A is unit triangular.
2189     * @param Ap The input allocation contains packed matrix A, supported elements type: {Element#F64}.
2190     * @param X The input allocation contains vector x, supported elements type: {Element#F64}.
2191     * @param incX The increment for the elements of vector x, must be larger than zero.
2192     */
2193    void DTPMV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2194               sp<Allocation> Ap, sp<Allocation> X, int incX);
2195
2196    /**
2197     * CTPMV performs one of the matrix-vector operations
2198     * x := A*x   or   x := A**T*x   or   x := A**H*x
2199     *
2200     * Details: http://www.netlib.org/lapack/explore-html/d4/dbb/ctpmv_8f.html
2201     *
2202     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2203     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2204     *       'a' to packed matrix 'b'.
2205     *           k = 0
2206     *           for i in range(0, n):
2207     *              for j in range(i, n):
2208     *                  b[k++] = a[i, j]
2209     *
2210     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2211     * @param TransA The type of transpose applied to matrix A.
2212     * @param Diag Specifies whether or not A is unit triangular.
2213     * @param Ap The input allocation contains packed matrix A, supported elements type: {Element#F32_2}.
2214     * @param X The input allocation contains vector x, supported elements type: {Element#F32_2}.
2215     * @param incX The increment for the elements of vector x, must be larger than zero.
2216     */
2217    void CTPMV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2218               sp<Allocation> Ap, sp<Allocation> X, int incX);
2219
2220    /**
2221     * ZTPMV performs one of the matrix-vector operations
2222     * x := A*x   or   x := A**T*x   or   x := A**H*x
2223     *
2224     * Details: http://www.netlib.org/lapack/explore-html/d2/d9e/ztpmv_8f.html
2225     *
2226     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2227     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2228     *       'a' to packed matrix 'b'.
2229     *           k = 0
2230     *           for i in range(0, n):
2231     *              for j in range(i, n):
2232     *                  b[k++] = a[i, j]
2233     *
2234     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2235     * @param TransA The type of transpose applied to matrix A.
2236     * @param Diag Specifies whether or not A is unit triangular.
2237     * @param Ap The input allocation contains packed matrix A, supported elements type: {Element#F64_2}.
2238     * @param X The input allocation contains vector x, supported elements type: {Element#F64_2}.
2239     * @param incX The increment for the elements of vector x, must be larger than zero.
2240     */
2241    void ZTPMV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2242               sp<Allocation> Ap, sp<Allocation> X, int incX);
2243
2244    /**
2245     * STRSV solves one of the systems of equations
2246     * A*x = b   or   A**T*x = b
2247     *
2248     * Details: http://www.netlib.org/lapack/explore-html/d0/d2a/strsv_8f.html
2249     *
2250     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2251     * @param TransA The type of transpose applied to matrix A.
2252     * @param Diag Specifies whether or not A is unit triangular.
2253     * @param A The input allocation contains matrix A, supported elements type: {Element#F32}.
2254     * @param X The input allocation contains vector x, supported elements type: {Element#F32}.
2255     * @param incX The increment for the elements of vector x, must be larger than zero.
2256     */
2257    void STRSV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2258               sp<Allocation> A, sp<Allocation> X, int incX);
2259
2260    /**
2261     * DTRSV solves one of the systems of equations
2262     * A*x = b   or   A**T*x = b
2263     *
2264     * Details: http://www.netlib.org/lapack/explore-html/d6/d96/dtrsv_8f.html
2265     *
2266     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2267     * @param TransA The type of transpose applied to matrix A.
2268     * @param Diag Specifies whether or not A is unit triangular.
2269     * @param A The input allocation contains matrix A, supported elements type: {Element#F64}.
2270     * @param X The input allocation contains vector x, supported elements type: {Element#F64}.
2271     * @param incX The increment for the elements of vector x, must be larger than zero.
2272     */
2273    void DTRSV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2274               sp<Allocation> A, sp<Allocation> X, int incX);
2275
2276    /**
2277     * CTRSV solves one of the systems of equations
2278     * A*x = b   or   A**T*x = b   or   A**H*x = b
2279     *
2280     * Details: http://www.netlib.org/lapack/explore-html/d4/dc8/ctrsv_8f.html
2281     *
2282     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2283     * @param TransA The type of transpose applied to matrix A.
2284     * @param Diag Specifies whether or not A is unit triangular.
2285     * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}.
2286     * @param X The input allocation contains vector x, supported elements type: {Element#F32_2}.
2287     * @param incX The increment for the elements of vector x, must be larger than zero.
2288     */
2289    void CTRSV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2290               sp<Allocation> A, sp<Allocation> X, int incX);
2291
2292    /**
2293     * ZTRSV solves one of the systems of equations
2294     * A*x = b   or   A**T*x = b   or   A**H*x = b
2295     *
2296     * Details: http://www.netlib.org/lapack/explore-html/d1/d2f/ztrsv_8f.html
2297     *
2298     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2299     * @param TransA The type of transpose applied to matrix A.
2300     * @param Diag Specifies whether or not A is unit triangular.
2301     * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2}.
2302     * @param X The input allocation contains vector x, supported elements type: {Element#F64_2}.
2303     * @param incX The increment for the elements of vector x, must be larger than zero.
2304     */
2305    void ZTRSV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2306               sp<Allocation> A, sp<Allocation> X, int incX);
2307
2308    /**
2309     * STBSV solves one of the systems of equations
2310     * A*x = b   or   A**T*x = b
2311     *
2312     * Details: http://www.netlib.org/lapack/explore-html/d0/d1f/stbsv_8f.html
2313     *
2314     * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
2315     *       but only the region N*(K+1) will be referenced. The following subroutine can is an
2316     *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
2317     *           for i in range(0, n):
2318     *              for j in range(i, min(i+k+1, n)):
2319     *                  b[i, j-i] = a[i, j]
2320     *
2321     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2322     * @param TransA The type of transpose applied to matrix A.
2323     * @param Diag Specifies whether or not A is unit triangular.
2324     * @param K The number of off-diagonals of the matrix A
2325     * @param A The input allocation contains matrix A, supported elements type: {Element#F32}.
2326     * @param X The input allocation contains vector x, supported elements type: {Element#F32}.
2327     * @param incX The increment for the elements of vector x, must be larger than zero.
2328     */
2329    void STBSV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2330               int K, sp<Allocation> A, sp<Allocation> X, int incX);
2331
2332    /**
2333     * DTBSV solves one of the systems of equations
2334     * A*x = b   or   A**T*x = b
2335     *
2336     * Details: http://www.netlib.org/lapack/explore-html/d4/dcf/dtbsv_8f.html
2337     *
2338     * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
2339     *       but only the region N*(K+1) will be referenced. The following subroutine can is an
2340     *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
2341     *           for i in range(0, n):
2342     *              for j in range(i, min(i+k+1, n)):
2343     *                  b[i, j-i] = a[i, j]
2344     *
2345     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2346     * @param TransA The type of transpose applied to matrix A.
2347     * @param Diag Specifies whether or not A is unit triangular.
2348     * @param K The number of off-diagonals of the matrix A
2349     * @param A The input allocation contains matrix A, supported elements type: {Element#F64}.
2350     * @param X The input allocation contains vector x, supported elements type: {Element#F64}.
2351     * @param incX The increment for the elements of vector x, must be larger than zero.
2352     */
2353    void DTBSV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2354               int K, sp<Allocation> A, sp<Allocation> X, int incX);
2355
2356    /**
2357     * CTBSV solves one of the systems of equations
2358     * A*x = b   or   A**T*x = b   or   A**H*x = b
2359     *
2360     * Details: http://www.netlib.org/lapack/explore-html/d9/d5f/ctbsv_8f.html
2361     *
2362     * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
2363     *       but only the region N*(K+1) will be referenced. The following subroutine can is an
2364     *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
2365     *           for i in range(0, n):
2366     *              for j in range(i, min(i+k+1, n)):
2367     *                  b[i, j-i] = a[i, j]
2368     *
2369     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2370     * @param TransA The type of transpose applied to matrix A.
2371     * @param Diag Specifies whether or not A is unit triangular.
2372     * @param K The number of off-diagonals of the matrix A
2373     * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}.
2374     * @param X The input allocation contains vector x, supported elements type: {Element#F32_2}.
2375     * @param incX The increment for the elements of vector x, must be larger than zero.
2376     */
2377    void CTBSV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2378               int K, sp<Allocation> A, sp<Allocation> X, int incX);
2379
2380    /**
2381     * ZTBSV solves one of the systems of equations
2382     * A*x = b   or   A**T*x = b   or   A**H*x = b
2383     *
2384     * Details: http://www.netlib.org/lapack/explore-html/d4/d5a/ztbsv_8f.html
2385     *
2386     * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
2387     *       but only the region N*(K+1) will be referenced. The following subroutine can is an
2388     *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
2389     *           for i in range(0, n):
2390     *              for j in range(i, min(i+k+1, n)):
2391     *                  b[i, j-i] = a[i, j]
2392     *
2393     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2394     * @param TransA The type of transpose applied to matrix A.
2395     * @param Diag Specifies whether or not A is unit triangular.
2396     * @param K The number of off-diagonals of the matrix A
2397     * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2}.
2398     * @param X The input allocation contains vector x, supported elements type: {Element#F64_2}.
2399     * @param incX The increment for the elements of vector x, must be larger than zero.
2400     */
2401    void ZTBSV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2402               int K, sp<Allocation> A, sp<Allocation> X, int incX);
2403
2404    /**
2405     * STPSV solves one of the systems of equations
2406     * A*x = b   or   A**T*x = b
2407     *
2408     * Details: http://www.netlib.org/lapack/explore-html/d0/d7c/stpsv_8f.html
2409     *
2410     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2411     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2412     *       'a' to packed matrix 'b'.
2413     *           k = 0
2414     *           for i in range(0, n):
2415     *              for j in range(i, n):
2416     *                  b[k++] = a[i, j]
2417     *
2418     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2419     * @param TransA The type of transpose applied to matrix A.
2420     * @param Diag Specifies whether or not A is unit triangular.
2421     * @param Ap The input allocation contains packed matrix A, supported elements type: {Element#F32}.
2422     * @param X The input allocation contains vector x, supported elements type: {Element#F32}.
2423     * @param incX The increment for the elements of vector x, must be larger than zero.
2424     */
2425    void STPSV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2426               sp<Allocation> Ap, sp<Allocation> X, int incX);
2427
2428    /**
2429     * DTPSV solves one of the systems of equations
2430     * A*x = b   or   A**T*x = b
2431     *
2432     * Details: http://www.netlib.org/lapack/explore-html/d9/d84/dtpsv_8f.html
2433     *
2434     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2435     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2436     *       'a' to packed matrix 'b'.
2437     *           k = 0
2438     *           for i in range(0, n):
2439     *              for j in range(i, n):
2440     *                  b[k++] = a[i, j]
2441     *
2442     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2443     * @param TransA The type of transpose applied to matrix A.
2444     * @param Diag Specifies whether or not A is unit triangular.
2445     * @param Ap The input allocation contains packed matrix A, supported elements type: {Element#F64}.
2446     * @param X The input allocation contains vector x, supported elements type: {Element#F64}.
2447     * @param incX The increment for the elements of vector x, must be larger than zero.
2448     */
2449    void DTPSV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2450               sp<Allocation> Ap, sp<Allocation> X, int incX);
2451
2452    /**
2453     * CTPSV solves one of the systems of equations
2454     * A*x = b   or   A**T*x = b   or   A**H*x = b
2455     *
2456     * Details: http://www.netlib.org/lapack/explore-html/d8/d56/ctpsv_8f.html
2457     *
2458     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2459     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2460     *       'a' to packed matrix 'b'.
2461     *           k = 0
2462     *           for i in range(0, n):
2463     *              for j in range(i, n):
2464     *                  b[k++] = a[i, j]
2465     *
2466     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2467     * @param TransA The type of transpose applied to matrix A.
2468     * @param Diag Specifies whether or not A is unit triangular.
2469     * @param Ap The input allocation contains packed matrix A, supported elements type: {Element#F32_2}.
2470     * @param X The input allocation contains vector x, supported elements type: {Element#F32_2}.
2471     * @param incX The increment for the elements of vector x, must be larger than zero.
2472     */
2473    void CTPSV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2474               sp<Allocation> Ap, sp<Allocation> X, int incX);
2475
2476    /**
2477     * ZTPSV solves one of the systems of equations
2478     * A*x = b   or   A**T*x = b   or   A**H*x = b
2479     *
2480     * Details: http://www.netlib.org/lapack/explore-html/da/d57/ztpsv_8f.html
2481     *
2482     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2483     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2484     *       'a' to packed matrix 'b'.
2485     *           k = 0
2486     *           for i in range(0, n):
2487     *              for j in range(i, n):
2488     *                  b[k++] = a[i, j]
2489     *
2490     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2491     * @param TransA The type of transpose applied to matrix A.
2492     * @param Diag Specifies whether or not A is unit triangular.
2493     * @param Ap The input allocation contains packed matrix A, supported elements type: {Element#F64_2}.
2494     * @param X The input allocation contains vector x, supported elements type: {Element#F64_2}.
2495     * @param incX The increment for the elements of vector x, must be larger than zero.
2496     */
2497    void ZTPSV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2498               sp<Allocation> Ap, sp<Allocation> X, int incX);
2499
2500    /**
2501     * SSYMV performs the matrix-vector operation
2502     * y := alpha*A*x + beta*y
2503     *
2504     * Details: http://www.netlib.org/lapack/explore-html/d2/d94/ssymv_8f.html
2505     *
2506     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
2507     * @param alpha The scalar alpha.
2508     * @param A The input allocation contains matrix A, supported elements type: {Element#F32}.
2509     * @param X The input allocation contains vector x, supported elements type: {Element#F32}.
2510     * @param incX The increment for the elements of vector x, must be larger than zero.
2511     * @param beta The scalar beta.
2512     * @param Y The input allocation contains vector y, supported elements type: {Element#F32}.
2513     * @param incY The increment for the elements of vector y, must be larger than zero.
2514     */
2515    void SSYMV(RsBlasUplo Uplo, float alpha, sp<Allocation> A, sp<Allocation> X,
2516               int incX, float beta, sp<Allocation> Y, int incY);
2517
2518    /**
2519     * SSBMV performs the matrix-vector operation
2520     * y := alpha*A*x + beta*y
2521     *
2522     * Details: http://www.netlib.org/lapack/explore-html/d3/da1/ssbmv_8f.html
2523     *
2524     * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
2525     *       but only the region N*(K+1) will be referenced. The following subroutine can is an
2526     *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
2527     *           for i in range(0, n):
2528     *              for j in range(i, min(i+k+1, n)):
2529     *                  b[i, j-i] = a[i, j]
2530     *
2531     * @param Uplo Specifies whether the upper or lower triangular part of the band matrix A is being supplied.
2532     * @param K The number of off-diagonals of the matrix A
2533     * @param alpha The scalar alpha.
2534     * @param A The input allocation contains matrix A, supported elements type: {Element#F32}.
2535     * @param X The input allocation contains vector x, supported elements type: {Element#F32}.
2536     * @param incX The increment for the elements of vector x, must be larger than zero.
2537     * @param beta The scalar beta.
2538     * @param Y The input allocation contains vector y, supported elements type: {Element#F32}.
2539     * @param incY The increment for the elements of vector y, must be larger than zero.
2540     */
2541    void SSBMV(RsBlasUplo Uplo, int K, float alpha, sp<Allocation> A, sp<Allocation> X,
2542               int incX, float beta, sp<Allocation> Y, int incY);
2543
2544    /**
2545     * SSPMV performs the matrix-vector operation
2546     * y := alpha*A*x + beta*y
2547     *
2548     * Details: http://www.netlib.org/lapack/explore-html/d8/d68/sspmv_8f.html
2549     *
2550     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2551     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2552     *       'a' to packed matrix 'b'.
2553     *           k = 0
2554     *           for i in range(0, n):
2555     *              for j in range(i, n):
2556     *                  b[k++] = a[i, j]
2557     *
2558     * @param Uplo Specifies whether the upper or lower triangular part of the matrix A is supplied in packed form.
2559     * @param alpha The scalar alpha.
2560     * @param Ap The input allocation contains matrix A, supported elements type: {Element#F32}.
2561     * @param X The input allocation contains vector x, supported elements type: {Element#F32}.
2562     * @param incX The increment for the elements of vector x, must be larger than zero.
2563     * @param beta The scalar beta.
2564     * @param Y The input allocation contains vector y, supported elements type: {Element#F32}.
2565     * @param incY The increment for the elements of vector y, must be larger than zero.
2566     */
2567    void SSPMV(RsBlasUplo Uplo, float alpha, sp<Allocation> Ap, sp<Allocation> X,
2568               int incX, float beta, sp<Allocation> Y, int incY);
2569
2570    /**
2571     * SGER performs the rank 1 operation
2572     * A := alpha*x*y**T + A
2573     *
2574     * Details: http://www.netlib.org/lapack/explore-html/db/d5c/sger_8f.html
2575     *
2576     * @param alpha The scalar alpha.
2577     * @param X The input allocation contains vector x, supported elements type: {Element#F32}.
2578     * @param incX The increment for the elements of vector x, must be larger than zero.
2579     * @param Y The input allocation contains vector y, supported elements type: {Element#F32}.
2580     * @param incY The increment for the elements of vector y, must be larger than zero.
2581     * @param A The input allocation contains matrix A, supported elements type: {Element#F32}.
2582     */
2583    void SGER(float alpha, sp<Allocation> X, int incX, sp<Allocation> Y, int incY, sp<Allocation> A);
2584
2585    /**
2586     * SSYR performs the rank 1 operation
2587     * A := alpha*x*x**T + A
2588     *
2589     * Details: http://www.netlib.org/lapack/explore-html/d6/dac/ssyr_8f.html
2590     *
2591     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
2592     * @param alpha The scalar alpha.
2593     * @param X The input allocation contains vector x, supported elements type: {Element#F32}.
2594     * @param incX The increment for the elements of vector x, must be larger than zero.
2595     * @param A The input allocation contains matrix A, supported elements type: {Element#F32}.
2596     */
2597    void SSYR(RsBlasUplo Uplo, float alpha, sp<Allocation> X, int incX, sp<Allocation> A);
2598
2599    /**
2600     * SSPR performs the rank 1 operation
2601     * A := alpha*x*x**T + A
2602     *
2603     * Details: http://www.netlib.org/lapack/explore-html/d2/d9b/sspr_8f.html
2604     *
2605     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2606     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2607     *       'a' to packed matrix 'b'.
2608     *           k = 0
2609     *           for i in range(0, n):
2610     *              for j in range(i, n):
2611     *                  b[k++] = a[i, j]
2612     *
2613     * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
2614     * @param alpha The scalar alpha.
2615     * @param X The input allocation contains vector x, supported elements type: {Element#F32}.
2616     * @param incX The increment for the elements of vector x, must be larger than zero.
2617     * @param Ap The input allocation contains matrix A, supported elements type: {Element#F32}.
2618     */
2619    void SSPR(RsBlasUplo Uplo, float alpha, sp<Allocation> X, int incX, sp<Allocation> Ap);
2620
2621    /**
2622     * SSYR2 performs the symmetric rank 2 operation
2623     * A := alpha*x*y**T + alpha*y*x**T + A
2624     *
2625     * Details: http://www.netlib.org/lapack/explore-html/db/d99/ssyr2_8f.html
2626     *
2627     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
2628     * @param alpha The scalar alpha.
2629     * @param X The input allocation contains vector x, supported elements type: {Element#F32}.
2630     * @param incX The increment for the elements of vector x, must be larger than zero.
2631     * @param Y The input allocation contains vector y, supported elements type: {Element#F32}.
2632     * @param incY The increment for the elements of vector y, must be larger than zero.
2633     * @param A The input allocation contains matrix A, supported elements type: {Element#F32}.
2634     */
2635    void SSYR2(RsBlasUplo Uplo, float alpha, sp<Allocation> X, int incX,
2636               sp<Allocation> Y, int incY, sp<Allocation> A);
2637
2638    /**
2639     * SSPR2 performs the symmetric rank 2 operation
2640     * A := alpha*x*y**T + alpha*y*x**T + A
2641     *
2642     * Details: http://www.netlib.org/lapack/explore-html/db/d3e/sspr2_8f.html
2643     *
2644     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2645     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2646     *       'a' to packed matrix 'b'.
2647     *           k = 0
2648     *           for i in range(0, n):
2649     *              for j in range(i, n):
2650     *                  b[k++] = a[i, j]
2651     *
2652     * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
2653     * @param alpha The scalar alpha.
2654     * @param X The input allocation contains vector x, supported elements type: {Element#F32}.
2655     * @param incX The increment for the elements of vector x, must be larger than zero.
2656     * @param Y The input allocation contains vector y, supported elements type: {Element#F32}.
2657     * @param incY The increment for the elements of vector y, must be larger than zero.
2658     * @param Ap The input allocation contains matrix A, supported elements type: {Element#F32}.
2659     */
2660    void SSPR2(RsBlasUplo Uplo, float alpha, sp<Allocation> X, int incX,
2661               sp<Allocation> Y, int incY, sp<Allocation> Ap);
2662
2663    /**
2664     * DSYMV performs the matrix-vector operation
2665     * y := alpha*A*x + beta*y
2666     *
2667     * Details: http://www.netlib.org/lapack/explore-html/d8/dbe/dsymv_8f.html
2668     *
2669     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
2670     * @param alpha The scalar alpha.
2671     * @param A The input allocation contains matrix A, supported elements type: {Element#F64}.
2672     * @param X The input allocation contains vector x, supported elements type: {Element#F64}.
2673     * @param incX The increment for the elements of vector x, must be larger than zero.
2674     * @param beta The scalar beta.
2675     * @param Y The input allocation contains vector y, supported elements type: {Element#F64}.
2676     * @param incY The increment for the elements of vector y, must be larger than zero.
2677     */
2678    void DSYMV(RsBlasUplo Uplo, double alpha, sp<Allocation> A, sp<Allocation> X, int incX,
2679               double beta, sp<Allocation> Y, int incY);
2680
2681    /**
2682     * DSBMV performs the matrix-vector operation
2683     * y := alpha*A*x + beta*y
2684     *
2685     * Details: http://www.netlib.org/lapack/explore-html/d8/d1e/dsbmv_8f.html
2686     *
2687     * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
2688     *       but only the region N*(K+1) will be referenced. The following subroutine can is an
2689     *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
2690     *           for i in range(0, n):
2691     *              for j in range(i, min(i+k+1, n)):
2692     *                  b[i, j-i] = a[i, j]
2693     *
2694     * @param Uplo Specifies whether the upper or lower triangular part of the band matrix A is being supplied.
2695     * @param K The number of off-diagonals of the matrix A
2696     * @param alpha The scalar alpha.
2697     * @param A The input allocation contains matrix A, supported elements type: {Element#F64}.
2698     * @param X The input allocation contains vector x, supported elements type: {Element#F64}.
2699     * @param incX The increment for the elements of vector x, must be larger than zero.
2700     * @param beta The scalar beta.
2701     * @param Y The input allocation contains vector y, supported elements type: {Element#F64}.
2702     * @param incY The increment for the elements of vector y, must be larger than zero.
2703     */
2704    void DSBMV(RsBlasUplo Uplo, int K, double alpha, sp<Allocation> A, sp<Allocation> X, int incX,
2705               double beta, sp<Allocation> Y, int incY);
2706
2707    /**
2708     * DSPMV performs the matrix-vector operation
2709     * y := alpha*A*x + beta*y
2710     *
2711     * Details: http://www.netlib.org/lapack/explore-html/d4/d85/dspmv_8f.html
2712     *
2713     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2714     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2715     *       'a' to packed matrix 'b'.
2716     *           k = 0
2717     *           for i in range(0, n):
2718     *              for j in range(i, n):
2719     *                  b[k++] = a[i, j]
2720     *
2721     * @param Uplo Specifies whether the upper or lower triangular part of the matrix A is supplied in packed form.
2722     * @param alpha The scalar alpha.
2723     * @param Ap The input allocation contains matrix A, supported elements type: {Element#F64}.
2724     * @param X The input allocation contains vector x, supported elements type: {Element#F64}.
2725     * @param incX The increment for the elements of vector x, must be larger than zero.
2726     * @param beta The scalar beta.
2727     * @param Y The input allocation contains vector y, supported elements type: {Element#F64}.
2728     * @param incY The increment for the elements of vector y, must be larger than zero.
2729     */
2730    void DSPMV(RsBlasUplo Uplo, double alpha, sp<Allocation> Ap, sp<Allocation> X, int incX,
2731               double beta, sp<Allocation> Y, int incY);
2732
2733    /**
2734     * DGER performs the rank 1 operation
2735     * A := alpha*x*y**T + A
2736     *
2737     * Details: http://www.netlib.org/lapack/explore-html/dc/da8/dger_8f.html
2738     *
2739     * @param alpha The scalar alpha.
2740     * @param X The input allocation contains vector x, supported elements type: {Element#F64}.
2741     * @param incX The increment for the elements of vector x, must be larger than zero.
2742     * @param Y The input allocation contains vector y, supported elements type: {Element#F64}.
2743     * @param incY The increment for the elements of vector y, must be larger than zero.
2744     * @param A The input allocation contains matrix A, supported elements type: {Element#F64}.
2745     */
2746    void DGER(double alpha, sp<Allocation> X, int incX, sp<Allocation> Y, int incY, sp<Allocation> A);
2747
2748    /**
2749     * DSYR performs the rank 1 operation
2750     * A := alpha*x*x**T + A
2751     *
2752     * Details: http://www.netlib.org/lapack/explore-html/d3/d60/dsyr_8f.html
2753     *
2754     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
2755     * @param alpha The scalar alpha.
2756     * @param X The input allocation contains vector x, supported elements type: {Element#F64}.
2757     * @param incX The increment for the elements of vector x, must be larger than zero.
2758     * @param A The input allocation contains matrix A, supported elements type: {Element#F64}.
2759     */
2760    void DSYR(RsBlasUplo Uplo, double alpha, sp<Allocation> X, int incX, sp<Allocation> A);
2761
2762    /**
2763     * DSPR performs the rank 1 operation
2764     * A := alpha*x*x**T + A
2765     *
2766     * Details: http://www.netlib.org/lapack/explore-html/dd/dba/dspr_8f.html
2767     *
2768     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2769     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2770     *       'a' to packed matrix 'b'.
2771     *           k = 0
2772     *           for i in range(0, n):
2773     *              for j in range(i, n):
2774     *                  b[k++] = a[i, j]
2775     *
2776     * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
2777     * @param alpha The scalar alpha.
2778     * @param X The input allocation contains vector x, supported elements type: {Element#F64}.
2779     * @param incX The increment for the elements of vector x, must be larger than zero.
2780     * @param Ap The input allocation contains matrix A, supported elements type: {Element#F64}.
2781     */
2782    void DSPR(RsBlasUplo Uplo, double alpha, sp<Allocation> X, int incX, sp<Allocation> Ap);
2783
2784    /**
2785     * DSYR2 performs the symmetric rank 2 operation
2786     * A := alpha*x*y**T + alpha*y*x**T + A
2787     *
2788     * Details: http://www.netlib.org/lapack/explore-html/de/d41/dsyr2_8f.html
2789     *
2790     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
2791     * @param alpha The scalar alpha.
2792     * @param X The input allocation contains vector x, supported elements type: {Element#F64}.
2793     * @param incX The increment for the elements of vector x, must be larger than zero.
2794     * @param Y The input allocation contains vector y, supported elements type: {Element#F64}.
2795     * @param incY The increment for the elements of vector y, must be larger than zero.
2796     * @param A The input allocation contains matrix A, supported elements type: {Element#F64}.
2797     */
2798    void DSYR2(RsBlasUplo Uplo, double alpha, sp<Allocation> X, int incX,
2799               sp<Allocation> Y, int incY, sp<Allocation> A);
2800
2801    /**
2802     * DSPR2 performs the symmetric rank 2 operation
2803     * A := alpha*x*y**T + alpha*y*x**T + A
2804     *
2805     * Details: http://www.netlib.org/lapack/explore-html/dd/d9e/dspr2_8f.html
2806     *
2807     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2808     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2809     *       'a' to packed matrix 'b'.
2810     *           k = 0
2811     *           for i in range(0, n):
2812     *              for j in range(i, n):
2813     *                  b[k++] = a[i, j]
2814     *
2815     * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
2816     * @param alpha The scalar alpha.
2817     * @param X The input allocation contains vector x, supported elements type: {Element#F64}.
2818     * @param incX The increment for the elements of vector x, must be larger than zero.
2819     * @param Y The input allocation contains vector y, supported elements type: {Element#F64}.
2820     * @param incY The increment for the elements of vector y, must be larger than zero.
2821     * @param Ap The input allocation contains matrix A, supported elements type: {Element#F64}.
2822     */
2823    void DSPR2(RsBlasUplo Uplo, double alpha, sp<Allocation> X, int incX,
2824               sp<Allocation> Y, int incY, sp<Allocation> Ap);
2825
2826    /**
2827     * CHEMV performs the matrix-vector operation
2828     * y := alpha*A*x + beta*y
2829     *
2830     * Details: http://www.netlib.org/lapack/explore-html/d7/d51/chemv_8f.html
2831     *
2832     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
2833     * @param alpha The scalar alpha.
2834     * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}.
2835     * @param X The input allocation contains vector x, supported elements type: {Element#F32_2}.
2836     * @param incX The increment for the elements of vector x, must be larger than zero.
2837     * @param beta The scalar beta.
2838     * @param Y The input allocation contains vector y, supported elements type: {Element#F32_2}.
2839     * @param incY The increment for the elements of vector y, must be larger than zero.
2840     */
2841    void CHEMV(RsBlasUplo Uplo, Float2 alpha, sp<Allocation> A, sp<Allocation> X,
2842               int incX, Float2 beta, sp<Allocation> Y, int incY);
2843
2844    /**
2845     * CHBMV performs the matrix-vector operation
2846     * y := alpha*A*x + beta*y
2847     *
2848     * Details: http://www.netlib.org/lapack/explore-html/db/dc2/chbmv_8f.html
2849     *
2850     * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
2851     *       but only the region N*(K+1) will be referenced. The following subroutine can is an
2852     *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
2853     *           for i in range(0, n):
2854     *              for j in range(i, min(i+k+1, n)):
2855     *                  b[i, j-i] = a[i, j]
2856     *
2857     * @param Uplo Specifies whether the upper or lower triangular part of the band matrix A is being supplied.
2858     * @param K The number of off-diagonals of the matrix A
2859     * @param alpha The scalar alpha.
2860     * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}.
2861     * @param X The input allocation contains vector x, supported elements type: {Element#F32_2}.
2862     * @param incX The increment for the elements of vector x, must be larger than zero.
2863     * @param beta The scalar beta.
2864     * @param Y The input allocation contains vector y, supported elements type: {Element#F32_2}.
2865     * @param incY The increment for the elements of vector y, must be larger than zero.
2866     */
2867    void CHBMV(RsBlasUplo Uplo, int K, Float2 alpha, sp<Allocation> A, sp<Allocation> X,
2868               int incX, Float2 beta, sp<Allocation> Y, int incY);
2869
2870    /**
2871     * CHPMV performs the matrix-vector operation
2872     * y := alpha*A*x + beta*y
2873     *
2874     * Details: http://www.netlib.org/lapack/explore-html/d2/d06/chpmv_8f.html
2875     *
2876     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2877     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2878     *       'a' to packed matrix 'b'.
2879     *           k = 0
2880     *           for i in range(0, n):
2881     *              for j in range(i, n):
2882     *                  b[k++] = a[i, j]
2883     *
2884     * @param Uplo Specifies whether the upper or lower triangular part of the matrix A is supplied in packed form.
2885     * @param alpha The scalar alpha.
2886     * @param Ap The input allocation contains matrix A, supported elements type: {Element#F32_2}.
2887     * @param X The input allocation contains vector x, supported elements type: {Element#F32_2}.
2888     * @param incX The increment for the elements of vector x, must be larger than zero.
2889     * @param beta The scalar beta.
2890     * @param Y The input allocation contains vector y, supported elements type: {Element#F32_2}.
2891     * @param incY The increment for the elements of vector y, must be larger than zero.
2892     */
2893    void CHPMV(RsBlasUplo Uplo, Float2 alpha, sp<Allocation> Ap, sp<Allocation> X,
2894               int incX, Float2 beta, sp<Allocation> Y, int incY);
2895
2896    /**
2897     * CGERU performs the rank 1 operation
2898     * A := alpha*x*y**T + A
2899     *
2900     * Details: http://www.netlib.org/lapack/explore-html/db/d5f/cgeru_8f.html
2901     *
2902     * @param alpha The scalar alpha.
2903     * @param X The input allocation contains vector x, supported elements type: {Element#F32_2}.
2904     * @param incX The increment for the elements of vector x, must be larger than zero.
2905     * @param Y The input allocation contains vector y, supported elements type: {Element#F32_2}.
2906     * @param incY The increment for the elements of vector y, must be larger than zero.
2907     * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}.
2908     */
2909    void CGERU(Float2 alpha, sp<Allocation> X, int incX,
2910               sp<Allocation> Y, int incY, sp<Allocation> A);
2911
2912    /**
2913     * CGERC performs the rank 1 operation
2914     * A := alpha*x*y**H + A
2915     *
2916     * Details: http://www.netlib.org/lapack/explore-html/dd/d84/cgerc_8f.html
2917     *
2918     * @param alpha The scalar alpha.
2919     * @param X The input allocation contains vector x, supported elements type: {Element#F32_2}.
2920     * @param incX The increment for the elements of vector x, must be larger than zero.
2921     * @param Y The input allocation contains vector y, supported elements type: {Element#F32_2}.
2922     * @param incY The increment for the elements of vector y, must be larger than zero.
2923     * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}.
2924     */
2925    void CGERC(Float2 alpha, sp<Allocation> X, int incX,
2926               sp<Allocation> Y, int incY, sp<Allocation> A);
2927
2928    /**
2929     * CHER performs the rank 1 operation
2930     * A := alpha*x*x**H + A
2931     *
2932     * Details: http://www.netlib.org/lapack/explore-html/d3/d6d/cher_8f.html
2933     *
2934     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
2935     * @param alpha The scalar alpha.
2936     * @param X The input allocation contains vector x, supported elements type: {Element#F32_2}.
2937     * @param incX The increment for the elements of vector x, must be larger than zero.
2938     * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}.
2939     */
2940    void CHER(RsBlasUplo Uplo, float alpha, sp<Allocation> X, int incX, sp<Allocation> A);
2941
2942    /**
2943     * CHPR performs the rank 1 operation
2944     * A := alpha*x*x**H + A
2945     *
2946     * Details: http://www.netlib.org/lapack/explore-html/db/dcd/chpr_8f.html
2947     *
2948     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2949     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2950     *       'a' to packed matrix 'b'.
2951     *           k = 0
2952     *           for i in range(0, n):
2953     *              for j in range(i, n):
2954     *                  b[k++] = a[i, j]
2955     *
2956     * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
2957     * @param alpha The scalar alpha.
2958     * @param X The input allocation contains vector x, supported elements type: {Element#F32_2}.
2959     * @param incX The increment for the elements of vector x, must be larger than zero.
2960     * @param Ap The input allocation contains matrix A, supported elements type: {Element#F32_2}.
2961     */
2962    void CHPR(RsBlasUplo Uplo, float alpha, sp<Allocation> X, int incX, sp<Allocation> Ap);
2963
2964    /**
2965     * CHER2 performs the symmetric rank 2 operation
2966     * A := alpha*x*y**H + alpha*y*x**H + A
2967     *
2968     * Details: http://www.netlib.org/lapack/explore-html/db/d87/cher2_8f.html
2969     *
2970     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
2971     * @param alpha The scalar alpha.
2972     * @param X The input allocation contains vector x, supported elements type: {Element#F32_2}.
2973     * @param incX The increment for the elements of vector x, must be larger than zero.
2974     * @param Y The input allocation contains vector y, supported elements type: {Element#F32_2}.
2975     * @param incY The increment for the elements of vector y, must be larger than zero.
2976     * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}.
2977     */
2978    void CHER2(RsBlasUplo Uplo, Float2 alpha, sp<Allocation> X, int incX,
2979               sp<Allocation> Y, int incY, sp<Allocation> A);
2980
2981    /**
2982     * CHPR2 performs the symmetric rank 2 operation
2983     * A := alpha*x*y**H + alpha*y*x**H + A
2984     *
2985     * Details: http://www.netlib.org/lapack/explore-html/d6/d44/chpr2_8f.html
2986     *
2987     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2988     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2989     *       'a' to packed matrix 'b'.
2990     *           k = 0
2991     *           for i in range(0, n):
2992     *              for j in range(i, n):
2993     *                  b[k++] = a[i, j]
2994     *
2995     * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
2996     * @param alpha The scalar alpha.
2997     * @param X The input allocation contains vector x, supported elements type: {Element#F32_2}.
2998     * @param incX The increment for the elements of vector x, must be larger than zero.
2999     * @param Y The input allocation contains vector y, supported elements type: {Element#F32_2}.
3000     * @param incY The increment for the elements of vector y, must be larger than zero.
3001     * @param Ap The input allocation contains matrix A, supported elements type: {Element#F32_2}.
3002     */
3003    void CHPR2(RsBlasUplo Uplo, Float2 alpha, sp<Allocation> X, int incX,
3004               sp<Allocation> Y, int incY, sp<Allocation> Ap);
3005
3006    /**
3007     * ZHEMV performs the matrix-vector operation
3008     * y := alpha*A*x + beta*y
3009     *
3010     * Details: http://www.netlib.org/lapack/explore-html/d0/ddd/zhemv_8f.html
3011     *
3012     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
3013     * @param alpha The scalar alpha.
3014     * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2}.
3015     * @param X The input allocation contains vector x, supported elements type: {Element#F64_2}.
3016     * @param incX The increment for the elements of vector x, must be larger than zero.
3017     * @param beta The scalar beta.
3018     * @param Y The input allocation contains vector y, supported elements type: {Element#F64_2}.
3019     * @param incY The increment for the elements of vector y, must be larger than zero.
3020     */
3021    void ZHEMV(RsBlasUplo Uplo, Double2 alpha, sp<Allocation> A, sp<Allocation> X,
3022               int incX, Double2 beta, sp<Allocation> Y, int incY);
3023
3024    /**
3025     * ZHBMV performs the matrix-vector operation
3026     * y := alpha*A*x + beta*y
3027     *
3028     * Details: http://www.netlib.org/lapack/explore-html/d3/d1a/zhbmv_8f.html
3029     *
3030     * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
3031     *       but only the region N*(K+1) will be referenced. The following subroutine can is an
3032     *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
3033     *           for i in range(0, n):
3034     *              for j in range(i, min(i+k+1, n)):
3035     *                  b[i, j-i] = a[i, j]
3036     *
3037     * @param Uplo Specifies whether the upper or lower triangular part of the band matrix A is being supplied.
3038     * @param K The number of off-diagonals of the matrix A
3039     * @param alpha The scalar alpha.
3040     * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2}.
3041     * @param X The input allocation contains vector x, supported elements type: {Element#F64_2}.
3042     * @param incX The increment for the elements of vector x, must be larger than zero.
3043     * @param beta The scalar beta.
3044     * @param Y The input allocation contains vector y, supported elements type: {Element#F64_2}.
3045     * @param incY The increment for the elements of vector y, must be larger than zero.
3046     */
3047    void ZHBMV(RsBlasUplo Uplo, int K, Double2 alpha, sp<Allocation> A, sp<Allocation> X,
3048               int incX, Double2 beta, sp<Allocation> Y, int incY);
3049
3050    /**
3051     * ZHPMV performs the matrix-vector operation
3052     * y := alpha*A*x + beta*y
3053     *
3054     * Details: http://www.netlib.org/lapack/explore-html/d0/d60/zhpmv_8f.html
3055     *
3056     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
3057     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
3058     *       'a' to packed matrix 'b'.
3059     *           k = 0
3060     *           for i in range(0, n):
3061     *              for j in range(i, n):
3062     *                  b[k++] = a[i, j]
3063     *
3064     * @param Uplo Specifies whether the upper or lower triangular part of the matrix A is supplied in packed form.
3065     * @param alpha The scalar alpha.
3066     * @param Ap The input allocation contains matrix A, supported elements type: {Element#F64_2}.
3067     * @param X The input allocation contains vector x, supported elements type: {Element#F64_2}.
3068     * @param incX The increment for the elements of vector x, must be larger than zero.
3069     * @param beta The scalar beta.
3070     * @param Y The input allocation contains vector y, supported elements type: {Element#F64_2}.
3071     * @param incY The increment for the elements of vector y, must be larger than zero.
3072     */
3073    void ZHPMV(RsBlasUplo Uplo, Double2 alpha, sp<Allocation> Ap, sp<Allocation> X,
3074               int incX, Double2 beta, sp<Allocation> Y, int incY);
3075
3076    /**
3077     * ZGERU performs the rank 1 operation
3078     * A := alpha*x*y**T + A
3079     *
3080     * Details: http://www.netlib.org/lapack/explore-html/d7/d12/zgeru_8f.html
3081     *
3082     * @param alpha The scalar alpha.
3083     * @param X The input allocation contains vector x, supported elements type: {Element#F64_2}.
3084     * @param incX The increment for the elements of vector x, must be larger than zero.
3085     * @param Y The input allocation contains vector y, supported elements type: {Element#F64_2}.
3086     * @param incY The increment for the elements of vector y, must be larger than zero.
3087     * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2}.
3088     */
3089    void ZGERU(Double2 alpha, sp<Allocation> X, int incX,
3090               sp<Allocation> Y, int incY, sp<Allocation> A);
3091
3092    /**
3093     * ZGERC performs the rank 1 operation
3094     * A := alpha*x*y**H + A
3095     *
3096     * Details: http://www.netlib.org/lapack/explore-html/d3/dad/zgerc_8f.html
3097     *
3098     * @param alpha The scalar alpha.
3099     * @param X The input allocation contains vector x, supported elements type: {Element#F64_2}.
3100     * @param incX The increment for the elements of vector x, must be larger than zero.
3101     * @param Y The input allocation contains vector y, supported elements type: {Element#F64_2}.
3102     * @param incY The increment for the elements of vector y, must be larger than zero.
3103     * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2}.
3104     */
3105    void ZGERC(Double2 alpha, sp<Allocation> X, int incX,
3106               sp<Allocation> Y, int incY, sp<Allocation> A);
3107
3108    /**
3109     * ZHER performs the rank 1 operation
3110     * A := alpha*x*x**H + A
3111     *
3112     * Details: http://www.netlib.org/lapack/explore-html/de/d0e/zher_8f.html
3113     *
3114     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
3115     * @param alpha The scalar alpha.
3116     * @param X The input allocation contains vector x, supported elements type: {Element#F64_2}.
3117     * @param incX The increment for the elements of vector x, must be larger than zero.
3118     * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2}.
3119     */
3120    void ZHER(RsBlasUplo Uplo, double alpha, sp<Allocation> X, int incX, sp<Allocation> A);
3121
3122    /**
3123     * ZHPR performs the rank 1 operation
3124     * A := alpha*x*x**H + A
3125     *
3126     * Details: http://www.netlib.org/lapack/explore-html/de/de1/zhpr_8f.html
3127     *
3128     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
3129     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
3130     *       'a' to packed matrix 'b'.
3131     *           k = 0
3132     *           for i in range(0, n):
3133     *              for j in range(i, n):
3134     *                  b[k++] = a[i, j]
3135     *
3136     * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
3137     * @param alpha The scalar alpha.
3138     * @param X The input allocation contains vector x, supported elements type: {Element#F64_2}.
3139     * @param incX The increment for the elements of vector x, must be larger than zero.
3140     * @param Ap The input allocation contains matrix A, supported elements type: {Element#F64_2}.
3141     */
3142    void ZHPR(RsBlasUplo Uplo, double alpha, sp<Allocation> X, int incX, sp<Allocation> Ap);
3143
3144    /**
3145     * ZHER2 performs the symmetric rank 2 operation
3146     * A := alpha*x*y**H + alpha*y*x**H + A
3147     *
3148     * Details: http://www.netlib.org/lapack/explore-html/da/d8a/zher2_8f.html
3149     *
3150     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
3151     * @param alpha The scalar alpha.
3152     * @param X The input allocation contains vector x, supported elements type: {Element#F64_2}.
3153     * @param incX The increment for the elements of vector x, must be larger than zero.
3154     * @param Y The input allocation contains vector y, supported elements type: {Element#F64_2}.
3155     * @param incY The increment for the elements of vector y, must be larger than zero.
3156     * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2}.
3157     */
3158    void ZHER2(RsBlasUplo Uplo, Double2 alpha, sp<Allocation> X, int incX,
3159               sp<Allocation> Y, int incY, sp<Allocation> A);
3160
3161    /**
3162     * ZHPR2 performs the symmetric rank 2 operation
3163     * A := alpha*x*y**H + alpha*y*x**H + A
3164     *
3165     * Details: http://www.netlib.org/lapack/explore-html/d5/d52/zhpr2_8f.html
3166     *
3167     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
3168     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
3169     *       'a' to packed matrix 'b'.
3170     *           k = 0
3171     *           for i in range(0, n):
3172     *              for j in range(i, n):
3173     *                  b[k++] = a[i, j]
3174     *
3175     * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
3176     * @param alpha The scalar alpha.
3177     * @param X The input allocation contains vector x, supported elements type: {Element#F64_2}.
3178     * @param incX The increment for the elements of vector x, must be larger than zero.
3179     * @param Y The input allocation contains vector y, supported elements type: {Element#F64_2}.
3180     * @param incY The increment for the elements of vector y, must be larger than zero.
3181     * @param Ap The input allocation contains matrix A, supported elements type: {Element#F64_2}.
3182     */
3183    void ZHPR2(RsBlasUplo Uplo, Double2 alpha, sp<Allocation> X, int incX,
3184               sp<Allocation> Y, int incY, sp<Allocation> Ap);
3185
3186    /**
3187     * SGEMM performs one of the matrix-matrix operations
3188     * C := alpha*op(A)*op(B) + beta*C   where op(X) is one of op(X) = X  or  op(X) = X**T
3189     *
3190     * Details: http://www.netlib.org/lapack/explore-html/d4/de2/sgemm_8f.html
3191     *
3192     * @param TransA The type of transpose applied to matrix A.
3193     * @param TransB The type of transpose applied to matrix B.
3194     * @param alpha The scalar alpha.
3195     * @param A The input allocation contains matrix A, supported elements type: {Element#F32}.
3196     * @param B The input allocation contains matrix B, supported elements type: {Element#F32}.
3197     * @param beta The scalar beta.
3198     * @param C The input allocation contains matrix C, supported elements type: {Element#F32}.
3199     */
3200    void SGEMM(RsBlasTranspose TransA, RsBlasTranspose TransB, float alpha, sp<Allocation> A,
3201                      sp<Allocation> B, float beta, sp<Allocation> C);
3202
3203
3204    /**
3205     * DGEMM performs one of the matrix-matrix operations
3206     * C := alpha*op(A)*op(B) + beta*C   where op(X) is one of op(X) = X  or  op(X) = X**T
3207     *
3208     * Details: http://www.netlib.org/lapack/explore-html/d7/d2b/dgemm_8f.html
3209     *
3210     * @param TransA The type of transpose applied to matrix A.
3211     * @param TransB The type of transpose applied to matrix B.
3212     * @param alpha The scalar alpha.
3213     * @param A The input allocation contains matrix A, supported elements type: {Element#F64}.
3214     * @param B The input allocation contains matrix B, supported elements type: {Element#F64}.
3215     * @param beta The scalar beta.
3216     * @param C The input allocation contains matrix C, supported elements type: {Element#F64}.
3217     */
3218    void DGEMM(RsBlasTranspose TransA, RsBlasTranspose TransB, double alpha, sp<Allocation> A,
3219                      sp<Allocation> B, double beta, sp<Allocation> C);
3220
3221    /**
3222     * CGEMM performs one of the matrix-matrix operations
3223     * C := alpha*op(A)*op(B) + beta*C   where op(X) is one of op(X) = X  or  op(X) = X**T  or  op(X) = X**H
3224     *
3225     * Details: http://www.netlib.org/lapack/explore-html/d6/d5b/cgemm_8f.html
3226     *
3227     * @param TransA The type of transpose applied to matrix A.
3228     * @param TransB The type of transpose applied to matrix B.
3229     * @param alpha The scalar alpha.
3230     * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}.
3231     * @param B The input allocation contains matrix B, supported elements type: {Element#F32_2}.
3232     * @param beta The scalar beta.
3233     * @param C The input allocation contains matrix C, supported elements type: {Element#F32_2}.
3234     */
3235    void CGEMM(RsBlasTranspose TransA, RsBlasTranspose TransB, Float2 alpha, sp<Allocation> A,
3236                      sp<Allocation> B, Float2 beta, sp<Allocation> C);
3237
3238    /**
3239     * ZGEMM performs one of the matrix-matrix operations
3240     * C := alpha*op(A)*op(B) + beta*C   where op(X) is one of op(X) = X  or  op(X) = X**T  or  op(X) = X**H
3241     *
3242     * Details: http://www.netlib.org/lapack/explore-html/d7/d76/zgemm_8f.html
3243     *
3244     * @param TransA The type of transpose applied to matrix A.
3245     * @param TransB The type of transpose applied to matrix B.
3246     * @param alpha The scalar alpha.
3247     * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2
3248     * @param B The input allocation contains matrix B, supported elements type: {Element#F64_2
3249     * @param beta The scalar beta.
3250     * @param C The input allocation contains matrix C, supported elements type: {Element#F64_2
3251     */
3252    void ZGEMM(RsBlasTranspose TransA, RsBlasTranspose TransB, Double2 alpha, sp<Allocation> A,
3253                      sp<Allocation> B, Double2 beta, sp<Allocation> C);
3254
3255    /**
3256     * SSYMM performs one of the matrix-matrix operations
3257     * C := alpha*A*B + beta*C   or   C := alpha*B*A + beta*C
3258     *
3259     * Details: http://www.netlib.org/lapack/explore-html/d7/d42/ssymm_8f.html
3260     *
3261     * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3262     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
3263     * @param alpha The scalar alpha.
3264     * @param A The input allocation contains matrix A, supported elements type: {Element#F32}.
3265     * @param B The input allocation contains matrix B, supported elements type: {Element#F32}.
3266     * @param beta The scalar beta.
3267     * @param C The input allocation contains matrix C, supported elements type: {Element#F32}.
3268     */
3269    void SSYMM(RsBlasSide Side, RsBlasUplo Uplo, float alpha, sp<Allocation> A,
3270                      sp<Allocation> B, float beta, sp<Allocation> C);
3271
3272    /**
3273     * DSYMM performs one of the matrix-matrix operations
3274     * C := alpha*A*B + beta*C   or   C := alpha*B*A + beta*C
3275     *
3276     * Details: http://www.netlib.org/lapack/explore-html/d8/db0/dsymm_8f.html
3277     *
3278     * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3279     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
3280     * @param alpha The scalar alpha.
3281     * @param A The input allocation contains matrix A, supported elements type: {Element#F64}.
3282     * @param B The input allocation contains matrix B, supported elements type: {Element#F64}.
3283     * @param beta The scalar beta.
3284     * @param C The input allocation contains matrix C, supported elements type: {Element#F64}.
3285     */
3286    void DSYMM(RsBlasSide Side, RsBlasUplo Uplo, double alpha, sp<Allocation> A,
3287                      sp<Allocation> B, double beta, sp<Allocation> C);
3288
3289    /**
3290     * CSYMM performs one of the matrix-matrix operations
3291     * C := alpha*A*B + beta*C   or   C := alpha*B*A + beta*C
3292     *
3293     * Details: http://www.netlib.org/lapack/explore-html/db/d59/csymm_8f.html
3294     *
3295     * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3296     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
3297     * @param alpha The scalar alpha.
3298     * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}.
3299     * @param B The input allocation contains matrix B, supported elements type: {Element#F32_2}.
3300     * @param beta The scalar beta.
3301     * @param C The input allocation contains matrix C, supported elements type: {Element#F32_2}.
3302     */
3303    void CSYMM(RsBlasSide Side, RsBlasUplo Uplo, Float2 alpha, sp<Allocation> A,
3304                      sp<Allocation> B, Float2 beta, sp<Allocation> C);
3305
3306    /**
3307     * ZSYMM performs one of the matrix-matrix operations
3308     * C := alpha*A*B + beta*C   or   C := alpha*B*A + beta*C
3309     *
3310     * Details: http://www.netlib.org/lapack/explore-html/df/d51/zsymm_8f.html
3311     *
3312     * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3313     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
3314     * @param alpha The scalar alpha.
3315     * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2}.
3316     * @param B The input allocation contains matrix B, supported elements type: {Element#F64_2}.
3317     * @param beta The scalar beta.
3318     * @param C The input allocation contains matrix C, supported elements type: {Element#F64_2}.
3319     */
3320    void ZSYMM(RsBlasSide Side, RsBlasUplo Uplo, Double2 alpha, sp<Allocation> A,
3321                      sp<Allocation> B, Double2 beta, sp<Allocation> C);
3322
3323    /**
3324     * SSYRK performs one of the symmetric rank k operations
3325     * C := alpha*A*A**T + beta*C   or   C := alpha*A**T*A + beta*C
3326     *
3327     * Details: http://www.netlib.org/lapack/explore-html/d0/d40/ssyrk_8f.html
3328     *
3329     * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
3330     * @param Trans The type of transpose applied to the operation.
3331     * @param alpha The scalar alpha.
3332     * @param A The input allocation contains matrix A, supported elements type: {Element#F32}.
3333     * @param beta The scalar beta.
3334     * @param C The input allocation contains matrix C, supported elements type: {Element#F32}.
3335     */
3336    void SSYRK(RsBlasUplo Uplo, RsBlasTranspose Trans, float alpha,
3337               sp<Allocation> A, float beta, sp<Allocation> C);
3338
3339    /**
3340     * DSYRK performs one of the symmetric rank k operations
3341     * C := alpha*A*A**T + beta*C   or   C := alpha*A**T*A + beta*C
3342     *
3343     * Details: http://www.netlib.org/lapack/explore-html/dc/d05/dsyrk_8f.html
3344     *
3345     * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
3346     * @param Trans The type of transpose applied to the operation.
3347     * @param alpha The scalar alpha.
3348     * @param A The input allocation contains matrix A, supported elements type: {Element#F64}.
3349     * @param beta The scalar beta.
3350     * @param C The input allocation contains matrix C, supported elements type: {Element#F64}.
3351     */
3352    void DSYRK(RsBlasUplo Uplo, RsBlasTranspose Trans, double alpha,
3353               sp<Allocation> A, double beta, sp<Allocation> C);
3354
3355    /**
3356     * CSYRK performs one of the symmetric rank k operations
3357     * C := alpha*A*A**T + beta*C   or   C := alpha*A**T*A + beta*C
3358     *
3359     * Details: http://www.netlib.org/lapack/explore-html/d3/d6a/csyrk_8f.html
3360     *
3361     * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
3362     * @param Trans The type of transpose applied to the operation.
3363     * @param alpha The scalar alpha.
3364     * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}.
3365     * @param beta The scalar beta.
3366     * @param C The input allocation contains matrix C, supported elements type: {Element#F32_2}.
3367     */
3368    void CSYRK(RsBlasUplo Uplo, RsBlasTranspose Trans, Float2 alpha,
3369               sp<Allocation> A, Float2 beta, sp<Allocation> C);
3370
3371    /**
3372     * ZSYRK performs one of the symmetric rank k operations
3373     * C := alpha*A*A**T + beta*C   or   C := alpha*A**T*A + beta*C
3374     *
3375     * Details: http://www.netlib.org/lapack/explore-html/de/d54/zsyrk_8f.html
3376     *
3377     * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
3378     * @param Trans The type of transpose applied to the operation.
3379     * @param alpha The scalar alpha.
3380     * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2}.
3381     * @param beta The scalar beta.
3382     * @param C The input allocation contains matrix C, supported elements type: {Element#F64_2}.
3383     */
3384    void ZSYRK(RsBlasUplo Uplo, RsBlasTranspose Trans, Double2 alpha,
3385               sp<Allocation> A, Double2 beta, sp<Allocation> C);
3386
3387    /**
3388     * SSYR2K performs one of the symmetric rank 2k operations
3389     * C := alpha*A*B**T + alpha*B*A**T + beta*C   or   C := alpha*A**T*B + alpha*B**T*A + beta*C
3390     *
3391     * Details: http://www.netlib.org/lapack/explore-html/df/d3d/ssyr2k_8f.html
3392     *
3393     * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
3394     * @param Trans The type of transpose applied to the operation.
3395     * @param alpha The scalar alpha.
3396     * @param A The input allocation contains matrix A, supported elements type: {Element#F32}.
3397     * @param B The input allocation contains matrix B, supported elements type: {Element#F32}.
3398     * @param beta The scalar beta.
3399     * @param C The input allocation contains matrix C, supported elements type: {Element#F32}.
3400     */
3401    void SSYR2K(RsBlasUplo Uplo, RsBlasTranspose Trans, float alpha,
3402                sp<Allocation> A, sp<Allocation> B, float beta, sp<Allocation> C);
3403
3404    /**
3405     * DSYR2K performs one of the symmetric rank 2k operations
3406     * C := alpha*A*B**T + alpha*B*A**T + beta*C   or   C := alpha*A**T*B + alpha*B**T*A + beta*C
3407     *
3408     * Details: http://www.netlib.org/lapack/explore-html/d1/dec/dsyr2k_8f.html
3409     *
3410     * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
3411     * @param Trans The type of transpose applied to the operation.
3412     * @param alpha The scalar alpha.
3413     * @param A The input allocation contains matrix A, supported elements type: {Element#F64}.
3414     * @param B The input allocation contains matrix B, supported elements type: {Element#F64}.
3415     * @param beta The scalar beta.
3416     * @param C The input allocation contains matrix C, supported elements type: {Element#F64}.
3417     */
3418    void DSYR2K(RsBlasUplo Uplo, RsBlasTranspose Trans, double alpha,
3419                sp<Allocation> A, sp<Allocation> B, double beta, sp<Allocation> C);
3420
3421    /**
3422     * CSYR2K performs one of the symmetric rank 2k operations
3423     * C := alpha*A*B**T + alpha*B*A**T + beta*C   or   C := alpha*A**T*B + alpha*B**T*A + beta*C
3424     *
3425     * Details: http://www.netlib.org/lapack/explore-html/de/d7e/csyr2k_8f.html
3426     *
3427     * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
3428     * @param Trans The type of transpose applied to the operation.
3429     * @param alpha The scalar alpha.
3430     * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}.
3431     * @param B The input allocation contains matrix B, supported elements type: {Element#F32_2}.
3432     * @param beta The scalar beta.
3433     * @param C The input allocation contains matrix C, supported elements type: {Element#F32_2}.
3434     */
3435    void CSYR2K(RsBlasUplo Uplo, RsBlasTranspose Trans, Float2 alpha,
3436                sp<Allocation> A, sp<Allocation> B, Float2 beta, sp<Allocation> C);
3437
3438    /**
3439     * ZSYR2K performs one of the symmetric rank 2k operations
3440     * C := alpha*A*B**T + alpha*B*A**T + beta*C   or   C := alpha*A**T*B + alpha*B**T*A + beta*C
3441     *
3442     * Details: http://www.netlib.org/lapack/explore-html/df/d20/zsyr2k_8f.html
3443     *
3444     * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
3445     * @param Trans The type of transpose applied to the operation.
3446     * @param alpha The scalar alpha.
3447     * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2}.
3448     * @param B The input allocation contains matrix B, supported elements type: {Element#F64_2}.
3449     * @param beta The scalar beta.
3450     * @param C The input allocation contains matrix C, supported elements type: {Element#F64_2}.
3451     */
3452    void ZSYR2K(RsBlasUplo Uplo, RsBlasTranspose Trans, Double2 alpha,
3453                sp<Allocation> A, sp<Allocation> B, Double2 beta, sp<Allocation> C);
3454
3455    /**
3456     * STRMM performs one of the matrix-matrix operations
3457     * B := alpha*op(A)*B   or   B := alpha*B*op(A)
3458     * op(A) is one of  op(A) = A  or  op(A) = A**T
3459     *
3460     * Details: http://www.netlib.org/lapack/explore-html/df/d01/strmm_8f.html
3461     *
3462     * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3463     * @param Uplo Specifies whether matrix A is upper or lower triangular.
3464     * @param TransA The type of transpose applied to matrix A.
3465     * @param Diag Specifies whether or not A is unit triangular.
3466     * @param alpha The scalar alpha.
3467     * @param A The input allocation contains matrix A, supported elements type: {Element#F32}.
3468     * @param B The input allocation contains matrix B, supported elements type: {Element#F32}.
3469     */
3470    void STRMM(RsBlasSide Side, RsBlasUplo Uplo, RsBlasTranspose TransA,
3471               RsBlasDiag Diag, float alpha, sp<Allocation> A, sp<Allocation> B);
3472
3473    /**
3474     * DTRMM performs one of the matrix-matrix operations
3475     * B := alpha*op(A)*B   or   B := alpha*B*op(A)
3476     * op(A) is one of  op(A) = A  or  op(A) = A**T
3477     *
3478     * Details: http://www.netlib.org/lapack/explore-html/dd/d19/dtrmm_8f.html
3479     *
3480     * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3481     * @param Uplo Specifies whether matrix A is upper or lower triangular.
3482     * @param TransA The type of transpose applied to matrix A.
3483     * @param Diag Specifies whether or not A is unit triangular.
3484     * @param alpha The scalar alpha.
3485     * @param A The input allocation contains matrix A, supported elements type: {Element#F64}.
3486     * @param B The input allocation contains matrix B, supported elements type: {Element#F64}.
3487     */
3488    void DTRMM(RsBlasSide Side, RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
3489               double alpha, sp<Allocation> A, sp<Allocation> B);
3490
3491    /**
3492     * CTRMM performs one of the matrix-matrix operations
3493     * B := alpha*op(A)*B   or   B := alpha*B*op(A)
3494     * op(A) is one of  op(A) = A  or  op(A) = A**T  or  op(A) = A**H
3495     *
3496     * Details: http://www.netlib.org/lapack/explore-html/d4/d9b/ctrmm_8f.html
3497     *
3498     * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3499     * @param Uplo Specifies whether matrix A is upper or lower triangular.
3500     * @param TransA The type of transpose applied to matrix A.
3501     * @param Diag Specifies whether or not A is unit triangular.
3502     * @param alpha The scalar alpha.
3503     * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}.
3504     * @param B The input allocation contains matrix B, supported elements type: {Element#F32_2}.
3505     */
3506    void CTRMM(RsBlasSide Side, RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
3507               Float2 alpha, sp<Allocation> A, sp<Allocation> B);
3508
3509    /**
3510     * ZTRMM performs one of the matrix-matrix operations
3511     * B := alpha*op(A)*B   or   B := alpha*B*op(A)
3512     * op(A) is one of  op(A) = A  or  op(A) = A**T  or  op(A) = A**H
3513     *
3514     * Details: http://www.netlib.org/lapack/explore-html/d8/de1/ztrmm_8f.html
3515     *
3516     * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3517     * @param Uplo Specifies whether matrix A is upper or lower triangular.
3518     * @param TransA The type of transpose applied to matrix A.
3519     * @param Diag Specifies whether or not A is unit triangular.
3520     * @param alpha The scalar alpha.
3521     * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2}.
3522     * @param B The input allocation contains matrix B, supported elements type: {Element#F64_2}.
3523     */
3524    void ZTRMM(RsBlasSide Side, RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
3525               Double2 alpha, sp<Allocation> A, sp<Allocation> B);
3526
3527    /**
3528     * STRSM solves one of the matrix equations
3529     * op(A)*X := alpha*B   or   X*op(A) := alpha*B
3530     * op(A) is one of  op(A) = A  or  op(A) = A**T
3531     *
3532     * Details: http://www.netlib.org/lapack/explore-html/d2/d8b/strsm_8f.html
3533     *
3534     * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3535     * @param Uplo Specifies whether matrix A is upper or lower triangular.
3536     * @param TransA The type of transpose applied to matrix A.
3537     * @param Diag Specifies whether or not A is unit triangular.
3538     * @param alpha The scalar alpha.
3539     * @param A The input allocation contains matrix A, supported elements type: {Element#F32}.
3540     * @param B The input allocation contains matrix B, supported elements type: {Element#F32}.
3541     */
3542    void STRSM(RsBlasSide Side, RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
3543               float alpha, sp<Allocation> A, sp<Allocation> B);
3544
3545    /**
3546     * DTRSM solves one of the matrix equations
3547     * op(A)*X := alpha*B   or   X*op(A) := alpha*B
3548     * op(A) is one of  op(A) = A  or  op(A) = A**T
3549     *
3550     * Details: http://www.netlib.org/lapack/explore-html/de/da7/dtrsm_8f.html
3551     *
3552     * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3553     * @param Uplo Specifies whether matrix A is upper or lower triangular.
3554     * @param TransA The type of transpose applied to matrix A.
3555     * @param Diag Specifies whether or not A is unit triangular.
3556     * @param alpha The scalar alpha.
3557     * @param A The input allocation contains matrix A, supported elements type: {Element#F64}.
3558     * @param B The input allocation contains matrix B, supported elements type: {Element#F64}.
3559     */
3560    void DTRSM(RsBlasSide Side, RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
3561               double alpha, sp<Allocation> A, sp<Allocation> B);
3562
3563    /**
3564     * CTRSM solves one of the matrix equations
3565     * op(A)*X := alpha*B   or   X*op(A) := alpha*B
3566     * op(A) is one of  op(A) = A  or  op(A) = A**T  or  op(A) = A**H
3567     *
3568     * Details: http://www.netlib.org/lapack/explore-html/de/d30/ctrsm_8f.html
3569     *
3570     * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3571     * @param Uplo Specifies whether matrix A is upper or lower triangular.
3572     * @param TransA The type of transpose applied to matrix A.
3573     * @param Diag Specifies whether or not A is unit triangular.
3574     * @param alpha The scalar alpha.
3575     * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}.
3576     * @param B The input allocation contains matrix B, supported elements type: {Element#F32_2}.
3577     */
3578    void CTRSM(RsBlasSide Side, RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
3579               Float2 alpha, sp<Allocation> A, sp<Allocation> B);
3580
3581    /**
3582     * ZTRSM solves one of the matrix equations
3583     * op(A)*X := alpha*B   or   X*op(A) := alpha*B
3584     * op(A) is one of  op(A) = A  or  op(A) = A**T  or  op(A) = A**H
3585     *
3586     * Details: http://www.netlib.org/lapack/explore-html/d1/d39/ztrsm_8f.html
3587     *
3588     * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3589     * @param Uplo Specifies whether matrix A is upper or lower triangular.
3590     * @param TransA The type of transpose applied to matrix A.
3591     * @param Diag Specifies whether or not A is unit triangular.
3592     * @param alpha The scalar alpha.
3593     * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2}.
3594     * @param B The input allocation contains matrix B, supported elements type: {Element#F64_2}.
3595     */
3596    void ZTRSM(RsBlasSide Side, RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
3597               Double2 alpha, sp<Allocation> A, sp<Allocation> B);
3598
3599    /**
3600     * CHEMM performs one of the matrix-matrix operations
3601     * C := alpha*A*B + beta*C   or   C := alpha*B*A + beta*C
3602     *
3603     * Details: http://www.netlib.org/lapack/explore-html/d3/d66/chemm_8f.html
3604     *
3605     * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3606     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
3607     * @param alpha The scalar alpha.
3608     * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}.
3609     * @param B The input allocation contains matrix B, supported elements type: {Element#F32_2}.
3610     * @param beta The scalar beta.
3611     * @param C The input allocation contains matrix C, supported elements type: {Element#F32_2}.
3612     */
3613    void CHEMM(RsBlasSide Side, RsBlasUplo Uplo, Float2 alpha, sp<Allocation> A,
3614               sp<Allocation> B, Float2 beta, sp<Allocation> C);
3615
3616    /**
3617     * ZHEMM performs one of the matrix-matrix operations
3618     * C := alpha*A*B + beta*C   or   C := alpha*B*A + beta*C
3619     *
3620     * Details: http://www.netlib.org/lapack/explore-html/d6/d3e/zhemm_8f.html
3621     *
3622     * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3623     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
3624     * @param alpha The scalar alpha.
3625     * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2}.
3626     * @param B The input allocation contains matrix B, supported elements type: {Element#F64_2}.
3627     * @param beta The scalar beta.
3628     * @param C The input allocation contains matrix C, supported elements type: {Element#F64_2}.
3629     */
3630    void ZHEMM(RsBlasSide Side, RsBlasUplo Uplo, Double2 alpha, sp<Allocation> A,
3631               sp<Allocation> B, Double2 beta, sp<Allocation> C);
3632
3633    /**
3634     * CHERK performs one of the hermitian rank k operations
3635     * C := alpha*A*A**H + beta*C   or   C := alpha*A**H*A + beta*C
3636     *
3637     * Details: http://www.netlib.org/lapack/explore-html/d8/d52/cherk_8f.html
3638     *
3639     * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
3640     * @param Trans The type of transpose applied to the operation.
3641     * @param alpha The scalar alpha.
3642     * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}.
3643     * @param beta The scalar beta.
3644     * @param C The input allocation contains matrix C, supported elements type: {Element#F32_2}.
3645     */
3646    void CHERK(RsBlasUplo Uplo, RsBlasTranspose Trans, float alpha, sp<Allocation> A,
3647               float beta, sp<Allocation> C);
3648
3649    /**
3650     * ZHERK performs one of the hermitian rank k operations
3651     * C := alpha*A*A**H + beta*C   or   C := alpha*A**H*A + beta*C
3652     *
3653     * Details: http://www.netlib.org/lapack/explore-html/d1/db1/zherk_8f.html
3654     *
3655     * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
3656     * @param Trans The type of transpose applied to the operation.
3657     * @param alpha The scalar alpha.
3658     * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2}.
3659     * @param beta The scalar beta.
3660     * @param C The input allocation contains matrix C, supported elements type: {Element#F64_2}.
3661     */
3662    void ZHERK(RsBlasUplo Uplo, RsBlasTranspose Trans, double alpha, sp<Allocation> A,
3663               double beta, sp<Allocation> C);
3664
3665    /**
3666     * CHER2K performs one of the hermitian rank 2k operations
3667     * C := alpha*A*B**H + conjg( alpha )*B*A**H + beta*C   or   C := alpha*A**H*B + conjg( alpha )*B**H*A + beta*C
3668     *
3669     * Details: http://www.netlib.org/lapack/explore-html/d1/d82/cher2k_8f.html
3670     *
3671     * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
3672     * @param Trans The type of transpose applied to the operation.
3673     * @param alpha The scalar alpha.
3674     * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}.
3675     * @param B The input allocation contains matrix B, supported elements type: {Element#F32_2}.
3676     * @param beta The scalar beta.
3677     * @param C The input allocation contains matrix C, supported elements type: {Element#F32_2}.
3678     */
3679    void CHER2K(RsBlasUplo Uplo, RsBlasTranspose Trans, Float2 alpha, sp<Allocation> A,
3680                sp<Allocation> B, float beta, sp<Allocation> C);
3681
3682    /**
3683     * ZHER2K performs one of the hermitian rank 2k operations
3684     * C := alpha*A*B**H + conjg( alpha )*B*A**H + beta*C   or   C := alpha*A**H*B + conjg( alpha )*B**H*A + beta*C
3685     *
3686     * Details: http://www.netlib.org/lapack/explore-html/d7/dfa/zher2k_8f.html
3687     *
3688     * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
3689     * @param Trans The type of transpose applied to the operation.
3690     * @param alpha The scalar alpha.
3691     * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2}.
3692     * @param B The input allocation contains matrix B, supported elements type: {Element#F64_2}.
3693     * @param beta The scalar beta.
3694     * @param C The input allocation contains matrix C, supported elements type: {Element#F64_2}.
3695     */
3696    void ZHER2K(RsBlasUplo Uplo, RsBlasTranspose Trans, Double2 alpha, sp<Allocation> A,
3697                sp<Allocation> B, double beta, sp<Allocation> C);
3698
3699    /**
3700     * 8-bit GEMM-like operation for neural networks: C = A * Transpose(B)
3701     * Calculations are done in 1.10.21 fixed-point format for the final output,
3702     * just before there's a shift down to drop the fractional parts. The output
3703     * values are gated to 0 to 255 to fit in a byte, but the 10-bit format
3704     * gives some headroom to avoid wrapping around on small overflows.
3705     *
3706     * @param A The input allocation contains matrix A, supported elements type: {Element#U8}.
3707     * @param a_offset The offset for all values in matrix A, e.g A[i,j] = A[i,j] - a_offset. Value should be from 0 to 255.
3708     * @param B The input allocation contains matrix B, supported elements type: {Element#U8}.
3709     * @param b_offset The offset for all values in matrix B, e.g B[i,j] = B[i,j] - b_offset. Value should be from 0 to 255.
3710     * @param C The input allocation contains matrix C, supported elements type: {Element#U8}.
3711     * @param c_offset The offset for all values in matrix C.
3712     * @param c_mult The multiplier for all values in matrix C, e.g C[i,j] = (C[i,j] + c_offset) * c_mult.
3713     **/
3714    void BNNM(sp<Allocation> A, int a_offset, sp<Allocation> B, int b_offset, sp<Allocation> C,
3715              int c_offset, int c_mult);
3716};
3717
3718/**
3719 * Intrinsic kernel for blending two Allocations.
3720 */
3721class ScriptIntrinsicBlend : public ScriptIntrinsic {
3722 private:
3723    ScriptIntrinsicBlend(sp<RS> rs, sp<const Element> e);
3724 public:
3725    /**
3726     * Supported Element types are U8_4.
3727     * @param[in] rs RenderScript context
3728     * @param[in] e Element
3729     * @return new ScriptIntrinsicBlend
3730     */
3731    static sp<ScriptIntrinsicBlend> create(sp<RS> rs, sp<const Element> e);
3732    /**
3733     * sets dst = {0, 0, 0, 0}
3734     * @param[in] in input Allocation
3735     * @param[in] out output Allocation
3736     */
3737    void forEachClear(sp<Allocation> in, sp<Allocation> out);
3738    /**
3739     * Sets dst = src
3740     * @param[in] in input Allocation
3741     * @param[in] out output Allocation
3742     */
3743    void forEachSrc(sp<Allocation> in, sp<Allocation> out);
3744    /**
3745     * Sets dst = dst (NOP)
3746     * @param[in] in input Allocation
3747     * @param[in] out output Allocation
3748     */
3749    void forEachDst(sp<Allocation> in, sp<Allocation> out);
3750    /**
3751     * Sets dst = src + dst * (1.0 - src.a)
3752     * @param[in] in input Allocation
3753     * @param[in] out output Allocation
3754     */
3755    void forEachSrcOver(sp<Allocation> in, sp<Allocation> out);
3756    /**
3757     * Sets dst = dst + src * (1.0 - dst.a)
3758     * @param[in] in input Allocation
3759     * @param[in] out output Allocation
3760     */
3761    void forEachDstOver(sp<Allocation> in, sp<Allocation> out);
3762    /**
3763     * Sets dst = src * dst.a
3764     * @param[in] in input Allocation
3765     * @param[in] out output Allocation
3766     */
3767    void forEachSrcIn(sp<Allocation> in, sp<Allocation> out);
3768    /**
3769     * Sets dst = dst * src.a
3770     * @param[in] in input Allocation
3771     * @param[in] out output Allocation
3772     */
3773    void forEachDstIn(sp<Allocation> in, sp<Allocation> out);
3774    /**
3775     * Sets dst = src * (1.0 - dst.a)
3776     * @param[in] in input Allocation
3777     * @param[in] out output Allocation
3778     */
3779    void forEachSrcOut(sp<Allocation> in, sp<Allocation> out);
3780    /**
3781     * Sets dst = dst * (1.0 - src.a)
3782     * @param[in] in input Allocation
3783     * @param[in] out output Allocation
3784     */
3785    void forEachDstOut(sp<Allocation> in, sp<Allocation> out);
3786    /**
3787     * Sets dst.rgb = src.rgb * dst.a + (1.0 - src.a) * dst.rgb
3788     * @param[in] in input Allocation
3789     * @param[in] out output Allocation
3790     */
3791    void forEachSrcAtop(sp<Allocation> in, sp<Allocation> out);
3792    /**
3793     * Sets dst.rgb = dst.rgb * src.a + (1.0 - dst.a) * src.rgb
3794     * @param[in] in input Allocation
3795     * @param[in] out output Allocation
3796     */
3797    void forEachDstAtop(sp<Allocation> in, sp<Allocation> out);
3798    /**
3799     * Sets dst = {src.r ^ dst.r, src.g ^ dst.g, src.b ^ dst.b, src.a ^ dst.a}
3800     * @param[in] in input Allocation
3801     * @param[in] out output Allocation
3802     */
3803    void forEachXor(sp<Allocation> in, sp<Allocation> out);
3804    /**
3805     * Sets dst = src * dst
3806     * @param[in] in input Allocation
3807     * @param[in] out output Allocation
3808     */
3809    void forEachMultiply(sp<Allocation> in, sp<Allocation> out);
3810    /**
3811     * Sets dst = min(src + dst, 1.0)
3812     * @param[in] in input Allocation
3813     * @param[in] out output Allocation
3814     */
3815    void forEachAdd(sp<Allocation> in, sp<Allocation> out);
3816    /**
3817     * Sets dst = max(dst - src, 0.0)
3818     * @param[in] in input Allocation
3819     * @param[in] out output Allocation
3820     */
3821    void forEachSubtract(sp<Allocation> in, sp<Allocation> out);
3822};
3823
3824/**
3825 * Intrinsic Gausian blur filter. Applies a Gaussian blur of the specified
3826 * radius to all elements of an Allocation.
3827 */
3828class ScriptIntrinsicBlur : public ScriptIntrinsic {
3829 private:
3830    ScriptIntrinsicBlur(sp<RS> rs, sp<const Element> e);
3831 public:
3832    /**
3833     * Supported Element types are U8 and U8_4.
3834     * @param[in] rs RenderScript context
3835     * @param[in] e Element
3836     * @return new ScriptIntrinsicBlur
3837     */
3838    static sp<ScriptIntrinsicBlur> create(sp<RS> rs, sp<const Element> e);
3839    /**
3840     * Sets the input of the blur.
3841     * @param[in] in input Allocation
3842     */
3843    void setInput(sp<Allocation> in);
3844    /**
3845     * Runs the intrinsic.
3846     * @param[in] output Allocation
3847     */
3848    void forEach(sp<Allocation> out);
3849    /**
3850     * Sets the radius of the blur. The supported range is 0 < radius <= 25.
3851     * @param[in] radius radius of the blur
3852     */
3853    void setRadius(float radius);
3854};
3855
3856/**
3857 * Intrinsic for applying a color matrix to allocations. This has the
3858 * same effect as loading each element and converting it to a
3859 * F32_N, multiplying the result by the 4x4 color matrix
3860 * as performed by rsMatrixMultiply() and writing it to the output
3861 * after conversion back to U8_N or F32_N.
3862 */
3863class ScriptIntrinsicColorMatrix : public ScriptIntrinsic {
3864 private:
3865    ScriptIntrinsicColorMatrix(sp<RS> rs, sp<const Element> e);
3866 public:
3867    /**
3868     * Creates a new intrinsic.
3869     * @param[in] rs RenderScript context
3870     * @return new ScriptIntrinsicColorMatrix
3871     */
3872    static sp<ScriptIntrinsicColorMatrix> create(sp<RS> rs);
3873    /**
3874     * Applies the color matrix. Supported types are U8 and F32 with
3875     * vector lengths between 1 and 4.
3876     * @param[in] in input Allocation
3877     * @param[out] out output Allocation
3878     */
3879    void forEach(sp<Allocation> in, sp<Allocation> out);
3880    /**
3881     * Set the value to be added after the color matrix has been
3882     * applied. The default value is {0, 0, 0, 0}.
3883     * @param[in] add float[4] of values
3884     */
3885    void setAdd(float* add);
3886
3887    /**
3888     * Set the color matrix which will be applied to each cell of the
3889     * image. The alpha channel will be copied.
3890     *
3891     * @param[in] m float[9] of values
3892     */
3893    void setColorMatrix3(float* m);
3894    /**
3895     * Set the color matrix which will be applied to each cell of the
3896     * image.
3897     *
3898     * @param[in] m float[16] of values
3899     */
3900    void setColorMatrix4(float* m);
3901    /**
3902     * Set a color matrix to convert from RGB to luminance. The alpha
3903     * channel will be a copy.
3904     */
3905    void setGreyscale();
3906    /**
3907     * Set the matrix to convert from RGB to YUV with a direct copy of
3908     * the 4th channel.
3909     */
3910    void setRGBtoYUV();
3911    /**
3912     * Set the matrix to convert from YUV to RGB with a direct copy of
3913     * the 4th channel.
3914     */
3915    void setYUVtoRGB();
3916};
3917
3918/**
3919 * Intrinsic for applying a 3x3 convolve to an allocation.
3920 */
3921class ScriptIntrinsicConvolve3x3 : public ScriptIntrinsic {
3922 private:
3923    ScriptIntrinsicConvolve3x3(sp<RS> rs, sp<const Element> e);
3924 public:
3925    /**
3926     * Supported types U8 and F32 with vector lengths between 1 and
3927     * 4. The default convolution kernel is the identity.
3928     * @param[in] rs RenderScript context
3929     * @param[in] e Element
3930     * @return new ScriptIntrinsicConvolve3x3
3931     */
3932    static sp<ScriptIntrinsicConvolve3x3> create(sp<RS> rs, sp<const Element> e);
3933    /**
3934     * Sets input for intrinsic.
3935     * @param[in] in input Allocation
3936     */
3937    void setInput(sp<Allocation> in);
3938    /**
3939     * Launches the intrinsic.
3940     * @param[in] out output Allocation
3941     */
3942    void forEach(sp<Allocation> out);
3943    /**
3944     * Sets convolution kernel.
3945     * @param[in] v float[9] of values
3946     */
3947    void setCoefficients(float* v);
3948};
3949
3950/**
3951 * Intrinsic for applying a 5x5 convolve to an allocation.
3952 */
3953class ScriptIntrinsicConvolve5x5 : public ScriptIntrinsic {
3954 private:
3955    ScriptIntrinsicConvolve5x5(sp<RS> rs, sp<const Element> e);
3956 public:
3957    /**
3958     * Supported types U8 and F32 with vector lengths between 1 and
3959     * 4. The default convolution kernel is the identity.
3960     * @param[in] rs RenderScript context
3961     * @param[in] e Element
3962     * @return new ScriptIntrinsicConvolve5x5
3963     */
3964    static sp<ScriptIntrinsicConvolve5x5> create(sp<RS> rs, sp<const Element> e);
3965    /**
3966     * Sets input for intrinsic.
3967     * @param[in] in input Allocation
3968     */
3969    void setInput(sp<Allocation> in);
3970    /**
3971     * Launches the intrinsic.
3972     * @param[in] out output Allocation
3973     */
3974    void forEach(sp<Allocation> out);
3975    /**
3976     * Sets convolution kernel.
3977     * @param[in] v float[25] of values
3978     */
3979    void setCoefficients(float* v);
3980};
3981
3982/**
3983 * Intrinsic for computing a histogram.
3984 */
3985class ScriptIntrinsicHistogram : public ScriptIntrinsic {
3986 private:
3987    ScriptIntrinsicHistogram(sp<RS> rs, sp<const Element> e);
3988    sp<Allocation> mOut;
3989 public:
3990    /**
3991     * Create an intrinsic for calculating the histogram of an uchar
3992     * or uchar4 image.
3993     *
3994     * Supported elements types are U8_4, U8_3, U8_2, and U8.
3995     *
3996     * @param[in] rs The RenderScript context
3997     * @param[in] e Element type for inputs
3998     *
3999     * @return ScriptIntrinsicHistogram
4000     */
4001    static sp<ScriptIntrinsicHistogram> create(sp<RS> rs, sp<const Element> e);
4002    /**
4003     * Set the output of the histogram.  32 bit integer types are
4004     * supported.
4005     *
4006     * @param[in] aout The output allocation
4007     */
4008    void setOutput(sp<Allocation> aout);
4009    /**
4010     * Set the coefficients used for the dot product calculation. The
4011     * default is {0.299f, 0.587f, 0.114f, 0.f}.
4012     *
4013     * Coefficients must be >= 0 and sum to 1.0 or less.
4014     *
4015     * @param[in] r Red coefficient
4016     * @param[in] g Green coefficient
4017     * @param[in] b Blue coefficient
4018     * @param[in] a Alpha coefficient
4019     */
4020    void setDotCoefficients(float r, float g, float b, float a);
4021    /**
4022     * Process an input buffer and place the histogram into the output
4023     * allocation. The output allocation may be a narrower vector size
4024     * than the input. In this case the vector size of the output is
4025     * used to determine how many of the input channels are used in
4026     * the computation. This is useful if you have an RGBA input
4027     * buffer but only want the histogram for RGB.
4028     *
4029     * 1D and 2D input allocations are supported.
4030     *
4031     * @param[in] ain The input image
4032     */
4033    void forEach(sp<Allocation> ain);
4034    /**
4035     * Process an input buffer and place the histogram into the output
4036     * allocation. The dot product of the input channel and the
4037     * coefficients from 'setDotCoefficients' are used to calculate
4038     * the output values.
4039     *
4040     * 1D and 2D input allocations are supported.
4041     *
4042     * @param ain The input image
4043     */
4044    void forEach_dot(sp<Allocation> ain);
4045};
4046
4047/**
4048 * Intrinsic for applying a per-channel lookup table. Each channel of
4049 * the input has an independant lookup table. The tables are 256
4050 * entries in size and can cover the full value range of U8_4.
4051 **/
4052class ScriptIntrinsicLUT : public ScriptIntrinsic {
4053 private:
4054    sp<Allocation> LUT;
4055    bool mDirty;
4056    unsigned char mCache[1024];
4057    void setTable(unsigned int offset, unsigned char base, unsigned int length, unsigned char* lutValues);
4058    ScriptIntrinsicLUT(sp<RS> rs, sp<const Element> e);
4059
4060 public:
4061    /**
4062     * Supported elements types are U8_4.
4063     *
4064     * The defaults tables are identity.
4065     *
4066     * @param[in] rs The RenderScript context
4067     * @param[in] e Element type for intputs and outputs
4068     *
4069     * @return ScriptIntrinsicLUT
4070     */
4071    static sp<ScriptIntrinsicLUT> create(sp<RS> rs, sp<const Element> e);
4072    /**
4073     * Invoke the kernel and apply the lookup to each cell of ain and
4074     * copy to aout.
4075     *
4076     * @param[in] ain Input allocation
4077     * @param[in] aout Output allocation
4078     */
4079    void forEach(sp<Allocation> ain, sp<Allocation> aout);
4080    /**
4081     * Sets entries in LUT for the red channel.
4082     * @param[in] base base of region to update
4083     * @param[in] length length of region to update
4084     * @param[in] lutValues LUT values to use
4085     */
4086    void setRed(unsigned char base, unsigned int length, unsigned char* lutValues);
4087    /**
4088     * Sets entries in LUT for the green channel.
4089     * @param[in] base base of region to update
4090     * @param[in] length length of region to update
4091     * @param[in] lutValues LUT values to use
4092     */
4093    void setGreen(unsigned char base, unsigned int length, unsigned char* lutValues);
4094    /**
4095     * Sets entries in LUT for the blue channel.
4096     * @param[in] base base of region to update
4097     * @param[in] length length of region to update
4098     * @param[in] lutValues LUT values to use
4099     */
4100    void setBlue(unsigned char base, unsigned int length, unsigned char* lutValues);
4101    /**
4102     * Sets entries in LUT for the alpha channel.
4103     * @param[in] base base of region to update
4104     * @param[in] length length of region to update
4105     * @param[in] lutValues LUT values to use
4106     */
4107    void setAlpha(unsigned char base, unsigned int length, unsigned char* lutValues);
4108    virtual ~ScriptIntrinsicLUT();
4109};
4110
4111/**
4112 * Intrinsic for performing a resize of a 2D allocation.
4113 */
4114class ScriptIntrinsicResize : public ScriptIntrinsic {
4115 private:
4116    sp<Allocation> mInput;
4117    ScriptIntrinsicResize(sp<RS> rs, sp<const Element> e);
4118 public:
4119    /**
4120     * Supported Element types are U8_4. Default lookup table is identity.
4121     * @param[in] rs RenderScript context
4122     * @param[in] e Element
4123     * @return new ScriptIntrinsic
4124     */
4125    static sp<ScriptIntrinsicResize> create(sp<RS> rs);
4126
4127    /**
4128     * Resize copy the input allocation to the output specified. The
4129     * Allocation is rescaled if necessary using bi-cubic
4130     * interpolation.
4131     * @param[in] ain input Allocation
4132     * @param[in] aout output Allocation
4133     */
4134    void forEach_bicubic(sp<Allocation> aout);
4135
4136    /**
4137     * Set the input of the resize.
4138     * @param[in] lut new lookup table
4139     */
4140    void setInput(sp<Allocation> ain);
4141};
4142
4143/**
4144 * Intrinsic for converting an Android YUV buffer to RGB.
4145 *
4146 * The input allocation should be supplied in a supported YUV format
4147 * as a YUV element Allocation. The output is RGBA; the alpha channel
4148 * will be set to 255.
4149 */
4150class ScriptIntrinsicYuvToRGB : public ScriptIntrinsic {
4151 private:
4152    ScriptIntrinsicYuvToRGB(sp<RS> rs, sp<const Element> e);
4153 public:
4154    /**
4155     * Create an intrinsic for converting YUV to RGB.
4156     *
4157     * Supported elements types are U8_4.
4158     *
4159     * @param[in] rs The RenderScript context
4160     * @param[in] e Element type for output
4161     *
4162     * @return ScriptIntrinsicYuvToRGB
4163     */
4164    static sp<ScriptIntrinsicYuvToRGB> create(sp<RS> rs, sp<const Element> e);
4165    /**
4166     * Set the input YUV allocation.
4167     *
4168     * @param[in] ain The input allocation.
4169     */
4170    void setInput(sp<Allocation> in);
4171
4172    /**
4173     * Convert the image to RGB.
4174     *
4175     * @param[in] aout Output allocation. Must match creation element
4176     *                 type.
4177     */
4178    void forEach(sp<Allocation> out);
4179
4180};
4181
4182/**
4183 * Sampler object that defines how Allocations can be read as textures
4184 * within a kernel. Samplers are used in conjunction with the rsSample
4185 * runtime function to return values from normalized coordinates.
4186 *
4187 * Any Allocation used with a Sampler must have been created with
4188 * RS_ALLOCATION_USAGE_GRAPHICS_TEXTURE; using a Sampler on an
4189 * Allocation that was not created with
4190 * RS_ALLOCATION_USAGE_GRAPHICS_TEXTURE is undefined.
4191 **/
4192 class Sampler : public BaseObj {
4193 private:
4194    Sampler(sp<RS> rs, void* id);
4195    Sampler(sp<RS> rs, void* id, RsSamplerValue min, RsSamplerValue mag,
4196            RsSamplerValue wrapS, RsSamplerValue wrapT, float anisotropy);
4197    RsSamplerValue mMin;
4198    RsSamplerValue mMag;
4199    RsSamplerValue mWrapS;
4200    RsSamplerValue mWrapT;
4201    float mAniso;
4202
4203 public:
4204    /**
4205     * Creates a non-standard Sampler.
4206     * @param[in] rs RenderScript context
4207     * @param[in] min minification
4208     * @param[in] mag magnification
4209     * @param[in] wrapS S wrapping mode
4210     * @param[in] wrapT T wrapping mode
4211     * @param[in] anisotropy anisotropy setting
4212     */
4213    static sp<Sampler> create(sp<RS> rs, RsSamplerValue min, RsSamplerValue mag, RsSamplerValue wrapS, RsSamplerValue wrapT, float anisotropy);
4214
4215    /**
4216     * @return minification setting for the sampler
4217     */
4218    RsSamplerValue getMinification();
4219    /**
4220     * @return magnification setting for the sampler
4221     */
4222    RsSamplerValue getMagnification();
4223    /**
4224     * @return S wrapping mode for the sampler
4225     */
4226    RsSamplerValue getWrapS();
4227    /**
4228     * @return T wrapping mode for the sampler
4229     */
4230    RsSamplerValue getWrapT();
4231    /**
4232     * @return anisotropy setting for the sampler
4233     */
4234    float getAnisotropy();
4235
4236    /**
4237     * Retrieve a sampler with min and mag set to nearest and wrap modes set to
4238     * clamp.
4239     *
4240     * @param rs Context to which the sampler will belong.
4241     *
4242     * @return Sampler
4243     */
4244    static sp<const Sampler> CLAMP_NEAREST(sp<RS> rs);
4245    /**
4246     * Retrieve a sampler with min and mag set to linear and wrap modes set to
4247     * clamp.
4248     *
4249     * @param rs Context to which the sampler will belong.
4250     *
4251     * @return Sampler
4252     */
4253    static sp<const Sampler> CLAMP_LINEAR(sp<RS> rs);
4254    /**
4255     * Retrieve a sampler with mag set to linear, min linear mipmap linear, and
4256     * wrap modes set to clamp.
4257     *
4258     * @param rs Context to which the sampler will belong.
4259     *
4260     * @return Sampler
4261     */
4262    static sp<const Sampler> CLAMP_LINEAR_MIP_LINEAR(sp<RS> rs);
4263    /**
4264     * Retrieve a sampler with min and mag set to nearest and wrap modes set to
4265     * wrap.
4266     *
4267     * @param rs Context to which the sampler will belong.
4268     *
4269     * @return Sampler
4270     */
4271    static sp<const Sampler> WRAP_NEAREST(sp<RS> rs);
4272    /**
4273     * Retrieve a sampler with min and mag set to linear and wrap modes set to
4274     * wrap.
4275     *
4276     * @param rs Context to which the sampler will belong.
4277     *
4278     * @return Sampler
4279     */
4280    static sp<const Sampler> WRAP_LINEAR(sp<RS> rs);
4281    /**
4282     * Retrieve a sampler with mag set to linear, min linear mipmap linear, and
4283     * wrap modes set to wrap.
4284     *
4285     * @param rs Context to which the sampler will belong.
4286     *
4287     * @return Sampler
4288     */
4289    static sp<const Sampler> WRAP_LINEAR_MIP_LINEAR(sp<RS> rs);
4290    /**
4291     * Retrieve a sampler with min and mag set to nearest and wrap modes set to
4292     * mirrored repeat.
4293     *
4294     * @param rs Context to which the sampler will belong.
4295     *
4296     * @return Sampler
4297     */
4298    static sp<const Sampler> MIRRORED_REPEAT_NEAREST(sp<RS> rs);
4299    /**
4300     * Retrieve a sampler with min and mag set to linear and wrap modes set to
4301     * mirrored repeat.
4302     *
4303     * @param rs Context to which the sampler will belong.
4304     *
4305     * @return Sampler
4306     */
4307    static sp<const Sampler> MIRRORED_REPEAT_LINEAR(sp<RS> rs);
4308    /**
4309     * Retrieve a sampler with min and mag set to linear and wrap modes set to
4310     * mirrored repeat.
4311     *
4312     * @param rs Context to which the sampler will belong.
4313     *
4314     * @return Sampler
4315     */
4316    static sp<const Sampler> MIRRORED_REPEAT_LINEAR_MIP_LINEAR(sp<RS> rs);
4317
4318};
4319
4320}
4321
4322}
4323
4324#endif
4325