1537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Sams/*
2537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Sams * Copyright (C) 2012 The Android Open Source Project
3537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Sams *
4537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Sams * Licensed under the Apache License, Version 2.0 (the "License");
5537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Sams * you may not use this file except in compliance with the License.
6537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Sams * You may obtain a copy of the License at
7537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Sams *
8537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Sams *      http://www.apache.org/licenses/LICENSE-2.0
9537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Sams *
10537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Sams * Unless required by applicable law or agreed to in writing, software
11537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Sams * distributed under the License is distributed on an "AS IS" BASIS,
12537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Sams * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Sams * See the License for the specific language governing permissions and
14537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Sams * limitations under the License.
15537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Sams */
16537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Sams
17537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Sams
18537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Sams#include "rsdCore.h"
19537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Sams#include "rsdIntrinsics.h"
20537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Sams#include "rsdAllocation.h"
21537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Sams
22537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Sams#include "rsdIntrinsicInlines.h"
23537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Sams
24537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Samsusing namespace android;
25537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Samsusing namespace android::renderscript;
26537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Sams
27537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Samsstruct ConvolveParams {
28537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Sams    float fp[16];
29537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Sams    short ip[16];
3040945e01597adaed9e728a14a17bf4a35452abd5Jason Sams    bool use3x3;
3140945e01597adaed9e728a14a17bf4a35452abd5Jason Sams    bool useDot;
32537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Sams};
33537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Sams
34537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Samsstatic void ColorMatrix_SetVar(const Context *dc, const Script *script, void * intrinsicData,
35537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Sams                               uint32_t slot, void *data, size_t dataLength) {
36537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Sams    ConvolveParams *cp = (ConvolveParams *)intrinsicData;
37537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Sams
38537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Sams    rsAssert(slot == 0);
39537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Sams    memcpy (cp->fp, data, dataLength);
40537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Sams    for(int ct=0; ct < 16; ct++) {
41537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Sams        cp->ip[ct] = (short)(cp->fp[ct] * 255.f + 0.5f);
42537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Sams    }
4340945e01597adaed9e728a14a17bf4a35452abd5Jason Sams
4440945e01597adaed9e728a14a17bf4a35452abd5Jason Sams    if ((cp->ip[3] == 0) && (cp->ip[7] == 0) && (cp->ip[11] == 0) &&
4540945e01597adaed9e728a14a17bf4a35452abd5Jason Sams        (cp->ip[12] == 0) && (cp->ip[13] == 0) && (cp->ip[14] == 0) &&
4640945e01597adaed9e728a14a17bf4a35452abd5Jason Sams        (cp->ip[15] == 255)) {
4740945e01597adaed9e728a14a17bf4a35452abd5Jason Sams        cp->use3x3 = true;
4840945e01597adaed9e728a14a17bf4a35452abd5Jason Sams
4940945e01597adaed9e728a14a17bf4a35452abd5Jason Sams        if ((cp->ip[0] == cp->ip[1]) && (cp->ip[0] == cp->ip[2]) &&
5040945e01597adaed9e728a14a17bf4a35452abd5Jason Sams            (cp->ip[4] == cp->ip[5]) && (cp->ip[4] == cp->ip[6]) &&
5140945e01597adaed9e728a14a17bf4a35452abd5Jason Sams            (cp->ip[8] == cp->ip[9]) && (cp->ip[8] == cp->ip[10])) {
5240945e01597adaed9e728a14a17bf4a35452abd5Jason Sams            cp->useDot = true;
5340945e01597adaed9e728a14a17bf4a35452abd5Jason Sams        }
5440945e01597adaed9e728a14a17bf4a35452abd5Jason Sams    }
55537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Sams}
56537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Sams
57537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Samsextern "C" void rsdIntrinsicColorMatrix4x4_K(void *dst, const void *src, const short *coef, uint32_t count);
58537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Samsextern "C" void rsdIntrinsicColorMatrix3x3_K(void *dst, const void *src, const short *coef, uint32_t count);
5940945e01597adaed9e728a14a17bf4a35452abd5Jason Samsextern "C" void rsdIntrinsicColorMatrixDot_K(void *dst, const void *src, const short *coef, uint32_t count);
60537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Sams
61537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Samsstatic void One(const RsForEachStubParamStruct *p, uchar4 *out,
62537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Sams                const uchar4 *py, const float* coeff) {
63537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Sams    float4 i = convert_float4(py[0]);
64537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Sams
65537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Sams    float4 sum;
66537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Sams    sum.x = i.x * coeff[0] +
67537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Sams            i.y * coeff[4] +
68537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Sams            i.z * coeff[8] +
69537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Sams            i.w * coeff[12];
70537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Sams    sum.y = i.x * coeff[1] +
71537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Sams            i.y * coeff[5] +
72537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Sams            i.z * coeff[9] +
73537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Sams            i.w * coeff[13];
74537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Sams    sum.z = i.x * coeff[2] +
75537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Sams            i.y * coeff[6] +
76537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Sams            i.z * coeff[10] +
77537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Sams            i.w * coeff[14];
78537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Sams    sum.w = i.x * coeff[3] +
79537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Sams            i.y * coeff[7] +
80537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Sams            i.z * coeff[11] +
81537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Sams            i.w * coeff[15];
82537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Sams
83537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Sams    sum.x = sum.x < 0 ? 0 : (sum.x > 255 ? 255 : sum.x);
84537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Sams    sum.y = sum.y < 0 ? 0 : (sum.y > 255 ? 255 : sum.y);
85537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Sams    sum.z = sum.z < 0 ? 0 : (sum.z > 255 ? 255 : sum.z);
86537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Sams    sum.w = sum.w < 0 ? 0 : (sum.w > 255 ? 255 : sum.w);
87537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Sams
88537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Sams    *out = convert_uchar4(sum);
89537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Sams}
90537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Sams
91537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Samsstatic void ColorMatrix_uchar4(const RsForEachStubParamStruct *p,
92537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Sams                                    uint32_t xstart, uint32_t xend,
93537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Sams                                    uint32_t instep, uint32_t outstep) {
94537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Sams    ConvolveParams *cp = (ConvolveParams *)p->usr;
95537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Sams    uchar4 *out = (uchar4 *)p->out;
96537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Sams    uchar4 *in = (uchar4 *)p->in;
97537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Sams    uint32_t x1 = xstart;
98537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Sams    uint32_t x2 = xend;
99537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Sams
100537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Sams    in += xstart;
101537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Sams    out += xstart;
102537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Sams
103537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Sams    if(x2 > x1) {
104537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Sams#if defined(ARCH_ARM_HAVE_NEON)
105537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Sams        int32_t len = (x2 - x1) >> 2;
106537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Sams        if(len > 0) {
10740945e01597adaed9e728a14a17bf4a35452abd5Jason Sams            if (cp->use3x3) {
10840945e01597adaed9e728a14a17bf4a35452abd5Jason Sams                if (cp->useDot) {
10940945e01597adaed9e728a14a17bf4a35452abd5Jason Sams                    rsdIntrinsicColorMatrixDot_K(out, in, cp->ip, len);
11040945e01597adaed9e728a14a17bf4a35452abd5Jason Sams                } else {
11140945e01597adaed9e728a14a17bf4a35452abd5Jason Sams                    rsdIntrinsicColorMatrix3x3_K(out, in, cp->ip, len);
11240945e01597adaed9e728a14a17bf4a35452abd5Jason Sams                }
11340945e01597adaed9e728a14a17bf4a35452abd5Jason Sams            } else {
11440945e01597adaed9e728a14a17bf4a35452abd5Jason Sams                rsdIntrinsicColorMatrix4x4_K(out, in, cp->ip, len);
11540945e01597adaed9e728a14a17bf4a35452abd5Jason Sams            }
116537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Sams            x1 += len << 2;
117537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Sams            out += len << 2;
118537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Sams            in += len << 2;
119537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Sams        }
120537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Sams#endif
121537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Sams
122537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Sams        while(x1 != x2) {
123537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Sams            One(p, out++, in++, cp->fp);
124537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Sams            x1++;
125537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Sams        }
126537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Sams    }
127537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Sams}
128537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Sams
129537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Samsvoid * rsdIntrinsic_InitColorMatrix(const android::renderscript::Context *dc,
130537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Sams                                    android::renderscript::Script *script,
131537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Sams                                    RsdIntriniscFuncs_t *funcs) {
132537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Sams
133537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Sams    script->mHal.info.exportedVariableCount = 1;
134537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Sams    funcs->setVar = ColorMatrix_SetVar;
135537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Sams    funcs->root = ColorMatrix_uchar4;
136537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Sams
137537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Sams    ConvolveParams *cp = (ConvolveParams *)calloc(1, sizeof(ConvolveParams));
138537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Sams    cp->fp[0] = 1.f;
139537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Sams    cp->fp[5] = 1.f;
140537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Sams    cp->fp[10] = 1.f;
141537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Sams    cp->fp[15] = 1.f;
142537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Sams    for(int ct=0; ct < 16; ct++) {
143537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Sams        cp->ip[ct] = (short)(cp->fp[ct] * 255.f + 0.5f);
144537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Sams    }
145537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Sams    return cp;
146537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Sams}
147537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Sams
148537c4411b57ba30b688f437a663120e7f9c4f4e0Jason Sams
149