1/* 2 * Copyright (C) 2012 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 18#include "rsCpuIntrinsic.h" 19#include "rsCpuIntrinsicInlines.h" 20 21namespace android { 22namespace renderscript { 23 24 25class RsdCpuScriptIntrinsic3DLUT : public RsdCpuScriptIntrinsic { 26public: 27 void populateScript(Script *) override; 28 void invokeFreeChildren() override; 29 30 void setGlobalObj(uint32_t slot, ObjectBase *data) override; 31 32 ~RsdCpuScriptIntrinsic3DLUT() override; 33 RsdCpuScriptIntrinsic3DLUT(RsdCpuReferenceImpl *ctx, const Script *s, const Element *e); 34 35protected: 36 ObjectBaseRef<Allocation> mLUT; 37 38 static void kernel(const RsExpandKernelDriverInfo *info, 39 uint32_t xstart, uint32_t xend, 40 uint32_t outstep); 41}; 42 43void RsdCpuScriptIntrinsic3DLUT::setGlobalObj(uint32_t slot, ObjectBase *data) { 44 rsAssert(slot == 0); 45 mLUT.set(static_cast<Allocation *>(data)); 46} 47 48extern "C" void rsdIntrinsic3DLUT_K(void *dst, void const *in, size_t count, 49 void const *lut, 50 int32_t pitchy, int32_t pitchz, 51 int dimx, int dimy, int dimz); 52 53 54void RsdCpuScriptIntrinsic3DLUT::kernel(const RsExpandKernelDriverInfo *info, 55 uint32_t xstart, uint32_t xend, 56 uint32_t outstep) { 57 RsdCpuScriptIntrinsic3DLUT *cp = (RsdCpuScriptIntrinsic3DLUT *)info->usr; 58 59 uchar4 *out = (uchar4 *)info->outPtr[0]; 60 uchar4 *in = (uchar4 *)info->inPtr[0]; 61 uint32_t x1 = xstart; 62 uint32_t x2 = xend; 63 64 const uchar *bp = (const uchar *)cp->mLUT->mHal.drvState.lod[0].mallocPtr; 65 66 int4 dims = { 67 static_cast<int>(cp->mLUT->mHal.drvState.lod[0].dimX - 1), 68 static_cast<int>(cp->mLUT->mHal.drvState.lod[0].dimY - 1), 69 static_cast<int>(cp->mLUT->mHal.drvState.lod[0].dimZ - 1), 70 -1 71 }; 72 const float4 m = (float4)(1.f / 255.f) * convert_float4(dims); 73 const int4 coordMul = convert_int4(m * (float4)0x8000); 74 const size_t stride_y = cp->mLUT->mHal.drvState.lod[0].stride; 75 const size_t stride_z = stride_y * cp->mLUT->mHal.drvState.lod[0].dimY; 76 77 //ALOGE("strides %zu %zu", stride_y, stride_z); 78 79#if defined(ARCH_ARM_USE_INTRINSICS) 80 if (gArchUseSIMD) { 81 int32_t len = x2 - x1; 82 if(len > 0) { 83 rsdIntrinsic3DLUT_K(out, in, len, 84 bp, stride_y, stride_z, 85 dims.x, dims.y, dims.z); 86 x1 += len; 87 out += len; 88 in += len; 89 } 90 } 91#endif 92 93 while (x1 < x2) { 94 int4 baseCoord = convert_int4(*in) * coordMul; 95 int4 coord1 = baseCoord >> (int4)15; 96 //int4 coord2 = min(coord1 + 1, gDims - 1); 97 98 int4 weight2 = baseCoord & 0x7fff; 99 int4 weight1 = (int4)0x8000 - weight2; 100 101 //ALOGE("coord1 %08x %08x %08x %08x", coord1.x, coord1.y, coord1.z, coord1.w); 102 const uchar *bp2 = bp + (coord1.x * 4) + (coord1.y * stride_y) + (coord1.z * stride_z); 103 const uchar4 *pt_00 = (const uchar4 *)&bp2[0]; 104 const uchar4 *pt_10 = (const uchar4 *)&bp2[stride_y]; 105 const uchar4 *pt_01 = (const uchar4 *)&bp2[stride_z]; 106 const uchar4 *pt_11 = (const uchar4 *)&bp2[stride_y + stride_z]; 107 108 uint4 v000 = convert_uint4(pt_00[0]); 109 uint4 v100 = convert_uint4(pt_00[1]); 110 uint4 v010 = convert_uint4(pt_10[0]); 111 uint4 v110 = convert_uint4(pt_10[1]); 112 uint4 v001 = convert_uint4(pt_01[0]); 113 uint4 v101 = convert_uint4(pt_01[1]); 114 uint4 v011 = convert_uint4(pt_11[0]); 115 uint4 v111 = convert_uint4(pt_11[1]); 116 117 uint4 yz00 = ((v000 * weight1.x) + (v100 * weight2.x)) >> (int4)7; 118 uint4 yz10 = ((v010 * weight1.x) + (v110 * weight2.x)) >> (int4)7; 119 uint4 yz01 = ((v001 * weight1.x) + (v101 * weight2.x)) >> (int4)7; 120 uint4 yz11 = ((v011 * weight1.x) + (v111 * weight2.x)) >> (int4)7; 121 122 uint4 z0 = ((yz00 * weight1.y) + (yz10 * weight2.y)) >> (int4)15; 123 uint4 z1 = ((yz01 * weight1.y) + (yz11 * weight2.y)) >> (int4)15; 124 125 uint4 v = ((z0 * weight1.z) + (z1 * weight2.z)) >> (int4)15; 126 uint4 v2 = (v + 0x7f) >> (int4)8; 127 128 uchar4 ret = convert_uchar4(v2); 129 ret.w = in->w; 130 131 #if 0 132 if (!x1) { 133 ALOGE("in %08x %08x %08x %08x", in->r, in->g, in->b, in->a); 134 ALOGE("baseCoord %08x %08x %08x %08x", baseCoord.x, baseCoord.y, baseCoord.z, baseCoord.w); 135 ALOGE("coord1 %08x %08x %08x %08x", coord1.x, coord1.y, coord1.z, coord1.w); 136 ALOGE("weight1 %08x %08x %08x %08x", weight1.x, weight1.y, weight1.z, weight1.w); 137 ALOGE("weight2 %08x %08x %08x %08x", weight2.x, weight2.y, weight2.z, weight2.w); 138 139 ALOGE("v000 %08x %08x %08x %08x", v000.x, v000.y, v000.z, v000.w); 140 ALOGE("v100 %08x %08x %08x %08x", v100.x, v100.y, v100.z, v100.w); 141 ALOGE("yz00 %08x %08x %08x %08x", yz00.x, yz00.y, yz00.z, yz00.w); 142 ALOGE("z0 %08x %08x %08x %08x", z0.x, z0.y, z0.z, z0.w); 143 144 ALOGE("v %08x %08x %08x %08x", v.x, v.y, v.z, v.w); 145 ALOGE("v2 %08x %08x %08x %08x", v2.x, v2.y, v2.z, v2.w); 146 } 147 #endif 148 *out = ret; 149 150 151 in++; 152 out++; 153 x1++; 154 } 155} 156 157RsdCpuScriptIntrinsic3DLUT::RsdCpuScriptIntrinsic3DLUT( 158 RsdCpuReferenceImpl *ctx, const Script *s, const Element *e) : 159 RsdCpuScriptIntrinsic(ctx, s, e, RS_SCRIPT_INTRINSIC_ID_3DLUT) { 160 161 mRootPtr = &kernel; 162} 163 164RsdCpuScriptIntrinsic3DLUT::~RsdCpuScriptIntrinsic3DLUT() { 165} 166 167void RsdCpuScriptIntrinsic3DLUT::populateScript(Script *s) { 168 s->mHal.info.exportedVariableCount = 1; 169} 170 171void RsdCpuScriptIntrinsic3DLUT::invokeFreeChildren() { 172 mLUT.clear(); 173} 174 175RsdCpuScriptImpl * rsdIntrinsic_3DLUT(RsdCpuReferenceImpl *ctx, 176 const Script *s, const Element *e) { 177 178 return new RsdCpuScriptIntrinsic3DLUT(ctx, s, e); 179} 180 181} // namespace renderscript 182} // namespace android 183