17c8da7ce66017295a65ec028084b90800be377f8James Zern// Copyright 2014 Google Inc. All Rights Reserved.
27c8da7ce66017295a65ec028084b90800be377f8James Zern//
37c8da7ce66017295a65ec028084b90800be377f8James Zern// Use of this source code is governed by a BSD-style license
47c8da7ce66017295a65ec028084b90800be377f8James Zern// that can be found in the COPYING file in the root of the source
57c8da7ce66017295a65ec028084b90800be377f8James Zern// tree. An additional intellectual property rights grant can be found
67c8da7ce66017295a65ec028084b90800be377f8James Zern// in the file PATENTS. All contributing project authors may
77c8da7ce66017295a65ec028084b90800be377f8James Zern// be found in the AUTHORS file in the root of the source tree.
87c8da7ce66017295a65ec028084b90800be377f8James Zern// -----------------------------------------------------------------------------
97c8da7ce66017295a65ec028084b90800be377f8James Zern//
107c8da7ce66017295a65ec028084b90800be377f8James Zern// Utilities for processing transparent channel.
117c8da7ce66017295a65ec028084b90800be377f8James Zern//
127c8da7ce66017295a65ec028084b90800be377f8James Zern// Author(s): Branimir Vasic (branimir.vasic@imgtec.com)
137c8da7ce66017295a65ec028084b90800be377f8James Zern//            Djordje Pesut  (djordje.pesut@imgtec.com)
147c8da7ce66017295a65ec028084b90800be377f8James Zern
157c8da7ce66017295a65ec028084b90800be377f8James Zern#include "./dsp.h"
167c8da7ce66017295a65ec028084b90800be377f8James Zern
177c8da7ce66017295a65ec028084b90800be377f8James Zern#if defined(WEBP_USE_MIPS_DSP_R2)
187c8da7ce66017295a65ec028084b90800be377f8James Zern
197c8da7ce66017295a65ec028084b90800be377f8James Zernstatic int DispatchAlpha(const uint8_t* alpha, int alpha_stride,
207c8da7ce66017295a65ec028084b90800be377f8James Zern                         int width, int height,
217c8da7ce66017295a65ec028084b90800be377f8James Zern                         uint8_t* dst, int dst_stride) {
227c8da7ce66017295a65ec028084b90800be377f8James Zern  uint32_t alpha_mask = 0xffffffff;
237c8da7ce66017295a65ec028084b90800be377f8James Zern  int i, j, temp0;
247c8da7ce66017295a65ec028084b90800be377f8James Zern
257c8da7ce66017295a65ec028084b90800be377f8James Zern  for (j = 0; j < height; ++j) {
267c8da7ce66017295a65ec028084b90800be377f8James Zern    uint8_t* pdst = dst;
277c8da7ce66017295a65ec028084b90800be377f8James Zern    const uint8_t* palpha = alpha;
287c8da7ce66017295a65ec028084b90800be377f8James Zern    for (i = 0; i < (width >> 2); ++i) {
297c8da7ce66017295a65ec028084b90800be377f8James Zern      int temp1, temp2, temp3;
307c8da7ce66017295a65ec028084b90800be377f8James Zern
317c8da7ce66017295a65ec028084b90800be377f8James Zern      __asm__ volatile (
327c8da7ce66017295a65ec028084b90800be377f8James Zern        "ulw    %[temp0],      0(%[palpha])                \n\t"
337c8da7ce66017295a65ec028084b90800be377f8James Zern        "addiu  %[palpha],     %[palpha],     4            \n\t"
347c8da7ce66017295a65ec028084b90800be377f8James Zern        "addiu  %[pdst],       %[pdst],       16           \n\t"
357c8da7ce66017295a65ec028084b90800be377f8James Zern        "srl    %[temp1],      %[temp0],      8            \n\t"
367c8da7ce66017295a65ec028084b90800be377f8James Zern        "srl    %[temp2],      %[temp0],      16           \n\t"
377c8da7ce66017295a65ec028084b90800be377f8James Zern        "srl    %[temp3],      %[temp0],      24           \n\t"
387c8da7ce66017295a65ec028084b90800be377f8James Zern        "and    %[alpha_mask], %[alpha_mask], %[temp0]     \n\t"
397c8da7ce66017295a65ec028084b90800be377f8James Zern        "sb     %[temp0],      -16(%[pdst])                \n\t"
407c8da7ce66017295a65ec028084b90800be377f8James Zern        "sb     %[temp1],      -12(%[pdst])                \n\t"
417c8da7ce66017295a65ec028084b90800be377f8James Zern        "sb     %[temp2],      -8(%[pdst])                 \n\t"
427c8da7ce66017295a65ec028084b90800be377f8James Zern        "sb     %[temp3],      -4(%[pdst])                 \n\t"
437c8da7ce66017295a65ec028084b90800be377f8James Zern        : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
447c8da7ce66017295a65ec028084b90800be377f8James Zern          [temp3]"=&r"(temp3), [palpha]"+r"(palpha), [pdst]"+r"(pdst),
457c8da7ce66017295a65ec028084b90800be377f8James Zern          [alpha_mask]"+r"(alpha_mask)
467c8da7ce66017295a65ec028084b90800be377f8James Zern        :
477c8da7ce66017295a65ec028084b90800be377f8James Zern        : "memory"
487c8da7ce66017295a65ec028084b90800be377f8James Zern      );
497c8da7ce66017295a65ec028084b90800be377f8James Zern    }
507c8da7ce66017295a65ec028084b90800be377f8James Zern
517c8da7ce66017295a65ec028084b90800be377f8James Zern    for (i = 0; i < (width & 3); ++i) {
527c8da7ce66017295a65ec028084b90800be377f8James Zern      __asm__ volatile (
537c8da7ce66017295a65ec028084b90800be377f8James Zern        "lbu    %[temp0],      0(%[palpha])                \n\t"
547c8da7ce66017295a65ec028084b90800be377f8James Zern        "addiu  %[palpha],     %[palpha],     1            \n\t"
557c8da7ce66017295a65ec028084b90800be377f8James Zern        "sb     %[temp0],      0(%[pdst])                  \n\t"
567c8da7ce66017295a65ec028084b90800be377f8James Zern        "and    %[alpha_mask], %[alpha_mask], %[temp0]     \n\t"
577c8da7ce66017295a65ec028084b90800be377f8James Zern        "addiu  %[pdst],       %[pdst],       4            \n\t"
587c8da7ce66017295a65ec028084b90800be377f8James Zern        : [temp0]"=&r"(temp0), [palpha]"+r"(palpha), [pdst]"+r"(pdst),
597c8da7ce66017295a65ec028084b90800be377f8James Zern          [alpha_mask]"+r"(alpha_mask)
607c8da7ce66017295a65ec028084b90800be377f8James Zern        :
617c8da7ce66017295a65ec028084b90800be377f8James Zern        : "memory"
627c8da7ce66017295a65ec028084b90800be377f8James Zern      );
637c8da7ce66017295a65ec028084b90800be377f8James Zern    }
647c8da7ce66017295a65ec028084b90800be377f8James Zern    alpha += alpha_stride;
657c8da7ce66017295a65ec028084b90800be377f8James Zern    dst += dst_stride;
667c8da7ce66017295a65ec028084b90800be377f8James Zern  }
677c8da7ce66017295a65ec028084b90800be377f8James Zern
687c8da7ce66017295a65ec028084b90800be377f8James Zern  __asm__ volatile (
697c8da7ce66017295a65ec028084b90800be377f8James Zern    "ext    %[temp0],      %[alpha_mask], 0, 16            \n\t"
707c8da7ce66017295a65ec028084b90800be377f8James Zern    "srl    %[alpha_mask], %[alpha_mask], 16               \n\t"
717c8da7ce66017295a65ec028084b90800be377f8James Zern    "and    %[alpha_mask], %[alpha_mask], %[temp0]         \n\t"
727c8da7ce66017295a65ec028084b90800be377f8James Zern    "ext    %[temp0],      %[alpha_mask], 0, 8             \n\t"
737c8da7ce66017295a65ec028084b90800be377f8James Zern    "srl    %[alpha_mask], %[alpha_mask], 8                \n\t"
747c8da7ce66017295a65ec028084b90800be377f8James Zern    "and    %[alpha_mask], %[alpha_mask], %[temp0]         \n\t"
757c8da7ce66017295a65ec028084b90800be377f8James Zern    : [temp0]"=&r"(temp0), [alpha_mask]"+r"(alpha_mask)
767c8da7ce66017295a65ec028084b90800be377f8James Zern    :
777c8da7ce66017295a65ec028084b90800be377f8James Zern  );
787c8da7ce66017295a65ec028084b90800be377f8James Zern
797c8da7ce66017295a65ec028084b90800be377f8James Zern  return (alpha_mask != 0xff);
807c8da7ce66017295a65ec028084b90800be377f8James Zern}
817c8da7ce66017295a65ec028084b90800be377f8James Zern
827c8da7ce66017295a65ec028084b90800be377f8James Zernstatic void MultARGBRow(uint32_t* const ptr, int width, int inverse) {
837c8da7ce66017295a65ec028084b90800be377f8James Zern  int x;
847c8da7ce66017295a65ec028084b90800be377f8James Zern  const uint32_t c_00ffffff = 0x00ffffffu;
857c8da7ce66017295a65ec028084b90800be377f8James Zern  const uint32_t c_ff000000 = 0xff000000u;
867c8da7ce66017295a65ec028084b90800be377f8James Zern  const uint32_t c_8000000  = 0x00800000u;
877c8da7ce66017295a65ec028084b90800be377f8James Zern  const uint32_t c_8000080  = 0x00800080u;
887c8da7ce66017295a65ec028084b90800be377f8James Zern  for (x = 0; x < width; ++x) {
897c8da7ce66017295a65ec028084b90800be377f8James Zern    const uint32_t argb = ptr[x];
907c8da7ce66017295a65ec028084b90800be377f8James Zern    if (argb < 0xff000000u) {      // alpha < 255
917c8da7ce66017295a65ec028084b90800be377f8James Zern      if (argb <= 0x00ffffffu) {   // alpha == 0
927c8da7ce66017295a65ec028084b90800be377f8James Zern        ptr[x] = 0;
937c8da7ce66017295a65ec028084b90800be377f8James Zern      } else {
947c8da7ce66017295a65ec028084b90800be377f8James Zern        int temp0, temp1, temp2, temp3, alpha;
957c8da7ce66017295a65ec028084b90800be377f8James Zern        __asm__ volatile (
967c8da7ce66017295a65ec028084b90800be377f8James Zern          "srl          %[alpha],   %[argb],       24                \n\t"
977c8da7ce66017295a65ec028084b90800be377f8James Zern          "replv.qb     %[temp0],   %[alpha]                         \n\t"
987c8da7ce66017295a65ec028084b90800be377f8James Zern          "and          %[temp0],   %[temp0],      %[c_00ffffff]     \n\t"
997c8da7ce66017295a65ec028084b90800be377f8James Zern          "beqz         %[inverse], 0f                               \n\t"
1007c8da7ce66017295a65ec028084b90800be377f8James Zern          "divu         $zero,      %[c_ff000000], %[alpha]          \n\t"
1017c8da7ce66017295a65ec028084b90800be377f8James Zern          "mflo         %[temp0]                                     \n\t"
1027c8da7ce66017295a65ec028084b90800be377f8James Zern        "0:                                                          \n\t"
1037c8da7ce66017295a65ec028084b90800be377f8James Zern          "andi         %[temp1],   %[argb],       0xff              \n\t"
1047c8da7ce66017295a65ec028084b90800be377f8James Zern          "ext          %[temp2],   %[argb],       8,             8  \n\t"
1057c8da7ce66017295a65ec028084b90800be377f8James Zern          "ext          %[temp3],   %[argb],       16,            8  \n\t"
1067c8da7ce66017295a65ec028084b90800be377f8James Zern          "mul          %[temp1],   %[temp1],      %[temp0]          \n\t"
1077c8da7ce66017295a65ec028084b90800be377f8James Zern          "mul          %[temp2],   %[temp2],      %[temp0]          \n\t"
1087c8da7ce66017295a65ec028084b90800be377f8James Zern          "mul          %[temp3],   %[temp3],      %[temp0]          \n\t"
1097c8da7ce66017295a65ec028084b90800be377f8James Zern          "precrq.ph.w  %[temp1],   %[temp2],      %[temp1]          \n\t"
1107c8da7ce66017295a65ec028084b90800be377f8James Zern          "addu         %[temp3],   %[temp3],      %[c_8000000]      \n\t"
1117c8da7ce66017295a65ec028084b90800be377f8James Zern          "addu         %[temp1],   %[temp1],      %[c_8000080]      \n\t"
1127c8da7ce66017295a65ec028084b90800be377f8James Zern          "precrq.ph.w  %[temp3],   %[argb],       %[temp3]          \n\t"
1137c8da7ce66017295a65ec028084b90800be377f8James Zern          "precrq.qb.ph %[temp1],   %[temp3],      %[temp1]          \n\t"
1147c8da7ce66017295a65ec028084b90800be377f8James Zern          : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
1157c8da7ce66017295a65ec028084b90800be377f8James Zern            [temp3]"=&r"(temp3), [alpha]"=&r"(alpha)
1167c8da7ce66017295a65ec028084b90800be377f8James Zern          : [inverse]"r"(inverse), [c_00ffffff]"r"(c_00ffffff),
1177c8da7ce66017295a65ec028084b90800be377f8James Zern            [c_8000000]"r"(c_8000000), [c_8000080]"r"(c_8000080),
1187c8da7ce66017295a65ec028084b90800be377f8James Zern            [c_ff000000]"r"(c_ff000000), [argb]"r"(argb)
1197c8da7ce66017295a65ec028084b90800be377f8James Zern          : "memory", "hi", "lo"
1207c8da7ce66017295a65ec028084b90800be377f8James Zern        );
1217c8da7ce66017295a65ec028084b90800be377f8James Zern        ptr[x] = temp1;
1227c8da7ce66017295a65ec028084b90800be377f8James Zern      }
1237c8da7ce66017295a65ec028084b90800be377f8James Zern    }
1247c8da7ce66017295a65ec028084b90800be377f8James Zern  }
1257c8da7ce66017295a65ec028084b90800be377f8James Zern}
1267c8da7ce66017295a65ec028084b90800be377f8James Zern
1277c8da7ce66017295a65ec028084b90800be377f8James Zern//------------------------------------------------------------------------------
1287c8da7ce66017295a65ec028084b90800be377f8James Zern// Entry point
1297c8da7ce66017295a65ec028084b90800be377f8James Zern
1307c8da7ce66017295a65ec028084b90800be377f8James Zernextern void WebPInitAlphaProcessingMIPSdspR2(void);
1317c8da7ce66017295a65ec028084b90800be377f8James Zern
1327c8da7ce66017295a65ec028084b90800be377f8James ZernWEBP_TSAN_IGNORE_FUNCTION void WebPInitAlphaProcessingMIPSdspR2(void) {
1337c8da7ce66017295a65ec028084b90800be377f8James Zern  WebPDispatchAlpha = DispatchAlpha;
1347c8da7ce66017295a65ec028084b90800be377f8James Zern  WebPMultARGBRow = MultARGBRow;
1357c8da7ce66017295a65ec028084b90800be377f8James Zern}
1367c8da7ce66017295a65ec028084b90800be377f8James Zern
1377c8da7ce66017295a65ec028084b90800be377f8James Zern#else  // !WEBP_USE_MIPS_DSP_R2
1387c8da7ce66017295a65ec028084b90800be377f8James Zern
1397c8da7ce66017295a65ec028084b90800be377f8James ZernWEBP_DSP_INIT_STUB(WebPInitAlphaProcessingMIPSdspR2)
1407c8da7ce66017295a65ec028084b90800be377f8James Zern
1417c8da7ce66017295a65ec028084b90800be377f8James Zern#endif  // WEBP_USE_MIPS_DSP_R2
142