15ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang/*
25ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang *  Copyright (c) 2013 The WebM project authors. All Rights Reserved.
35ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang *
45ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang *  Use of this source code is governed by a BSD-style license
55ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang *  that can be found in the LICENSE file in the root of the source
65ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang *  tree. An additional intellectual property rights grant can be found
75ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang *  in the file PATENTS.  All contributing project authors may
85ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang *  be found in the AUTHORS file in the root of the source tree.
95ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang */
105ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
115ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang#include <assert.h>
125ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang#include <stdio.h>
135ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
145ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang#include "./vpx_config.h"
155ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang#include "./vp9_rtcd.h"
165ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang#include "vp9/common/vp9_common.h"
175ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang#include "vpx/vpx_integer.h"
185ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang#include "vpx_ports/mem.h"
195ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang#include "vp9/common/vp9_convolve.h"
205ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang#include "vp9/common/mips/dspr2/vp9_common_dspr2.h"
215ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
225ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang#if HAVE_DSPR2
235ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuangstatic void convolve_vert_4_dspr2(const uint8_t *src,
245ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                                  int32_t src_stride,
255ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                                  uint8_t *dst,
265ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                                  int32_t dst_stride,
275ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                                  const int16_t *filter_y,
285ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                                  int32_t w,
295ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                                  int32_t h) {
305ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  int32_t x, y;
315ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  const uint8_t *src_ptr;
325ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  uint8_t *dst_ptr;
335ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  uint8_t *cm = vp9_ff_cropTbl;
345ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  uint32_t vector4a = 64;
355ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  uint32_t load1, load2, load3, load4;
365ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  uint32_t p1, p2;
375ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  uint32_t n1, n2;
385ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  uint32_t scratch1, scratch2;
395ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  uint32_t store1, store2;
405ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  int32_t vector1b, vector2b, vector3b, vector4b;
415ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  int32_t Temp1, Temp2;
425ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
435ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  vector1b = ((const int32_t *)filter_y)[0];
445ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  vector2b = ((const int32_t *)filter_y)[1];
455ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  vector3b = ((const int32_t *)filter_y)[2];
465ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  vector4b = ((const int32_t *)filter_y)[3];
475ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
485ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  src -= 3 * src_stride;
495ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
505ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  for (y = h; y--;) {
515ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang    /* prefetch data to cache memory */
525ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang    vp9_prefetch_store(dst + dst_stride);
535ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
545ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang    for (x = 0; x < w; x += 4) {
555ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang      src_ptr = src + x;
565ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang      dst_ptr = dst + x;
575ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
585ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang      __asm__ __volatile__ (
595ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "ulw              %[load1],     0(%[src_ptr])                   \n\t"
605ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "add              %[src_ptr],   %[src_ptr],     %[src_stride]   \n\t"
615ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "ulw              %[load2],     0(%[src_ptr])                   \n\t"
625ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "add              %[src_ptr],   %[src_ptr],     %[src_stride]   \n\t"
635ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "ulw              %[load3],     0(%[src_ptr])                   \n\t"
645ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "add              %[src_ptr],   %[src_ptr],     %[src_stride]   \n\t"
655ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "ulw              %[load4],     0(%[src_ptr])                   \n\t"
665ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
675ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "mtlo             %[vector4a],  $ac0                            \n\t"
685ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "mtlo             %[vector4a],  $ac1                            \n\t"
695ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "mtlo             %[vector4a],  $ac2                            \n\t"
705ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "mtlo             %[vector4a],  $ac3                            \n\t"
715ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "mthi             $zero,        $ac0                            \n\t"
725ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "mthi             $zero,        $ac1                            \n\t"
735ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "mthi             $zero,        $ac2                            \n\t"
745ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "mthi             $zero,        $ac3                            \n\t"
755ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
765ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "preceu.ph.qbr    %[scratch1],  %[load1]                        \n\t"
775ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "preceu.ph.qbr    %[p1],        %[load2]                        \n\t"
785ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "precrq.ph.w      %[n1],        %[p1],          %[scratch1]     \n\t" /* pixel 2 */
795ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "append           %[p1],        %[scratch1],    16              \n\t" /* pixel 1 */
805ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "preceu.ph.qbr    %[scratch2],  %[load3]                        \n\t"
815ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "preceu.ph.qbr    %[p2],        %[load4]                        \n\t"
825ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "precrq.ph.w      %[n2],        %[p2],          %[scratch2]     \n\t" /* pixel 2 */
835ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "append           %[p2],        %[scratch2],    16              \n\t" /* pixel 1 */
845ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
855ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac0,         %[p1],          %[vector1b]     \n\t"
865ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac0,         %[p2],          %[vector2b]     \n\t"
875ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac1,         %[n1],          %[vector1b]     \n\t"
885ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac1,         %[n2],          %[vector2b]     \n\t"
895ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
905ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "preceu.ph.qbl    %[scratch1],  %[load1]                        \n\t"
915ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "preceu.ph.qbl    %[p1],        %[load2]                        \n\t"
925ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "precrq.ph.w      %[n1],        %[p1],          %[scratch1]     \n\t" /* pixel 2 */
935ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "append           %[p1],        %[scratch1],    16              \n\t" /* pixel 1 */
945ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "preceu.ph.qbl    %[scratch2],  %[load3]                        \n\t"
955ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "preceu.ph.qbl    %[p2],        %[load4]                        \n\t"
965ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "precrq.ph.w      %[n2],        %[p2],          %[scratch2]     \n\t" /* pixel 2 */
975ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "append           %[p2],        %[scratch2],    16              \n\t" /* pixel 1 */
985ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
995ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac2,         %[p1],          %[vector1b]     \n\t"
1005ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac2,         %[p2],          %[vector2b]     \n\t"
1015ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac3,         %[n1],          %[vector1b]     \n\t"
1025ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac3,         %[n2],          %[vector2b]     \n\t"
1035ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
1045ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "add              %[src_ptr],   %[src_ptr],     %[src_stride]   \n\t"
1055ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "ulw              %[load1],     0(%[src_ptr])                   \n\t"
1065ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "add              %[src_ptr],   %[src_ptr],     %[src_stride]   \n\t"
1075ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "ulw              %[load2],     0(%[src_ptr])                   \n\t"
1085ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "add              %[src_ptr],   %[src_ptr],     %[src_stride]   \n\t"
1095ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "ulw              %[load3],     0(%[src_ptr])                   \n\t"
1105ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "add              %[src_ptr],   %[src_ptr],     %[src_stride]   \n\t"
1115ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "ulw              %[load4],     0(%[src_ptr])                   \n\t"
1125ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
1135ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "preceu.ph.qbr    %[scratch1],  %[load1]                        \n\t"
1145ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "preceu.ph.qbr    %[p1],        %[load2]                        \n\t"
1155ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "precrq.ph.w      %[n1],        %[p1],          %[scratch1]     \n\t" /* pixel 2 */
1165ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "append           %[p1],        %[scratch1],    16              \n\t" /* pixel 1 */
1175ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "preceu.ph.qbr    %[scratch2],  %[load3]                        \n\t"
1185ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "preceu.ph.qbr    %[p2],        %[load4]                        \n\t"
1195ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "precrq.ph.w      %[n2],        %[p2],          %[scratch2]     \n\t" /* pixel 2 */
1205ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "append           %[p2],        %[scratch2],    16              \n\t" /* pixel 1 */
1215ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
1225ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac0,         %[p1],          %[vector3b]     \n\t"
1235ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac0,         %[p2],          %[vector4b]     \n\t"
1245ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "extp             %[Temp1],     $ac0,           31              \n\t"
1255ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac1,         %[n1],          %[vector3b]     \n\t"
1265ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac1,         %[n2],          %[vector4b]     \n\t"
1275ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "extp             %[Temp2],     $ac1,           31              \n\t"
1285ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
1295ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "preceu.ph.qbl    %[scratch1],  %[load1]                        \n\t"
1305ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "preceu.ph.qbl    %[p1],        %[load2]                        \n\t"
1315ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "precrq.ph.w      %[n1],        %[p1],          %[scratch1]     \n\t" /* pixel 2 */
1325ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "append           %[p1],        %[scratch1],    16              \n\t" /* pixel 1 */
1335ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "preceu.ph.qbl    %[scratch2],  %[load3]                        \n\t"
1345ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "preceu.ph.qbl    %[p2],        %[load4]                        \n\t"
1355ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "precrq.ph.w      %[n2],        %[p2],          %[scratch2]     \n\t" /* pixel 2 */
1365ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "append           %[p2],        %[scratch2],    16              \n\t" /* pixel 1 */
1375ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
1385ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "lbux             %[store1],    %[Temp1](%[cm])                 \n\t"
1395ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac2,         %[p1],          %[vector3b]     \n\t"
1405ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac2,         %[p2],          %[vector4b]     \n\t"
1415ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "extp             %[Temp1],     $ac2,           31              \n\t"
1425ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
1435ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "lbux             %[store2],    %[Temp2](%[cm])                 \n\t"
1445ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac3,         %[n1],          %[vector3b]     \n\t"
1455ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac3,         %[n2],          %[vector4b]     \n\t"
1465ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "extp             %[Temp2],     $ac3,           31              \n\t"
1475ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
1485ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "sb               %[store1],    0(%[dst_ptr])                   \n\t"
1495ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "sb               %[store2],    1(%[dst_ptr])                   \n\t"
1505ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
1515ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "lbux             %[store1],    %[Temp1](%[cm])                 \n\t"
1525ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "lbux             %[store2],    %[Temp2](%[cm])                 \n\t"
1535ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
1545ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "sb               %[store1],    2(%[dst_ptr])                   \n\t"
1555ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "sb               %[store2],    3(%[dst_ptr])                   \n\t"
1565ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
1575ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          : [load1] "=&r" (load1), [load2] "=&r" (load2),
1585ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang            [load3] "=&r" (load3), [load4] "=&r" (load4),
1595ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang            [p1] "=&r" (p1), [p2] "=&r" (p2),
1605ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang            [n1] "=&r" (n1), [n2] "=&r" (n2),
1615ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang            [scratch1] "=&r" (scratch1), [scratch2] "=&r" (scratch2),
1625ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang            [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2),
1635ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang            [store1] "=&r" (store1), [store2] "=&r" (store2),
1645ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang            [src_ptr] "+r" (src_ptr)
1655ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b),
1665ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang            [vector3b] "r" (vector3b), [vector4b] "r" (vector4b),
1675ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang            [vector4a] "r" (vector4a), [src_stride] "r" (src_stride),
1685ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang            [cm] "r" (cm), [dst_ptr] "r" (dst_ptr)
1695ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang      );
1705ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang    }
1715ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
1725ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang    /* Next row... */
1735ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang    src += src_stride;
1745ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang    dst += dst_stride;
1755ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  }
1765ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang}
1775ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
1785ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuangstatic void convolve_vert_64_dspr2(const uint8_t *src,
1795ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                                   int32_t src_stride,
1805ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                                   uint8_t *dst,
1815ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                                   int32_t dst_stride,
1825ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                                   const int16_t *filter_y,
1835ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                                   int32_t h) {
1845ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  int32_t x, y;
1855ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  const uint8_t *src_ptr;
1865ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  uint8_t *dst_ptr;
1875ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  uint8_t *cm = vp9_ff_cropTbl;
1885ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  uint32_t vector4a = 64;
1895ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  uint32_t load1, load2, load3, load4;
1905ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  uint32_t p1, p2;
1915ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  uint32_t n1, n2;
1925ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  uint32_t scratch1, scratch2;
1935ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  uint32_t store1, store2;
1945ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  int32_t vector1b, vector2b, vector3b, vector4b;
1955ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  int32_t Temp1, Temp2;
1965ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
1975ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  vector1b = ((const int32_t *)filter_y)[0];
1985ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  vector2b = ((const int32_t *)filter_y)[1];
1995ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  vector3b = ((const int32_t *)filter_y)[2];
2005ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  vector4b = ((const int32_t *)filter_y)[3];
2015ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
2025ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  src -= 3 * src_stride;
2035ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
2045ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  for (y = h; y--;) {
2055ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang    /* prefetch data to cache memory */
2065ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang    vp9_prefetch_store(dst + dst_stride);
2075ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang    vp9_prefetch_store(dst + dst_stride + 32);
2085ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
2095ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang    for (x = 0; x < 64; x += 4) {
2105ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang      src_ptr = src + x;
2115ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang      dst_ptr = dst + x;
2125ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
2135ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang      __asm__ __volatile__ (
2145ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "ulw              %[load1],     0(%[src_ptr])                   \n\t"
2155ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "add              %[src_ptr],   %[src_ptr],     %[src_stride]   \n\t"
2165ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "ulw              %[load2],     0(%[src_ptr])                   \n\t"
2175ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "add              %[src_ptr],   %[src_ptr],     %[src_stride]   \n\t"
2185ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "ulw              %[load3],     0(%[src_ptr])                   \n\t"
2195ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "add              %[src_ptr],   %[src_ptr],     %[src_stride]   \n\t"
2205ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "ulw              %[load4],     0(%[src_ptr])                   \n\t"
2215ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
2225ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "mtlo             %[vector4a],  $ac0                            \n\t"
2235ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "mtlo             %[vector4a],  $ac1                            \n\t"
2245ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "mtlo             %[vector4a],  $ac2                            \n\t"
2255ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "mtlo             %[vector4a],  $ac3                            \n\t"
2265ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "mthi             $zero,        $ac0                            \n\t"
2275ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "mthi             $zero,        $ac1                            \n\t"
2285ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "mthi             $zero,        $ac2                            \n\t"
2295ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "mthi             $zero,        $ac3                            \n\t"
2305ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
2315ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "preceu.ph.qbr    %[scratch1],  %[load1]                        \n\t"
2325ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "preceu.ph.qbr    %[p1],        %[load2]                        \n\t"
2335ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "precrq.ph.w      %[n1],        %[p1],          %[scratch1]     \n\t" /* pixel 2 */
2345ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "append           %[p1],        %[scratch1],    16              \n\t" /* pixel 1 */
2355ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "preceu.ph.qbr    %[scratch2],  %[load3]                        \n\t"
2365ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "preceu.ph.qbr    %[p2],        %[load4]                        \n\t"
2375ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "precrq.ph.w      %[n2],        %[p2],          %[scratch2]     \n\t" /* pixel 2 */
2385ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "append           %[p2],        %[scratch2],    16              \n\t" /* pixel 1 */
2395ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
2405ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac0,         %[p1],          %[vector1b]     \n\t"
2415ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac0,         %[p2],          %[vector2b]     \n\t"
2425ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac1,         %[n1],          %[vector1b]     \n\t"
2435ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac1,         %[n2],          %[vector2b]     \n\t"
2445ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
2455ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "preceu.ph.qbl    %[scratch1],  %[load1]                        \n\t"
2465ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "preceu.ph.qbl    %[p1],        %[load2]                        \n\t"
2475ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "precrq.ph.w      %[n1],        %[p1],          %[scratch1]     \n\t" /* pixel 2 */
2485ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "append           %[p1],        %[scratch1],    16              \n\t" /* pixel 1 */
2495ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "preceu.ph.qbl    %[scratch2],  %[load3]                        \n\t"
2505ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "preceu.ph.qbl    %[p2],        %[load4]                        \n\t"
2515ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "precrq.ph.w      %[n2],        %[p2],          %[scratch2]     \n\t" /* pixel 2 */
2525ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "append           %[p2],        %[scratch2],    16              \n\t" /* pixel 1 */
2535ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
2545ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac2,         %[p1],          %[vector1b]     \n\t"
2555ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac2,         %[p2],          %[vector2b]     \n\t"
2565ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac3,         %[n1],          %[vector1b]     \n\t"
2575ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac3,         %[n2],          %[vector2b]     \n\t"
2585ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
2595ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "add              %[src_ptr],   %[src_ptr],     %[src_stride]   \n\t"
2605ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "ulw              %[load1],     0(%[src_ptr])                   \n\t"
2615ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "add              %[src_ptr],   %[src_ptr],     %[src_stride]   \n\t"
2625ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "ulw              %[load2],     0(%[src_ptr])                   \n\t"
2635ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "add              %[src_ptr],   %[src_ptr],     %[src_stride]   \n\t"
2645ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "ulw              %[load3],     0(%[src_ptr])                   \n\t"
2655ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "add              %[src_ptr],   %[src_ptr],     %[src_stride]   \n\t"
2665ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "ulw              %[load4],     0(%[src_ptr])                   \n\t"
2675ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
2685ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "preceu.ph.qbr    %[scratch1],  %[load1]                        \n\t"
2695ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "preceu.ph.qbr    %[p1],        %[load2]                        \n\t"
2705ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "precrq.ph.w      %[n1],        %[p1],          %[scratch1]     \n\t" /* pixel 2 */
2715ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "append           %[p1],        %[scratch1],    16              \n\t" /* pixel 1 */
2725ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "preceu.ph.qbr    %[scratch2],  %[load3]                        \n\t"
2735ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "preceu.ph.qbr    %[p2],        %[load4]                        \n\t"
2745ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "precrq.ph.w      %[n2],        %[p2],          %[scratch2]     \n\t" /* pixel 2 */
2755ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "append           %[p2],        %[scratch2],    16              \n\t" /* pixel 1 */
2765ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
2775ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac0,         %[p1],          %[vector3b]     \n\t"
2785ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac0,         %[p2],          %[vector4b]     \n\t"
2795ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "extp             %[Temp1],     $ac0,           31              \n\t"
2805ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac1,         %[n1],          %[vector3b]     \n\t"
2815ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac1,         %[n2],          %[vector4b]     \n\t"
2825ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "extp             %[Temp2],     $ac1,           31              \n\t"
2835ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
2845ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "preceu.ph.qbl    %[scratch1],  %[load1]                        \n\t"
2855ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "preceu.ph.qbl    %[p1],        %[load2]                        \n\t"
2865ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "precrq.ph.w      %[n1],        %[p1],          %[scratch1]     \n\t" /* pixel 2 */
2875ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "append           %[p1],        %[scratch1],    16              \n\t" /* pixel 1 */
2885ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "preceu.ph.qbl    %[scratch2],  %[load3]                        \n\t"
2895ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "preceu.ph.qbl    %[p2],        %[load4]                        \n\t"
2905ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "precrq.ph.w      %[n2],        %[p2],          %[scratch2]     \n\t" /* pixel 2 */
2915ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "append           %[p2],        %[scratch2],    16              \n\t" /* pixel 1 */
2925ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
2935ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "lbux             %[store1],    %[Temp1](%[cm])                 \n\t"
2945ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac2,         %[p1],          %[vector3b]     \n\t"
2955ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac2,         %[p2],          %[vector4b]     \n\t"
2965ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "extp             %[Temp1],     $ac2,           31              \n\t"
2975ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
2985ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "lbux             %[store2],    %[Temp2](%[cm])                 \n\t"
2995ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac3,         %[n1],          %[vector3b]     \n\t"
3005ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac3,         %[n2],          %[vector4b]     \n\t"
3015ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "extp             %[Temp2],     $ac3,           31              \n\t"
3025ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
3035ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "sb               %[store1],    0(%[dst_ptr])                   \n\t"
3045ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "sb               %[store2],    1(%[dst_ptr])                   \n\t"
3055ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
3065ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "lbux             %[store1],    %[Temp1](%[cm])                 \n\t"
3075ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "lbux             %[store2],    %[Temp2](%[cm])                 \n\t"
3085ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
3095ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "sb               %[store1],    2(%[dst_ptr])                   \n\t"
3105ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "sb               %[store2],    3(%[dst_ptr])                   \n\t"
3115ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
3125ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          : [load1] "=&r" (load1), [load2] "=&r" (load2),
3135ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang            [load3] "=&r" (load3), [load4] "=&r" (load4),
3145ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang            [p1] "=&r" (p1), [p2] "=&r" (p2),
3155ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang            [n1] "=&r" (n1), [n2] "=&r" (n2),
3165ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang            [scratch1] "=&r" (scratch1), [scratch2] "=&r" (scratch2),
3175ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang            [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2),
3185ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang            [store1] "=&r" (store1), [store2] "=&r" (store2),
3195ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang            [src_ptr] "+r" (src_ptr)
3205ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b),
3215ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang            [vector3b] "r" (vector3b), [vector4b] "r" (vector4b),
3225ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang            [vector4a] "r" (vector4a), [src_stride] "r" (src_stride),
3235ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang            [cm] "r" (cm), [dst_ptr] "r" (dst_ptr)
3245ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang      );
3255ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang    }
3265ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
3275ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang    /* Next row... */
3285ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang    src += src_stride;
3295ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang    dst += dst_stride;
3305ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  }
3315ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang}
3325ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
3335ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuangvoid vp9_convolve8_vert_dspr2(const uint8_t *src, ptrdiff_t src_stride,
3345ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                              uint8_t *dst, ptrdiff_t dst_stride,
3355ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                              const int16_t *filter_x, int x_step_q4,
3365ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                              const int16_t *filter_y, int y_step_q4,
3375ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                              int w, int h) {
3385ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  if (((const int32_t *)filter_y)[1] == 0x800000) {
3395ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang    vp9_convolve_copy(src, src_stride,
3405ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                      dst, dst_stride,
3415ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                      filter_x, x_step_q4,
3425ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                      filter_y, y_step_q4,
3435ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                      w, h);
3445ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  } else if (((const int32_t *)filter_y)[0] == 0) {
3455ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang    vp9_convolve2_vert_dspr2(src, src_stride,
3465ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                             dst, dst_stride,
3475ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                             filter_x, x_step_q4,
3485ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                             filter_y, y_step_q4,
3495ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                             w, h);
3505ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  } else {
3515ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang    if (16 == y_step_q4) {
3525ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang      uint32_t pos = 38;
3535ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
3545ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang      /* bit positon for extract from acc */
3555ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang      __asm__ __volatile__ (
3565ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "wrdsp      %[pos],     1           \n\t"
3575ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        :
3585ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        : [pos] "r" (pos)
3595ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang      );
3605ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
3615ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang      vp9_prefetch_store(dst);
3625ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
3635ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang      switch (w) {
3645ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        case 4 :
3655ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        case 8 :
3665ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        case 16 :
3675ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        case 32 :
3685ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          convolve_vert_4_dspr2(src, src_stride,
3695ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                                dst, dst_stride,
3705ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                                filter_y, w, h);
3715ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          break;
3725ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        case 64 :
3735ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          vp9_prefetch_store(dst + 32);
3745ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          convolve_vert_64_dspr2(src, src_stride,
3755ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                                 dst, dst_stride,
3765ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                                 filter_y, h);
3775ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          break;
3785ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        default:
3795ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          vp9_convolve8_vert_c(src, src_stride,
3805ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                               dst, dst_stride,
3815ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                               filter_x, x_step_q4,
3825ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                               filter_y, y_step_q4,
3835ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                               w, h);
3845ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          break;
3855ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang      }
3865ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang    } else {
3875ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang      vp9_convolve8_vert_c(src, src_stride,
3885ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                           dst, dst_stride,
3895ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                           filter_x, x_step_q4,
3905ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                           filter_y, y_step_q4,
3915ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                           w, h);
3925ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang    }
3935ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  }
3945ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang}
3955ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
3965ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang#endif
397