1f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang/*
2f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
3f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang *
4f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang *  Use of this source code is governed by a BSD-style license
5f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang *  that can be found in the LICENSE file in the root of the source
6f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang *  tree. An additional intellectual property rights grant can be found
7f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang *  in the file PATENTS. All contributing project authors may
8f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang *  be found in the AUTHORS file in the root of the source tree.
9f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang */
10f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang
11f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang#include "libyuv/row.h"
12f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang#include "libyuv/rotate_row.h"
13f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang
14f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang#include "libyuv/basic_types.h"
15f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang
16f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang#ifdef __cplusplus
17f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuangnamespace libyuv {
18f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuangextern "C" {
19f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang#endif
20f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang
21f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang#if !defined(LIBYUV_DISABLE_MIPS) && \
22f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang    defined(__mips_dsp) && (__mips_dsp_rev >= 2) && \
23f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang    (_MIPS_SIM == _MIPS_SIM_ABI32)
24f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang
25f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuangvoid TransposeWx8_DSPR2(const uint8* src, int src_stride,
26f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang                        uint8* dst, int dst_stride, int width) {
27f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang   __asm__ __volatile__ (
28f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      ".set push                                         \n"
29f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      ".set noreorder                                    \n"
30f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "sll              $t2, %[src_stride], 0x1          \n" // src_stride x 2
31f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "sll              $t4, %[src_stride], 0x2          \n" // src_stride x 4
32f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "sll              $t9, %[src_stride], 0x3          \n" // src_stride x 8
33f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "addu             $t3, $t2, %[src_stride]          \n"
34f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "addu             $t5, $t4, %[src_stride]          \n"
35f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "addu             $t6, $t2, $t4                    \n"
36f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "andi             $t0, %[dst], 0x3                 \n"
37f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "andi             $t1, %[dst_stride], 0x3          \n"
38f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "or               $t0, $t0, $t1                    \n"
39f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "bnez             $t0, 11f                         \n"
40f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      " subu            $t7, $t9, %[src_stride]          \n"
41f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang//dst + dst_stride word aligned
42f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang    "1:                                                  \n"
43f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "lbu              $t0, 0(%[src])                   \n"
44f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "lbux             $t1, %[src_stride](%[src])       \n"
45f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "lbux             $t8, $t2(%[src])                 \n"
46f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "lbux             $t9, $t3(%[src])                 \n"
47f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "sll              $t1, $t1, 16                     \n"
48f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "sll              $t9, $t9, 16                     \n"
49f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "or               $t0, $t0, $t1                    \n"
50f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "or               $t8, $t8, $t9                    \n"
51f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "precr.qb.ph      $s0, $t8, $t0                    \n"
52f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "lbux             $t0, $t4(%[src])                 \n"
53f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "lbux             $t1, $t5(%[src])                 \n"
54f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "lbux             $t8, $t6(%[src])                 \n"
55f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "lbux             $t9, $t7(%[src])                 \n"
56f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "sll              $t1, $t1, 16                     \n"
57f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "sll              $t9, $t9, 16                     \n"
58f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "or               $t0, $t0, $t1                    \n"
59f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "or               $t8, $t8, $t9                    \n"
60f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "precr.qb.ph      $s1, $t8, $t0                    \n"
61f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "sw               $s0, 0(%[dst])                   \n"
62f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "addiu            %[width], -1                     \n"
63f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "addiu            %[src], 1                        \n"
64f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "sw               $s1, 4(%[dst])                   \n"
65f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "bnez             %[width], 1b                     \n"
66f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      " addu            %[dst], %[dst], %[dst_stride]    \n"
67f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "b                2f                               \n"
68f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang//dst + dst_stride unaligned
69f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang   "11:                                                  \n"
70f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "lbu              $t0, 0(%[src])                   \n"
71f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "lbux             $t1, %[src_stride](%[src])       \n"
72f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "lbux             $t8, $t2(%[src])                 \n"
73f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "lbux             $t9, $t3(%[src])                 \n"
74f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "sll              $t1, $t1, 16                     \n"
75f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "sll              $t9, $t9, 16                     \n"
76f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "or               $t0, $t0, $t1                    \n"
77f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "or               $t8, $t8, $t9                    \n"
78f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "precr.qb.ph      $s0, $t8, $t0                    \n"
79f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "lbux             $t0, $t4(%[src])                 \n"
80f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "lbux             $t1, $t5(%[src])                 \n"
81f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "lbux             $t8, $t6(%[src])                 \n"
82f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "lbux             $t9, $t7(%[src])                 \n"
83f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "sll              $t1, $t1, 16                     \n"
84f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "sll              $t9, $t9, 16                     \n"
85f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "or               $t0, $t0, $t1                    \n"
86f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "or               $t8, $t8, $t9                    \n"
87f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "precr.qb.ph      $s1, $t8, $t0                    \n"
88f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "swr              $s0, 0(%[dst])                   \n"
89f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "swl              $s0, 3(%[dst])                   \n"
90f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "addiu            %[width], -1                     \n"
91f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "addiu            %[src], 1                        \n"
92f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "swr              $s1, 4(%[dst])                   \n"
93f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "swl              $s1, 7(%[dst])                   \n"
94f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "bnez             %[width], 11b                    \n"
95f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang       "addu             %[dst], %[dst], %[dst_stride]   \n"
96f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang    "2:                                                  \n"
97f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      ".set pop                                          \n"
98f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      :[src] "+r" (src),
99f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang       [dst] "+r" (dst),
100f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang       [width] "+r" (width)
101f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      :[src_stride] "r" (src_stride),
102f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang       [dst_stride] "r" (dst_stride)
103f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      : "t0", "t1",  "t2", "t3", "t4", "t5",
104f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang        "t6", "t7", "t8", "t9",
105f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang        "s0", "s1"
106f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang  );
107f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang}
108f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang
109f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuangvoid TransposeWx8_Fast_DSPR2(const uint8* src, int src_stride,
110f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang                             uint8* dst, int dst_stride, int width) {
111f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang  __asm__ __volatile__ (
112f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      ".set noat                                         \n"
113f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      ".set push                                         \n"
114f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      ".set noreorder                                    \n"
115f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "beqz             %[width], 2f                     \n"
116f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      " sll             $t2, %[src_stride], 0x1          \n"  // src_stride x 2
117f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "sll              $t4, %[src_stride], 0x2          \n"  // src_stride x 4
118f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "sll              $t9, %[src_stride], 0x3          \n"  // src_stride x 8
119f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "addu             $t3, $t2, %[src_stride]          \n"
120f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "addu             $t5, $t4, %[src_stride]          \n"
121f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "addu             $t6, $t2, $t4                    \n"
122f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang
123f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "srl              $AT, %[width], 0x2               \n"
124f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "andi             $t0, %[dst], 0x3                 \n"
125f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "andi             $t1, %[dst_stride], 0x3          \n"
126f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "or               $t0, $t0, $t1                    \n"
127f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "bnez             $t0, 11f                         \n"
128f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      " subu            $t7, $t9, %[src_stride]          \n"
129f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang//dst + dst_stride word aligned
130f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "1:                                                \n"
131f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "lw               $t0, 0(%[src])                   \n"
132f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "lwx              $t1, %[src_stride](%[src])       \n"
133f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "lwx              $t8, $t2(%[src])                 \n"
134f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "lwx              $t9, $t3(%[src])                 \n"
135f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang
136f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang// t0 = | 30 | 20 | 10 | 00 |
137f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang// t1 = | 31 | 21 | 11 | 01 |
138f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang// t8 = | 32 | 22 | 12 | 02 |
139f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang// t9 = | 33 | 23 | 13 | 03 |
140f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang
141f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "precr.qb.ph     $s0, $t1, $t0                     \n"
142f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "precr.qb.ph     $s1, $t9, $t8                     \n"
143f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "precrq.qb.ph    $s2, $t1, $t0                     \n"
144f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "precrq.qb.ph    $s3, $t9, $t8                     \n"
145f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang
146f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang  // s0 = | 21 | 01 | 20 | 00 |
147f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang  // s1 = | 23 | 03 | 22 | 02 |
148f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang  // s2 = | 31 | 11 | 30 | 10 |
149f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang  // s3 = | 33 | 13 | 32 | 12 |
150f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang
151f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "precr.qb.ph     $s4, $s1, $s0                     \n"
152f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "precrq.qb.ph    $s5, $s1, $s0                     \n"
153f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "precr.qb.ph     $s6, $s3, $s2                     \n"
154f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "precrq.qb.ph    $s7, $s3, $s2                     \n"
155f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang
156f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang  // s4 = | 03 | 02 | 01 | 00 |
157f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang  // s5 = | 23 | 22 | 21 | 20 |
158f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang  // s6 = | 13 | 12 | 11 | 10 |
159f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang  // s7 = | 33 | 32 | 31 | 30 |
160f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang
161f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "lwx              $t0, $t4(%[src])                 \n"
162f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "lwx              $t1, $t5(%[src])                 \n"
163f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "lwx              $t8, $t6(%[src])                 \n"
164f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "lwx              $t9, $t7(%[src])                 \n"
165f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang
166f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang// t0 = | 34 | 24 | 14 | 04 |
167f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang// t1 = | 35 | 25 | 15 | 05 |
168f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang// t8 = | 36 | 26 | 16 | 06 |
169f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang// t9 = | 37 | 27 | 17 | 07 |
170f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang
171f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "precr.qb.ph     $s0, $t1, $t0                     \n"
172f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "precr.qb.ph     $s1, $t9, $t8                     \n"
173f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "precrq.qb.ph    $s2, $t1, $t0                     \n"
174f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "precrq.qb.ph    $s3, $t9, $t8                     \n"
175f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang
176f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang  // s0 = | 25 | 05 | 24 | 04 |
177f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang  // s1 = | 27 | 07 | 26 | 06 |
178f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang  // s2 = | 35 | 15 | 34 | 14 |
179f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang  // s3 = | 37 | 17 | 36 | 16 |
180f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang
181f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "precr.qb.ph     $t0, $s1, $s0                     \n"
182f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "precrq.qb.ph    $t1, $s1, $s0                     \n"
183f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "precr.qb.ph     $t8, $s3, $s2                     \n"
184f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "precrq.qb.ph    $t9, $s3, $s2                     \n"
185f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang
186f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang  // t0 = | 07 | 06 | 05 | 04 |
187f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang  // t1 = | 27 | 26 | 25 | 24 |
188f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang  // t8 = | 17 | 16 | 15 | 14 |
189f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang  // t9 = | 37 | 36 | 35 | 34 |
190f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang
191f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "addu            $s0, %[dst], %[dst_stride]        \n"
192f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "addu            $s1, $s0, %[dst_stride]           \n"
193f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "addu            $s2, $s1, %[dst_stride]           \n"
194f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang
195f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "sw              $s4, 0(%[dst])                    \n"
196f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "sw              $t0, 4(%[dst])                    \n"
197f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "sw              $s6, 0($s0)                       \n"
198f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "sw              $t8, 4($s0)                       \n"
199f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "sw              $s5, 0($s1)                       \n"
200f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "sw              $t1, 4($s1)                       \n"
201f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "sw              $s7, 0($s2)                       \n"
202f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "sw              $t9, 4($s2)                       \n"
203f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang
204f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "addiu            $AT, -1                          \n"
205f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "addiu            %[src], 4                        \n"
206f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang
207f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "bnez             $AT, 1b                          \n"
208f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      " addu            %[dst], $s2, %[dst_stride]       \n"
209f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "b                2f                               \n"
210f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang//dst + dst_stride unaligned
211f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "11:                                               \n"
212f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "lw               $t0, 0(%[src])                   \n"
213f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "lwx              $t1, %[src_stride](%[src])       \n"
214f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "lwx              $t8, $t2(%[src])                 \n"
215f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "lwx              $t9, $t3(%[src])                 \n"
216f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang
217f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang// t0 = | 30 | 20 | 10 | 00 |
218f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang// t1 = | 31 | 21 | 11 | 01 |
219f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang// t8 = | 32 | 22 | 12 | 02 |
220f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang// t9 = | 33 | 23 | 13 | 03 |
221f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang
222f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "precr.qb.ph     $s0, $t1, $t0                     \n"
223f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "precr.qb.ph     $s1, $t9, $t8                     \n"
224f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "precrq.qb.ph    $s2, $t1, $t0                     \n"
225f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "precrq.qb.ph    $s3, $t9, $t8                     \n"
226f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang
227f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang  // s0 = | 21 | 01 | 20 | 00 |
228f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang  // s1 = | 23 | 03 | 22 | 02 |
229f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang  // s2 = | 31 | 11 | 30 | 10 |
230f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang  // s3 = | 33 | 13 | 32 | 12 |
231f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang
232f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "precr.qb.ph     $s4, $s1, $s0                     \n"
233f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "precrq.qb.ph    $s5, $s1, $s0                     \n"
234f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "precr.qb.ph     $s6, $s3, $s2                     \n"
235f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "precrq.qb.ph    $s7, $s3, $s2                     \n"
236f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang
237f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang  // s4 = | 03 | 02 | 01 | 00 |
238f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang  // s5 = | 23 | 22 | 21 | 20 |
239f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang  // s6 = | 13 | 12 | 11 | 10 |
240f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang  // s7 = | 33 | 32 | 31 | 30 |
241f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang
242f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "lwx              $t0, $t4(%[src])                 \n"
243f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "lwx              $t1, $t5(%[src])                 \n"
244f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "lwx              $t8, $t6(%[src])                 \n"
245f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "lwx              $t9, $t7(%[src])                 \n"
246f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang
247f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang// t0 = | 34 | 24 | 14 | 04 |
248f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang// t1 = | 35 | 25 | 15 | 05 |
249f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang// t8 = | 36 | 26 | 16 | 06 |
250f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang// t9 = | 37 | 27 | 17 | 07 |
251f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang
252f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "precr.qb.ph     $s0, $t1, $t0                     \n"
253f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "precr.qb.ph     $s1, $t9, $t8                     \n"
254f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "precrq.qb.ph    $s2, $t1, $t0                     \n"
255f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "precrq.qb.ph    $s3, $t9, $t8                     \n"
256f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang
257f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang  // s0 = | 25 | 05 | 24 | 04 |
258f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang  // s1 = | 27 | 07 | 26 | 06 |
259f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang  // s2 = | 35 | 15 | 34 | 14 |
260f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang  // s3 = | 37 | 17 | 36 | 16 |
261f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang
262f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "precr.qb.ph     $t0, $s1, $s0                     \n"
263f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "precrq.qb.ph    $t1, $s1, $s0                     \n"
264f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "precr.qb.ph     $t8, $s3, $s2                     \n"
265f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "precrq.qb.ph    $t9, $s3, $s2                     \n"
266f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang
267f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang  // t0 = | 07 | 06 | 05 | 04 |
268f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang  // t1 = | 27 | 26 | 25 | 24 |
269f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang  // t8 = | 17 | 16 | 15 | 14 |
270f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang  // t9 = | 37 | 36 | 35 | 34 |
271f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang
272f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "addu            $s0, %[dst], %[dst_stride]        \n"
273f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "addu            $s1, $s0, %[dst_stride]           \n"
274f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "addu            $s2, $s1, %[dst_stride]           \n"
275f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang
276f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "swr              $s4, 0(%[dst])                   \n"
277f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "swl              $s4, 3(%[dst])                   \n"
278f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "swr              $t0, 4(%[dst])                   \n"
279f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "swl              $t0, 7(%[dst])                   \n"
280f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "swr              $s6, 0($s0)                      \n"
281f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "swl              $s6, 3($s0)                      \n"
282f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "swr              $t8, 4($s0)                      \n"
283f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "swl              $t8, 7($s0)                      \n"
284f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "swr              $s5, 0($s1)                      \n"
285f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "swl              $s5, 3($s1)                      \n"
286f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "swr              $t1, 4($s1)                      \n"
287f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "swl              $t1, 7($s1)                      \n"
288f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "swr              $s7, 0($s2)                      \n"
289f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "swl              $s7, 3($s2)                      \n"
290f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "swr              $t9, 4($s2)                      \n"
291f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "swl              $t9, 7($s2)                      \n"
292f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang
293f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "addiu            $AT, -1                          \n"
294f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "addiu            %[src], 4                        \n"
295f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang
296f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "bnez             $AT, 11b                         \n"
297f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      " addu            %[dst], $s2, %[dst_stride]       \n"
298f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "2:                                                \n"
299f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      ".set pop                                          \n"
300f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      ".set at                                           \n"
301f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      :[src] "+r" (src),
302f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang       [dst] "+r" (dst),
303f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang       [width] "+r" (width)
304f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      :[src_stride] "r" (src_stride),
305f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang       [dst_stride] "r" (dst_stride)
306f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      : "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9",
307f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang        "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7"
308f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang  );
309f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang}
310f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang
311f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuangvoid TransposeUVWx8_DSPR2(const uint8* src, int src_stride,
312f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang                          uint8* dst_a, int dst_stride_a,
313f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang                          uint8* dst_b, int dst_stride_b,
314f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang                          int width) {
315f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang  __asm__ __volatile__ (
316f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      ".set push                                         \n"
317f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      ".set noreorder                                    \n"
318f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "beqz            %[width], 2f                      \n"
319f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      " sll            $t2, %[src_stride], 0x1           \n" // src_stride x 2
320f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "sll             $t4, %[src_stride], 0x2           \n" // src_stride x 4
321f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "sll             $t9, %[src_stride], 0x3           \n" // src_stride x 8
322f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "addu            $t3, $t2, %[src_stride]           \n"
323f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "addu            $t5, $t4, %[src_stride]           \n"
324f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "addu            $t6, $t2, $t4                     \n"
325f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "subu            $t7, $t9, %[src_stride]           \n"
326f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "srl             $t1, %[width], 1                  \n"
327f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang
328f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang// check word aligment for dst_a, dst_b, dst_stride_a and dst_stride_b
329f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "andi            $t0, %[dst_a], 0x3                \n"
330f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "andi            $t8, %[dst_b], 0x3                \n"
331f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "or              $t0, $t0, $t8                     \n"
332f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "andi            $t8, %[dst_stride_a], 0x3         \n"
333f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "andi            $s5, %[dst_stride_b], 0x3         \n"
334f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "or              $t8, $t8, $s5                     \n"
335f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "or              $t0, $t0, $t8                     \n"
336f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "bnez            $t0, 11f                          \n"
337f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      " nop                                              \n"
338f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang// dst + dst_stride word aligned (both, a & b dst addresses)
339f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang    "1:                                                  \n"
340f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "lw              $t0, 0(%[src])                    \n" // |B0|A0|b0|a0|
341f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "lwx             $t8, %[src_stride](%[src])        \n" // |B1|A1|b1|a1|
342f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "addu            $s5, %[dst_a], %[dst_stride_a]    \n"
343f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "lwx             $t9, $t2(%[src])                  \n" // |B2|A2|b2|a2|
344f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "lwx             $s0, $t3(%[src])                  \n" // |B3|A3|b3|a3|
345f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "addu            $s6, %[dst_b], %[dst_stride_b]    \n"
346f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang
347f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "precrq.ph.w     $s1, $t8, $t0                     \n" // |B1|A1|B0|A0|
348f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "precrq.ph.w     $s2, $s0, $t9                     \n" // |B3|A3|B2|A2|
349f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "precr.qb.ph     $s3, $s2, $s1                     \n" // |A3|A2|A1|A0|
350f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "precrq.qb.ph    $s4, $s2, $s1                     \n" // |B3|B2|B1|B0|
351f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang
352f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "sll             $t0, $t0, 16                      \n"
353f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "packrl.ph       $s1, $t8, $t0                     \n" // |b1|a1|b0|a0|
354f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "sll             $t9, $t9, 16                      \n"
355f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "packrl.ph       $s2, $s0, $t9                     \n" // |b3|a3|b2|a2|
356f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang
357f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "sw              $s3, 0($s5)                       \n"
358f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "sw              $s4, 0($s6)                       \n"
359f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang
360f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "precr.qb.ph     $s3, $s2, $s1                     \n" // |a3|a2|a1|a0|
361f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "precrq.qb.ph    $s4, $s2, $s1                     \n" // |b3|b2|b1|b0|
362f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang
363f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "lwx             $t0, $t4(%[src])                  \n" // |B4|A4|b4|a4|
364f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "lwx             $t8, $t5(%[src])                  \n" // |B5|A5|b5|a5|
365f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "lwx             $t9, $t6(%[src])                  \n" // |B6|A6|b6|a6|
366f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "lwx             $s0, $t7(%[src])                  \n" // |B7|A7|b7|a7|
367f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "sw              $s3, 0(%[dst_a])                  \n"
368f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "sw              $s4, 0(%[dst_b])                  \n"
369f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang
370f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "precrq.ph.w     $s1, $t8, $t0                     \n" // |B5|A5|B4|A4|
371f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "precrq.ph.w     $s2, $s0, $t9                     \n" // |B6|A6|B7|A7|
372f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "precr.qb.ph     $s3, $s2, $s1                     \n" // |A7|A6|A5|A4|
373f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "precrq.qb.ph    $s4, $s2, $s1                     \n" // |B7|B6|B5|B4|
374f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang
375f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "sll             $t0, $t0, 16                      \n"
376f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "packrl.ph       $s1, $t8, $t0                     \n" // |b5|a5|b4|a4|
377f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "sll             $t9, $t9, 16                      \n"
378f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "packrl.ph       $s2, $s0, $t9                     \n" // |b7|a7|b6|a6|
379f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "sw              $s3, 4($s5)                       \n"
380f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "sw              $s4, 4($s6)                       \n"
381f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang
382f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "precr.qb.ph     $s3, $s2, $s1                     \n" // |a7|a6|a5|a4|
383f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "precrq.qb.ph    $s4, $s2, $s1                     \n" // |b7|b6|b5|b4|
384f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang
385f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "addiu           %[src], 4                         \n"
386f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "addiu           $t1, -1                           \n"
387f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "sll             $t0, %[dst_stride_a], 1           \n"
388f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "sll             $t8, %[dst_stride_b], 1           \n"
389f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "sw              $s3, 4(%[dst_a])                  \n"
390f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "sw              $s4, 4(%[dst_b])                  \n"
391f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "addu            %[dst_a], %[dst_a], $t0           \n"
392f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "bnez            $t1, 1b                           \n"
393f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      " addu           %[dst_b], %[dst_b], $t8           \n"
394f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "b               2f                                \n"
395f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      " nop                                              \n"
396f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang
397f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang// dst_a or dst_b or dst_stride_a or dst_stride_b not word aligned
398f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang   "11:                                                  \n"
399f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "lw              $t0, 0(%[src])                    \n" // |B0|A0|b0|a0|
400f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "lwx             $t8, %[src_stride](%[src])        \n" // |B1|A1|b1|a1|
401f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "addu            $s5, %[dst_a], %[dst_stride_a]    \n"
402f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "lwx             $t9, $t2(%[src])                  \n" // |B2|A2|b2|a2|
403f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "lwx             $s0, $t3(%[src])                  \n" // |B3|A3|b3|a3|
404f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "addu            $s6, %[dst_b], %[dst_stride_b]    \n"
405f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang
406f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "precrq.ph.w     $s1, $t8, $t0                     \n" // |B1|A1|B0|A0|
407f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "precrq.ph.w     $s2, $s0, $t9                     \n" // |B3|A3|B2|A2|
408f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "precr.qb.ph     $s3, $s2, $s1                     \n" // |A3|A2|A1|A0|
409f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "precrq.qb.ph    $s4, $s2, $s1                     \n" // |B3|B2|B1|B0|
410f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang
411f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "sll             $t0, $t0, 16                      \n"
412f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "packrl.ph       $s1, $t8, $t0                     \n" // |b1|a1|b0|a0|
413f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "sll             $t9, $t9, 16                      \n"
414f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "packrl.ph       $s2, $s0, $t9                     \n" // |b3|a3|b2|a2|
415f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang
416f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "swr             $s3, 0($s5)                       \n"
417f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "swl             $s3, 3($s5)                       \n"
418f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "swr             $s4, 0($s6)                       \n"
419f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "swl             $s4, 3($s6)                       \n"
420f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang
421f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "precr.qb.ph     $s3, $s2, $s1                     \n" // |a3|a2|a1|a0|
422f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "precrq.qb.ph    $s4, $s2, $s1                     \n" // |b3|b2|b1|b0|
423f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang
424f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "lwx             $t0, $t4(%[src])                  \n" // |B4|A4|b4|a4|
425f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "lwx             $t8, $t5(%[src])                  \n" // |B5|A5|b5|a5|
426f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "lwx             $t9, $t6(%[src])                  \n" // |B6|A6|b6|a6|
427f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "lwx             $s0, $t7(%[src])                  \n" // |B7|A7|b7|a7|
428f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "swr             $s3, 0(%[dst_a])                  \n"
429f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "swl             $s3, 3(%[dst_a])                  \n"
430f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "swr             $s4, 0(%[dst_b])                  \n"
431f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "swl             $s4, 3(%[dst_b])                  \n"
432f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang
433f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "precrq.ph.w     $s1, $t8, $t0                     \n" // |B5|A5|B4|A4|
434f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "precrq.ph.w     $s2, $s0, $t9                     \n" // |B6|A6|B7|A7|
435f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "precr.qb.ph     $s3, $s2, $s1                     \n" // |A7|A6|A5|A4|
436f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "precrq.qb.ph    $s4, $s2, $s1                     \n" // |B7|B6|B5|B4|
437f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang
438f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "sll             $t0, $t0, 16                      \n"
439f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "packrl.ph       $s1, $t8, $t0                     \n" // |b5|a5|b4|a4|
440f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "sll             $t9, $t9, 16                      \n"
441f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "packrl.ph       $s2, $s0, $t9                     \n" // |b7|a7|b6|a6|
442f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang
443f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "swr             $s3, 4($s5)                       \n"
444f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "swl             $s3, 7($s5)                       \n"
445f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "swr             $s4, 4($s6)                       \n"
446f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "swl             $s4, 7($s6)                       \n"
447f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang
448f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "precr.qb.ph     $s3, $s2, $s1                     \n" // |a7|a6|a5|a4|
449f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "precrq.qb.ph    $s4, $s2, $s1                     \n" // |b7|b6|b5|b4|
450f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang
451f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "addiu           %[src], 4                         \n"
452f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "addiu           $t1, -1                           \n"
453f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "sll             $t0, %[dst_stride_a], 1           \n"
454f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "sll             $t8, %[dst_stride_b], 1           \n"
455f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "swr             $s3, 4(%[dst_a])                  \n"
456f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "swl             $s3, 7(%[dst_a])                  \n"
457f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "swr             $s4, 4(%[dst_b])                  \n"
458f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "swl             $s4, 7(%[dst_b])                  \n"
459f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "addu            %[dst_a], %[dst_a], $t0           \n"
460f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "bnez            $t1, 11b                          \n"
461f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      " addu           %[dst_b], %[dst_b], $t8           \n"
462f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang
463f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      "2:                                                \n"
464f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      ".set pop                                          \n"
465f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      : [src] "+r" (src),
466f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang        [dst_a] "+r" (dst_a),
467f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang        [dst_b] "+r" (dst_b),
468f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang        [width] "+r" (width),
469f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang        [src_stride] "+r" (src_stride)
470f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      : [dst_stride_a] "r" (dst_stride_a),
471f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang        [dst_stride_b] "r" (dst_stride_b)
472f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang      : "t0", "t1",  "t2", "t3",  "t4", "t5",
473f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang        "t6", "t7", "t8", "t9",
474f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang        "s0", "s1", "s2", "s3",
475f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang        "s4", "s5", "s6"
476f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang  );
477f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang}
478f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang
479f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang#endif  // defined(__mips_dsp) && (__mips_dsp_rev >= 2)
480f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang
481f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang#ifdef __cplusplus
482f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang}  // extern "C"
483f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang}  // namespace libyuv
484f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang#endif
485