1f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang/* 2f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang * Copyright 2011 The LibYuv Project Authors. All rights reserved. 3f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang * 4f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang * Use of this source code is governed by a BSD-style license 5f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang * that can be found in the LICENSE file in the root of the source 6f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang * tree. An additional intellectual property rights grant can be found 7f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang * in the file PATENTS. All contributing project authors may 8f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang * be found in the AUTHORS file in the root of the source tree. 9f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang */ 10f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang 11f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang#include "libyuv/row.h" 12f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang#include "libyuv/rotate_row.h" 13f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang 14f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang#include "libyuv/basic_types.h" 15f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang 16f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang#ifdef __cplusplus 17f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuangnamespace libyuv { 18f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuangextern "C" { 19f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang#endif 20f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang 21f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang#if !defined(LIBYUV_DISABLE_MIPS) && \ 22f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang defined(__mips_dsp) && (__mips_dsp_rev >= 2) && \ 23f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang (_MIPS_SIM == _MIPS_SIM_ABI32) 24f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang 25f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuangvoid TransposeWx8_DSPR2(const uint8* src, int src_stride, 26f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang uint8* dst, int dst_stride, int width) { 27f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang __asm__ __volatile__ ( 28f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang ".set push \n" 29f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang ".set noreorder \n" 30f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "sll $t2, %[src_stride], 0x1 \n" // src_stride x 2 31f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "sll $t4, %[src_stride], 0x2 \n" // src_stride x 4 32f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "sll $t9, %[src_stride], 0x3 \n" // src_stride x 8 33f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "addu $t3, $t2, %[src_stride] \n" 34f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "addu $t5, $t4, %[src_stride] \n" 35f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "addu $t6, $t2, $t4 \n" 36f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "andi $t0, %[dst], 0x3 \n" 37f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "andi $t1, %[dst_stride], 0x3 \n" 38f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "or $t0, $t0, $t1 \n" 39f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "bnez $t0, 11f \n" 40f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang " subu $t7, $t9, %[src_stride] \n" 41f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang//dst + dst_stride word aligned 42f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "1: \n" 43f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "lbu $t0, 0(%[src]) \n" 44f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "lbux $t1, %[src_stride](%[src]) \n" 45f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "lbux $t8, $t2(%[src]) \n" 46f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "lbux $t9, $t3(%[src]) \n" 47f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "sll $t1, $t1, 16 \n" 48f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "sll $t9, $t9, 16 \n" 49f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "or $t0, $t0, $t1 \n" 50f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "or $t8, $t8, $t9 \n" 51f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "precr.qb.ph $s0, $t8, $t0 \n" 52f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "lbux $t0, $t4(%[src]) \n" 53f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "lbux $t1, $t5(%[src]) \n" 54f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "lbux $t8, $t6(%[src]) \n" 55f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "lbux $t9, $t7(%[src]) \n" 56f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "sll $t1, $t1, 16 \n" 57f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "sll $t9, $t9, 16 \n" 58f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "or $t0, $t0, $t1 \n" 59f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "or $t8, $t8, $t9 \n" 60f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "precr.qb.ph $s1, $t8, $t0 \n" 61f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "sw $s0, 0(%[dst]) \n" 62f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "addiu %[width], -1 \n" 63f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "addiu %[src], 1 \n" 64f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "sw $s1, 4(%[dst]) \n" 65f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "bnez %[width], 1b \n" 66f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang " addu %[dst], %[dst], %[dst_stride] \n" 67f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "b 2f \n" 68f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang//dst + dst_stride unaligned 69f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "11: \n" 70f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "lbu $t0, 0(%[src]) \n" 71f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "lbux $t1, %[src_stride](%[src]) \n" 72f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "lbux $t8, $t2(%[src]) \n" 73f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "lbux $t9, $t3(%[src]) \n" 74f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "sll $t1, $t1, 16 \n" 75f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "sll $t9, $t9, 16 \n" 76f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "or $t0, $t0, $t1 \n" 77f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "or $t8, $t8, $t9 \n" 78f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "precr.qb.ph $s0, $t8, $t0 \n" 79f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "lbux $t0, $t4(%[src]) \n" 80f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "lbux $t1, $t5(%[src]) \n" 81f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "lbux $t8, $t6(%[src]) \n" 82f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "lbux $t9, $t7(%[src]) \n" 83f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "sll $t1, $t1, 16 \n" 84f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "sll $t9, $t9, 16 \n" 85f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "or $t0, $t0, $t1 \n" 86f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "or $t8, $t8, $t9 \n" 87f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "precr.qb.ph $s1, $t8, $t0 \n" 88f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "swr $s0, 0(%[dst]) \n" 89f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "swl $s0, 3(%[dst]) \n" 90f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "addiu %[width], -1 \n" 91f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "addiu %[src], 1 \n" 92f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "swr $s1, 4(%[dst]) \n" 93f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "swl $s1, 7(%[dst]) \n" 94f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "bnez %[width], 11b \n" 95f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "addu %[dst], %[dst], %[dst_stride] \n" 96f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "2: \n" 97f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang ".set pop \n" 98f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang :[src] "+r" (src), 99f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang [dst] "+r" (dst), 100f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang [width] "+r" (width) 101f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang :[src_stride] "r" (src_stride), 102f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang [dst_stride] "r" (dst_stride) 103f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang : "t0", "t1", "t2", "t3", "t4", "t5", 104f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "t6", "t7", "t8", "t9", 105f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "s0", "s1" 106f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang ); 107f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang} 108f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang 109f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuangvoid TransposeWx8_Fast_DSPR2(const uint8* src, int src_stride, 110f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang uint8* dst, int dst_stride, int width) { 111f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang __asm__ __volatile__ ( 112f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang ".set noat \n" 113f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang ".set push \n" 114f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang ".set noreorder \n" 115f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "beqz %[width], 2f \n" 116f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang " sll $t2, %[src_stride], 0x1 \n" // src_stride x 2 117f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "sll $t4, %[src_stride], 0x2 \n" // src_stride x 4 118f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "sll $t9, %[src_stride], 0x3 \n" // src_stride x 8 119f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "addu $t3, $t2, %[src_stride] \n" 120f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "addu $t5, $t4, %[src_stride] \n" 121f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "addu $t6, $t2, $t4 \n" 122f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang 123f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "srl $AT, %[width], 0x2 \n" 124f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "andi $t0, %[dst], 0x3 \n" 125f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "andi $t1, %[dst_stride], 0x3 \n" 126f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "or $t0, $t0, $t1 \n" 127f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "bnez $t0, 11f \n" 128f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang " subu $t7, $t9, %[src_stride] \n" 129f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang//dst + dst_stride word aligned 130f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "1: \n" 131f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "lw $t0, 0(%[src]) \n" 132f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "lwx $t1, %[src_stride](%[src]) \n" 133f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "lwx $t8, $t2(%[src]) \n" 134f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "lwx $t9, $t3(%[src]) \n" 135f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang 136f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang// t0 = | 30 | 20 | 10 | 00 | 137f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang// t1 = | 31 | 21 | 11 | 01 | 138f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang// t8 = | 32 | 22 | 12 | 02 | 139f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang// t9 = | 33 | 23 | 13 | 03 | 140f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang 141f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "precr.qb.ph $s0, $t1, $t0 \n" 142f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "precr.qb.ph $s1, $t9, $t8 \n" 143f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "precrq.qb.ph $s2, $t1, $t0 \n" 144f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "precrq.qb.ph $s3, $t9, $t8 \n" 145f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang 146f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang // s0 = | 21 | 01 | 20 | 00 | 147f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang // s1 = | 23 | 03 | 22 | 02 | 148f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang // s2 = | 31 | 11 | 30 | 10 | 149f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang // s3 = | 33 | 13 | 32 | 12 | 150f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang 151f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "precr.qb.ph $s4, $s1, $s0 \n" 152f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "precrq.qb.ph $s5, $s1, $s0 \n" 153f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "precr.qb.ph $s6, $s3, $s2 \n" 154f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "precrq.qb.ph $s7, $s3, $s2 \n" 155f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang 156f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang // s4 = | 03 | 02 | 01 | 00 | 157f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang // s5 = | 23 | 22 | 21 | 20 | 158f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang // s6 = | 13 | 12 | 11 | 10 | 159f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang // s7 = | 33 | 32 | 31 | 30 | 160f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang 161f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "lwx $t0, $t4(%[src]) \n" 162f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "lwx $t1, $t5(%[src]) \n" 163f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "lwx $t8, $t6(%[src]) \n" 164f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "lwx $t9, $t7(%[src]) \n" 165f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang 166f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang// t0 = | 34 | 24 | 14 | 04 | 167f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang// t1 = | 35 | 25 | 15 | 05 | 168f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang// t8 = | 36 | 26 | 16 | 06 | 169f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang// t9 = | 37 | 27 | 17 | 07 | 170f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang 171f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "precr.qb.ph $s0, $t1, $t0 \n" 172f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "precr.qb.ph $s1, $t9, $t8 \n" 173f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "precrq.qb.ph $s2, $t1, $t0 \n" 174f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "precrq.qb.ph $s3, $t9, $t8 \n" 175f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang 176f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang // s0 = | 25 | 05 | 24 | 04 | 177f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang // s1 = | 27 | 07 | 26 | 06 | 178f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang // s2 = | 35 | 15 | 34 | 14 | 179f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang // s3 = | 37 | 17 | 36 | 16 | 180f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang 181f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "precr.qb.ph $t0, $s1, $s0 \n" 182f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "precrq.qb.ph $t1, $s1, $s0 \n" 183f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "precr.qb.ph $t8, $s3, $s2 \n" 184f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "precrq.qb.ph $t9, $s3, $s2 \n" 185f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang 186f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang // t0 = | 07 | 06 | 05 | 04 | 187f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang // t1 = | 27 | 26 | 25 | 24 | 188f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang // t8 = | 17 | 16 | 15 | 14 | 189f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang // t9 = | 37 | 36 | 35 | 34 | 190f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang 191f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "addu $s0, %[dst], %[dst_stride] \n" 192f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "addu $s1, $s0, %[dst_stride] \n" 193f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "addu $s2, $s1, %[dst_stride] \n" 194f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang 195f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "sw $s4, 0(%[dst]) \n" 196f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "sw $t0, 4(%[dst]) \n" 197f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "sw $s6, 0($s0) \n" 198f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "sw $t8, 4($s0) \n" 199f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "sw $s5, 0($s1) \n" 200f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "sw $t1, 4($s1) \n" 201f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "sw $s7, 0($s2) \n" 202f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "sw $t9, 4($s2) \n" 203f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang 204f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "addiu $AT, -1 \n" 205f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "addiu %[src], 4 \n" 206f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang 207f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "bnez $AT, 1b \n" 208f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang " addu %[dst], $s2, %[dst_stride] \n" 209f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "b 2f \n" 210f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang//dst + dst_stride unaligned 211f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "11: \n" 212f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "lw $t0, 0(%[src]) \n" 213f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "lwx $t1, %[src_stride](%[src]) \n" 214f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "lwx $t8, $t2(%[src]) \n" 215f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "lwx $t9, $t3(%[src]) \n" 216f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang 217f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang// t0 = | 30 | 20 | 10 | 00 | 218f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang// t1 = | 31 | 21 | 11 | 01 | 219f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang// t8 = | 32 | 22 | 12 | 02 | 220f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang// t9 = | 33 | 23 | 13 | 03 | 221f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang 222f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "precr.qb.ph $s0, $t1, $t0 \n" 223f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "precr.qb.ph $s1, $t9, $t8 \n" 224f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "precrq.qb.ph $s2, $t1, $t0 \n" 225f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "precrq.qb.ph $s3, $t9, $t8 \n" 226f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang 227f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang // s0 = | 21 | 01 | 20 | 00 | 228f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang // s1 = | 23 | 03 | 22 | 02 | 229f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang // s2 = | 31 | 11 | 30 | 10 | 230f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang // s3 = | 33 | 13 | 32 | 12 | 231f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang 232f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "precr.qb.ph $s4, $s1, $s0 \n" 233f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "precrq.qb.ph $s5, $s1, $s0 \n" 234f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "precr.qb.ph $s6, $s3, $s2 \n" 235f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "precrq.qb.ph $s7, $s3, $s2 \n" 236f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang 237f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang // s4 = | 03 | 02 | 01 | 00 | 238f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang // s5 = | 23 | 22 | 21 | 20 | 239f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang // s6 = | 13 | 12 | 11 | 10 | 240f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang // s7 = | 33 | 32 | 31 | 30 | 241f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang 242f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "lwx $t0, $t4(%[src]) \n" 243f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "lwx $t1, $t5(%[src]) \n" 244f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "lwx $t8, $t6(%[src]) \n" 245f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "lwx $t9, $t7(%[src]) \n" 246f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang 247f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang// t0 = | 34 | 24 | 14 | 04 | 248f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang// t1 = | 35 | 25 | 15 | 05 | 249f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang// t8 = | 36 | 26 | 16 | 06 | 250f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang// t9 = | 37 | 27 | 17 | 07 | 251f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang 252f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "precr.qb.ph $s0, $t1, $t0 \n" 253f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "precr.qb.ph $s1, $t9, $t8 \n" 254f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "precrq.qb.ph $s2, $t1, $t0 \n" 255f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "precrq.qb.ph $s3, $t9, $t8 \n" 256f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang 257f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang // s0 = | 25 | 05 | 24 | 04 | 258f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang // s1 = | 27 | 07 | 26 | 06 | 259f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang // s2 = | 35 | 15 | 34 | 14 | 260f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang // s3 = | 37 | 17 | 36 | 16 | 261f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang 262f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "precr.qb.ph $t0, $s1, $s0 \n" 263f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "precrq.qb.ph $t1, $s1, $s0 \n" 264f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "precr.qb.ph $t8, $s3, $s2 \n" 265f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "precrq.qb.ph $t9, $s3, $s2 \n" 266f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang 267f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang // t0 = | 07 | 06 | 05 | 04 | 268f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang // t1 = | 27 | 26 | 25 | 24 | 269f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang // t8 = | 17 | 16 | 15 | 14 | 270f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang // t9 = | 37 | 36 | 35 | 34 | 271f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang 272f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "addu $s0, %[dst], %[dst_stride] \n" 273f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "addu $s1, $s0, %[dst_stride] \n" 274f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "addu $s2, $s1, %[dst_stride] \n" 275f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang 276f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "swr $s4, 0(%[dst]) \n" 277f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "swl $s4, 3(%[dst]) \n" 278f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "swr $t0, 4(%[dst]) \n" 279f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "swl $t0, 7(%[dst]) \n" 280f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "swr $s6, 0($s0) \n" 281f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "swl $s6, 3($s0) \n" 282f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "swr $t8, 4($s0) \n" 283f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "swl $t8, 7($s0) \n" 284f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "swr $s5, 0($s1) \n" 285f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "swl $s5, 3($s1) \n" 286f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "swr $t1, 4($s1) \n" 287f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "swl $t1, 7($s1) \n" 288f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "swr $s7, 0($s2) \n" 289f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "swl $s7, 3($s2) \n" 290f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "swr $t9, 4($s2) \n" 291f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "swl $t9, 7($s2) \n" 292f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang 293f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "addiu $AT, -1 \n" 294f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "addiu %[src], 4 \n" 295f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang 296f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "bnez $AT, 11b \n" 297f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang " addu %[dst], $s2, %[dst_stride] \n" 298f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "2: \n" 299f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang ".set pop \n" 300f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang ".set at \n" 301f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang :[src] "+r" (src), 302f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang [dst] "+r" (dst), 303f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang [width] "+r" (width) 304f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang :[src_stride] "r" (src_stride), 305f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang [dst_stride] "r" (dst_stride) 306f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang : "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9", 307f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7" 308f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang ); 309f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang} 310f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang 311f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuangvoid TransposeUVWx8_DSPR2(const uint8* src, int src_stride, 312f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang uint8* dst_a, int dst_stride_a, 313f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang uint8* dst_b, int dst_stride_b, 314f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang int width) { 315f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang __asm__ __volatile__ ( 316f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang ".set push \n" 317f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang ".set noreorder \n" 318f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "beqz %[width], 2f \n" 319f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang " sll $t2, %[src_stride], 0x1 \n" // src_stride x 2 320f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "sll $t4, %[src_stride], 0x2 \n" // src_stride x 4 321f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "sll $t9, %[src_stride], 0x3 \n" // src_stride x 8 322f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "addu $t3, $t2, %[src_stride] \n" 323f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "addu $t5, $t4, %[src_stride] \n" 324f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "addu $t6, $t2, $t4 \n" 325f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "subu $t7, $t9, %[src_stride] \n" 326f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "srl $t1, %[width], 1 \n" 327f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang 328f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang// check word aligment for dst_a, dst_b, dst_stride_a and dst_stride_b 329f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "andi $t0, %[dst_a], 0x3 \n" 330f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "andi $t8, %[dst_b], 0x3 \n" 331f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "or $t0, $t0, $t8 \n" 332f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "andi $t8, %[dst_stride_a], 0x3 \n" 333f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "andi $s5, %[dst_stride_b], 0x3 \n" 334f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "or $t8, $t8, $s5 \n" 335f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "or $t0, $t0, $t8 \n" 336f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "bnez $t0, 11f \n" 337f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang " nop \n" 338f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang// dst + dst_stride word aligned (both, a & b dst addresses) 339f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "1: \n" 340f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "lw $t0, 0(%[src]) \n" // |B0|A0|b0|a0| 341f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "lwx $t8, %[src_stride](%[src]) \n" // |B1|A1|b1|a1| 342f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "addu $s5, %[dst_a], %[dst_stride_a] \n" 343f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "lwx $t9, $t2(%[src]) \n" // |B2|A2|b2|a2| 344f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "lwx $s0, $t3(%[src]) \n" // |B3|A3|b3|a3| 345f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "addu $s6, %[dst_b], %[dst_stride_b] \n" 346f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang 347f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "precrq.ph.w $s1, $t8, $t0 \n" // |B1|A1|B0|A0| 348f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "precrq.ph.w $s2, $s0, $t9 \n" // |B3|A3|B2|A2| 349f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "precr.qb.ph $s3, $s2, $s1 \n" // |A3|A2|A1|A0| 350f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "precrq.qb.ph $s4, $s2, $s1 \n" // |B3|B2|B1|B0| 351f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang 352f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "sll $t0, $t0, 16 \n" 353f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "packrl.ph $s1, $t8, $t0 \n" // |b1|a1|b0|a0| 354f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "sll $t9, $t9, 16 \n" 355f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "packrl.ph $s2, $s0, $t9 \n" // |b3|a3|b2|a2| 356f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang 357f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "sw $s3, 0($s5) \n" 358f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "sw $s4, 0($s6) \n" 359f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang 360f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "precr.qb.ph $s3, $s2, $s1 \n" // |a3|a2|a1|a0| 361f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "precrq.qb.ph $s4, $s2, $s1 \n" // |b3|b2|b1|b0| 362f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang 363f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "lwx $t0, $t4(%[src]) \n" // |B4|A4|b4|a4| 364f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "lwx $t8, $t5(%[src]) \n" // |B5|A5|b5|a5| 365f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "lwx $t9, $t6(%[src]) \n" // |B6|A6|b6|a6| 366f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "lwx $s0, $t7(%[src]) \n" // |B7|A7|b7|a7| 367f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "sw $s3, 0(%[dst_a]) \n" 368f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "sw $s4, 0(%[dst_b]) \n" 369f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang 370f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "precrq.ph.w $s1, $t8, $t0 \n" // |B5|A5|B4|A4| 371f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "precrq.ph.w $s2, $s0, $t9 \n" // |B6|A6|B7|A7| 372f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "precr.qb.ph $s3, $s2, $s1 \n" // |A7|A6|A5|A4| 373f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "precrq.qb.ph $s4, $s2, $s1 \n" // |B7|B6|B5|B4| 374f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang 375f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "sll $t0, $t0, 16 \n" 376f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "packrl.ph $s1, $t8, $t0 \n" // |b5|a5|b4|a4| 377f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "sll $t9, $t9, 16 \n" 378f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "packrl.ph $s2, $s0, $t9 \n" // |b7|a7|b6|a6| 379f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "sw $s3, 4($s5) \n" 380f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "sw $s4, 4($s6) \n" 381f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang 382f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "precr.qb.ph $s3, $s2, $s1 \n" // |a7|a6|a5|a4| 383f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "precrq.qb.ph $s4, $s2, $s1 \n" // |b7|b6|b5|b4| 384f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang 385f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "addiu %[src], 4 \n" 386f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "addiu $t1, -1 \n" 387f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "sll $t0, %[dst_stride_a], 1 \n" 388f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "sll $t8, %[dst_stride_b], 1 \n" 389f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "sw $s3, 4(%[dst_a]) \n" 390f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "sw $s4, 4(%[dst_b]) \n" 391f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "addu %[dst_a], %[dst_a], $t0 \n" 392f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "bnez $t1, 1b \n" 393f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang " addu %[dst_b], %[dst_b], $t8 \n" 394f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "b 2f \n" 395f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang " nop \n" 396f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang 397f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang// dst_a or dst_b or dst_stride_a or dst_stride_b not word aligned 398f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "11: \n" 399f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "lw $t0, 0(%[src]) \n" // |B0|A0|b0|a0| 400f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "lwx $t8, %[src_stride](%[src]) \n" // |B1|A1|b1|a1| 401f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "addu $s5, %[dst_a], %[dst_stride_a] \n" 402f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "lwx $t9, $t2(%[src]) \n" // |B2|A2|b2|a2| 403f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "lwx $s0, $t3(%[src]) \n" // |B3|A3|b3|a3| 404f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "addu $s6, %[dst_b], %[dst_stride_b] \n" 405f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang 406f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "precrq.ph.w $s1, $t8, $t0 \n" // |B1|A1|B0|A0| 407f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "precrq.ph.w $s2, $s0, $t9 \n" // |B3|A3|B2|A2| 408f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "precr.qb.ph $s3, $s2, $s1 \n" // |A3|A2|A1|A0| 409f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "precrq.qb.ph $s4, $s2, $s1 \n" // |B3|B2|B1|B0| 410f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang 411f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "sll $t0, $t0, 16 \n" 412f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "packrl.ph $s1, $t8, $t0 \n" // |b1|a1|b0|a0| 413f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "sll $t9, $t9, 16 \n" 414f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "packrl.ph $s2, $s0, $t9 \n" // |b3|a3|b2|a2| 415f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang 416f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "swr $s3, 0($s5) \n" 417f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "swl $s3, 3($s5) \n" 418f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "swr $s4, 0($s6) \n" 419f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "swl $s4, 3($s6) \n" 420f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang 421f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "precr.qb.ph $s3, $s2, $s1 \n" // |a3|a2|a1|a0| 422f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "precrq.qb.ph $s4, $s2, $s1 \n" // |b3|b2|b1|b0| 423f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang 424f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "lwx $t0, $t4(%[src]) \n" // |B4|A4|b4|a4| 425f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "lwx $t8, $t5(%[src]) \n" // |B5|A5|b5|a5| 426f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "lwx $t9, $t6(%[src]) \n" // |B6|A6|b6|a6| 427f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "lwx $s0, $t7(%[src]) \n" // |B7|A7|b7|a7| 428f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "swr $s3, 0(%[dst_a]) \n" 429f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "swl $s3, 3(%[dst_a]) \n" 430f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "swr $s4, 0(%[dst_b]) \n" 431f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "swl $s4, 3(%[dst_b]) \n" 432f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang 433f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "precrq.ph.w $s1, $t8, $t0 \n" // |B5|A5|B4|A4| 434f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "precrq.ph.w $s2, $s0, $t9 \n" // |B6|A6|B7|A7| 435f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "precr.qb.ph $s3, $s2, $s1 \n" // |A7|A6|A5|A4| 436f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "precrq.qb.ph $s4, $s2, $s1 \n" // |B7|B6|B5|B4| 437f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang 438f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "sll $t0, $t0, 16 \n" 439f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "packrl.ph $s1, $t8, $t0 \n" // |b5|a5|b4|a4| 440f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "sll $t9, $t9, 16 \n" 441f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "packrl.ph $s2, $s0, $t9 \n" // |b7|a7|b6|a6| 442f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang 443f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "swr $s3, 4($s5) \n" 444f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "swl $s3, 7($s5) \n" 445f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "swr $s4, 4($s6) \n" 446f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "swl $s4, 7($s6) \n" 447f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang 448f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "precr.qb.ph $s3, $s2, $s1 \n" // |a7|a6|a5|a4| 449f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "precrq.qb.ph $s4, $s2, $s1 \n" // |b7|b6|b5|b4| 450f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang 451f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "addiu %[src], 4 \n" 452f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "addiu $t1, -1 \n" 453f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "sll $t0, %[dst_stride_a], 1 \n" 454f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "sll $t8, %[dst_stride_b], 1 \n" 455f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "swr $s3, 4(%[dst_a]) \n" 456f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "swl $s3, 7(%[dst_a]) \n" 457f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "swr $s4, 4(%[dst_b]) \n" 458f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "swl $s4, 7(%[dst_b]) \n" 459f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "addu %[dst_a], %[dst_a], $t0 \n" 460f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "bnez $t1, 11b \n" 461f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang " addu %[dst_b], %[dst_b], $t8 \n" 462f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang 463f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "2: \n" 464f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang ".set pop \n" 465f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang : [src] "+r" (src), 466f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang [dst_a] "+r" (dst_a), 467f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang [dst_b] "+r" (dst_b), 468f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang [width] "+r" (width), 469f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang [src_stride] "+r" (src_stride) 470f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang : [dst_stride_a] "r" (dst_stride_a), 471f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang [dst_stride_b] "r" (dst_stride_b) 472f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang : "t0", "t1", "t2", "t3", "t4", "t5", 473f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "t6", "t7", "t8", "t9", 474f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "s0", "s1", "s2", "s3", 475f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang "s4", "s5", "s6" 476f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang ); 477f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang} 478f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang 479f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang#endif // defined(__mips_dsp) && (__mips_dsp_rev >= 2) 480f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang 481f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang#ifdef __cplusplus 482f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang} // extern "C" 483f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang} // namespace libyuv 484f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang#endif 485