1ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian/* 2ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian * Copyright (c) 2014 The WebM project authors. All Rights Reserved. 3ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian * 4ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian * Use of this source code is governed by a BSD-style license 5ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian * that can be found in the LICENSE file in the root of the source 6ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian * tree. An additional intellectual property rights grant can be found 7ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian * in the file PATENTS. All contributing project authors may 8ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian * be found in the AUTHORS file in the root of the source tree. 9ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian */ 10ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 11ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian#include <arm_neon.h> 12ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian#include "./vp9_rtcd.h" 13ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian#include "./vpx_config.h" 14ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 15ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian#include "vpx/vpx_integer.h" 16ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 17ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanianvoid vp9_subtract_block_neon(int rows, int cols, 18ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian int16_t *diff, ptrdiff_t diff_stride, 19ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian const uint8_t *src, ptrdiff_t src_stride, 20ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian const uint8_t *pred, ptrdiff_t pred_stride) { 21ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian int r, c; 22ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 23ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian if (cols > 16) { 24ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian for (r = 0; r < rows; ++r) { 25ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian for (c = 0; c < cols; c += 32) { 26ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian const uint8x16_t v_src_00 = vld1q_u8(&src[c + 0]); 27ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian const uint8x16_t v_src_16 = vld1q_u8(&src[c + 16]); 28ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian const uint8x16_t v_pred_00 = vld1q_u8(&pred[c + 0]); 29ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian const uint8x16_t v_pred_16 = vld1q_u8(&pred[c + 16]); 30ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian const uint16x8_t v_diff_lo_00 = vsubl_u8(vget_low_u8(v_src_00), 31ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian vget_low_u8(v_pred_00)); 32ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian const uint16x8_t v_diff_hi_00 = vsubl_u8(vget_high_u8(v_src_00), 33ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian vget_high_u8(v_pred_00)); 34ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian const uint16x8_t v_diff_lo_16 = vsubl_u8(vget_low_u8(v_src_16), 35ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian vget_low_u8(v_pred_16)); 36ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian const uint16x8_t v_diff_hi_16 = vsubl_u8(vget_high_u8(v_src_16), 37ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian vget_high_u8(v_pred_16)); 38ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian vst1q_s16(&diff[c + 0], vreinterpretq_s16_u16(v_diff_lo_00)); 39ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian vst1q_s16(&diff[c + 8], vreinterpretq_s16_u16(v_diff_hi_00)); 40ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian vst1q_s16(&diff[c + 16], vreinterpretq_s16_u16(v_diff_lo_16)); 41ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian vst1q_s16(&diff[c + 24], vreinterpretq_s16_u16(v_diff_hi_16)); 42ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian } 43ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian diff += diff_stride; 44ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian pred += pred_stride; 45ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian src += src_stride; 46ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian } 47ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian } else if (cols > 8) { 48ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian for (r = 0; r < rows; ++r) { 49ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian const uint8x16_t v_src = vld1q_u8(&src[0]); 50ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian const uint8x16_t v_pred = vld1q_u8(&pred[0]); 51ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian const uint16x8_t v_diff_lo = vsubl_u8(vget_low_u8(v_src), 52ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian vget_low_u8(v_pred)); 53ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian const uint16x8_t v_diff_hi = vsubl_u8(vget_high_u8(v_src), 54ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian vget_high_u8(v_pred)); 55ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian vst1q_s16(&diff[0], vreinterpretq_s16_u16(v_diff_lo)); 56ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian vst1q_s16(&diff[8], vreinterpretq_s16_u16(v_diff_hi)); 57ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian diff += diff_stride; 58ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian pred += pred_stride; 59ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian src += src_stride; 60ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian } 61ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian } else if (cols > 4) { 62ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian for (r = 0; r < rows; ++r) { 63ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian const uint8x8_t v_src = vld1_u8(&src[0]); 64ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian const uint8x8_t v_pred = vld1_u8(&pred[0]); 65ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian const uint16x8_t v_diff = vsubl_u8(v_src, v_pred); 66ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian vst1q_s16(&diff[0], vreinterpretq_s16_u16(v_diff)); 67ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian diff += diff_stride; 68ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian pred += pred_stride; 69ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian src += src_stride; 70ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian } 71ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian } else { 72ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian for (r = 0; r < rows; ++r) { 73ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian for (c = 0; c < cols; ++c) 74ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian diff[c] = src[c] - pred[c]; 75ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 76ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian diff += diff_stride; 77ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian pred += pred_stride; 78ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian src += src_stride; 79ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian } 80ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian } 81ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian} 82