1ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian/* 2ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian * Copyright (c) 2014 The WebM project authors. All Rights Reserved. 3ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian * 4ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian * Use of this source code is governed by a BSD-style license 5ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian * that can be found in the LICENSE file in the root of the source 6ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian * tree. An additional intellectual property rights grant can be found 7ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian * in the file PATENTS. All contributing project authors may 8ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian * be found in the AUTHORS file in the root of the source tree. 9ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian */ 10ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 11ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian#include <arm_neon.h> 12ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian#include "./vpx_config.h" 13ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 14ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanianstatic INLINE void vp8_loop_filter_simple_horizontal_edge_neon( 157bc9febe8749e98a3812a0dc4380ceae75c29450Johann unsigned char *s, int p, const unsigned char *blimit) { 167bc9febe8749e98a3812a0dc4380ceae75c29450Johann uint8_t *sp; 177bc9febe8749e98a3812a0dc4380ceae75c29450Johann uint8x16_t qblimit, q0u8; 187bc9febe8749e98a3812a0dc4380ceae75c29450Johann uint8x16_t q5u8, q6u8, q7u8, q8u8, q9u8, q10u8, q14u8, q15u8; 197bc9febe8749e98a3812a0dc4380ceae75c29450Johann int16x8_t q2s16, q3s16, q13s16; 207bc9febe8749e98a3812a0dc4380ceae75c29450Johann int8x8_t d8s8, d9s8; 217bc9febe8749e98a3812a0dc4380ceae75c29450Johann int8x16_t q2s8, q3s8, q4s8, q10s8, q11s8, q14s8; 227bc9febe8749e98a3812a0dc4380ceae75c29450Johann 237bc9febe8749e98a3812a0dc4380ceae75c29450Johann qblimit = vdupq_n_u8(*blimit); 247bc9febe8749e98a3812a0dc4380ceae75c29450Johann 257bc9febe8749e98a3812a0dc4380ceae75c29450Johann sp = s - (p << 1); 267bc9febe8749e98a3812a0dc4380ceae75c29450Johann q5u8 = vld1q_u8(sp); 277bc9febe8749e98a3812a0dc4380ceae75c29450Johann sp += p; 287bc9febe8749e98a3812a0dc4380ceae75c29450Johann q6u8 = vld1q_u8(sp); 297bc9febe8749e98a3812a0dc4380ceae75c29450Johann sp += p; 307bc9febe8749e98a3812a0dc4380ceae75c29450Johann q7u8 = vld1q_u8(sp); 317bc9febe8749e98a3812a0dc4380ceae75c29450Johann sp += p; 327bc9febe8749e98a3812a0dc4380ceae75c29450Johann q8u8 = vld1q_u8(sp); 337bc9febe8749e98a3812a0dc4380ceae75c29450Johann 347bc9febe8749e98a3812a0dc4380ceae75c29450Johann q15u8 = vabdq_u8(q6u8, q7u8); 357bc9febe8749e98a3812a0dc4380ceae75c29450Johann q14u8 = vabdq_u8(q5u8, q8u8); 367bc9febe8749e98a3812a0dc4380ceae75c29450Johann 377bc9febe8749e98a3812a0dc4380ceae75c29450Johann q15u8 = vqaddq_u8(q15u8, q15u8); 387bc9febe8749e98a3812a0dc4380ceae75c29450Johann q14u8 = vshrq_n_u8(q14u8, 1); 397bc9febe8749e98a3812a0dc4380ceae75c29450Johann q0u8 = vdupq_n_u8(0x80); 407bc9febe8749e98a3812a0dc4380ceae75c29450Johann q13s16 = vdupq_n_s16(3); 417bc9febe8749e98a3812a0dc4380ceae75c29450Johann q15u8 = vqaddq_u8(q15u8, q14u8); 427bc9febe8749e98a3812a0dc4380ceae75c29450Johann 437bc9febe8749e98a3812a0dc4380ceae75c29450Johann q5u8 = veorq_u8(q5u8, q0u8); 447bc9febe8749e98a3812a0dc4380ceae75c29450Johann q6u8 = veorq_u8(q6u8, q0u8); 457bc9febe8749e98a3812a0dc4380ceae75c29450Johann q7u8 = veorq_u8(q7u8, q0u8); 467bc9febe8749e98a3812a0dc4380ceae75c29450Johann q8u8 = veorq_u8(q8u8, q0u8); 477bc9febe8749e98a3812a0dc4380ceae75c29450Johann 487bc9febe8749e98a3812a0dc4380ceae75c29450Johann q15u8 = vcgeq_u8(qblimit, q15u8); 497bc9febe8749e98a3812a0dc4380ceae75c29450Johann 507bc9febe8749e98a3812a0dc4380ceae75c29450Johann q2s16 = vsubl_s8(vget_low_s8(vreinterpretq_s8_u8(q7u8)), 517bc9febe8749e98a3812a0dc4380ceae75c29450Johann vget_low_s8(vreinterpretq_s8_u8(q6u8))); 527bc9febe8749e98a3812a0dc4380ceae75c29450Johann q3s16 = vsubl_s8(vget_high_s8(vreinterpretq_s8_u8(q7u8)), 537bc9febe8749e98a3812a0dc4380ceae75c29450Johann vget_high_s8(vreinterpretq_s8_u8(q6u8))); 547bc9febe8749e98a3812a0dc4380ceae75c29450Johann 557bc9febe8749e98a3812a0dc4380ceae75c29450Johann q4s8 = vqsubq_s8(vreinterpretq_s8_u8(q5u8), vreinterpretq_s8_u8(q8u8)); 567bc9febe8749e98a3812a0dc4380ceae75c29450Johann 577bc9febe8749e98a3812a0dc4380ceae75c29450Johann q2s16 = vmulq_s16(q2s16, q13s16); 587bc9febe8749e98a3812a0dc4380ceae75c29450Johann q3s16 = vmulq_s16(q3s16, q13s16); 597bc9febe8749e98a3812a0dc4380ceae75c29450Johann 607bc9febe8749e98a3812a0dc4380ceae75c29450Johann q10u8 = vdupq_n_u8(3); 617bc9febe8749e98a3812a0dc4380ceae75c29450Johann q9u8 = vdupq_n_u8(4); 627bc9febe8749e98a3812a0dc4380ceae75c29450Johann 637bc9febe8749e98a3812a0dc4380ceae75c29450Johann q2s16 = vaddw_s8(q2s16, vget_low_s8(q4s8)); 647bc9febe8749e98a3812a0dc4380ceae75c29450Johann q3s16 = vaddw_s8(q3s16, vget_high_s8(q4s8)); 657bc9febe8749e98a3812a0dc4380ceae75c29450Johann 667bc9febe8749e98a3812a0dc4380ceae75c29450Johann d8s8 = vqmovn_s16(q2s16); 677bc9febe8749e98a3812a0dc4380ceae75c29450Johann d9s8 = vqmovn_s16(q3s16); 687bc9febe8749e98a3812a0dc4380ceae75c29450Johann q4s8 = vcombine_s8(d8s8, d9s8); 697bc9febe8749e98a3812a0dc4380ceae75c29450Johann 707bc9febe8749e98a3812a0dc4380ceae75c29450Johann q14s8 = vandq_s8(q4s8, vreinterpretq_s8_u8(q15u8)); 717bc9febe8749e98a3812a0dc4380ceae75c29450Johann 727bc9febe8749e98a3812a0dc4380ceae75c29450Johann q2s8 = vqaddq_s8(q14s8, vreinterpretq_s8_u8(q10u8)); 737bc9febe8749e98a3812a0dc4380ceae75c29450Johann q3s8 = vqaddq_s8(q14s8, vreinterpretq_s8_u8(q9u8)); 747bc9febe8749e98a3812a0dc4380ceae75c29450Johann q2s8 = vshrq_n_s8(q2s8, 3); 757bc9febe8749e98a3812a0dc4380ceae75c29450Johann q3s8 = vshrq_n_s8(q3s8, 3); 767bc9febe8749e98a3812a0dc4380ceae75c29450Johann 777bc9febe8749e98a3812a0dc4380ceae75c29450Johann q11s8 = vqaddq_s8(vreinterpretq_s8_u8(q6u8), q2s8); 787bc9febe8749e98a3812a0dc4380ceae75c29450Johann q10s8 = vqsubq_s8(vreinterpretq_s8_u8(q7u8), q3s8); 797bc9febe8749e98a3812a0dc4380ceae75c29450Johann 807bc9febe8749e98a3812a0dc4380ceae75c29450Johann q6u8 = veorq_u8(vreinterpretq_u8_s8(q11s8), q0u8); 817bc9febe8749e98a3812a0dc4380ceae75c29450Johann q7u8 = veorq_u8(vreinterpretq_u8_s8(q10s8), q0u8); 827bc9febe8749e98a3812a0dc4380ceae75c29450Johann 837bc9febe8749e98a3812a0dc4380ceae75c29450Johann vst1q_u8(s, q7u8); 847bc9febe8749e98a3812a0dc4380ceae75c29450Johann s -= p; 857bc9febe8749e98a3812a0dc4380ceae75c29450Johann vst1q_u8(s, q6u8); 867bc9febe8749e98a3812a0dc4380ceae75c29450Johann return; 87ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian} 88ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 897bc9febe8749e98a3812a0dc4380ceae75c29450Johannvoid vp8_loop_filter_bhs_neon(unsigned char *y_ptr, int y_stride, 907bc9febe8749e98a3812a0dc4380ceae75c29450Johann const unsigned char *blimit) { 917bc9febe8749e98a3812a0dc4380ceae75c29450Johann y_ptr += y_stride * 4; 927bc9febe8749e98a3812a0dc4380ceae75c29450Johann vp8_loop_filter_simple_horizontal_edge_neon(y_ptr, y_stride, blimit); 937bc9febe8749e98a3812a0dc4380ceae75c29450Johann y_ptr += y_stride * 4; 947bc9febe8749e98a3812a0dc4380ceae75c29450Johann vp8_loop_filter_simple_horizontal_edge_neon(y_ptr, y_stride, blimit); 957bc9febe8749e98a3812a0dc4380ceae75c29450Johann y_ptr += y_stride * 4; 967bc9febe8749e98a3812a0dc4380ceae75c29450Johann vp8_loop_filter_simple_horizontal_edge_neon(y_ptr, y_stride, blimit); 977bc9febe8749e98a3812a0dc4380ceae75c29450Johann return; 98ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian} 99ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 1007bc9febe8749e98a3812a0dc4380ceae75c29450Johannvoid vp8_loop_filter_mbhs_neon(unsigned char *y_ptr, int y_stride, 1017bc9febe8749e98a3812a0dc4380ceae75c29450Johann const unsigned char *blimit) { 1027bc9febe8749e98a3812a0dc4380ceae75c29450Johann vp8_loop_filter_simple_horizontal_edge_neon(y_ptr, y_stride, blimit); 1037bc9febe8749e98a3812a0dc4380ceae75c29450Johann return; 104ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian} 105