153a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org/* 253a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org * Copyright (c) 2013 The WebM project authors. All Rights Reserved. 353a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org * 453a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org * Use of this source code is governed by a BSD-style license 553a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org * that can be found in the LICENSE file in the root of the source 653a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org * tree. An additional intellectual property rights grant can be found 753a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org * in the file PATENTS. All contributing project authors may 853a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org * be found in the AUTHORS file in the root of the source tree. 953a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org */ 1053a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org 1153a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org#include "./vp9_rtcd.h" 1253a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org#include "vp9/common/vp9_common.h" 1353a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org 14ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.orgvoid vp9_idct16x16_256_add_neon_pass1(const int16_t *input, 15ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org int16_t *output, 16ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org int output_stride); 17ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.orgvoid vp9_idct16x16_256_add_neon_pass2(const int16_t *src, 18ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org int16_t *output, 19ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org int16_t *pass1Output, 20ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org int16_t skip_adding, 21ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org uint8_t *dest, 22ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org int dest_stride); 23ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.orgvoid vp9_idct16x16_10_add_neon_pass1(const int16_t *input, 24ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org int16_t *output, 25ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org int output_stride); 26ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.orgvoid vp9_idct16x16_10_add_neon_pass2(const int16_t *src, 27ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org int16_t *output, 28ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org int16_t *pass1Output, 29ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org int16_t skip_adding, 30ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org uint8_t *dest, 31ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org int dest_stride); 32ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 33ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org/* For ARM NEON, d8-d15 are callee-saved registers, and need to be saved. */ 34ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.orgextern void vp9_push_neon(int64_t *store); 35ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.orgextern void vp9_pop_neon(int64_t *store); 36ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org 37ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.orgvoid vp9_idct16x16_256_add_neon(const int16_t *input, 38ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org uint8_t *dest, int dest_stride) { 39ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org int64_t store_reg[8]; 4053a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org int16_t pass1_output[16*16] = {0}; 4153a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org int16_t row_idct_output[16*16] = {0}; 4253a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org 4353a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org // save d8-d15 register values. 44ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org vp9_push_neon(store_reg); 4553a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org 4653a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org /* Parallel idct on the upper 8 rows */ 4753a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the 4853a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org // stage 6 result in pass1_output. 49ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org vp9_idct16x16_256_add_neon_pass1(input, pass1_output, 8); 5053a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org 5153a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines 5253a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org // with result in pass1(pass1_output) to calculate final result in stage 7 5353a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org // which will be saved into row_idct_output. 54ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org vp9_idct16x16_256_add_neon_pass2(input+1, 5553a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org row_idct_output, 5653a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org pass1_output, 5753a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org 0, 5853a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org dest, 5953a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org dest_stride); 6053a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org 6153a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org /* Parallel idct on the lower 8 rows */ 6253a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the 6353a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org // stage 6 result in pass1_output. 64ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org vp9_idct16x16_256_add_neon_pass1(input+8*16, pass1_output, 8); 6553a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org 6653a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines 6753a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org // with result in pass1(pass1_output) to calculate final result in stage 7 6853a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org // which will be saved into row_idct_output. 69ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org vp9_idct16x16_256_add_neon_pass2(input+8*16+1, 7053a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org row_idct_output+8, 7153a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org pass1_output, 7253a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org 0, 7353a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org dest, 7453a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org dest_stride); 7553a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org 7653a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org /* Parallel idct on the left 8 columns */ 7753a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the 7853a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org // stage 6 result in pass1_output. 79ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org vp9_idct16x16_256_add_neon_pass1(row_idct_output, pass1_output, 8); 8053a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org 8153a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines 8253a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org // with result in pass1(pass1_output) to calculate final result in stage 7. 8353a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org // Then add the result to the destination data. 84ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org vp9_idct16x16_256_add_neon_pass2(row_idct_output+1, 8553a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org row_idct_output, 8653a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org pass1_output, 8753a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org 1, 8853a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org dest, 8953a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org dest_stride); 9053a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org 9153a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org /* Parallel idct on the right 8 columns */ 9253a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the 9353a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org // stage 6 result in pass1_output. 94ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org vp9_idct16x16_256_add_neon_pass1(row_idct_output+8*16, pass1_output, 8); 9553a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org 9653a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines 9753a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org // with result in pass1(pass1_output) to calculate final result in stage 7. 9853a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org // Then add the result to the destination data. 99ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org vp9_idct16x16_256_add_neon_pass2(row_idct_output+8*16+1, 10053a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org row_idct_output+8, 10153a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org pass1_output, 10253a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org 1, 10353a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org dest+8, 10453a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org dest_stride); 10553a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org 10653a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org // restore d8-d15 register values. 107ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org vp9_pop_neon(store_reg); 10853a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org 10953a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org return; 11053a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org} 11153a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org 112ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.orgvoid vp9_idct16x16_10_add_neon(const int16_t *input, 113ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org uint8_t *dest, int dest_stride) { 114ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org int64_t store_reg[8]; 11553a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org int16_t pass1_output[16*16] = {0}; 11653a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org int16_t row_idct_output[16*16] = {0}; 11753a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org 11853a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org // save d8-d15 register values. 119ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org vp9_push_neon(store_reg); 12053a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org 12153a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org /* Parallel idct on the upper 8 rows */ 12253a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the 12353a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org // stage 6 result in pass1_output. 124ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org vp9_idct16x16_10_add_neon_pass1(input, pass1_output, 8); 12553a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org 12653a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines 12753a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org // with result in pass1(pass1_output) to calculate final result in stage 7 12853a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org // which will be saved into row_idct_output. 129ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org vp9_idct16x16_10_add_neon_pass2(input+1, 13053a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org row_idct_output, 13153a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org pass1_output, 13253a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org 0, 13353a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org dest, 13453a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org dest_stride); 13553a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org 13653a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org /* Skip Parallel idct on the lower 8 rows as they are all 0s */ 13753a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org 13853a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org /* Parallel idct on the left 8 columns */ 13953a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the 14053a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org // stage 6 result in pass1_output. 141ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org vp9_idct16x16_256_add_neon_pass1(row_idct_output, pass1_output, 8); 14253a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org 14353a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines 14453a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org // with result in pass1(pass1_output) to calculate final result in stage 7. 14553a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org // Then add the result to the destination data. 146ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org vp9_idct16x16_256_add_neon_pass2(row_idct_output+1, 14753a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org row_idct_output, 14853a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org pass1_output, 14953a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org 1, 15053a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org dest, 15153a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org dest_stride); 15253a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org 15353a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org /* Parallel idct on the right 8 columns */ 15453a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the 15553a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org // stage 6 result in pass1_output. 156ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org vp9_idct16x16_256_add_neon_pass1(row_idct_output+8*16, pass1_output, 8); 15753a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org 15853a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines 15953a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org // with result in pass1(pass1_output) to calculate final result in stage 7. 16053a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org // Then add the result to the destination data. 161ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org vp9_idct16x16_256_add_neon_pass2(row_idct_output+8*16+1, 16253a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org row_idct_output+8, 16353a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org pass1_output, 16453a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org 1, 16553a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org dest+8, 16653a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org dest_stride); 16753a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org 16853a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org // restore d8-d15 register values. 169ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org vp9_pop_neon(store_reg); 17053a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org 17153a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org return; 17253a13f1fa964820f7a8f9d3932a6f3c0433f8bf5fgalligan@chromium.org} 173