127fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org@ 227fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org@ Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 327fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org@ 427fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org@ Use of this source code is governed by a BSD-style license 527fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org@ that can be found in the LICENSE file in the root of the source 627fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org@ tree. An additional intellectual property rights grant can be found 727fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org@ in the file PATENTS. All contributing project authors may 827fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org@ be found in the AUTHORS file in the root of the source tree. 927fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org@ 1027fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org@ Reference code in filters.c. Output is bit-exact. 1127fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org 12b238acaca55b4b345f0e37b82f8bbd9851c8bb6dkma@webrtc.org#include "webrtc/system_wrappers/interface/asm_defines.h" 1327fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org 14b238acaca55b4b345f0e37b82f8bbd9851c8bb6dkma@webrtc.orgGLOBAL_FUNCTION WebRtcIsacfix_AutocorrNeon 1527fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org.align 2 1627fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org 1727fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org@ int WebRtcIsacfix_AutocorrNeon( 18fbda0fcf2f9e82c82bcaac138f44d4e5144f6e0dpbos@webrtc.org@ int32_t* __restrict r, 19fbda0fcf2f9e82c82bcaac138f44d4e5144f6e0dpbos@webrtc.org@ const int16_t* __restrict x, 20fbda0fcf2f9e82c82bcaac138f44d4e5144f6e0dpbos@webrtc.org@ int16_t N, 21fbda0fcf2f9e82c82bcaac138f44d4e5144f6e0dpbos@webrtc.org@ int16_t order, 22fbda0fcf2f9e82c82bcaac138f44d4e5144f6e0dpbos@webrtc.org@ int16_t* __restrict scale); 2327fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org 24b238acaca55b4b345f0e37b82f8bbd9851c8bb6dkma@webrtc.orgDEFINE_FUNCTION WebRtcIsacfix_AutocorrNeon 2527fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org push {r3 - r12} 2627fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org 2727fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org @ Constant initializations 2827fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org mov r4, #33 2927fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org vmov.i32 d0, #0 3027fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org vmov.i32 q8, #0 3127fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org vmov.i32 d29, #0 @ Initialize (-scale). 3227fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org vmov.u8 d30, #255 @ Initialize d30 as -1. 3327fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org vmov.i32 d0[0], r4 @ d0: 00000033 (low), 00000000 (high) 3427fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org vmov.i32 d25, #32 3527fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org 3627fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org mov r5, r1 @ x 3727fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org mov r6, r2 @ N 3827fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org 3927fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org@ Generate the first coefficient r0. 4027fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.orgLOOP_R0: 4127fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org vld1.16 {d18}, [r5]! @ x[] 4227fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org subs r6, r6, #4 4327fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org vmull.s16 q9, d18, d18 4427fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org vpadal.s32 q8, q9 4527fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org bgt LOOP_R0 4627fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org 4727fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org vadd.i64 d16, d16, d17 4827fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org 4927fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org @ Calculate scaling (the value of shifting). 5027fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org vmov d17, d16 5127fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org 5227fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org @ Check overflow and determine the value for 'scale'. 5327fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org @ vclz cannot deal with a 64-bit, so we have to do vclz on both the upper and 5427fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org @ lower 32-bit words. Note that we don't care about the value of the upper 5527fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org @ word in d17. 5627fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org 5727fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org @ Check the case of 1 bit overflow. If it occurs store the results for 5827fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org @ scale and r[0] in d17 and d29. 5927fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org 6027fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org vshr.u64 d3, d16, #1 6127fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org vclt.s32 d1, d16, #0 @ < 0 ? 6227fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org vbit d17, d3, d1 @ For r[0] 6327fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org vbit d29, d30, d1 @ -scale = -1 6427fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org 6527fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org @ For the case of more than 1 bit overflow. If it occurs overwrite the 6627fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org @ results for scale and r[0] in d17 and d29. 6727fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org vclz.s32 d5, d16 @ Leading zeros of the two 32 bit words. 6827fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org vshr.s64 d26, d5, #32 @ Keep only the upper 32 bits. 6927fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org vsub.i64 d31, d26, d0 @ zeros - 33 7027fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org vshl.i64 d27, d26, #32 7127fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org vorr d27, d26 @ Duplicate the high word with its low one. 7227fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org vshl.u64 d2, d16, d31 @ Shift by (-scale). 7327fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org vclt.s32 d1, d27, d25 @ < 32 ? 7427fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org vbit d17, d2, d1 @ For r[0] 7527fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org vbit d29, d31, d1 @ -scale 7627fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org 7727fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org vst1.32 d17[0], [r0]! @ r[0] 7827fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org mov r5, #1 @ outer loop counter 7927fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org 8027fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org@ Generate rest of the coefficients 8127fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.orgLOOP_R: 8227fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org vmov.i32 q8, #0 @ Initialize the accumulation result. 8327fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org vmov.i32 q9, #0 @ Initialize the accumulation result. 8427fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org mov r7, r1 @ &x[0] 8527fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org add r6, r7, r5, lsl #1 @ x[i] 8627fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org sub r12, r2, r5 @ N - i 8727fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org lsr r8, r12, #3 @ inner loop counter 8827fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org sub r12, r8, lsl #3 @ Leftover samples to be processed 8927fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org 9027fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.orgLOOP_8X_SAMPLES: @ Multiple of 8 samples 9127fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org vld1.16 {d20, d21}, [r7]! @ x[0, ...] 9227fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org vld1.16 {d22, d23}, [r6]! @ x[i, ...] 9327fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org vmull.s16 q12, d20, d22 9427fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org vmull.s16 q13, d21, d23 9527fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org subs r8, #1 9627fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org vpadal.s32 q8, q12 9727fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org vpadal.s32 q9, q13 9827fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org bgt LOOP_8X_SAMPLES 9927fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org 10027fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org cmp r12, #4 10127fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org blt REST_SAMPLES 10227fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org 10327fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.orgFour_SAMPLES: 10427fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org vld1.16 d20, [r7]! 10527fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org vld1.16 d22, [r6]! 10627fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org vmull.s16 q12, d20, d22 10727fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org vpadal.s32 q8, q12 10827fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org sub r12, #4 10927fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org 11027fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.orgREST_SAMPLES: 11127fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org mov r8, #0 @ Initialize lower word of the accumulation. 11227fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org mov r4, #0 @ Initialize upper word of the accumulation. 11327fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org cmp r12, #0 11427fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org ble SUMUP 11527fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org 11627fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.orgLOOP_REST_SAMPLES: 11727fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org ldrh r9, [r7], #2 @ x[0, ...] 11827fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org ldrh r10, [r6], #2 @ x[i, ...] 11927fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org smulbb r11, r9, r10 12027fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org adds r8, r8, r11 @ lower word of the accumulation. 12127fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org adc r4, r4, r11, asr #31 @ upper word of the accumulation. 12227fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org subs r12, #1 12327fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org bgt LOOP_REST_SAMPLES 12427fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org 12527fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org@ Added the multiplication results together and do a shift. 12627fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.orgSUMUP: 12727fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org vadd.i64 d16, d17 12827fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org vadd.i64 d18, d19 12927fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org vadd.i64 d18, d16 13027fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org vmov d17, r8, r4 13127fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org vadd.i64 d18, d17 13227fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org vshl.s64 d18, d29 @ Shift left by (-scale). 13327fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org vst1.32 d18[0], [r0]! @ r[i] 13427fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org 13527fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org add r5, #1 13627fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org cmp r5, r3 13727fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org ble LOOP_R 13827fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org 13927fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org vneg.s32 d29, d29 @ Get value for 'scale'. 14027fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org ldr r2, [sp, #40] @ &scale 14127fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org add r0, r3, #1 @ return (order + 1) 14227fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org vst1.s16 d29[0], [r2] @ Store 'scale' 14327fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org 14427fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org pop {r3 - r12} 14527fe999a8fe09ea7a39318e00262b30e38caa83ckma@webrtc.org bx lr 146