1f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org; 2f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org; Copyright (c) 2013 The WebM project authors. All Rights Reserved. 3f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org; 4f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org; Use of this source code is governed by a BSD-style license 5f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org; that can be found in the LICENSE file in the root of the source 6f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org; tree. An additional intellectual property rights grant can be found 7f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org; in the file PATENTS. All contributing project authors may 8f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org; be found in the AUTHORS file in the root of the source tree. 9f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org; 10f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org 11ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org EXPORT |vp9_iht4x4_16_add_neon| 12f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org ARM 13f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org REQUIRE8 14f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org PRESERVE8 15f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org 16f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org AREA ||.text||, CODE, READONLY, ALIGN=2 17f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org 18f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org ; Parallel 1D IDCT on all the columns of a 4x4 16bits data matrix which are 19f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org ; loaded in d16-d19. d0 must contain cospi_8_64. d1 must contain 20f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org ; cospi_16_64. d2 must contain cospi_24_64. The output will be stored back 21f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org ; into d16-d19 registers. This macro will touch q10- q15 registers and use 22f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org ; them as buffer during calculation. 23f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org MACRO 24f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org IDCT4x4_1D 25f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org ; stage 1 26f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org vadd.s16 d23, d16, d18 ; (input[0] + input[2]) 27f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org vsub.s16 d24, d16, d18 ; (input[0] - input[2]) 28f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org 29f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org vmull.s16 q15, d17, d2 ; input[1] * cospi_24_64 30f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org vmull.s16 q10, d17, d0 ; input[1] * cospi_8_64 31f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org vmull.s16 q13, d23, d1 ; (input[0] + input[2]) * cospi_16_64 32f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org vmull.s16 q14, d24, d1 ; (input[0] - input[2]) * cospi_16_64 33f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org vmlsl.s16 q15, d19, d0 ; input[1] * cospi_24_64 - input[3] * cospi_8_64 34f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org vmlal.s16 q10, d19, d2 ; input[1] * cospi_8_64 + input[3] * cospi_24_64 35f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org 36f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org ; dct_const_round_shift 37f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org vqrshrn.s32 d26, q13, #14 38f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org vqrshrn.s32 d27, q14, #14 39f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org vqrshrn.s32 d29, q15, #14 40f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org vqrshrn.s32 d28, q10, #14 41f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org 42f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org ; stage 2 43f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org ; output[0] = step[0] + step[3]; 44f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org ; output[1] = step[1] + step[2]; 45f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org ; output[3] = step[0] - step[3]; 46f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org ; output[2] = step[1] - step[2]; 47f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org vadd.s16 q8, q13, q14 48f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org vsub.s16 q9, q13, q14 49f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org vswp d18, d19 50f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org MEND 51f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org 52f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org ; Parallel 1D IADST on all the columns of a 4x4 16bits data matrix which 53f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org ; loaded in d16-d19. d3 must contain sinpi_1_9. d4 must contain sinpi_2_9. 54f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org ; d5 must contain sinpi_4_9. d6 must contain sinpi_3_9. The output will be 55f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org ; stored back into d16-d19 registers. This macro will touch q11,q12,q13, 56f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org ; q14,q15 registers and use them as buffer during calculation. 57f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org MACRO 58f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org IADST4x4_1D 59f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org vmull.s16 q10, d3, d16 ; s0 = sinpi_1_9 * x0 60f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org vmull.s16 q11, d4, d16 ; s1 = sinpi_2_9 * x0 61f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org vmull.s16 q12, d6, d17 ; s2 = sinpi_3_9 * x1 62f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org vmull.s16 q13, d5, d18 ; s3 = sinpi_4_9 * x2 63f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org vmull.s16 q14, d3, d18 ; s4 = sinpi_1_9 * x2 64f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org vmovl.s16 q15, d16 ; expand x0 from 16 bit to 32 bit 65f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org vaddw.s16 q15, q15, d19 ; x0 + x3 66f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org vmull.s16 q8, d4, d19 ; s5 = sinpi_2_9 * x3 67f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org vsubw.s16 q15, q15, d18 ; s7 = x0 + x3 - x2 68f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org vmull.s16 q9, d5, d19 ; s6 = sinpi_4_9 * x3 69f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org 70f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org vadd.s32 q10, q10, q13 ; x0 = s0 + s3 + s5 71f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org vadd.s32 q10, q10, q8 72f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org vsub.s32 q11, q11, q14 ; x1 = s1 - s4 - s6 73f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org vdup.32 q8, r0 ; duplicate sinpi_3_9 74f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org vsub.s32 q11, q11, q9 75f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org vmul.s32 q15, q15, q8 ; x2 = sinpi_3_9 * s7 76f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org 77f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org vadd.s32 q13, q10, q12 ; s0 = x0 + x3 78f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org vadd.s32 q10, q10, q11 ; x0 + x1 79f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org vadd.s32 q14, q11, q12 ; s1 = x1 + x3 80f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org vsub.s32 q10, q10, q12 ; s3 = x0 + x1 - x3 81f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org 82f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org ; dct_const_round_shift 83f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org vqrshrn.s32 d16, q13, #14 84f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org vqrshrn.s32 d17, q14, #14 85f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org vqrshrn.s32 d18, q15, #14 86f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org vqrshrn.s32 d19, q10, #14 87f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org MEND 88f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org 89f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org ; Generate cosine constants in d6 - d8 for the IDCT 90f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org MACRO 91f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org GENERATE_COSINE_CONSTANTS 92f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org ; cospi_8_64 = 15137 = 0x3b21 93f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org mov r0, #0x3b00 94f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org add r0, #0x21 95f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org ; cospi_16_64 = 11585 = 0x2d41 96f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org mov r3, #0x2d00 97f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org add r3, #0x41 98f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org ; cospi_24_64 = 6270 = 0x187e 99f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org mov r12, #0x1800 100f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org add r12, #0x7e 101f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org 102f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org ; generate constant vectors 103f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org vdup.16 d0, r0 ; duplicate cospi_8_64 104f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org vdup.16 d1, r3 ; duplicate cospi_16_64 105f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org vdup.16 d2, r12 ; duplicate cospi_24_64 106f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org MEND 107f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org 108f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org ; Generate sine constants in d1 - d4 for the IADST. 109f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org MACRO 110f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org GENERATE_SINE_CONSTANTS 111f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org ; sinpi_1_9 = 5283 = 0x14A3 112f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org mov r0, #0x1400 113f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org add r0, #0xa3 114f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org ; sinpi_2_9 = 9929 = 0x26C9 115f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org mov r3, #0x2600 116f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org add r3, #0xc9 117f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org ; sinpi_4_9 = 15212 = 0x3B6C 118f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org mov r12, #0x3b00 119f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org add r12, #0x6c 120f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org 121f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org ; generate constant vectors 122f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org vdup.16 d3, r0 ; duplicate sinpi_1_9 123f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org 124f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org ; sinpi_3_9 = 13377 = 0x3441 125f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org mov r0, #0x3400 126f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org add r0, #0x41 127f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org 128f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org vdup.16 d4, r3 ; duplicate sinpi_2_9 129f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org vdup.16 d5, r12 ; duplicate sinpi_4_9 130f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org vdup.16 q3, r0 ; duplicate sinpi_3_9 131f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org MEND 132f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org 133f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org ; Transpose a 4x4 16bits data matrix. Datas are loaded in d16-d19. 134f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org MACRO 135f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org TRANSPOSE4X4 136f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org vtrn.16 d16, d17 137f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org vtrn.16 d18, d19 138f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org vtrn.32 q8, q9 139f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org MEND 140f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org 141f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org AREA Block, CODE, READONLY ; name this block of code 142ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org;void vp9_iht4x4_16_add_neon(int16_t *input, uint8_t *dest, 143f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org; int dest_stride, int tx_type) 144f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org; 145f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org; r0 int16_t input 146f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org; r1 uint8_t *dest 147f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org; r2 int dest_stride 148f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org; r3 int tx_type) 149f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org; This function will only handle tx_type of 1,2,3. 150ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org|vp9_iht4x4_16_add_neon| PROC 151f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org 152f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org ; load the inputs into d16-d19 153f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org vld1.s16 {q8,q9}, [r0]! 154f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org 155f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org ; transpose the input data 156f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org TRANSPOSE4X4 157f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org 158f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org ; decide the type of transform 159f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org cmp r3, #2 160f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org beq idct_iadst 161f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org cmp r3, #3 162f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org beq iadst_iadst 163f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org 164f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.orgiadst_idct 165f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org ; generate constants 166f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org GENERATE_COSINE_CONSTANTS 167f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org GENERATE_SINE_CONSTANTS 168f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org 169f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org ; first transform rows 170f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org IDCT4x4_1D 171f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org 172f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org ; transpose the matrix 173f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org TRANSPOSE4X4 174f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org 175f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org ; then transform columns 176f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org IADST4x4_1D 177f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org 178ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org b end_vp9_iht4x4_16_add_neon 179f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org 180f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.orgidct_iadst 181f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org ; generate constants 182f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org GENERATE_COSINE_CONSTANTS 183f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org GENERATE_SINE_CONSTANTS 184f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org 185f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org ; first transform rows 186f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org IADST4x4_1D 187f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org 188f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org ; transpose the matrix 189f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org TRANSPOSE4X4 190f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org 191f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org ; then transform columns 192f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org IDCT4x4_1D 193f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org 194ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org b end_vp9_iht4x4_16_add_neon 195f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org 196f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.orgiadst_iadst 197f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org ; generate constants 198f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org GENERATE_SINE_CONSTANTS 199f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org 200f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org ; first transform rows 201f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org IADST4x4_1D 202f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org 203f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org ; transpose the matrix 204f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org TRANSPOSE4X4 205f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org 206f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org ; then transform columns 207f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org IADST4x4_1D 208f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org 209ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.orgend_vp9_iht4x4_16_add_neon 210f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org ; ROUND_POWER_OF_TWO(temp_out[j], 4) 211f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org vrshr.s16 q8, q8, #4 212f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org vrshr.s16 q9, q9, #4 213f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org 214f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org vld1.32 {d26[0]}, [r1], r2 215f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org vld1.32 {d26[1]}, [r1], r2 216f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org vld1.32 {d27[0]}, [r1], r2 217f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org vld1.32 {d27[1]}, [r1] 218f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org 219f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org ; ROUND_POWER_OF_TWO(temp_out[j], 4) + dest[j * dest_stride + i] 220f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org vaddw.u8 q8, q8, d26 221f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org vaddw.u8 q9, q9, d27 222f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org 223f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org ; clip_pixel 224f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org vqmovun.s16 d26, q8 225f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org vqmovun.s16 d27, q9 226f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org 227f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org ; do the stores in reverse order with negative post-increment, by changing 228f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org ; the sign of the stride 229f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org rsb r2, r2, #0 230f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org vst1.32 {d27[1]}, [r1], r2 231f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org vst1.32 {d27[0]}, [r1], r2 232f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org vst1.32 {d26[1]}, [r1], r2 233f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org vst1.32 {d26[0]}, [r1] ; no post-increment 234f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org bx lr 235ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org ENDP ; |vp9_iht4x4_16_add_neon| 236f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org 237f9586bb54d74c97d07b09eb2512f8569c9c1c025fgalligan@chromium.org END 238