1f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org/*
2f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org *  Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
3f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org *
4f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org *  Use of this source code is governed by a BSD-style license
5f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org *  that can be found in the LICENSE file in the root of the source
6f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org *  tree. An additional intellectual property rights grant can be found
7f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org *  in the file PATENTS.  All contributing project authors may
8f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org *  be found in the AUTHORS file in the root of the source tree.
9f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org */
10f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org
11262e676a08fc29ee6c414f5858d68697be983515andresp@webrtc.org#include "webrtc/modules/audio_coding/codecs/isac/fix/source/entropy_coding.h"
12262e676a08fc29ee6c414f5858d68697be983515andresp@webrtc.org#include "webrtc/modules/audio_coding/codecs/isac/fix/source/settings.h"
13f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org
14f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org// MIPS optimization of the function WebRtcIsacfix_MatrixProduct1.
15f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org// Bit-exact with the function WebRtcIsacfix_MatrixProduct1C from
16f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org// entropy_coding.c file.
17f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.orgvoid WebRtcIsacfix_MatrixProduct1MIPS(const int16_t matrix0[],
18f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org                                      const int32_t matrix1[],
19f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org                                      int32_t matrix_product[],
20f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org                                      const int matrix1_index_factor1,
21f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org                                      const int matrix0_index_factor1,
22f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org                                      const int matrix1_index_init_case,
23f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org                                      const int matrix1_index_step,
24f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org                                      const int matrix0_index_step,
25f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org                                      const int inner_loop_count,
26f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org                                      const int mid_loop_count,
27f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org                                      const int shift) {
28f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org  if (matrix1_index_init_case != 0) {
29f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org    int j = SUBFRAMES, k = 0, n = 0;
30f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org    int32_t r0, r1, r2, sum32;
31f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org    int32_t* product_start = matrix_product;
32f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org    int32_t* product_ptr;
33f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org    const uint32_t product_step = 4 * mid_loop_count;
34f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org    const uint32_t matrix0_step = 2 * matrix0_index_step;
35f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org    const uint32_t matrix1_step = 4 * matrix1_index_step;
36f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org    const uint32_t matrix0_step2 = 2 * matrix0_index_factor1;
37f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org    const uint32_t matrix1_step2 = 4 * matrix1_index_factor1;
38f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org    const int16_t* matrix0_start = matrix0;
39f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org    const int32_t* matrix1_start = matrix1;
40f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org    int16_t* matrix0_ptr;
41f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org    int32_t* matrix1_ptr;
42f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org
43f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org    __asm __volatile (
44f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org      ".set     push                                                       \n\t"
45f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org      ".set     noreorder                                                  \n\t"
46f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org     "1:                                                                   \n\t"
47f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org      "addu     %[product_ptr],     %[product_start],     $0               \n\t"
48f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org      "addu     %[k],               %[product_step],      $0               \n\t"
49f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org      "addiu    %[j],               %[j],                 -1               \n\t"
50f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org      "addu     %[matrix1_start],   %[matrix1],           $0               \n\t"
51f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org     "2:                                                                   \n\t"
52f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org      "addu     %[matrix1_ptr],     %[matrix1_start],     $0               \n\t"
53f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org      "addu     %[matrix0_ptr],     %[matrix0_start],     $0               \n\t"
54f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org      "addu     %[n],               %[inner_loop_count],  $0               \n\t"
55f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org      "mul      %[sum32],           $0,                   $0               \n\t"
56f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org     "3:                                                                   \n\t"
57f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org      "lw       %[r0],              0(%[matrix1_ptr])                      \n\t"
58f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org      "lh       %[r1],              0(%[matrix0_ptr])                      \n\t"
59f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org      "addu     %[matrix1_ptr],     %[matrix1_ptr],       %[matrix1_step]  \n\t"
60f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org      "sllv     %[r0],              %[r0],                %[shift]         \n\t"
61f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org      "andi     %[r2],              %[r0],                0xffff           \n\t"
62f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org      "sra      %[r2],              %[r2],                1                \n\t"
63f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org      "mul      %[r2],              %[r2],                %[r1]            \n\t"
64f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org      "sra      %[r0],              %[r0],                16               \n\t"
65f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org      "mul      %[r0],              %[r0],                %[r1]            \n\t"
66f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org      "addu     %[matrix0_ptr],     %[matrix0_ptr],       %[matrix0_step]  \n\t"
67f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org      "addiu    %[n],               %[n],                 -1               \n\t"
68f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org#if defined(MIPS_DSP_R1_LE)
69f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org      "shra_r.w %[r2],              %[r2],                15               \n\t"
70f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org#else
71f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org      "addiu    %[r2],              %[r2],                0x4000           \n\t"
72f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org      "sra      %[r2],              %[r2],                15               \n\t"
73f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org#endif
74f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org      "addu     %[sum32],           %[sum32],             %[r2]            \n\t"
75f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org      "bgtz     %[n],               3b                                     \n\t"
76f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org      " addu    %[sum32],           %[sum32],             %[r0]            \n\t"
77f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org      "addiu    %[k],               %[k],                 -4               \n\t"
78f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org      "addu     %[matrix1_start],   %[matrix1_start],     %[matrix1_step2] \n\t"
79f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org      "sw       %[sum32],           0(%[product_ptr])                      \n\t"
80f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org      "bgtz     %[k],               2b                                     \n\t"
81f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org      " addiu   %[product_ptr],     %[product_ptr],       4                \n\t"
82f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org      "addu     %[matrix0_start],   %[matrix0_start],     %[matrix0_step2] \n\t"
83f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org      "bgtz     %[j],               1b                                     \n\t"
84f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org      " addu    %[product_start],   %[product_start],     %[product_step]  \n\t"
85f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org      ".set     pop                                                        \n\t"
86f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org      : [product_ptr] "=&r" (product_ptr), [product_start] "+r" (product_start),
87f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org        [k] "=&r" (k), [j] "+r" (j), [matrix1_start] "=&r"(matrix1_start),
88f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org        [matrix1_ptr] "=&r" (matrix1_ptr), [matrix0_ptr] "=&r" (matrix0_ptr),
89f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org        [matrix0_start] "+r" (matrix0_start), [n] "=&r" (n), [r0] "=&r" (r0),
90f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org        [sum32] "=&r" (sum32), [r1] "=&r" (r1),[r2] "=&r" (r2)
91f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org      : [product_step] "r" (product_step), [matrix1] "r" (matrix1),
92f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org        [inner_loop_count] "r" (inner_loop_count),
93f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org        [matrix1_step] "r" (matrix1_step), [shift] "r" (shift),
94f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org        [matrix0_step] "r" (matrix0_step), [matrix1_step2] "r" (matrix1_step2),
95f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org        [matrix0_step2] "r" (matrix0_step2)
96f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org      : "hi", "lo", "memory"
97f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org    );
98f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org  } else {
99f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org    int j = SUBFRAMES, k = 0, n = 0;
100f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org    int32_t r0, r1, r2, sum32;
101f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org    int32_t* product_start = matrix_product;
102f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org    int32_t* product_ptr;
103f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org    const uint32_t product_step = 4 * mid_loop_count;
104f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org    const uint32_t matrix0_step = 2 * matrix0_index_step;
105f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org    const uint32_t matrix1_step = 4 * matrix1_index_step;
106f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org    const uint32_t matrix0_step2 = 2 * matrix0_index_factor1;
107f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org    const uint32_t matrix1_step2 = 4 * matrix1_index_factor1;
108f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org    const int16_t* matrix0_start = matrix0;
109f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org    const int32_t* matrix1_start = matrix1;
110f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org    int16_t* matrix0_ptr;
111f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org    int32_t* matrix1_ptr;
112f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org
113f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org    __asm __volatile (
114f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org      ".set     push                                                       \n\t"
115f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org      ".set     noreorder                                                  \n\t"
116f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org     "1:                                                                   \n\t"
117f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org      "addu     %[product_ptr],     %[product_start],     $0               \n\t"
118f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org      "addu     %[k],               %[product_step],      $0               \n\t"
119f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org      "addiu    %[j],               %[j],                 -1               \n\t"
120f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org      "addu     %[matrix0_start],   %[matrix0],           $0               \n\t"
121f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org     "2:                                                                   \n\t"
122f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org      "addu     %[matrix1_ptr],     %[matrix1_start],     $0               \n\t"
123f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org      "addu     %[matrix0_ptr],     %[matrix0_start],     $0               \n\t"
124f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org      "addu     %[n],               %[inner_loop_count],  $0               \n\t"
125f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org      "mul      %[sum32],           $0,                   $0               \n\t"
126f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org     "3:                                                                   \n\t"
127f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org      "lw       %[r0],              0(%[matrix1_ptr])                      \n\t"
128f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org      "lh       %[r1],              0(%[matrix0_ptr])                      \n\t"
129f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org      "addu     %[matrix1_ptr],     %[matrix1_ptr],       %[matrix1_step]  \n\t"
130f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org      "sllv     %[r0],              %[r0],                %[shift]         \n\t"
131f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org      "andi     %[r2],              %[r0],                0xffff           \n\t"
132f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org      "sra      %[r2],              %[r2],                1                \n\t"
133f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org      "mul      %[r2],              %[r2],                %[r1]            \n\t"
134f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org      "sra      %[r0],              %[r0],                16               \n\t"
135f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org      "mul      %[r0],              %[r0],                %[r1]            \n\t"
136f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org      "addu     %[matrix0_ptr],     %[matrix0_ptr],       %[matrix0_step]  \n\t"
137f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org      "addiu    %[n],               %[n],                 -1               \n\t"
138f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org#if defined(MIPS_DSP_R1_LE)
139f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org      "shra_r.w %[r2],              %[r2],                15               \n\t"
140f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org#else
141f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org      "addiu    %[r2],              %[r2],                0x4000           \n\t"
142f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org      "sra      %[r2],              %[r2],                15               \n\t"
143f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org#endif
144f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org      "addu     %[sum32],           %[sum32],             %[r2]            \n\t"
145f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org      "bgtz     %[n],               3b                                     \n\t"
146f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org      " addu    %[sum32],           %[sum32],             %[r0]            \n\t"
147f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org      "addiu    %[k],               %[k],                 -4               \n\t"
148f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org      "addu     %[matrix0_start],   %[matrix0_start],     %[matrix0_step2] \n\t"
149f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org      "sw       %[sum32],           0(%[product_ptr])                      \n\t"
150f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org      "bgtz     %[k],               2b                                     \n\t"
151f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org      " addiu   %[product_ptr],     %[product_ptr],       4                \n\t"
152f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org      "addu     %[matrix1_start],   %[matrix1_start],     %[matrix1_step2] \n\t"
153f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org      "bgtz     %[j],               1b                                     \n\t"
154f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org      " addu    %[product_start],   %[product_start],     %[product_step]  \n\t"
155f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org      ".set     pop                                                        \n\t"
156f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org      : [product_ptr] "=&r" (product_ptr), [product_start] "+r" (product_start),
157f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org        [k] "=&r" (k), [j] "+r" (j), [matrix1_start] "+r"(matrix1_start),
158f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org        [matrix1_ptr] "=&r" (matrix1_ptr), [matrix0_ptr] "=&r" (matrix0_ptr),
159f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org        [matrix0_start] "=&r" (matrix0_start), [n] "=&r" (n), [r0] "=&r" (r0),
160f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org        [sum32] "=&r" (sum32), [r1] "=&r" (r1),[r2] "=&r" (r2)
161f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org      : [product_step] "r" (product_step), [matrix0] "r" (matrix0),
162f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org        [inner_loop_count] "r" (inner_loop_count),
163f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org        [matrix1_step] "r" (matrix1_step), [shift] "r" (shift),
164f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org        [matrix0_step] "r" (matrix0_step), [matrix1_step2] "r" (matrix1_step2),
165f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org        [matrix0_step2] "r" (matrix0_step2)
166f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org      : "hi", "lo", "memory"
167f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org    );
168f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org  }
169f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org}
170f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org
171f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org// MIPS optimization of the function WebRtcIsacfix_MatrixProduct2.
172f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org// Bit-exact with the function WebRtcIsacfix_MatrixProduct2C from
173f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org// entropy_coding.c file.
174f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.orgvoid WebRtcIsacfix_MatrixProduct2MIPS(const int16_t matrix0[],
175f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org                                      const int32_t matrix1[],
176f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org                                      int32_t matrix_product[],
177f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org                                      const int matrix0_index_factor,
178f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org                                      const int matrix0_index_step) {
179f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org  int j = 0, n = 0;
180f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org  int loop_count = SUBFRAMES;
181f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org  const int16_t* matrix0_ptr;
182f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org  const int32_t* matrix1_ptr;
183f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org  const int16_t* matrix0_start = matrix0;
184f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org  const int matrix0_step = 2 * matrix0_index_step;
185f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org  const int matrix0_step2 = 2 * matrix0_index_factor;
186f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org  int32_t r0, r1, r2, r3, r4, sum32, sum32_2;
187f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org
188f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org  __asm __volatile (
189f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org    ".set       push                                                   \n\t"
190f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org    ".set       noreorder                                              \n\t"
191f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org    "addu       %[j],              %[loop_count],     $0               \n\t"
192f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org    "addu       %[matrix0_start],  %[matrix0],        $0               \n\t"
193f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org   "1:                                                                 \n\t"
194f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org    "addu       %[matrix1_ptr],    %[matrix1],        $0               \n\t"
195f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org    "addu       %[matrix0_ptr],    %[matrix0_start],  $0               \n\t"
196f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org    "addu       %[n],              %[loop_count],     $0               \n\t"
197f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org    "mul        %[sum32],          $0,                $0               \n\t"
198f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org    "mul        %[sum32_2],        $0,                $0               \n\t"
199f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org   "2:                                                                 \n\t"
200f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org    "lw         %[r0],             0(%[matrix1_ptr])                   \n\t"
201f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org    "lw         %[r1],             4(%[matrix1_ptr])                   \n\t"
202f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org    "lh         %[r2],             0(%[matrix0_ptr])                   \n\t"
203f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org    "andi       %[r3],             %[r0],             0xffff           \n\t"
204f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org    "sra        %[r3],             %[r3],             1                \n\t"
205f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org    "mul        %[r3],             %[r3],             %[r2]            \n\t"
206f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org    "andi       %[r4],             %[r1],             0xffff           \n\t"
207f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org    "sra        %[r4],             %[r4],             1                \n\t"
208f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org    "mul        %[r4],             %[r4],             %[r2]            \n\t"
209f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org    "sra        %[r0],             %[r0],             16               \n\t"
210f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org    "mul        %[r0],             %[r0],             %[r2]            \n\t"
211f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org    "sra        %[r1],             %[r1],             16               \n\t"
212f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org    "mul        %[r1],             %[r1],             %[r2]            \n\t"
213f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org#if defined(MIPS_DSP_R1_LE)
214f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org    "shra_r.w   %[r3],             %[r3],             15               \n\t"
215f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org    "shra_r.w   %[r4],             %[r4],             15               \n\t"
216f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org#else
217f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org    "addiu      %[r3],             %[r3],             0x4000           \n\t"
218f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org    "sra        %[r3],             %[r3],             15               \n\t"
219f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org    "addiu      %[r4],             %[r4],             0x4000           \n\t"
220f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org    "sra        %[r4],             %[r4],             15               \n\t"
221f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org#endif
222f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org    "addiu      %[matrix1_ptr],    %[matrix1_ptr],    8                \n\t"
223f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org    "addu       %[matrix0_ptr],    %[matrix0_ptr],    %[matrix0_step]  \n\t"
224f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org    "addiu      %[n],              %[n],              -1               \n\t"
225f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org    "addu       %[sum32],          %[sum32],          %[r3]            \n\t"
226f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org    "addu       %[sum32_2],        %[sum32_2],        %[r4]            \n\t"
227f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org    "addu       %[sum32],          %[sum32],          %[r0]            \n\t"
228f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org    "bgtz       %[n],              2b                                  \n\t"
229f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org    " addu      %[sum32_2],        %[sum32_2],        %[r1]            \n\t"
230f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org    "sra        %[sum32],          %[sum32],          3                \n\t"
231f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org    "sra        %[sum32_2],        %[sum32_2],        3                \n\t"
232f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org    "addiu      %[j],              %[j],              -1               \n\t"
233f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org    "addu       %[matrix0_start],  %[matrix0_start],  %[matrix0_step2] \n\t"
234f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org    "sw         %[sum32],          0(%[matrix_product])                \n\t"
235f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org    "sw         %[sum32_2],        4(%[matrix_product])                \n\t"
236f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org    "bgtz       %[j],              1b                                  \n\t"
237f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org    " addiu     %[matrix_product], %[matrix_product], 8                \n\t"
238f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org    ".set       pop                                                    \n\t"
239f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org    : [j] "=&r" (j), [matrix0_start] "=&r" (matrix0_start),
240f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org      [matrix1_ptr] "=&r" (matrix1_ptr), [matrix0_ptr] "=&r" (matrix0_ptr),
241f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org      [n] "=&r" (n), [sum32] "=&r" (sum32), [sum32_2] "=&r" (sum32_2),
242f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org      [r0] "=&r" (r0), [r1] "=&r" (r1), [r2] "=&r" (r2), [r3] "=&r" (r3),
243f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org      [r4] "=&r" (r4), [matrix_product] "+r" (matrix_product)
244f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org    : [loop_count] "r" (loop_count), [matrix0] "r" (matrix0),
245f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org      [matrix1] "r" (matrix1), [matrix0_step] "r" (matrix0_step),
246f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org      [matrix0_step2] "r" (matrix0_step2)
247f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org    : "hi", "lo", "memory"
248f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org  );
249f86b2625889b065ccf67e580a8456f0b5e42b261andrew@webrtc.org}
250