17b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org/*
27b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org *  Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
37b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org *
47b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org *  Use of this source code is governed by a BSD-style license
57b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org *  that can be found in the LICENSE file in the root of the source
67b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org *  tree. An additional intellectual property rights grant can be found
77b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org *  in the file PATENTS.  All contributing project authors may
87b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org *  be found in the AUTHORS file in the root of the source tree.
97b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org */
107b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org
11a11402cdbf99a12d58421d5b446982e8d30c40caandresp@webrtc.org#include "webrtc/modules/audio_coding/codecs/isac/fix/source/entropy_coding.h"
12a11402cdbf99a12d58421d5b446982e8d30c40caandresp@webrtc.org#include "webrtc/modules/audio_coding/codecs/isac/fix/source/settings.h"
137b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org
147b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org// MIPS optimization of the function WebRtcIsacfix_MatrixProduct1.
157b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org// Bit-exact with the function WebRtcIsacfix_MatrixProduct1C from
167b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org// entropy_coding.c file.
177b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.orgvoid WebRtcIsacfix_MatrixProduct1MIPS(const int16_t matrix0[],
187b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org                                      const int32_t matrix1[],
197b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org                                      int32_t matrix_product[],
207b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org                                      const int matrix1_index_factor1,
217b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org                                      const int matrix0_index_factor1,
227b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org                                      const int matrix1_index_init_case,
237b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org                                      const int matrix1_index_step,
247b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org                                      const int matrix0_index_step,
257b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org                                      const int inner_loop_count,
267b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org                                      const int mid_loop_count,
277b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org                                      const int shift) {
287b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org  if (matrix1_index_init_case != 0) {
297b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org    int j = SUBFRAMES, k = 0, n = 0;
307b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org    int32_t r0, r1, r2, sum32;
317b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org    int32_t* product_start = matrix_product;
327b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org    int32_t* product_ptr;
337b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org    const uint32_t product_step = 4 * mid_loop_count;
347b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org    const uint32_t matrix0_step = 2 * matrix0_index_step;
357b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org    const uint32_t matrix1_step = 4 * matrix1_index_step;
367b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org    const uint32_t matrix0_step2 = 2 * matrix0_index_factor1;
377b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org    const uint32_t matrix1_step2 = 4 * matrix1_index_factor1;
387b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org    const int16_t* matrix0_start = matrix0;
397b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org    const int32_t* matrix1_start = matrix1;
407b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org    int16_t* matrix0_ptr;
417b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org    int32_t* matrix1_ptr;
427b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org
437b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org    __asm __volatile (
447b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org      ".set     push                                                       \n\t"
457b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org      ".set     noreorder                                                  \n\t"
467b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org     "1:                                                                   \n\t"
477b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org      "addu     %[product_ptr],     %[product_start],     $0               \n\t"
487b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org      "addu     %[k],               %[product_step],      $0               \n\t"
497b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org      "addiu    %[j],               %[j],                 -1               \n\t"
507b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org      "addu     %[matrix1_start],   %[matrix1],           $0               \n\t"
517b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org     "2:                                                                   \n\t"
527b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org      "addu     %[matrix1_ptr],     %[matrix1_start],     $0               \n\t"
537b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org      "addu     %[matrix0_ptr],     %[matrix0_start],     $0               \n\t"
547b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org      "addu     %[n],               %[inner_loop_count],  $0               \n\t"
557b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org      "mul      %[sum32],           $0,                   $0               \n\t"
567b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org     "3:                                                                   \n\t"
577b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org      "lw       %[r0],              0(%[matrix1_ptr])                      \n\t"
587b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org      "lh       %[r1],              0(%[matrix0_ptr])                      \n\t"
597b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org      "addu     %[matrix1_ptr],     %[matrix1_ptr],       %[matrix1_step]  \n\t"
607b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org      "sllv     %[r0],              %[r0],                %[shift]         \n\t"
617b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org      "andi     %[r2],              %[r0],                0xffff           \n\t"
627b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org      "sra      %[r2],              %[r2],                1                \n\t"
637b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org      "mul      %[r2],              %[r2],                %[r1]            \n\t"
647b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org      "sra      %[r0],              %[r0],                16               \n\t"
657b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org      "mul      %[r0],              %[r0],                %[r1]            \n\t"
667b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org      "addu     %[matrix0_ptr],     %[matrix0_ptr],       %[matrix0_step]  \n\t"
677b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org      "addiu    %[n],               %[n],                 -1               \n\t"
687b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org#if defined(MIPS_DSP_R1_LE)
697b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org      "shra_r.w %[r2],              %[r2],                15               \n\t"
707b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org#else
717b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org      "addiu    %[r2],              %[r2],                0x4000           \n\t"
727b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org      "sra      %[r2],              %[r2],                15               \n\t"
737b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org#endif
747b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org      "addu     %[sum32],           %[sum32],             %[r2]            \n\t"
757b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org      "bgtz     %[n],               3b                                     \n\t"
767b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org      " addu    %[sum32],           %[sum32],             %[r0]            \n\t"
777b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org      "addiu    %[k],               %[k],                 -4               \n\t"
787b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org      "addu     %[matrix1_start],   %[matrix1_start],     %[matrix1_step2] \n\t"
797b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org      "sw       %[sum32],           0(%[product_ptr])                      \n\t"
807b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org      "bgtz     %[k],               2b                                     \n\t"
817b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org      " addiu   %[product_ptr],     %[product_ptr],       4                \n\t"
827b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org      "addu     %[matrix0_start],   %[matrix0_start],     %[matrix0_step2] \n\t"
837b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org      "bgtz     %[j],               1b                                     \n\t"
847b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org      " addu    %[product_start],   %[product_start],     %[product_step]  \n\t"
857b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org      ".set     pop                                                        \n\t"
867b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org      : [product_ptr] "=&r" (product_ptr), [product_start] "+r" (product_start),
877b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org        [k] "=&r" (k), [j] "+r" (j), [matrix1_start] "=&r"(matrix1_start),
887b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org        [matrix1_ptr] "=&r" (matrix1_ptr), [matrix0_ptr] "=&r" (matrix0_ptr),
897b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org        [matrix0_start] "+r" (matrix0_start), [n] "=&r" (n), [r0] "=&r" (r0),
907b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org        [sum32] "=&r" (sum32), [r1] "=&r" (r1),[r2] "=&r" (r2)
917b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org      : [product_step] "r" (product_step), [matrix1] "r" (matrix1),
927b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org        [inner_loop_count] "r" (inner_loop_count),
937b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org        [matrix1_step] "r" (matrix1_step), [shift] "r" (shift),
947b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org        [matrix0_step] "r" (matrix0_step), [matrix1_step2] "r" (matrix1_step2),
957b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org        [matrix0_step2] "r" (matrix0_step2)
967b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org      : "hi", "lo", "memory"
977b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org    );
987b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org  } else {
997b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org    int j = SUBFRAMES, k = 0, n = 0;
1007b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org    int32_t r0, r1, r2, sum32;
1017b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org    int32_t* product_start = matrix_product;
1027b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org    int32_t* product_ptr;
1037b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org    const uint32_t product_step = 4 * mid_loop_count;
1047b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org    const uint32_t matrix0_step = 2 * matrix0_index_step;
1057b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org    const uint32_t matrix1_step = 4 * matrix1_index_step;
1067b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org    const uint32_t matrix0_step2 = 2 * matrix0_index_factor1;
1077b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org    const uint32_t matrix1_step2 = 4 * matrix1_index_factor1;
1087b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org    const int16_t* matrix0_start = matrix0;
1097b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org    const int32_t* matrix1_start = matrix1;
1107b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org    int16_t* matrix0_ptr;
1117b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org    int32_t* matrix1_ptr;
1127b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org
1137b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org    __asm __volatile (
1147b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org      ".set     push                                                       \n\t"
1157b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org      ".set     noreorder                                                  \n\t"
1167b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org     "1:                                                                   \n\t"
1177b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org      "addu     %[product_ptr],     %[product_start],     $0               \n\t"
1187b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org      "addu     %[k],               %[product_step],      $0               \n\t"
1197b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org      "addiu    %[j],               %[j],                 -1               \n\t"
1207b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org      "addu     %[matrix0_start],   %[matrix0],           $0               \n\t"
1217b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org     "2:                                                                   \n\t"
1227b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org      "addu     %[matrix1_ptr],     %[matrix1_start],     $0               \n\t"
1237b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org      "addu     %[matrix0_ptr],     %[matrix0_start],     $0               \n\t"
1247b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org      "addu     %[n],               %[inner_loop_count],  $0               \n\t"
1257b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org      "mul      %[sum32],           $0,                   $0               \n\t"
1267b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org     "3:                                                                   \n\t"
1277b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org      "lw       %[r0],              0(%[matrix1_ptr])                      \n\t"
1287b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org      "lh       %[r1],              0(%[matrix0_ptr])                      \n\t"
1297b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org      "addu     %[matrix1_ptr],     %[matrix1_ptr],       %[matrix1_step]  \n\t"
1307b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org      "sllv     %[r0],              %[r0],                %[shift]         \n\t"
1317b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org      "andi     %[r2],              %[r0],                0xffff           \n\t"
1327b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org      "sra      %[r2],              %[r2],                1                \n\t"
1337b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org      "mul      %[r2],              %[r2],                %[r1]            \n\t"
1347b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org      "sra      %[r0],              %[r0],                16               \n\t"
1357b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org      "mul      %[r0],              %[r0],                %[r1]            \n\t"
1367b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org      "addu     %[matrix0_ptr],     %[matrix0_ptr],       %[matrix0_step]  \n\t"
1377b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org      "addiu    %[n],               %[n],                 -1               \n\t"
1387b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org#if defined(MIPS_DSP_R1_LE)
1397b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org      "shra_r.w %[r2],              %[r2],                15               \n\t"
1407b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org#else
1417b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org      "addiu    %[r2],              %[r2],                0x4000           \n\t"
1427b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org      "sra      %[r2],              %[r2],                15               \n\t"
1437b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org#endif
1447b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org      "addu     %[sum32],           %[sum32],             %[r2]            \n\t"
1457b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org      "bgtz     %[n],               3b                                     \n\t"
1467b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org      " addu    %[sum32],           %[sum32],             %[r0]            \n\t"
1477b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org      "addiu    %[k],               %[k],                 -4               \n\t"
1487b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org      "addu     %[matrix0_start],   %[matrix0_start],     %[matrix0_step2] \n\t"
1497b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org      "sw       %[sum32],           0(%[product_ptr])                      \n\t"
1507b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org      "bgtz     %[k],               2b                                     \n\t"
1517b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org      " addiu   %[product_ptr],     %[product_ptr],       4                \n\t"
1527b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org      "addu     %[matrix1_start],   %[matrix1_start],     %[matrix1_step2] \n\t"
1537b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org      "bgtz     %[j],               1b                                     \n\t"
1547b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org      " addu    %[product_start],   %[product_start],     %[product_step]  \n\t"
1557b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org      ".set     pop                                                        \n\t"
1567b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org      : [product_ptr] "=&r" (product_ptr), [product_start] "+r" (product_start),
1577b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org        [k] "=&r" (k), [j] "+r" (j), [matrix1_start] "+r"(matrix1_start),
1587b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org        [matrix1_ptr] "=&r" (matrix1_ptr), [matrix0_ptr] "=&r" (matrix0_ptr),
1597b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org        [matrix0_start] "=&r" (matrix0_start), [n] "=&r" (n), [r0] "=&r" (r0),
1607b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org        [sum32] "=&r" (sum32), [r1] "=&r" (r1),[r2] "=&r" (r2)
1617b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org      : [product_step] "r" (product_step), [matrix0] "r" (matrix0),
1627b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org        [inner_loop_count] "r" (inner_loop_count),
1637b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org        [matrix1_step] "r" (matrix1_step), [shift] "r" (shift),
1647b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org        [matrix0_step] "r" (matrix0_step), [matrix1_step2] "r" (matrix1_step2),
1657b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org        [matrix0_step2] "r" (matrix0_step2)
1667b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org      : "hi", "lo", "memory"
1677b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org    );
1687b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org  }
1697b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org}
1707b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org
1717b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org// MIPS optimization of the function WebRtcIsacfix_MatrixProduct2.
1727b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org// Bit-exact with the function WebRtcIsacfix_MatrixProduct2C from
1737b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org// entropy_coding.c file.
1747b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.orgvoid WebRtcIsacfix_MatrixProduct2MIPS(const int16_t matrix0[],
1757b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org                                      const int32_t matrix1[],
1767b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org                                      int32_t matrix_product[],
1777b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org                                      const int matrix0_index_factor,
1787b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org                                      const int matrix0_index_step) {
1797b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org  int j = 0, n = 0;
1807b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org  int loop_count = SUBFRAMES;
1817b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org  const int16_t* matrix0_ptr;
1827b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org  const int32_t* matrix1_ptr;
1837b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org  const int16_t* matrix0_start = matrix0;
1847b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org  const int matrix0_step = 2 * matrix0_index_step;
1857b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org  const int matrix0_step2 = 2 * matrix0_index_factor;
1867b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org  int32_t r0, r1, r2, r3, r4, sum32, sum32_2;
1877b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org
1887b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org  __asm __volatile (
1897b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org    ".set       push                                                   \n\t"
1907b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org    ".set       noreorder                                              \n\t"
1917b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org    "addu       %[j],              %[loop_count],     $0               \n\t"
1927b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org    "addu       %[matrix0_start],  %[matrix0],        $0               \n\t"
1937b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org   "1:                                                                 \n\t"
1947b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org    "addu       %[matrix1_ptr],    %[matrix1],        $0               \n\t"
1957b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org    "addu       %[matrix0_ptr],    %[matrix0_start],  $0               \n\t"
1967b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org    "addu       %[n],              %[loop_count],     $0               \n\t"
1977b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org    "mul        %[sum32],          $0,                $0               \n\t"
1987b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org    "mul        %[sum32_2],        $0,                $0               \n\t"
1997b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org   "2:                                                                 \n\t"
2007b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org    "lw         %[r0],             0(%[matrix1_ptr])                   \n\t"
2017b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org    "lw         %[r1],             4(%[matrix1_ptr])                   \n\t"
2027b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org    "lh         %[r2],             0(%[matrix0_ptr])                   \n\t"
2037b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org    "andi       %[r3],             %[r0],             0xffff           \n\t"
2047b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org    "sra        %[r3],             %[r3],             1                \n\t"
2057b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org    "mul        %[r3],             %[r3],             %[r2]            \n\t"
2067b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org    "andi       %[r4],             %[r1],             0xffff           \n\t"
2077b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org    "sra        %[r4],             %[r4],             1                \n\t"
2087b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org    "mul        %[r4],             %[r4],             %[r2]            \n\t"
2097b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org    "sra        %[r0],             %[r0],             16               \n\t"
2107b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org    "mul        %[r0],             %[r0],             %[r2]            \n\t"
2117b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org    "sra        %[r1],             %[r1],             16               \n\t"
2127b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org    "mul        %[r1],             %[r1],             %[r2]            \n\t"
2137b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org#if defined(MIPS_DSP_R1_LE)
2147b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org    "shra_r.w   %[r3],             %[r3],             15               \n\t"
2157b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org    "shra_r.w   %[r4],             %[r4],             15               \n\t"
2167b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org#else
2177b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org    "addiu      %[r3],             %[r3],             0x4000           \n\t"
2187b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org    "sra        %[r3],             %[r3],             15               \n\t"
2197b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org    "addiu      %[r4],             %[r4],             0x4000           \n\t"
2207b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org    "sra        %[r4],             %[r4],             15               \n\t"
2217b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org#endif
2227b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org    "addiu      %[matrix1_ptr],    %[matrix1_ptr],    8                \n\t"
2237b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org    "addu       %[matrix0_ptr],    %[matrix0_ptr],    %[matrix0_step]  \n\t"
2247b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org    "addiu      %[n],              %[n],              -1               \n\t"
2257b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org    "addu       %[sum32],          %[sum32],          %[r3]            \n\t"
2267b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org    "addu       %[sum32_2],        %[sum32_2],        %[r4]            \n\t"
2277b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org    "addu       %[sum32],          %[sum32],          %[r0]            \n\t"
2287b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org    "bgtz       %[n],              2b                                  \n\t"
2297b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org    " addu      %[sum32_2],        %[sum32_2],        %[r1]            \n\t"
2307b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org    "sra        %[sum32],          %[sum32],          3                \n\t"
2317b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org    "sra        %[sum32_2],        %[sum32_2],        3                \n\t"
2327b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org    "addiu      %[j],              %[j],              -1               \n\t"
2337b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org    "addu       %[matrix0_start],  %[matrix0_start],  %[matrix0_step2] \n\t"
2347b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org    "sw         %[sum32],          0(%[matrix_product])                \n\t"
2357b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org    "sw         %[sum32_2],        4(%[matrix_product])                \n\t"
2367b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org    "bgtz       %[j],              1b                                  \n\t"
2377b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org    " addiu     %[matrix_product], %[matrix_product], 8                \n\t"
2387b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org    ".set       pop                                                    \n\t"
2397b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org    : [j] "=&r" (j), [matrix0_start] "=&r" (matrix0_start),
2407b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org      [matrix1_ptr] "=&r" (matrix1_ptr), [matrix0_ptr] "=&r" (matrix0_ptr),
2417b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org      [n] "=&r" (n), [sum32] "=&r" (sum32), [sum32_2] "=&r" (sum32_2),
2427b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org      [r0] "=&r" (r0), [r1] "=&r" (r1), [r2] "=&r" (r2), [r3] "=&r" (r3),
2437b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org      [r4] "=&r" (r4), [matrix_product] "+r" (matrix_product)
2447b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org    : [loop_count] "r" (loop_count), [matrix0] "r" (matrix0),
2457b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org      [matrix1] "r" (matrix1), [matrix0_step] "r" (matrix0_step),
2467b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org      [matrix0_step2] "r" (matrix0_step2)
2477b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org    : "hi", "lo", "memory"
2487b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org  );
2497b12812b0605c1538cab2f84e6baa3277e395cf3andrew@webrtc.org}
250