ref_vqdmulh_lane.c revision c94d4c1e27bb7e2b2869cc230872669df1459533
1073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon/* 2073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon 3c94d4c1e27bb7e2b2869cc230872669df1459533Christophe LyonCopyright (c) 2009, 2010, 2011, 2012 STMicroelectronics 4073831adf9442c019e8d34b18b0c04b1d780a19Christophe LyonWritten by Christophe Lyon 5073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon 6073831adf9442c019e8d34b18b0c04b1d780a19Christophe LyonPermission is hereby granted, free of charge, to any person obtaining a copy 7073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyonof this software and associated documentation files (the "Software"), to deal 8073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyonin the Software without restriction, including without limitation the rights 9073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyonto use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyoncopies of the Software, and to permit persons to whom the Software is 11073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyonfurnished to do so, subject to the following conditions: 12073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon 13073831adf9442c019e8d34b18b0c04b1d780a19Christophe LyonThe above copyright notice and this permission notice shall be included in 14073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyonall copies or substantial portions of the Software. 15073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon 16073831adf9442c019e8d34b18b0c04b1d780a19Christophe LyonTHE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17073831adf9442c019e8d34b18b0c04b1d780a19Christophe LyonIMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18073831adf9442c019e8d34b18b0c04b1d780a19Christophe LyonFITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19073831adf9442c019e8d34b18b0c04b1d780a19Christophe LyonAUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20073831adf9442c019e8d34b18b0c04b1d780a19Christophe LyonLIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21073831adf9442c019e8d34b18b0c04b1d780a19Christophe LyonOUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22073831adf9442c019e8d34b18b0c04b1d780a19Christophe LyonTHE SOFTWARE. 23073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon 24073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon*/ 25073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon 26073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon#ifdef __arm__ 27073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon#include <arm_neon.h> 28073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon#else 290dab5f72da4b2434882b51b44ac377af9e3160feChristophe Lyon#include "stm-arm-neon.h" 30073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon#endif 31073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon 32073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon#include "stm-arm-neon-ref.h" 33073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon 34073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon#define INSN vqdmulh 35073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon#define TEST_MSG "VQDMULH_LANE" 36073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon#define FNNAME1(NAME) void exec_ ## NAME ## _lane (void) 37073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon#define FNNAME(NAME) FNNAME1(NAME) 38073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon 39073831adf9442c019e8d34b18b0c04b1d780a19Christophe LyonFNNAME (INSN) 40073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon{ 41073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon /* vector_res = vqdmulh_lane(vector,vector2,lane), then store the result. */ 42073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon#define TEST_VQDMULH_LANE2(INSN, Q, T1, T2, W, N, N2, L) \ 43eb8034b39a424f157b7117141e23cdad329d5cc0Christophe Lyon Set_Neon_Overflow(0); \ 44073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon VECT_VAR(vector_res, T1, W, N) = \ 45073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon INSN##Q##_lane_##T2##W(VECT_VAR(vector, T1, W, N), \ 46073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon VECT_VAR(vector2, T1, W, N2), \ 47073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon L); \ 48073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ 49073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon VECT_VAR(vector_res, T1, W, N)); \ 50073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon dump_neon_overflow(TEST_MSG, xSTR(INSN##Q##_lane_##T2##W)) 51073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon 52073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon /* Two auxliary macros are necessary to expand INSN */ 53073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon#define TEST_VQDMULH_LANE1(INSN, Q, T1, T2, W, N, N2, L) \ 54073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon TEST_VQDMULH_LANE2(INSN, Q, T1, T2, W, N, N2, L) 55073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon 56073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon#define TEST_VQDMULH_LANE(Q, T1, T2, W, N, N2, L) \ 57073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon TEST_VQDMULH_LANE1(INSN, Q, T1, T2, W, N, N2, L) 58073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon 59073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon /* With ARM RVCT, we need to declare variables before any executable 60073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon statement */ 61073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon DECL_VARIABLE(vector, int, 16, 4); 62073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon DECL_VARIABLE(vector, int, 32, 2); 63073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon DECL_VARIABLE(vector, int, 16, 8); 64073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon DECL_VARIABLE(vector, int, 32, 4); 65073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon 66073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon DECL_VARIABLE(vector_res, int, 16, 4); 67073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon DECL_VARIABLE(vector_res, int, 32, 2); 68073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon DECL_VARIABLE(vector_res, int, 16, 8); 69073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon DECL_VARIABLE(vector_res, int, 32, 4); 70073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon 71073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon /* vector2: vqdmulh_lane and vqdmulhq_lane have a 2nd argument with 72073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon the same number of elements, so we need only one variable of each 73073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon type. */ 74073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon DECL_VARIABLE(vector2, int, 16, 4); 75073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon DECL_VARIABLE(vector2, int, 32, 2); 76073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon 77073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon clean_results (); 78073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon 79073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon TEST_VLOAD(vector, buffer, , int, s, 16, 4); 80073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon TEST_VLOAD(vector, buffer, , int, s, 32, 2); 81073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon 82073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon TEST_VLOAD(vector, buffer, q, int, s, 16, 8); 83073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon TEST_VLOAD(vector, buffer, q, int, s, 32, 4); 84073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon 85073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon /* Initialize vector2 */ 86073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon TEST_VDUP(vector2, , int, s, 16, 4, 0x55); 87073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon TEST_VDUP(vector2, , int, s, 32, 2, 0xBB); 88073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon 89073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon /* Choose lane arbitrarily */ 90073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon fprintf(ref_file, "\n%s overflow output:\n", TEST_MSG); 91073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon TEST_VQDMULH_LANE(, int, s, 16, 4, 4, 2); 92073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon TEST_VQDMULH_LANE(, int, s, 32, 2, 2, 1); 93073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon TEST_VQDMULH_LANE(q, int, s, 16, 8, 4, 3); 94073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon TEST_VQDMULH_LANE(q, int, s, 32, 4, 2, 0); 95073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon 96073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon /* FIXME: only a subset of the result buffers are used, but we 97073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon output all of them */ 98073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon dump_results_hex (TEST_MSG); 99073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon 100073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon 101073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon TEST_VDUP(vector, , int, s, 16, 4, 0x8000); 102073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon TEST_VDUP(vector, , int, s, 32, 2, 0x80000000); 103073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon TEST_VDUP(vector, q, int, s, 16, 8, 0x8000); 104073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon TEST_VDUP(vector, q, int, s, 32, 4, 0x80000000); 105073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon TEST_VDUP(vector2, , int, s, 16, 4, 0x8000); 106073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon TEST_VDUP(vector2, , int, s, 32, 2, 0x80000000); 107073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon 108073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon fprintf(ref_file, "\n%s overflow output:\n", 109073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon TEST_MSG " (check mul overflow)"); 110073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon TEST_VQDMULH_LANE(, int, s, 16, 4, 4, 3); 111073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon TEST_VQDMULH_LANE(, int, s, 32, 2, 2, 1); 112073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon TEST_VQDMULH_LANE(q, int, s, 16, 8, 4, 2); 113073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon TEST_VQDMULH_LANE(q, int, s, 32, 4, 2, 1); 114073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon dump_results_hex2 (TEST_MSG, " (check mul overflow)"); 115073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon} 116