ref_vqdmulh_lane.c revision c94d4c1e27bb7e2b2869cc230872669df1459533
1073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon/*
2073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon
3c94d4c1e27bb7e2b2869cc230872669df1459533Christophe LyonCopyright (c) 2009, 2010, 2011, 2012 STMicroelectronics
4073831adf9442c019e8d34b18b0c04b1d780a19Christophe LyonWritten by Christophe Lyon
5073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon
6073831adf9442c019e8d34b18b0c04b1d780a19Christophe LyonPermission is hereby granted, free of charge, to any person obtaining a copy
7073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyonof this software and associated documentation files (the "Software"), to deal
8073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyonin the Software without restriction, including without limitation the rights
9073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyonto use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyoncopies of the Software, and to permit persons to whom the Software is
11073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyonfurnished to do so, subject to the following conditions:
12073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon
13073831adf9442c019e8d34b18b0c04b1d780a19Christophe LyonThe above copyright notice and this permission notice shall be included in
14073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyonall copies or substantial portions of the Software.
15073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon
16073831adf9442c019e8d34b18b0c04b1d780a19Christophe LyonTHE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17073831adf9442c019e8d34b18b0c04b1d780a19Christophe LyonIMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18073831adf9442c019e8d34b18b0c04b1d780a19Christophe LyonFITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19073831adf9442c019e8d34b18b0c04b1d780a19Christophe LyonAUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20073831adf9442c019e8d34b18b0c04b1d780a19Christophe LyonLIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21073831adf9442c019e8d34b18b0c04b1d780a19Christophe LyonOUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22073831adf9442c019e8d34b18b0c04b1d780a19Christophe LyonTHE SOFTWARE.
23073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon
24073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon*/
25073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon
26073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon#ifdef __arm__
27073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon#include <arm_neon.h>
28073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon#else
290dab5f72da4b2434882b51b44ac377af9e3160feChristophe Lyon#include "stm-arm-neon.h"
30073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon#endif
31073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon
32073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon#include "stm-arm-neon-ref.h"
33073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon
34073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon#define INSN vqdmulh
35073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon#define TEST_MSG "VQDMULH_LANE"
36073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon#define FNNAME1(NAME) void exec_ ## NAME ## _lane (void)
37073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon#define FNNAME(NAME) FNNAME1(NAME)
38073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon
39073831adf9442c019e8d34b18b0c04b1d780a19Christophe LyonFNNAME (INSN)
40073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon{
41073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon  /* vector_res = vqdmulh_lane(vector,vector2,lane), then store the result.  */
42073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon#define TEST_VQDMULH_LANE2(INSN, Q, T1, T2, W, N, N2, L)	\
43eb8034b39a424f157b7117141e23cdad329d5cc0Christophe Lyon  Set_Neon_Overflow(0);						\
44073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon  VECT_VAR(vector_res, T1, W, N) =				\
45073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon    INSN##Q##_lane_##T2##W(VECT_VAR(vector, T1, W, N),		\
46073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon			   VECT_VAR(vector2, T1, W, N2),	\
47073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon			   L);					\
48073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N),			\
49073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon		    VECT_VAR(vector_res, T1, W, N));		\
50073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon  dump_neon_overflow(TEST_MSG, xSTR(INSN##Q##_lane_##T2##W))
51073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon
52073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon  /* Two auxliary macros are necessary to expand INSN */
53073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon#define TEST_VQDMULH_LANE1(INSN, Q, T1, T2, W, N, N2, L)	\
54073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon  TEST_VQDMULH_LANE2(INSN, Q, T1, T2, W, N, N2, L)
55073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon
56073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon#define TEST_VQDMULH_LANE(Q, T1, T2, W, N, N2, L)	\
57073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon  TEST_VQDMULH_LANE1(INSN, Q, T1, T2, W, N, N2, L)
58073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon
59073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon  /* With ARM RVCT, we need to declare variables before any executable
60073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon     statement */
61073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon  DECL_VARIABLE(vector, int, 16, 4);
62073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon  DECL_VARIABLE(vector, int, 32, 2);
63073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon  DECL_VARIABLE(vector, int, 16, 8);
64073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon  DECL_VARIABLE(vector, int, 32, 4);
65073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon
66073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon  DECL_VARIABLE(vector_res, int, 16, 4);
67073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon  DECL_VARIABLE(vector_res, int, 32, 2);
68073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon  DECL_VARIABLE(vector_res, int, 16, 8);
69073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon  DECL_VARIABLE(vector_res, int, 32, 4);
70073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon
71073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon  /* vector2: vqdmulh_lane and vqdmulhq_lane have a 2nd argument with
72073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon     the same number of elements, so we need only one variable of each
73073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon     type.  */
74073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon  DECL_VARIABLE(vector2, int, 16, 4);
75073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon  DECL_VARIABLE(vector2, int, 32, 2);
76073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon
77073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon  clean_results ();
78073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon
79073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon  TEST_VLOAD(vector, buffer, , int, s, 16, 4);
80073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon  TEST_VLOAD(vector, buffer, , int, s, 32, 2);
81073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon
82073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon  TEST_VLOAD(vector, buffer, q, int, s, 16, 8);
83073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon  TEST_VLOAD(vector, buffer, q, int, s, 32, 4);
84073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon
85073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon  /* Initialize vector2 */
86073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon  TEST_VDUP(vector2, , int, s, 16, 4, 0x55);
87073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon  TEST_VDUP(vector2, , int, s, 32, 2, 0xBB);
88073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon
89073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon  /* Choose lane arbitrarily */
90073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon  fprintf(ref_file, "\n%s overflow output:\n", TEST_MSG);
91073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon  TEST_VQDMULH_LANE(, int, s, 16, 4, 4, 2);
92073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon  TEST_VQDMULH_LANE(, int, s, 32, 2, 2, 1);
93073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon  TEST_VQDMULH_LANE(q, int, s, 16, 8, 4, 3);
94073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon  TEST_VQDMULH_LANE(q, int, s, 32, 4, 2, 0);
95073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon
96073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon  /* FIXME: only a subset of the result buffers are used, but we
97073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon     output all of them */
98073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon  dump_results_hex (TEST_MSG);
99073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon
100073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon
101073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon  TEST_VDUP(vector, , int, s, 16, 4, 0x8000);
102073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon  TEST_VDUP(vector, , int, s, 32, 2, 0x80000000);
103073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon  TEST_VDUP(vector, q, int, s, 16, 8, 0x8000);
104073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon  TEST_VDUP(vector, q, int, s, 32, 4, 0x80000000);
105073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon  TEST_VDUP(vector2, , int, s, 16, 4, 0x8000);
106073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon  TEST_VDUP(vector2, , int, s, 32, 2, 0x80000000);
107073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon
108073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon  fprintf(ref_file, "\n%s overflow output:\n",
109073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon	  TEST_MSG " (check mul overflow)");
110073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon  TEST_VQDMULH_LANE(, int, s, 16, 4, 4, 3);
111073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon  TEST_VQDMULH_LANE(, int, s, 32, 2, 2, 1);
112073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon  TEST_VQDMULH_LANE(q, int, s, 16, 8, 4, 2);
113073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon  TEST_VQDMULH_LANE(q, int, s, 32, 4, 2, 1);
114073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon  dump_results_hex2 (TEST_MSG, " (check mul overflow)");
115073831adf9442c019e8d34b18b0c04b1d780a19Christophe Lyon}
116