lp_bld_tgsi_action.c revision dde807b9dc038266fbe594c1a700283df007bf5e
1/**************************************************************************
2 *
3 * Copyright 2011-2012 Advanced Micro Devices, Inc.
4 * Copyright 2009 VMware, Inc.
5 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
6 * All Rights Reserved.
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the
10 * "Software"), to deal in the Software without restriction, including
11 * without limitation the rights to use, copy, modify, merge, publish,
12 * distribute, sub license, and/or sell copies of the Software, and to
13 * permit persons to whom the Software is furnished to do so, subject to
14 * the following conditions:
15 *
16 * The above copyright notice and this permission notice (including the
17 * next paragraph) shall be included in all copies or substantial portions
18 * of the Software.
19 *
20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
21 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
23 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
24 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
25 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
26 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27 *
28 **************************************************************************/
29
30/**
31 * @file
32 * TGSI to LLVM IR translation.
33 *
34 * @author Jose Fonseca <jfonseca@vmware.com>
35 * @author Tom Stellard <thomas.stellard@amd.com>
36 *
37 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell,
38 * Brian Paul, and others.
39 */
40
41
42#include "lp_bld_tgsi_action.h"
43
44#include "lp_bld_tgsi.h"
45#include "lp_bld_arit.h"
46#include "lp_bld_const.h"
47#include "lp_bld_gather.h"
48#include "lp_bld_logic.h"
49
50#include "tgsi/tgsi_exec.h"
51
52/* XXX: The CPU only defaults should be repaced by generic ones.  In most
53 * cases, the CPU defaults are just wrappers around a function in
54 * lp_build_arit.c and these functions should be inlined here and the CPU
55 * generic code should be removed and placed elsewhere.
56 */
57
58/* Default actions */
59
60/* Generic fetch_arg functions */
61
62static void scalar_unary_fetch_args(
63   struct lp_build_tgsi_context * bld_base,
64   struct lp_build_emit_data * emit_data)
65{
66   /* src0.x */
67   emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst, 0, 0);
68   emit_data->arg_count = 1;
69   emit_data->dst_type = LLVMTypeOf(emit_data->args[0]);
70}
71
72static void scalar_binary_fetch_args(
73   struct lp_build_tgsi_context * bld_base,
74   struct lp_build_emit_data * emit_data)
75{
76   /* src0.x */
77   emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst,
78                                            0, TGSI_CHAN_X);
79   /* src1.x */
80   emit_data->args[1] = lp_build_emit_fetch(bld_base, emit_data->inst,
81                                            1, TGSI_CHAN_X);
82   emit_data->arg_count = 2;
83   emit_data->dst_type = LLVMTypeOf(emit_data->args[0]);
84}
85
86/* TGSI_OPCODE_ADD */
87static void
88add_emit(
89   const struct lp_build_tgsi_action * action,
90   struct lp_build_tgsi_context * bld_base,
91   struct lp_build_emit_data * emit_data)
92{
93   emit_data->output[emit_data->chan] = LLVMBuildFAdd(
94                                bld_base->base.gallivm->builder,
95                                emit_data->args[0], emit_data->args[1], "");
96}
97
98/* TGSI_OPCODE_ARR */
99static void
100arr_emit(
101   const struct lp_build_tgsi_action * action,
102   struct lp_build_tgsi_context * bld_base,
103   struct lp_build_emit_data * emit_data)
104{
105   emit_data->output[emit_data->chan] = lp_build_emit_llvm_unary(bld_base,
106                                         TGSI_OPCODE_ROUND, emit_data->args[0]);
107}
108
109/* TGSI_OPCODE_CLAMP */
110static void
111clamp_emit(
112   const struct lp_build_tgsi_action * action,
113   struct lp_build_tgsi_context * bld_base,
114   struct lp_build_emit_data * emit_data)
115{
116   LLVMValueRef tmp;
117   tmp = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MAX,
118                                   emit_data->args[0],
119                                   emit_data->args[1]);
120   emit_data->output[emit_data->chan] = lp_build_emit_llvm_binary(bld_base,
121                                       TGSI_OPCODE_MIN, tmp, emit_data->args[2]);
122}
123
124/* DP* Helper */
125
126static void
127dp_fetch_args(
128   struct lp_build_tgsi_context * bld_base,
129   struct lp_build_emit_data * emit_data,
130   unsigned dp_components)
131{
132   unsigned chan, src;
133   for (src = 0; src < 2; src++) {
134      for (chan = 0; chan < dp_components; chan++) {
135         emit_data->args[(src * dp_components) + chan] =
136                     lp_build_emit_fetch(bld_base, emit_data->inst, src, chan);
137      }
138   }
139   emit_data->dst_type = bld_base->base.elem_type;
140}
141
142/* TGSI_OPCODE_DP2 */
143static void
144dp2_fetch_args(
145   struct lp_build_tgsi_context * bld_base,
146   struct lp_build_emit_data * emit_data)
147{
148   dp_fetch_args(bld_base, emit_data, 2);
149}
150
151static void
152dp2_emit(
153   const struct lp_build_tgsi_action * action,
154   struct lp_build_tgsi_context * bld_base,
155   struct lp_build_emit_data * emit_data)
156{
157   LLVMValueRef tmp0, tmp1;
158   tmp0 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL,
159                                    emit_data->args[0] /* src0.x */,
160                                    emit_data->args[2] /* src1.x */);
161   tmp1 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL,
162                                    emit_data->args[1] /* src0.y */,
163                                    emit_data->args[3] /* src1.y */);
164   emit_data->output[emit_data->chan] = lp_build_emit_llvm_binary(bld_base,
165                                                    TGSI_OPCODE_ADD, tmp0, tmp1);
166}
167
168static struct lp_build_tgsi_action dp2_action = {
169   .fetch_args = dp2_fetch_args,
170   .emit = dp2_emit
171};
172
173/* TGSI_OPCODE_DP2A */
174static void
175dp2a_fetch_args(
176   struct lp_build_tgsi_context * bld_base,
177   struct lp_build_emit_data * emit_data)
178{
179   dp_fetch_args(bld_base, emit_data, 2);
180   emit_data->args[5] = lp_build_emit_fetch(bld_base, emit_data->inst,
181                                            2, TGSI_CHAN_X);
182}
183
184static void
185dp2a_emit(
186   const struct lp_build_tgsi_action * action,
187   struct lp_build_tgsi_context * bld_base,
188   struct lp_build_emit_data * emit_data)
189{
190   LLVMValueRef tmp;
191   tmp = lp_build_emit_llvm(bld_base, TGSI_OPCODE_DP2, emit_data);
192   emit_data->output[emit_data->chan] = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_ADD,
193                                    emit_data->args[5], tmp);
194}
195
196static struct lp_build_tgsi_action dp2a_action = {
197   .fetch_args = dp2a_fetch_args,
198   .emit = dp2a_emit
199};
200
201/* TGSI_OPCODE_DP3 */
202static void
203dp3_fetch_args(
204   struct lp_build_tgsi_context * bld_base,
205   struct lp_build_emit_data * emit_data)
206{
207   dp_fetch_args(bld_base, emit_data, 3);
208}
209
210static void
211dp3_emit(
212   const struct lp_build_tgsi_action * action,
213   struct lp_build_tgsi_context * bld_base,
214   struct lp_build_emit_data * emit_data)
215{
216   LLVMValueRef tmp0, tmp1;
217   tmp0 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL,
218                                    emit_data->args[0] /* src0.x */,
219                                    emit_data->args[3] /* src1.x */);
220   tmp1 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL,
221                                    emit_data->args[1] /* src0.y */,
222                                    emit_data->args[4] /* src1.y */);
223   tmp0 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_ADD, tmp1, tmp0);
224   tmp1 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL,
225                                    emit_data->args[2] /* src0.z */,
226                                    emit_data->args[5] /* src1.z */);
227   emit_data->output[emit_data->chan] = lp_build_emit_llvm_binary(bld_base,
228                                                    TGSI_OPCODE_ADD, tmp0, tmp1);
229}
230
231static struct lp_build_tgsi_action dp3_action = {
232   .fetch_args = dp3_fetch_args,
233   .emit = dp3_emit
234};
235
236/* TGSI_OPCODDE_DP4 */
237
238static void
239dp4_fetch_args(
240   struct lp_build_tgsi_context * bld_base,
241   struct lp_build_emit_data * emit_data)
242{
243   dp_fetch_args(bld_base, emit_data, 4);
244}
245
246static void
247dp4_emit(
248   const struct lp_build_tgsi_action * action,
249   struct lp_build_tgsi_context * bld_base,
250   struct lp_build_emit_data * emit_data)
251{
252   LLVMValueRef tmp0, tmp1;
253   tmp0 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL,
254                                    emit_data->args[0] /* src0.x */,
255                                    emit_data->args[4] /* src1.x */);
256   tmp1 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL,
257                                    emit_data->args[1] /* src0.y */,
258                                    emit_data->args[5] /* src1.y */);
259   tmp0 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_ADD, tmp0, tmp1);
260   tmp1 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL,
261                                    emit_data->args[2] /* src0.z */,
262                                    emit_data->args[6] /* src1.z */);
263   tmp0 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_ADD, tmp0, tmp1);
264   tmp1 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL,
265                                    emit_data->args[3] /* src0.w */,
266                                    emit_data->args[7] /* src1.w */);
267   emit_data->output[emit_data->chan] = lp_build_emit_llvm_binary(bld_base,
268                                                    TGSI_OPCODE_ADD, tmp0, tmp1);
269}
270
271static struct lp_build_tgsi_action dp4_action = {
272   .fetch_args = dp4_fetch_args,
273   .emit = dp4_emit
274};
275
276/* TGSI_OPCODE_DPH */
277static void
278dph_fetch_args(
279   struct lp_build_tgsi_context * bld_base,
280   struct lp_build_emit_data * emit_data)
281{
282   dp_fetch_args(bld_base, emit_data, 4);
283   /* src0.w */
284   emit_data->args[3] = bld_base->base.one;
285}
286
287const struct lp_build_tgsi_action dph_action = {
288   .fetch_args = dph_fetch_args,
289   .emit = dp4_emit
290};
291
292/* TGSI_OPCODE_DST */
293static void
294dst_fetch_args(
295   struct lp_build_tgsi_context * bld_base,
296   struct lp_build_emit_data * emit_data)
297{
298   /* src0.y */
299   emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst,
300                                            0, TGSI_CHAN_Y);
301   /* src0.z */
302   emit_data->args[1] = lp_build_emit_fetch(bld_base, emit_data->inst,
303                                            0, TGSI_CHAN_Z);
304   /* src1.y */
305   emit_data->args[2] = lp_build_emit_fetch(bld_base, emit_data->inst,
306                                            1, TGSI_CHAN_Y);
307   /* src1.w */
308   emit_data->args[3] = lp_build_emit_fetch(bld_base, emit_data->inst,
309                                            1, TGSI_CHAN_W);
310}
311
312static void
313dst_emit(
314   const struct lp_build_tgsi_action * action,
315   struct lp_build_tgsi_context * bld_base,
316   struct lp_build_emit_data * emit_data)
317{
318   /* dst.x */
319   emit_data->output[TGSI_CHAN_X] = bld_base->base.one;
320
321   /* dst.y */
322   emit_data->output[TGSI_CHAN_Y] = lp_build_emit_llvm_binary(bld_base,
323                                          TGSI_OPCODE_MUL,
324                                          emit_data->args[0] /* src0.y */,
325                                          emit_data->args[2] /* src1.y */);
326   /* dst.z */
327   emit_data->output[TGSI_CHAN_Z] = emit_data->args[1]; /* src0.z */
328
329   /* dst.w */
330   emit_data->output[TGSI_CHAN_W] = emit_data->args[3]; /* src1.w */
331}
332
333static struct lp_build_tgsi_action dst_action = {
334   .fetch_args = dst_fetch_args,
335   .emit = dst_emit
336};
337
338/* TGSI_OPCODE_END */
339static void
340end_emit(
341   const struct lp_build_tgsi_action * action,
342   struct lp_build_tgsi_context * bld_base,
343   struct lp_build_emit_data * emit_data)
344{
345   bld_base->pc = -1;
346}
347
348/* TGSI_OPCODE_EXP */
349
350static void
351exp_emit(
352   const struct lp_build_tgsi_action * action,
353   struct lp_build_tgsi_context * bld_base,
354   struct lp_build_emit_data * emit_data)
355{
356   LLVMValueRef floor_x;
357
358   /* floor( src0.x ) */
359   floor_x = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_FLR,
360                                      emit_data->args[0]);
361
362   /* 2 ^ floor( src0.x ) */
363   emit_data->output[TGSI_CHAN_X] = lp_build_emit_llvm_unary(bld_base,
364                                       TGSI_OPCODE_EX2, floor_x);
365
366   /* src0.x - floor( src0.x ) */
367   emit_data->output[TGSI_CHAN_Y] = lp_build_emit_llvm_binary(bld_base,
368                   TGSI_OPCODE_SUB,  emit_data->args[0] /* src0.x */, floor_x);
369
370   /* 2 ^ src0.x */
371   emit_data->output[TGSI_CHAN_Z] = lp_build_emit_llvm_unary(bld_base,
372                             TGSI_OPCODE_EX2, emit_data->args[0] /* src0.x */);
373
374   emit_data->output[TGSI_CHAN_W] = bld_base->base.one;
375}
376
377const struct lp_build_tgsi_action exp_action = {
378   .fetch_args = scalar_unary_fetch_args,
379   .emit = exp_emit
380};
381
382/* TGSI_OPCODE_FRC */
383
384static void
385frc_emit(
386   const struct lp_build_tgsi_action * action,
387   struct lp_build_tgsi_context * bld_base,
388   struct lp_build_emit_data * emit_data)
389{
390   LLVMValueRef tmp;
391   tmp = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_FLR,
392                                  emit_data->args[0]);
393   emit_data->output[emit_data->chan] = lp_build_emit_llvm_binary(bld_base,
394                                       TGSI_OPCODE_SUB, emit_data->args[0], tmp);
395}
396
397/* TGSI_OPCODE_KIL */
398
399static void
400kil_fetch_args(
401   struct lp_build_tgsi_context * bld_base,
402   struct lp_build_emit_data * emit_data)
403{
404   /* src0.x */
405   emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst,
406                                            0, TGSI_CHAN_X);
407   /* src0.y */
408   emit_data->args[1] = lp_build_emit_fetch(bld_base, emit_data->inst,
409                                            0, TGSI_CHAN_Y);
410   /* src0.z */
411   emit_data->args[2] = lp_build_emit_fetch(bld_base, emit_data->inst,
412                                            0, TGSI_CHAN_Z);
413   /* src0.w */
414   emit_data->args[3] = lp_build_emit_fetch(bld_base, emit_data->inst,
415                                            0, TGSI_CHAN_W);
416   emit_data->arg_count = 4;
417   emit_data->dst_type = LLVMVoidTypeInContext(bld_base->base.gallivm->context);
418}
419
420/* TGSI_OPCODE_KILP */
421
422static void
423kilp_fetch_args(
424   struct lp_build_tgsi_context * bld_base,
425   struct lp_build_emit_data * emit_data)
426{
427   emit_data->dst_type = LLVMVoidTypeInContext(bld_base->base.gallivm->context);
428}
429
430/* TGSI_OPCODE_LIT */
431
432static void
433lit_fetch_args(
434   struct lp_build_tgsi_context * bld_base,
435   struct lp_build_emit_data * emit_data)
436{
437   /* src0.x */
438   emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_X);
439   /* src0.y */
440   emit_data->args[1] = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_Y);
441   /* src0.w */
442   emit_data->args[2] = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_W);
443   emit_data->arg_count = 3;
444}
445
446static void
447lit_emit(
448   const struct lp_build_tgsi_action * action,
449   struct lp_build_tgsi_context * bld_base,
450   struct lp_build_emit_data * emit_data)
451{
452   LLVMValueRef tmp0, tmp1, tmp2;
453
454   /* dst.x */
455   emit_data->output[TGSI_CHAN_X] = bld_base->base.one;
456
457   /* dst. y */
458   emit_data->output[TGSI_CHAN_Y] = lp_build_emit_llvm_binary(bld_base,
459                                               TGSI_OPCODE_MAX,
460                                               emit_data->args[0] /* src0.x */,
461                                               bld_base->base.zero);
462
463   /* dst.z */
464   /* XMM[1] = SrcReg[0].yyyy */
465   tmp1 = emit_data->args[1];
466   /* XMM[1] = max(XMM[1], 0) */
467   tmp1 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MAX,
468                                    tmp1, bld_base->base.zero);
469   /* XMM[2] = SrcReg[0].wwww */
470   tmp2 = emit_data->args[2];
471   tmp1 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_POW,
472                                    tmp1, tmp2);
473   tmp0 = emit_data->args[0];
474   emit_data->output[TGSI_CHAN_Z] = lp_build_emit_llvm_ternary(bld_base,
475                                             TGSI_OPCODE_CMP,
476                                             tmp0, bld_base->base.zero, tmp1);
477   /* dst.w */
478   emit_data->output[TGSI_CHAN_W] = bld_base->base.one;
479}
480
481static struct lp_build_tgsi_action lit_action = {
482   .fetch_args = lit_fetch_args,
483   .emit = lit_emit
484};
485
486/* TGSI_OPCODE_LOG */
487
488static void
489log_emit(
490   const struct lp_build_tgsi_action * action,
491   struct lp_build_tgsi_context * bld_base,
492   struct lp_build_emit_data * emit_data)
493{
494
495   LLVMValueRef abs_x, log_abs_x, flr_log_abs_x, ex2_flr_log_abs_x;
496
497   /* abs( src0.x) */
498   abs_x = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_ABS,
499                                    emit_data->args[0] /* src0.x */);
500
501   /* log( abs( src0.x ) ) */
502   log_abs_x = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_LG2,
503                                        abs_x);
504
505   /* floor( log( abs( src0.x ) ) ) */
506   flr_log_abs_x = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_FLR,
507                                            log_abs_x);
508   /* dst.x */
509   emit_data->output[TGSI_CHAN_X] = flr_log_abs_x;
510
511   /* dst.y */
512   ex2_flr_log_abs_x = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_EX2,
513                                                flr_log_abs_x);
514
515   /* abs( src0.x ) / 2^( floor( lg2( abs( src0.x ) ) ) ) */
516   emit_data->output[TGSI_CHAN_Y] = lp_build_emit_llvm_binary(bld_base,
517                                    TGSI_OPCODE_DIV, abs_x, ex2_flr_log_abs_x);
518
519   /* dst.x */
520   emit_data->output[TGSI_CHAN_Z] = log_abs_x;
521
522   /* dst.w */
523   emit_data->output[TGSI_CHAN_W] = bld_base->base.one;
524}
525
526static struct lp_build_tgsi_action log_action = {
527   .fetch_args = scalar_unary_fetch_args,
528   .emit = log_emit
529};
530
531/* TGSI_OPCODE_LRP */
532
533static void
534lrp_emit(
535   const struct lp_build_tgsi_action * action,
536   struct lp_build_tgsi_context * bld_base,
537   struct lp_build_emit_data * emit_data)
538{
539   LLVMValueRef tmp;
540   tmp = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_SUB,
541                                   emit_data->args[1],
542                                   emit_data->args[2]);
543   emit_data->output[emit_data->chan] = lp_build_emit_llvm_ternary(bld_base,
544                    TGSI_OPCODE_MAD, emit_data->args[0], tmp, emit_data->args[2]);
545}
546
547/* TGSI_OPCODE_MAD */
548
549static void
550mad_emit(
551   const struct lp_build_tgsi_action * action,
552   struct lp_build_tgsi_context * bld_base,
553   struct lp_build_emit_data * emit_data)
554{
555   LLVMValueRef tmp;
556   tmp = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL,
557                                   emit_data->args[0],
558                                   emit_data->args[1]);
559   emit_data->output[emit_data->chan] = lp_build_emit_llvm_binary(bld_base,
560                                       TGSI_OPCODE_ADD, tmp, emit_data->args[2]);
561}
562
563/* TGSI_OPCODE_MOV */
564
565static void
566mov_emit(
567   const struct lp_build_tgsi_action * action,
568   struct lp_build_tgsi_context * bld_base,
569   struct lp_build_emit_data * emit_data)
570{
571   emit_data->output[emit_data->chan] = emit_data->args[0];
572}
573
574/* TGSI_OPCODE_MUL */
575static void
576mul_emit(
577   const struct lp_build_tgsi_action * action,
578   struct lp_build_tgsi_context * bld_base,
579   struct lp_build_emit_data * emit_data)
580{
581   emit_data->output[emit_data->chan] = lp_build_mul(&bld_base->base,
582                                   emit_data->args[0], emit_data->args[1]);
583}
584
585/* TGSI_OPCODE_POW */
586
587static void
588pow_emit(
589   const struct lp_build_tgsi_action * action,
590   struct lp_build_tgsi_context * bld_base,
591   struct lp_build_emit_data * emit_data)
592{
593   emit_data->output[emit_data->chan] = lp_build_pow(&bld_base->base,
594                                   emit_data->args[0], emit_data->args[1]);
595}
596
597static struct lp_build_tgsi_action pow_action = {
598   .fetch_args = scalar_binary_fetch_args,
599   .emit = pow_emit
600};
601
602/* TGSI_OPCODE_RSQ */
603
604static void
605rsq_emit(
606   const struct lp_build_tgsi_action * action,
607   struct lp_build_tgsi_context * bld_base,
608   struct lp_build_emit_data * emit_data)
609{
610   emit_data->args[0] = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_ABS,
611                                               emit_data->args[0]);
612   if (bld_base->rsq_action.emit) {
613      bld_base->rsq_action.emit(&bld_base->rsq_action, bld_base, emit_data);
614   } else {
615      emit_data->output[emit_data->chan] = bld_base->base.undef;
616   }
617}
618
619const struct lp_build_tgsi_action rsq_action = {
620   .fetch_args = scalar_unary_fetch_args,
621   .emit = rsq_emit
622
623};
624
625/* TGSI_OPCODE_SCS */
626static void
627scs_emit(
628   const struct lp_build_tgsi_action * action,
629   struct lp_build_tgsi_context * bld_base,
630   struct lp_build_emit_data * emit_data)
631{
632   /* dst.x */
633   emit_data->output[TGSI_CHAN_X] = lp_build_emit_llvm_unary(bld_base,
634                                           TGSI_OPCODE_COS, emit_data->args[0]);
635   /* dst.y */
636   emit_data->output[TGSI_CHAN_Y] = lp_build_emit_llvm_unary(bld_base,
637                                           TGSI_OPCODE_SIN, emit_data->args[0]);
638   /* dst.z */
639   emit_data->output[TGSI_CHAN_Z] = bld_base->base.zero;
640
641   /* dst.w */
642   emit_data->output[TGSI_CHAN_W] = bld_base->base.one;
643}
644
645const struct lp_build_tgsi_action scs_action = {
646   .fetch_args = scalar_unary_fetch_args,
647   .emit = scs_emit
648};
649
650/* TGSI_OPCODE_SFL */
651
652static void
653sfl_emit(
654   const struct lp_build_tgsi_action * action,
655   struct lp_build_tgsi_context * bld_base,
656   struct lp_build_emit_data * emit_data)
657{
658   emit_data->output[emit_data->chan] = bld_base->base.zero;
659}
660
661/* TGSI_OPCODE_STR */
662
663static void
664str_emit(
665   const struct lp_build_tgsi_action * action,
666   struct lp_build_tgsi_context * bld_base,
667   struct lp_build_emit_data * emit_data)
668{
669   emit_data->output[emit_data->chan] = bld_base->base.one;
670}
671
672/* TGSI_OPCODE_SUB */
673static void
674sub_emit(
675   const struct lp_build_tgsi_action * action,
676   struct lp_build_tgsi_context * bld_base,
677   struct lp_build_emit_data * emit_data)
678{
679	emit_data->output[emit_data->chan] = LLVMBuildFSub(
680				bld_base->base.gallivm->builder,
681				emit_data->args[0],
682				emit_data->args[1], "");
683}
684
685/* TGSI_OPCODE_XPD */
686
687static void
688xpd_fetch_args(
689   struct lp_build_tgsi_context * bld_base,
690   struct lp_build_emit_data * emit_data)
691{
692   dp_fetch_args(bld_base, emit_data, 3);
693}
694
695/**
696 * (a * b) - (c * d)
697 */
698static LLVMValueRef
699xpd_helper(
700  struct lp_build_tgsi_context * bld_base,
701  LLVMValueRef a,
702  LLVMValueRef b,
703  LLVMValueRef c,
704  LLVMValueRef d)
705{
706   LLVMValueRef tmp0, tmp1;
707
708   tmp0 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL, a, b);
709   tmp1 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL, c, d);
710
711   return lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_SUB, tmp0, tmp1);
712}
713
714static void
715xpd_emit(
716   const struct lp_build_tgsi_action * action,
717   struct lp_build_tgsi_context * bld_base,
718   struct lp_build_emit_data * emit_data)
719{
720   emit_data->output[TGSI_CHAN_X] = xpd_helper(bld_base,
721              emit_data->args[1] /* src0.y */, emit_data->args[5] /* src1.z */,
722              emit_data->args[4] /* src1.y */, emit_data->args[2] /* src0.z */);
723
724   emit_data->output[TGSI_CHAN_Y] = xpd_helper(bld_base,
725              emit_data->args[2] /* src0.z */, emit_data->args[3] /* src1.x */,
726              emit_data->args[5] /* src1.z */, emit_data->args[0] /* src0.x */);
727
728   emit_data->output[TGSI_CHAN_Z] = xpd_helper(bld_base,
729              emit_data->args[0] /* src0.x */, emit_data->args[4] /* src1.y */,
730              emit_data->args[3] /* src1.x */, emit_data->args[1] /* src0.y */);
731
732   emit_data->output[TGSI_CHAN_W] = bld_base->base.one;
733}
734
735const struct lp_build_tgsi_action xpd_action = {
736   .fetch_args = xpd_fetch_args,
737   .emit = xpd_emit
738};
739
740void
741lp_set_default_actions(struct lp_build_tgsi_context * bld_base)
742{
743   bld_base->op_actions[TGSI_OPCODE_DP2] = dp2_action;
744   bld_base->op_actions[TGSI_OPCODE_DP3] = dp3_action;
745   bld_base->op_actions[TGSI_OPCODE_DP4] = dp4_action;
746   bld_base->op_actions[TGSI_OPCODE_DP2A] = dp2a_action;
747   bld_base->op_actions[TGSI_OPCODE_DPH] = dph_action;
748   bld_base->op_actions[TGSI_OPCODE_DST] = dst_action;
749   bld_base->op_actions[TGSI_OPCODE_EXP] = exp_action;
750   bld_base->op_actions[TGSI_OPCODE_LIT] = lit_action;
751   bld_base->op_actions[TGSI_OPCODE_LOG] = log_action;
752   bld_base->op_actions[TGSI_OPCODE_RSQ] = rsq_action;
753   bld_base->op_actions[TGSI_OPCODE_POW] = pow_action;
754   bld_base->op_actions[TGSI_OPCODE_SCS] = scs_action;
755   bld_base->op_actions[TGSI_OPCODE_XPD] = xpd_action;
756
757   bld_base->op_actions[TGSI_OPCODE_COS].fetch_args = scalar_unary_fetch_args;
758   bld_base->op_actions[TGSI_OPCODE_EX2].fetch_args = scalar_unary_fetch_args;
759   bld_base->op_actions[TGSI_OPCODE_IF].fetch_args = scalar_unary_fetch_args;
760   bld_base->op_actions[TGSI_OPCODE_KIL].fetch_args = kil_fetch_args;
761   bld_base->op_actions[TGSI_OPCODE_KILP].fetch_args = kilp_fetch_args;
762   bld_base->op_actions[TGSI_OPCODE_RCP].fetch_args = scalar_unary_fetch_args;
763   bld_base->op_actions[TGSI_OPCODE_SIN].fetch_args = scalar_unary_fetch_args;
764   bld_base->op_actions[TGSI_OPCODE_LG2].fetch_args = scalar_unary_fetch_args;
765
766   bld_base->op_actions[TGSI_OPCODE_ADD].emit = add_emit;
767   bld_base->op_actions[TGSI_OPCODE_ARR].emit = arr_emit;
768   bld_base->op_actions[TGSI_OPCODE_CLAMP].emit = clamp_emit;
769   bld_base->op_actions[TGSI_OPCODE_END].emit = end_emit;
770   bld_base->op_actions[TGSI_OPCODE_FRC].emit = frc_emit;
771   bld_base->op_actions[TGSI_OPCODE_LRP].emit = lrp_emit;
772   bld_base->op_actions[TGSI_OPCODE_MAD].emit = mad_emit;
773   bld_base->op_actions[TGSI_OPCODE_MOV].emit = mov_emit;
774   bld_base->op_actions[TGSI_OPCODE_MUL].emit = mul_emit;
775   bld_base->op_actions[TGSI_OPCODE_SFL].emit = sfl_emit;
776   bld_base->op_actions[TGSI_OPCODE_STR].emit = str_emit;
777   bld_base->op_actions[TGSI_OPCODE_SUB].emit = sub_emit;
778}
779
780/* CPU Only default actions */
781
782/* These actions are CPU only, because they could potentially output SSE
783 * intrinsics.
784 */
785
786/* TGSI_OPCODE_ABS (CPU Only)*/
787
788static void
789abs_emit_cpu(
790   const struct lp_build_tgsi_action * action,
791   struct lp_build_tgsi_context * bld_base,
792   struct lp_build_emit_data * emit_data)
793{
794   emit_data->output[emit_data->chan] = lp_build_abs(&bld_base->base,
795                                                       emit_data->args[0]);
796}
797
798/* TGSI_OPCODE_ADD (CPU Only) */
799static void
800add_emit_cpu(
801   const struct lp_build_tgsi_action * action,
802   struct lp_build_tgsi_context * bld_base,
803   struct lp_build_emit_data * emit_data)
804{
805   emit_data->output[emit_data->chan] = lp_build_add(&bld_base->base,
806                                   emit_data->args[0], emit_data->args[1]);
807}
808
809/* TGSI_OPCODE_CEIL (CPU Only) */
810static void
811ceil_emit_cpu(
812   const struct lp_build_tgsi_action * action,
813   struct lp_build_tgsi_context * bld_base,
814   struct lp_build_emit_data * emit_data)
815{
816   emit_data->output[emit_data->chan] = lp_build_trunc(&bld_base->base,
817                                                         emit_data->args[0]);
818}
819
820/* TGSI_OPCODE_CMP (CPU Only) */
821static void
822cmp_emit_cpu(
823   const struct lp_build_tgsi_action * action,
824   struct lp_build_tgsi_context * bld_base,
825   struct lp_build_emit_data * emit_data)
826{
827   LLVMValueRef cond = lp_build_cmp(&bld_base->base, PIPE_FUNC_LESS,
828                                   emit_data->args[0], bld_base->base.zero);
829   emit_data->output[emit_data->chan] = lp_build_select(&bld_base->base,
830                                cond, emit_data->args[1], emit_data->args[2]);
831}
832
833/* TGSI_OPCODE_CND (CPU Only) */
834static void
835cnd_emit_cpu(
836   const struct lp_build_tgsi_action * action,
837   struct lp_build_tgsi_context * bld_base,
838   struct lp_build_emit_data * emit_data)
839{
840   LLVMValueRef half, tmp;
841   half = lp_build_const_vec(bld_base->base.gallivm, bld_base->base.type, 0.5);
842   tmp = lp_build_cmp(&bld_base->base, PIPE_FUNC_GREATER,
843                      emit_data->args[2], half);
844   emit_data->output[emit_data->chan] = lp_build_select(&bld_base->base,
845                                          tmp,
846                                          emit_data->args[0],
847                                          emit_data->args[1]);
848}
849
850/* TGSI_OPCODE_COS (CPU Only) */
851static void
852cos_emit_cpu(
853   const struct lp_build_tgsi_action * action,
854   struct lp_build_tgsi_context * bld_base,
855   struct lp_build_emit_data * emit_data)
856{
857   emit_data->output[emit_data->chan] = lp_build_cos(&bld_base->base,
858                                                       emit_data->args[0]);
859}
860
861/* TGSI_OPCODE_DIV (CPU Only) */
862static void
863div_emit_cpu(
864   const struct lp_build_tgsi_action * action,
865   struct lp_build_tgsi_context * bld_base,
866   struct lp_build_emit_data * emit_data)
867{
868   emit_data->output[emit_data->chan] = lp_build_div(&bld_base->base,
869                                   emit_data->args[0], emit_data->args[1]);
870}
871
872/* TGSI_OPCODE_EX2 (CPU Only) */
873static void
874ex2_emit_cpu(
875   const struct lp_build_tgsi_action * action,
876   struct lp_build_tgsi_context * bld_base,
877   struct lp_build_emit_data * emit_data)
878{
879   emit_data->output[emit_data->chan] = lp_build_exp2(&bld_base->base,
880                                                        emit_data->args[0]);
881}
882
883/* TGSI_OPCODE_EXP (CPU Only) */
884static void
885exp_emit_cpu(
886   const struct lp_build_tgsi_action * action,
887   struct lp_build_tgsi_context * bld_base,
888   struct lp_build_emit_data * emit_data)
889{
890   lp_build_exp2_approx(&bld_base->base, emit_data->args[0],
891                        &emit_data->output[TGSI_CHAN_X],
892                        &emit_data->output[TGSI_CHAN_Y],
893                        &emit_data->output[TGSI_CHAN_Z]);
894   emit_data->output[TGSI_CHAN_W] = bld_base->base.one;
895}
896
897/* TGSI_OPCODE_FLR (CPU Only) */
898
899static void
900flr_emit_cpu(
901   const struct lp_build_tgsi_action * action,
902   struct lp_build_tgsi_context * bld_base,
903   struct lp_build_emit_data * emit_data)
904{
905   emit_data->output[emit_data->chan] = lp_build_floor(&bld_base->base,
906                                                         emit_data->args[0]);
907}
908
909/* TGSI_OPCODE_LG2 (CPU Only) */
910static void
911lg2_emit_cpu(
912   const struct lp_build_tgsi_action * action,
913   struct lp_build_tgsi_context * bld_base,
914   struct lp_build_emit_data * emit_data)
915{
916   emit_data->output[emit_data->chan] = lp_build_log2(&bld_base->base,
917                                                        emit_data->args[0]);
918}
919
920/* TGSI_OPCODE_LOG (CPU Only) */
921static void
922log_emit_cpu(
923   const struct lp_build_tgsi_action * action,
924   struct lp_build_tgsi_context * bld_base,
925   struct lp_build_emit_data * emit_data)
926{
927   LLVMValueRef p_floor_log2;
928   LLVMValueRef p_exp;
929   LLVMValueRef p_log2;
930   LLVMValueRef src0 = emit_data->args[0];
931
932   lp_build_log2_approx(&bld_base->base, src0,
933                        &p_exp, &p_floor_log2, &p_log2);
934
935   emit_data->output[TGSI_CHAN_X] = p_floor_log2;
936
937   emit_data->output[TGSI_CHAN_Y] = lp_build_emit_llvm_binary(bld_base,
938                                             TGSI_OPCODE_DIV,
939                                             src0, p_exp);
940   emit_data->output[TGSI_CHAN_Z] = p_log2;
941
942   emit_data->output[TGSI_CHAN_W] = bld_base->base.one;
943
944}
945
946/* TGSI_OPCODE_MAX (CPU Only) */
947
948static void
949max_emit_cpu(
950   const struct lp_build_tgsi_action * action,
951   struct lp_build_tgsi_context * bld_base,
952   struct lp_build_emit_data * emit_data)
953{
954   emit_data->output[emit_data->chan] = lp_build_max(&bld_base->base,
955                                   emit_data->args[0], emit_data->args[1]);
956}
957
958/* TGSI_OPCODE_MIN (CPU Only) */
959static void
960min_emit_cpu(
961   const struct lp_build_tgsi_action * action,
962   struct lp_build_tgsi_context * bld_base,
963   struct lp_build_emit_data * emit_data)
964{
965   emit_data->output[emit_data->chan] = lp_build_min(&bld_base->base,
966                                   emit_data->args[0], emit_data->args[1]);
967}
968
969/* TGSI_OPCODE_POW (CPU Only) */
970static void
971pow_emit_cpu(
972   const struct lp_build_tgsi_action * action,
973   struct lp_build_tgsi_context * bld_base,
974   struct lp_build_emit_data * emit_data)
975{
976   emit_data->output[emit_data->chan] = lp_build_pow(&bld_base->base,
977                                   emit_data->args[0], emit_data->args[1]);
978}
979
980
981/* TGSI_OPCODE_RCP (CPU Only) */
982
983static void
984rcp_emit_cpu(
985   const struct lp_build_tgsi_action * action,
986   struct lp_build_tgsi_context * bld_base,
987   struct lp_build_emit_data * emit_data)
988{
989   emit_data->output[emit_data->chan] = lp_build_rcp(&bld_base->base,
990                                                       emit_data->args[0]);
991}
992
993/* Reciprical squareroot (CPU Only) */
994
995/* This is not the same as TGSI_OPCODE_RSQ, which requres the argument to be
996 * greater than or equal to 0 */
997static void
998recip_sqrt_emit_cpu(
999   const struct lp_build_tgsi_action * action,
1000   struct lp_build_tgsi_context * bld_base,
1001   struct lp_build_emit_data * emit_data)
1002{
1003   emit_data->output[emit_data->chan] = lp_build_rsqrt(&bld_base->base,
1004                                                         emit_data->args[0]);
1005}
1006
1007/* TGSI_OPCODE_ROUND (CPU Only) */
1008static void
1009round_emit_cpu(
1010   const struct lp_build_tgsi_action * action,
1011   struct lp_build_tgsi_context * bld_base,
1012   struct lp_build_emit_data * emit_data)
1013{
1014   emit_data->output[emit_data->chan] = lp_build_round(&bld_base->base,
1015                                                         emit_data->args[0]);
1016}
1017
1018/* TGSI_OPCODE_SET Helper (CPU Only) */
1019
1020static void
1021set_emit_cpu(
1022   const struct lp_build_tgsi_action * action,
1023   struct lp_build_tgsi_context * bld_base,
1024   struct lp_build_emit_data * emit_data,
1025   unsigned pipe_func)
1026{
1027   LLVMValueRef cond = lp_build_cmp(&bld_base->base, pipe_func,
1028                                    emit_data->args[0], emit_data->args[1]);
1029   emit_data->output[emit_data->chan] = lp_build_select(&bld_base->base,
1030                                          cond,
1031                                          bld_base->base.one,
1032                                          bld_base->base.zero);
1033}
1034
1035/* TGSI_OPCODE_SEQ (CPU Only) */
1036
1037static void
1038seq_emit_cpu(
1039   const struct lp_build_tgsi_action * action,
1040   struct lp_build_tgsi_context * bld_base,
1041   struct lp_build_emit_data * emit_data)
1042{
1043   set_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_EQUAL);
1044}
1045
1046/* TGSI_OPCODE_SGE (CPU Only) */
1047static void
1048sge_emit_cpu(
1049   const struct lp_build_tgsi_action * action,
1050   struct lp_build_tgsi_context * bld_base,
1051   struct lp_build_emit_data * emit_data)
1052{
1053   set_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_GEQUAL);
1054}
1055
1056/* TGSI_OPCODE_SGT (CPU Only)*/
1057
1058static void
1059sgt_emit_cpu(
1060   const struct lp_build_tgsi_action * action,
1061   struct lp_build_tgsi_context * bld_base,
1062   struct lp_build_emit_data * emit_data)
1063{
1064   set_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_GREATER);
1065}
1066
1067/* TGSI_OPCODE_SIN (CPU Only) */
1068static void
1069sin_emit_cpu(
1070   const struct lp_build_tgsi_action * action,
1071   struct lp_build_tgsi_context * bld_base,
1072   struct lp_build_emit_data * emit_data)
1073{
1074   emit_data->output[emit_data->chan] = lp_build_sin(&bld_base->base,
1075                                                       emit_data->args[0]);
1076}
1077
1078/* TGSI_OPCODE_SLE (CPU Only) */
1079static void
1080sle_emit_cpu(
1081   const struct lp_build_tgsi_action * action,
1082   struct lp_build_tgsi_context * bld_base,
1083   struct lp_build_emit_data * emit_data)
1084{
1085   set_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_LEQUAL);
1086}
1087
1088/* TGSI_OPCODE_SLT (CPU Only) */
1089
1090static void
1091slt_emit_cpu(
1092   const struct lp_build_tgsi_action * action,
1093   struct lp_build_tgsi_context * bld_base,
1094   struct lp_build_emit_data * emit_data)
1095{
1096   set_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_LESS);
1097}
1098
1099/* TGSI_OPCODE_SNE (CPU Only) */
1100
1101static void
1102sne_emit_cpu(
1103   const struct lp_build_tgsi_action * action,
1104   struct lp_build_tgsi_context * bld_base,
1105   struct lp_build_emit_data * emit_data)
1106{
1107   set_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_NOTEQUAL);
1108}
1109
1110/* TGSI_OPCODE_SSG (CPU Only) */
1111
1112static void
1113ssg_emit_cpu(
1114   const struct lp_build_tgsi_action * action,
1115   struct lp_build_tgsi_context * bld_base,
1116   struct lp_build_emit_data * emit_data)
1117{
1118   emit_data->output[emit_data->chan] = lp_build_sgn(&bld_base->base,
1119                                                       emit_data->args[0]);
1120}
1121
1122/* TGSI_OPCODE_SUB (CPU Only) */
1123
1124static void
1125sub_emit_cpu(
1126   const struct lp_build_tgsi_action * action,
1127   struct lp_build_tgsi_context * bld_base,
1128   struct lp_build_emit_data * emit_data)
1129{
1130   emit_data->output[emit_data->chan] = lp_build_sub(&bld_base->base,
1131                                                        emit_data->args[0],
1132                                                        emit_data->args[1]);
1133}
1134
1135/* TGSI_OPCODE_TRUNC (CPU Only) */
1136
1137static void
1138trunc_emit_cpu(
1139   const struct lp_build_tgsi_action * action,
1140   struct lp_build_tgsi_context * bld_base,
1141   struct lp_build_emit_data * emit_data)
1142{
1143   emit_data->output[emit_data->chan] = lp_build_trunc(&bld_base->base,
1144                                                         emit_data->args[0]);
1145}
1146
1147void
1148lp_set_default_actions_cpu(
1149   struct lp_build_tgsi_context * bld_base)
1150{
1151   lp_set_default_actions(bld_base);
1152   bld_base->op_actions[TGSI_OPCODE_ABS].emit = abs_emit_cpu;
1153   bld_base->op_actions[TGSI_OPCODE_ADD].emit = add_emit_cpu;
1154   bld_base->op_actions[TGSI_OPCODE_ARL].emit = flr_emit_cpu;
1155   bld_base->op_actions[TGSI_OPCODE_CEIL].emit = ceil_emit_cpu;
1156   bld_base->op_actions[TGSI_OPCODE_CND].emit = cnd_emit_cpu;
1157   bld_base->op_actions[TGSI_OPCODE_COS].emit = cos_emit_cpu;
1158   bld_base->op_actions[TGSI_OPCODE_CMP].emit = cmp_emit_cpu;
1159   bld_base->op_actions[TGSI_OPCODE_DIV].emit = div_emit_cpu;
1160   bld_base->op_actions[TGSI_OPCODE_EX2].emit = ex2_emit_cpu;
1161   bld_base->op_actions[TGSI_OPCODE_EXP].emit = exp_emit_cpu;
1162   bld_base->op_actions[TGSI_OPCODE_FLR].emit = flr_emit_cpu;
1163   bld_base->op_actions[TGSI_OPCODE_LG2].emit = lg2_emit_cpu;
1164   bld_base->op_actions[TGSI_OPCODE_LOG].emit = log_emit_cpu;
1165   bld_base->op_actions[TGSI_OPCODE_MAX].emit = max_emit_cpu;
1166   bld_base->op_actions[TGSI_OPCODE_MIN].emit = min_emit_cpu;
1167   bld_base->op_actions[TGSI_OPCODE_POW].emit = pow_emit_cpu;
1168   bld_base->op_actions[TGSI_OPCODE_RCP].emit = rcp_emit_cpu;
1169   bld_base->op_actions[TGSI_OPCODE_ROUND].emit = round_emit_cpu;
1170   bld_base->op_actions[TGSI_OPCODE_SEQ].emit = seq_emit_cpu;
1171   bld_base->op_actions[TGSI_OPCODE_SGE].emit = sge_emit_cpu;
1172   bld_base->op_actions[TGSI_OPCODE_SGT].emit = sgt_emit_cpu;
1173   bld_base->op_actions[TGSI_OPCODE_SIN].emit = sin_emit_cpu;
1174   bld_base->op_actions[TGSI_OPCODE_SLE].emit = sle_emit_cpu;
1175   bld_base->op_actions[TGSI_OPCODE_SLT].emit = slt_emit_cpu;
1176   bld_base->op_actions[TGSI_OPCODE_SNE].emit = sne_emit_cpu;
1177   bld_base->op_actions[TGSI_OPCODE_SSG].emit = ssg_emit_cpu;
1178   bld_base->op_actions[TGSI_OPCODE_SUB].emit = sub_emit_cpu;
1179   bld_base->op_actions[TGSI_OPCODE_TRUNC].emit = trunc_emit_cpu;
1180
1181   bld_base->rsq_action.emit = recip_sqrt_emit_cpu;
1182}
1183