Lines Matching refs:ctx

143 static LLVMValueRef get_sampler_desc(struct nir_to_llvm_context *ctx,
199 create_llvm_function(LLVMContextRef ctx, LLVMModuleRef module,
209 ret_type = LLVMStructTypeInContext(ctx, return_types,
212 ret_type = LLVMVoidTypeInContext(ctx);
220 LLVMAppendBasicBlockInContext(ctx, main_function, "main_body");
259 static LLVMValueRef get_shared_memory_ptr(struct nir_to_llvm_context *ctx,
267 offset = LLVMConstInt(ctx->i32, idx * 16, false);
269 ptr = ctx->shared_memory;
270 ptr = LLVMBuildGEP(ctx->builder, ptr, &offset, 1, "");
272 ptr = LLVMBuildBitCast(ctx->builder, ptr, LLVMPointerType(type, addr_space), "");
276 static LLVMValueRef to_integer(struct nir_to_llvm_context *ctx, LLVMValueRef v)
279 if (type == ctx->f32) {
280 return LLVMBuildBitCast(ctx->builder, v, ctx->i32, "");
283 if (elem_type == ctx->f32) {
284 LLVMTypeRef nt = LLVMVectorType(ctx->i32, LLVMGetVectorSize(type));
285 return LLVMBuildBitCast(ctx->builder, v, nt, "");
291 static LLVMValueRef to_float(struct nir_to_llvm_context *ctx, LLVMValueRef v)
294 if (type == ctx->i32) {
295 return LLVMBuildBitCast(ctx->builder, v, ctx->f32, "");
298 if (elem_type == ctx->i32) {
299 LLVMTypeRef nt = LLVMVectorType(ctx->f32, LLVMGetVectorSize(type));
300 return LLVMBuildBitCast(ctx->builder, v, nt, "");
306 static LLVMValueRef unpack_param(struct nir_to_llvm_context *ctx,
312 value = LLVMBuildLShr(ctx->builder, value,
313 LLVMConstInt(ctx->i32, rshift, false), "");
317 value = LLVMBuildAnd(ctx->builder, value,
318 LLVMConstInt(ctx->i32, mask, false), "");
323 static LLVMValueRef build_gep0(struct nir_to_llvm_context *ctx,
327 ctx->i32zero,
330 return LLVMBuildGEP(ctx->builder, base_ptr,
334 static LLVMValueRef build_indexed_load(struct nir_to_llvm_context *ctx,
339 pointer = build_gep0(ctx, base_ptr, index);
341 LLVMSetMetadata(pointer, ctx->uniform_md_kind, ctx->empty_md);
342 return LLVMBuildLoad(ctx->builder, pointer, "");
345 static LLVMValueRef build_indexed_load_const(struct nir_to_llvm_context *ctx,
348 LLVMValueRef result = build_indexed_load(ctx, base_ptr, index, true);
349 LLVMSetMetadata(result, ctx->invariant_load_md_kind, ctx->empty_md);
361 static void set_userdata_location_shader(struct nir_to_llvm_context *ctx,
364 set_userdata_location(&ctx->shader_info->user_sgprs_locs.shader_data[idx], sgpr_idx, num_sgprs);
378 static void create_function(struct nir_to_llvm_context *ctx)
385 unsigned num_sets = ctx->options->layout ? ctx->options->layout->num_sets : 0;
390 if (!ctx->options->layout)
392 else if (!ctx->options->layout->push_constant_size &&
393 !ctx->options->layout->dynamic_offset_count)
398 if (ctx->options->layout->set[i].layout->shader_stages & (1 << ctx->stage)) {
400 arg_types[arg_idx++] = const_array(ctx->i8, 1024 * 1024);
407 arg_types[arg_idx++] = const_array(ctx->i8, 1024 * 1024);
410 switch (ctx->stage) {
412 arg_types[arg_idx++] = LLVMVectorType(ctx->i32, 3); /* grid size */
414 arg_types[arg_idx++] = LLVMVectorType(ctx->i32, 3);
415 arg_types[arg_idx++] = ctx->i32;
418 arg_types[arg_idx++] = LLVMVectorType(ctx->i32, 3);
421 arg_types[arg_idx++] = const_array(ctx->v16i8, 16); /* vertex buffers */
422 arg_types[arg_idx++] = ctx->i32; // base vertex
423 arg_types[arg_idx++] = ctx->i32; // start instance
425 arg_types[arg_idx++] = ctx->i32; // vertex id
426 arg_types[arg_idx++] = ctx->i32; // rel auto id
427 arg_types[arg_idx++] = ctx->i32; // vs prim id
428 arg_types[arg_idx++] = ctx->i32; // instance id
431 arg_types[arg_idx++] = const_array(ctx->f32, 32); /* sample positions */
433 arg_types[arg_idx++] = ctx->i32; /* prim mask */
435 arg_types[arg_idx++] = ctx->v2i32; /* persp sample */
436 arg_types[arg_idx++] = ctx->v2i32; /* persp center */
437 arg_types[arg_idx++] = ctx->v2i32; /* persp centroid */
438 arg_types[arg_idx++] = ctx->v3i32; /* persp pull model */
439 arg_types[arg_idx++] = ctx->v2i32; /* linear sample */
440 arg_types[arg_idx++] = ctx->v2i32; /* linear center */
441 arg_types[arg_idx++] = ctx->v2i32; /* linear centroid */
442 arg_types[arg_idx++] = ctx->f32; /* line stipple tex */
443 arg_types[arg_idx++] = ctx->f32; /* pos x float */
444 arg_types[arg_idx++] = ctx->f32; /* pos y float */
445 arg_types[arg_idx++] = ctx->f32; /* pos z float */
446 arg_types[arg_idx++] = ctx->f32; /* pos w float */
447 arg_types[arg_idx++] = ctx->i32; /* front face */
448 arg_types[arg_idx++] = ctx->i32; /* ancillary */
449 arg_types[arg_idx++] = ctx->f32; /* sample coverage */
450 arg_types[arg_idx++] = ctx->i32; /* fixed pt */
456 ctx->main_function = create_llvm_function(
457 ctx->context, ctx->module, ctx->builder, NULL, 0, arg_types,
458 arg_idx, array_params_mask, sgpr_count, ctx->options->unsafe_math);
459 set_llvm_calling_convention(ctx->main_function, ctx->stage);
462 ctx->shader_info->num_input_sgprs = 0;
463 ctx->shader_info->num_input_vgprs = 0;
466 ctx->shader_info->num_user_sgprs += llvm_get_type_size(arg_types[i]) / 4;
468 ctx->shader_info->num_input_sgprs = ctx->shader_info->num_user_sgprs;
470 ctx->shader_info->num_input_sgprs += llvm_get_type_size(arg_types[i]) / 4;
472 if (ctx->stage != MESA_SHADER_FRAGMENT)
474 ctx->shader_info->num_input_vgprs += llvm_get_type_size(arg_types[i]) / 4;
479 if (ctx->options->layout->set[i].layout->shader_stages & (1 << ctx->stage)) {
480 set_userdata_location(&ctx->shader_info->user_sgprs_locs.descriptor_sets[i], user_sgpr_idx, 2);
482 ctx->descriptor_sets[i] =
483 LLVMGetParam(ctx->main_function, arg_idx++);
485 ctx->descriptor_sets[i] = NULL;
489 ctx->push_constants = LLVMGetParam(ctx->main_function, arg_idx++);
490 set_userdata_location_shader(ctx, AC_UD_PUSH_CONSTANTS, user_sgpr_idx, 2);
494 switch (ctx->stage) {
496 set_userdata_location_shader(ctx, AC_UD_CS_GRID_SIZE, user_sgpr_idx, 3);
498 ctx->num_work_groups =
499 LLVMGetParam(ctx->main_function, arg_idx++);
500 ctx->workgroup_ids =
501 LLVMGetParam(ctx->main_function, arg_idx++);
502 ctx->tg_size =
503 LLVMGetParam(ctx->main_function, arg_idx++);
504 ctx->local_invocation_ids =
505 LLVMGetParam(ctx->main_function, arg_idx++);
508 set_userdata_location_shader(ctx, AC_UD_VS_VERTEX_BUFFERS, user_sgpr_idx, 2);
510 ctx->vertex_buffers = LLVMGetParam(ctx->main_function, arg_idx++);
511 set_userdata_location_shader(ctx, AC_UD_VS_BASE_VERTEX_START_INSTANCE, user_sgpr_idx, 2);
513 ctx->base_vertex = LLVMGetParam(ctx->main_function, arg_idx++);
514 ctx->start_instance = LLVMGetParam(ctx->main_function, arg_idx++);
515 ctx->vertex_id = LLVMGetParam(ctx->main_function, arg_idx++);
516 ctx->rel_auto_id = LLVMGetParam(ctx->main_function, arg_idx++);
517 ctx->vs_prim_id = LLVMGetParam(ctx->main_function, arg_idx++);
518 ctx->instance_id = LLVMGetParam(ctx->main_function, arg_idx++);
521 set_userdata_location_shader(ctx, AC_UD_PS_SAMPLE_POS, user_sgpr_idx, 2);
523 ctx->sample_positions = LLVMGetParam(ctx->main_function, arg_idx++);
524 ctx->prim_mask = LLVMGetParam(ctx->main_function, arg_idx++);
525 ctx->persp_sample = LLVMGetParam(ctx->main_function, arg_idx++);
526 ctx->persp_center = LLVMGetParam(ctx->main_function, arg_idx++);
527 ctx->persp_centroid = LLVMGetParam(ctx->main_function, arg_idx++);
529 ctx->linear_sample = LLVMGetParam(ctx->main_function, arg_idx++);
530 ctx->linear_center = LLVMGetParam(ctx->main_function, arg_idx++);
531 ctx->linear_centroid = LLVMGetParam(ctx->main_function, arg_idx++);
533 ctx->frag_pos[0] = LLVMGetParam(ctx->main_function, arg_idx++);
534 ctx->frag_pos[1] = LLVMGetParam(ctx->main_function, arg_idx++);
535 ctx->frag_pos[2] = LLVMGetParam(ctx->main_function, arg_idx++);
536 ctx->frag_pos[3] = LLVMGetParam(ctx->main_function, arg_idx++);
537 ctx->front_face = LLVMGetParam(ctx->main_function, arg_idx++);
538 ctx->ancillary = LLVMGetParam(ctx->main_function, arg_idx++);
545 static void setup_types(struct nir_to_llvm_context *ctx)
549 ctx->voidt = LLVMVoidTypeInContext(ctx->context);
550 ctx->i1 = LLVMIntTypeInContext(ctx->context, 1);
551 ctx->i8 = LLVMIntTypeInContext(ctx->context, 8);
552 ctx->i16 = LLVMIntTypeInContext(ctx->context, 16);
553 ctx->i32 = LLVMIntTypeInContext(ctx->context, 32);
554 ctx->i64 = LLVMIntTypeInContext(ctx->context, 64);
555 ctx->v2i32 = LLVMVectorType(ctx->i32, 2);
556 ctx->v3i32 = LLVMVectorType(ctx->i32, 3);
557 ctx->v4i32 = LLVMVectorType(ctx->i32, 4);
558 ctx->v8i32 = LLVMVectorType(ctx->i32, 8);
559 ctx->f32 = LLVMFloatTypeInContext(ctx->context);
560 ctx->f16 = LLVMHalfTypeInContext(ctx->context);
561 ctx->v2f32 = LLVMVectorType(ctx->f32, 2);
562 ctx->v4f32 = LLVMVectorType(ctx->f32, 4);
563 ctx->v16i8 = LLVMVectorType(ctx->i8, 16);
565 ctx->i32zero = LLVMConstInt(ctx->i32, 0, false);
566 ctx->i32one = LLVMConstInt(ctx->i32, 1, false);
567 ctx->f32zero = LLVMConstReal(ctx->f32, 0.0);
568 ctx->f32one = LLVMConstReal(ctx->f32, 1.0);
570 args[0] = ctx->f32zero;
571 args[1] = ctx->f32zero;
572 args[2] = ctx->f32zero;
573 args[3] = ctx->f32one;
574 ctx->v4f32empty = LLVMConstVector(args, 4);
576 ctx->range_md_kind = LLVMGetMDKindIDInContext(ctx->context,
578 ctx->invariant_load_md_kind = LLVMGetMDKindIDInContext(ctx->context,
580 ctx->uniform_md_kind =
581 LLVMGetMDKindIDInContext(ctx->context, "amdgpu.uniform", 14);
582 ctx->empty_md = LLVMMDNodeInContext(ctx->context, NULL, 0);
584 args[0] = LLVMConstReal(ctx->f32, 2.5);
596 static LLVMValueRef llvm_extract_elem(struct nir_to_llvm_context *ctx,
606 return LLVMBuildExtractElement(ctx->builder, value,
607 LLVMConstInt(ctx->i32, index, false), "");
610 static LLVMValueRef trim_vector(struct nir_to_llvm_context *ctx,
618 LLVMConstInt(ctx->i32, 0, false), LLVMConstInt(ctx->i32, 1, false),
619 LLVMConstInt(ctx->i32, 2, false), LLVMConstInt(ctx->i32, 3, false)};
622 return LLVMBuildExtractElement(ctx->builder, value, masks[0],
626 return LLVMBuildShuffleVector(ctx->builder, value, value, swizzle, "");
630 build_store_values_extended(struct nir_to_llvm_context *ctx,
636 LLVMBuilderRef builder = ctx->builder;
646 LLVMValueRef index = LLVMConstInt(ctx->i32, i, false);
652 static LLVMTypeRef get_def_type(struct nir_to_llvm_context *ctx,
655 LLVMTypeRef type = LLVMIntTypeInContext(ctx->context, def->bit_size);
662 static LLVMValueRef get_src(struct nir_to_llvm_context *ctx, nir_src src)
665 struct hash_entry *entry = _mesa_hash_table_search(ctx->defs, src.ssa);
670 static LLVMBasicBlockRef get_block(struct nir_to_llvm_context *ctx,
673 struct hash_entry *entry = _mesa_hash_table_search(ctx->defs, b);
677 static LLVMValueRef get_alu_src(struct nir_to_llvm_context *ctx,
681 LLVMValueRef value = get_src(ctx, src.src);
698 LLVMConstInt(ctx->i32, src.swizzle[0], false),
699 LLVMConstInt(ctx->i32, src.swizzle[1], false),
700 LLVMConstInt(ctx->i32, src.swizzle[2], false),
701 LLVMConstInt(ctx->i32, src.swizzle[3], false)};
704 value = LLVMBuildExtractElement(ctx->builder, value,
708 value = ac_build_gather_values(&ctx->ac, values, num_components);
711 value = LLVMBuildShuffleVector(ctx->builder, value, value,
720 static LLVMValueRef emit_int_cmp(struct nir_to_llvm_context *ctx,
724 LLVMValueRef result = LLVMBuildICmp(ctx->builder, pred, src0, src1, "");
725 return LLVMBuildSelect(ctx->builder, result,
726 LLVMConstInt(ctx->i32, 0xFFFFFFFF, false),
727 LLVMConstInt(ctx->i32, 0, false), "");
730 static LLVMValueRef emit_float_cmp(struct nir_to_llvm_context *ctx,
735 src0 = to_float(ctx, src0);
736 src1 = to_float(ctx, src1);
737 result = LLVMBuildFCmp(ctx->builder, pred, src0, src1, "");
738 return LLVMBuildSelect(ctx->builder, result,
739 LLVMConstInt(ctx->i32, 0xFFFFFFFF, false),
740 LLVMConstInt(ctx->i32, 0, false), "");
743 static LLVMValueRef emit_intrin_1f_param(struct nir_to_llvm_context *ctx,
748 to_float(ctx, src0),
750 return ac_emit_llvm_intrinsic(&ctx->ac, intrin, ctx->f32, params, 1, AC_FUNC_ATTR_READNONE);
753 static LLVMValueRef emit_intrin_2f_param(struct nir_to_llvm_context *ctx,
758 to_float(ctx, src0),
759 to_float(ctx, src1),
761 return ac_emit_llvm_intrinsic(&ctx->ac, intrin, ctx->f32, params, 2, AC_FUNC_ATTR_READNONE);
764 static LLVMValueRef emit_intrin_3f_param(struct nir_to_llvm_context *ctx,
769 to_float(ctx, src0),
770 to_float(ctx, src1),
771 to_float(ctx, src2),
773 return ac_emit_llvm_intrinsic(&ctx->ac, intrin, ctx->f32, params, 3, AC_FUNC_ATTR_READNONE);
776 static LLVMValueRef emit_bcsel(struct nir_to_llvm_context *ctx,
779 LLVMValueRef v = LLVMBuildICmp(ctx->builder, LLVMIntNE, src0,
780 ctx->i32zero, "");
781 return LLVMBuildSelect(ctx->builder, v, src1, src2, "");
784 static LLVMValueRef emit_find_lsb(struct nir_to_llvm_context *ctx,
797 LLVMConstInt(ctx->i32, 1, false),
799 return ac_emit_llvm_intrinsic(&ctx->ac, "llvm.cttz.i32", ctx->i32, params, 2, AC_FUNC_ATTR_READNONE);
802 static LLVMValueRef emit_ifind_msb(struct nir_to_llvm_context *ctx,
805 LLVMValueRef msb = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.AMDGPU.flbit.i32",
806 ctx->i32, &src0, 1,
811 msb = LLVMBuildSub(ctx->builder, LLVMConstInt(ctx->i32, 31, false),
814 LLVMValueRef all_ones = LLVMConstInt(ctx->i32, -1, true);
815 LLVMValueRef cond = LLVMBuildOr(ctx->builder,
816 LLVMBuildICmp(ctx->builder, LLVMIntEQ,
817 src0, ctx->i32zero, ""),
818 LLVMBuildICmp(ctx->builder, LLVMIntEQ,
821 return LLVMBuildSelect(ctx->builder, cond, all_ones, msb, "");
824 static LLVMValueRef emit_ufind_msb(struct nir_to_llvm_context *ctx,
829 ctx->i32one,
831 LLVMValueRef msb = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.ctlz.i32",
832 ctx->i32, args, ARRAY_SIZE(args),
837 msb = LLVMBuildSub(ctx->builder, LLVMConstInt(ctx->i32, 31, false),
840 return LLVMBuildSelect(ctx->builder,
841 LLVMBuildICmp(ctx->builder, LLVMIntEQ, src0,
842 ctx->i32zero, ""),
843 LLVMConstInt(ctx->i32, -1, true), msb, "");
846 static LLVMValueRef emit_minmax_int(struct nir_to_llvm_context *ctx,
850 return LLVMBuildSelect(ctx->builder,
851 LLVMBuildICmp(ctx->builder, pred, src0, src1, ""),
856 static LLVMValueRef emit_iabs(struct nir_to_llvm_context *ctx,
859 return emit_minmax_int(ctx, LLVMIntSGT, src0,
860 LLVMBuildNeg(ctx->builder, src0, ""));
863 static LLVMValueRef emit_fsign(struct nir_to_llvm_context *ctx,
868 cmp = LLVMBuildFCmp(ctx->builder, LLVMRealOGT, src0, ctx->f32zero, "");
869 val = LLVMBuildSelect(ctx->builder, cmp, ctx->f32one, src0, "");
870 cmp = LLVMBuildFCmp(ctx->builder, LLVMRealOGE, val, ctx->f32zero, "");
871 val = LLVMBuildSelect(ctx->builder, cmp, val, LLVMConstReal(ctx->f32, -1.0), "");
875 static LLVMValueRef emit_isign(struct nir_to_llvm_context *ctx,
880 cmp = LLVMBuildICmp(ctx->builder, LLVMIntSGT, src0, ctx->i32zero, "");
881 val = LLVMBuildSelect(ctx->builder, cmp, ctx->i32one, src0, "");
882 cmp = LLVMBuildICmp(ctx->builder, LLVMIntSGE, val, ctx->i32zero, "");
883 val = LLVMBuildSelect(ctx->builder, cmp, val, LLVMConstInt(ctx->i32, -1, true), "");
887 static LLVMValueRef emit_ffract(struct nir_to_llvm_context *ctx,
891 LLVMValueRef fsrc0 = to_float(ctx, src0);
895 LLVMValueRef floor = ac_emit_llvm_intrinsic(&ctx->ac, intr,
896 ctx->f32, params, 1,
898 return LLVMBuildFSub(ctx->builder, fsrc0, floor, "");
901 static LLVMValueRef emit_uint_carry(struct nir_to_llvm_context *ctx,
906 LLVMTypeRef types[] = { ctx->i32, ctx->i1 };
909 ret_type = LLVMStructTypeInContext(ctx->context, types,
912 res = ac_emit_llvm_intrinsic(&ctx->ac, intrin, ret_type,
915 res = LLVMBuildExtractValue(ctx->builder, res, 1, "");
916 res = LLVMBuildZExt(ctx->builder, res, ctx->i32, "");
920 static LLVMValueRef emit_b2f(struct nir_to_llvm_context *ctx,
923 return LLVMBuildAnd(ctx->builder, src0, LLVMBuildBitCast(ctx->builder, LLVMConstReal(ctx->f32, 1.0), ctx->i32, ""), "");
926 static LLVMValueRef emit_umul_high(struct nir_to_llvm_context *ctx,
930 src0 = LLVMBuildZExt(ctx->builder, src0, ctx->i64, "");
931 src1 = LLVMBuildZExt(ctx->builder, src1, ctx->i64, "");
933 dst64 = LLVMBuildMul(ctx->builder, src0, src1, "");
934 dst64 = LLVMBuildLShr(ctx->builder, dst64, LLVMConstInt(ctx->i64, 32, false), "");
935 result = LLVMBuildTrunc(ctx->builder, dst64, ctx->i32, "");
939 static LLVMValueRef emit_imul_high(struct nir_to_llvm_context *ctx,
943 src0 = LLVMBuildSExt(ctx->builder, src0, ctx->i64, "");
944 src1 = LLVMBuildSExt(ctx->builder, src1, ctx->i64, "");
946 dst64 = LLVMBuildMul(ctx->builder, src0, src1, "");
947 dst64 = LLVMBuildAShr(ctx->builder, dst64, LLVMConstInt(ctx->i64, 32, false), "");
948 result = LLVMBuildTrunc(ctx->builder, dst64, ctx->i32, "");
952 static LLVMValueRef emit_bitfield_extract(struct nir_to_llvm_context *ctx,
957 LLVMValueRef icond = LLVMBuildICmp(ctx->builder, LLVMIntEQ, srcs[2], LLVMConstInt(ctx->i32, 32, false), "");
958 result = ac_emit_llvm_intrinsic(&ctx->ac, intrin, ctx->i32, srcs, 3, AC_FUNC_ATTR_READNONE);
960 result = LLVMBuildSelect(ctx->builder, icond, srcs[0], result, "");
964 static LLVMValueRef emit_bitfield_insert(struct nir_to_llvm_context *ctx,
970 bfi_args[0] = LLVMBuildShl(ctx->builder,
971 LLVMBuildSub(ctx->builder,
972 LLVMBuildShl(ctx->builder,
973 ctx->i32one,
975 ctx->i32one, ""),
977 bfi_args[1] = LLVMBuildShl(ctx->builder, src1, src2, "");
980 LLVMValueRef icond = LLVMBuildICmp(ctx->builder, LLVMIntEQ, src3, LLVMConstInt(ctx->i32, 32, false), "");
986 result = LLVMBuildXor(ctx->builder, bfi_args[2],
987 LLVMBuildAnd(ctx->builder, bfi_args[0],
988 LLVMBuildXor(ctx->builder, bfi_args[1], bfi_args[2], ""), ""), "");
990 result = LLVMBuildSelect(ctx->builder, icond, src1, result, "");
994 static LLVMValueRef emit_pack_half_2x16(struct nir_to_llvm_context *ctx,
997 LLVMValueRef const16 = LLVMConstInt(ctx->i32, 16, false);
1001 src0 = to_float(ctx, src0);
1002 comp[0] = LLVMBuildExtractElement(ctx->builder, src0, ctx->i32zero, "");
1003 comp[1] = LLVMBuildExtractElement(ctx->builder, src0, ctx->i32one, "");
1005 comp[i] = LLVMBuildFPTrunc(ctx->builder, comp[i], ctx->f16, "");
1006 comp[i] = LLVMBuildBitCast(ctx->builder, comp[i], ctx->i16, "");
1007 comp[i] = LLVMBuildZExt(ctx->builder, comp[i], ctx->i32, "");
1010 comp[1] = LLVMBuildShl(ctx->builder, comp[1], const16, "");
1011 comp[0] = LLVMBuildOr(ctx->builder, comp[0], comp[1], "");
1016 static LLVMValueRef emit_unpack_half_2x16(struct nir_to_llvm_context *ctx,
1019 LLVMValueRef const16 = LLVMConstInt(ctx->i32, 16, false);
1024 val = i == 1 ? LLVMBuildLShr(ctx->builder, src0, const16, "") : src0;
1025 val = LLVMBuildTrunc(ctx->builder, val, ctx->i16, "");
1026 val = LLVMBuildBitCast(ctx->builder, val, ctx->f16, "");
1027 temps[i] = LLVMBuildFPExt(ctx->builder, val, ctx->f32, "");
1030 result = LLVMBuildInsertElement(ctx->builder, LLVMGetUndef(ctx->v2f32), temps[0],
1031 ctx->i32zero, "");
1032 result = LLVMBuildInsertElement(ctx->builder, result, temps[1],
1033 ctx->i32one, "");
1044 static void set_range_metadata(struct nir_to_llvm_context *ctx,
1054 LLVMSetMetadata(value, ctx->range_md_kind, range_md);
1057 static LLVMValueRef get_thread_id(struct nir_to_llvm_context *ctx)
1061 tid_args[0] = LLVMConstInt(ctx->i32, 0xffffffff, false);
1062 tid_args[1] = ctx->i32zero;
1063 tid_args[1] = ac_emit_llvm_intrinsic(&ctx->ac,
1064 "llvm.amdgcn.mbcnt.lo", ctx->i32,
1067 tid = ac_emit_llvm_intrinsic(&ctx->ac,
1068 "llvm.amdgcn.mbcnt.hi", ctx->i32,
1070 set_range_metadata(ctx, tid, 0, 64);
1102 static LLVMValueRef emit_ddxy(struct nir_to_llvm_context *ctx,
1112 ctx->has_ddxy = true;
1114 if (!ctx->lds && !ctx->has_ds_bpermute)
1115 ctx->lds = LLVMAddGlobalInAddressSpace(ctx->module,
1116 LLVMArrayType(ctx->i32, 64),
1119 thread_id = get_thread_id(ctx);
1127 tl_tid = LLVMBuildAnd(ctx->builder, thread_id,
1128 LLVMConstInt(ctx->i32, mask, false), "");
1137 trbl_tid = LLVMBuildAdd(ctx->builder, tl_tid,
1138 LLVMConstInt(ctx->i32, idx, false), "");
1140 if (ctx->has_ds_bpermute) {
1141 args[0] = LLVMBuildMul(ctx->builder, tl_tid,
1142 LLVMConstInt(ctx->i32, 4, false), "");
1144 tl = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.amdgcn.ds.bpermute",
1145 ctx->i32, args, 2,
1148 args[0] = LLVMBuildMul(ctx->builder, trbl_tid,
1149 LLVMConstInt(ctx->i32, 4, false), "");
1150 trbl = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.amdgcn.ds.bpermute",
1151 ctx->i32, args, 2,
1156 store_ptr = build_gep0(ctx, ctx->lds, thread_id);
1157 load_ptr0 = build_gep0(ctx, ctx->lds, tl_tid);
1158 load_ptr1 = build_gep0(ctx, ctx->lds, trbl_tid);
1160 LLVMBuildStore(ctx->builder, src0, store_ptr);
1161 tl = LLVMBuildLoad(ctx->builder, load_ptr0, "");
1162 trbl = LLVMBuildLoad(ctx->builder, load_ptr1, "");
1164 tl = LLVMBuildBitCast(ctx->builder, tl, ctx->f32, "");
1165 trbl = LLVMBuildBitCast(ctx->builder, trbl, ctx->f32, "");
1166 result = LLVMBuildFSub(ctx->builder, trbl, tl, "");
1176 struct nir_to_llvm_context *ctx,
1183 a = LLVMBuildExtractElement(ctx->builder, interp_ij,
1184 LLVMConstInt(ctx->i32, i, false), "");
1185 result[i] = emit_ddxy(ctx, nir_op_fddx, a);
1186 result[2+i] = emit_ddxy(ctx, nir_op_fddy, a);
1188 return ac_build_gather_values(&ctx->ac, result, 4);
1191 static void visit_alu(struct nir_to_llvm_context *ctx, nir_alu_instr *instr)
1215 src[i] = get_alu_src(ctx, instr->src[i], src_components);
1223 src[0] = to_float(ctx, src[0]);
1224 result = LLVMBuildFNeg(ctx->builder, src[0], "");
1227 result = LLVMBuildNeg(ctx->builder, src[0], "");
1230 result = LLVMBuildNot(ctx->builder, src[0], "");
1233 result = LLVMBuildAdd(ctx->builder, src[0], src[1], "");
1236 src[0] = to_float(ctx, src[0]);
1237 src[1] = to_float(ctx, src[1]);
1238 result = LLVMBuildFAdd(ctx->builder, src[0], src[1], "");
1241 src[0] = to_float(ctx, src[0]);
1242 src[1] = to_float(ctx, src[1]);
1243 result = LLVMBuildFSub(ctx->builder, src[0], src[1], "");
1246 result = LLVMBuildSub(ctx->builder, src[0], src[1], "");
1249 result = LLVMBuildMul(ctx->builder, src[0], src[1], "");
1252 result = LLVMBuildSRem(ctx->builder, src[0], src[1], "");
1255 result = LLVMBuildURem(ctx->builder, src[0], src[1], "");
1258 src[0] = to_float(ctx, src[0]);
1259 src[1] = to_float(ctx, src[1]);
1260 result = ac_emit_fdiv(&ctx->ac, src[0], src[1]);
1261 result = emit_intrin_1f_param(ctx, "llvm.floor.f32", result);
1262 result = LLVMBuildFMul(ctx->builder, src[1] , result, "");
1263 result = LLVMBuildFSub(ctx->builder, src[0], result, "");
1266 src[0] = to_float(ctx, src[0]);
1267 src[1] = to_float(ctx, src[1]);
1268 result = LLVMBuildFRem(ctx->builder, src[0], src[1], "");
1271 result = LLVMBuildSRem(ctx->builder, src[0], src[1], "");
1274 result = LLVMBuildSDiv(ctx->builder, src[0], src[1], "");
1277 result = LLVMBuildUDiv(ctx->builder, src[0], src[1], "");
1280 src[0] = to_float(ctx, src[0]);
1281 src[1] = to_float(ctx, src[1]);
1282 result = LLVMBuildFMul(ctx->builder, src[0], src[1], "");
1285 src[0] = to_float(ctx, src[0]);
1286 src[1] = to_float(ctx, src[1]);
1287 result = ac_emit_fdiv(&ctx->ac, src[0], src[1]);
1290 src[0] = to_float(ctx, src[0]);
1291 result = ac_emit_fdiv(&ctx->ac, ctx->f32one, src[0]);
1294 result = LLVMBuildAnd(ctx->builder, src[0], src[1], "");
1297 result = LLVMBuildOr(ctx->builder, src[0], src[1], "");
1300 result = LLVMBuildXor(ctx->builder, src[0], src[1], "");
1303 result = LLVMBuildShl(ctx->builder, src[0], src[1], "");
1306 result = LLVMBuildAShr(ctx->builder, src[0], src[1], "");
1309 result = LLVMBuildLShr(ctx->builder, src[0], src[1], "");
1312 result = emit_int_cmp(ctx, LLVMIntSLT, src[0], src[1]);
1315 result = emit_int_cmp(ctx, LLVMIntNE, src[0], src[1]);
1318 result = emit_int_cmp(ctx, LLVMIntEQ, src[0], src[1]);
1321 result = emit_int_cmp(ctx, LLVMIntSGE, src[0], src[1]);
1324 result = emit_int_cmp(ctx, LLVMIntULT, src[0], src[1]);
1327 result = emit_int_cmp(ctx, LLVMIntUGE, src[0], src[1]);
1330 result = emit_float_cmp(ctx, LLVMRealUEQ, src[0], src[1]);
1333 result = emit_float_cmp(ctx, LLVMRealUNE, src[0], src[1]);
1336 result = emit_float_cmp(ctx, LLVMRealULT, src[0], src[1]);
1339 result = emit_float_cmp(ctx, LLVMRealUGE, src[0], src[1]);
1342 result = emit_intrin_1f_param(ctx, "llvm.fabs.f32", src[0]);
1345 result = emit_iabs(ctx, src[0]);
1348 result = emit_minmax_int(ctx, LLVMIntSGT, src[0], src[1]);
1351 result = emit_minmax_int(ctx, LLVMIntSLT, src[0], src[1]);
1354 result = emit_minmax_int(ctx, LLVMIntUGT, src[0], src[1]);
1357 result = emit_minmax_int(ctx, LLVMIntULT, src[0], src[1]);
1360 result = emit_isign(ctx, src[0]);
1363 src[0] = to_float(ctx, src[0]);
1364 result = emit_fsign(ctx, src[0]);
1367 result = emit_intrin_1f_param(ctx, "llvm.floor.f32", src[0]);
1370 result = emit_intrin_1f_param(ctx, "llvm.trunc.f32", src[0]);
1373 result = emit_intrin_1f_param(ctx, "llvm.ceil.f32", src[0]);
1376 result = emit_intrin_1f_param(ctx, "llvm.rint.f32", src[0]);
1379 result = emit_ffract(ctx, src[0]);
1382 result = emit_intrin_1f_param(ctx, "llvm.sin.f32", src[0]);
1385 result = emit_intrin_1f_param(ctx, "llvm.cos.f32", src[0]);
1388 result = emit_intrin_1f_param(ctx, "llvm.sqrt.f32", src[0]);
1391 result = emit_intrin_1f_param(ctx, "llvm.exp2.f32", src[0]);
1394 result = emit_intrin_1f_param(ctx, "llvm.log2.f32", src[0]);
1397 result = emit_intrin_1f_param(ctx, "llvm.sqrt.f32", src[0]);
1398 result = ac_emit_fdiv(&ctx->ac, ctx->f32one, result);
1401 result = emit_intrin_2f_param(ctx, "llvm.pow.f32", src[0], src[1]);
1404 result = emit_intrin_2f_param(ctx, "llvm.maxnum.f32", src[0], src[1]);
1407 result = emit_intrin_2f_param(ctx, "llvm.minnum.f32", src[0], src[1]);
1410 result = emit_intrin_3f_param(ctx, "llvm.fma.f32", src[0], src[1], src[2]);
1413 result = emit_bitfield_extract(ctx, "llvm.AMDGPU.bfe.i32", src);
1416 result = emit_bitfield_extract(ctx, "llvm.AMDGPU.bfe.u32", src);
1419 result = emit_bitfield_insert(ctx, src[0], src[1], src[2], src[3]);
1422 result = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.bitreverse.i32", ctx->i32, src, 1, AC_FUNC_ATTR_READNONE);
1425 result = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.ctpop.i32", ctx->i32, src, 1, AC_FUNC_ATTR_READNONE);
1431 src[i] = to_integer(ctx, src[i]);
1432 result = ac_build_gather_values(&ctx->ac, src, num_components);
1435 src[0] = to_float(ctx, src[0]);
1436 result = LLVMBuildFPToSI(ctx->builder, src[0], ctx->i32, "");
1439 src[0] = to_float(ctx, src[0]);
1440 result = LLVMBuildFPToUI(ctx->builder, src[0], ctx->i32, "");
1443 result = LLVMBuildSIToFP(ctx->builder, src[0], ctx->f32, "");
1446 result = LLVMBuildUIToFP(ctx->builder, src[0], ctx->f32, "");
1449 result = emit_bcsel(ctx, src[0], src[1], src[2]);
1452 result = emit_find_lsb(ctx, src[0]);
1455 result = emit_ufind_msb(ctx, src[0]);
1458 result = emit_ifind_msb(ctx, src[0]);
1461 result = emit_uint_carry(ctx, "llvm.uadd.with.overflow.i32", src[0], src[1]);
1464 result = emit_uint_carry(ctx, "llvm.usub.with.overflow.i32", src[0], src[1]);
1467 result = emit_b2f(ctx, src[0]);
1470 src[0] = to_float(ctx, src[0]);
1471 result = LLVMBuildFPTrunc(ctx->builder, src[0], ctx->f16, "");
1473 result = LLVMBuildFPExt(ctx->builder, result, ctx->f32, "");
1476 result = emit_umul_high(ctx, src[0], src[1]);
1479 result = emit_imul_high(ctx, src[0], src[1]);
1482 result = emit_pack_half_2x16(ctx, src[0]);
1485 result = emit_unpack_half_2x16(ctx, src[0]);
1493 result = emit_ddxy(ctx, instr->op, src[0]);
1504 result = to_integer(ctx, result);
1505 _mesa_hash_table_insert(ctx->defs, &instr->dest.dest.ssa,
1510 static void visit_load_const(struct nir_to_llvm_context *ctx,
1515 LLVMIntTypeInContext(ctx->context, instr->def.bit_size);
1539 _mesa_hash_table_insert(ctx->defs, &instr->def, value);
1542 static LLVMValueRef cast_ptr(struct nir_to_llvm_context *ctx, LLVMValueRef ptr,
1546 return LLVMBuildBitCast(ctx->builder, ptr,
1551 get_buffer_size(struct nir_to_llvm_context *ctx, LLVMValueRef descriptor, bool in_elements)
1554 LLVMBuildExtractElement(ctx->builder, descriptor,
1555 LLVMConstInt(ctx->i32, 2, false), "");
1558 if (ctx->options->chip_class >= VI && in_elements) {
1564 LLVMBuildExtractElement(ctx->builder, descriptor,
1565 LLVMConstInt(ctx->i32, 1, false), "");
1566 stride = LLVMBuildLShr(ctx->builder, stride,
1567 LLVMConstInt(ctx->i32, 16, false), "");
1568 stride = LLVMBuildAnd(ctx->builder, stride,
1569 LLVMConstInt(ctx->i32, 0x3fff, false), "");
1571 size = LLVMBuildUDiv(ctx->builder, size, stride, "");
1593 static LLVMValueRef radv_lower_gather4_integer(struct nir_to_llvm_context *ctx,
1609 txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, false);
1611 txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0xf, 0); /* dmask */
1612 txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, 0); /* unorm */
1613 txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, 0); /* r128 */
1614 txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, da ? 1 : 0, 0);
1615 txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, 0); /* glc */
1616 txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, 0); /* slc */
1617 txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, 0); /* tfe */
1618 txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, 0); /* lwe */
1619 size = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.getresinfo.i32", ctx->v4i32,
1624 half_texel[c] = LLVMBuildExtractElement(ctx->builder, size,
1625 LLVMConstInt(ctx->i32, c, false), "");
1626 half_texel[c] = LLVMBuildUIToFP(ctx->builder, half_texel[c], ctx->f32, "");
1627 half_texel[c] = ac_emit_fdiv(&ctx->ac, ctx->f32one, half_texel[c]);
1628 half_texel[c] = LLVMBuildFMul(ctx->builder, half_texel[c],
1629 LLVMConstReal(ctx->f32, -0.5), "");
1635 LLVMValueRef index = LLVMConstInt(ctx->i32, coord_vgpr_index + c, 0);
1636 tmp = LLVMBuildExtractElement(ctx->builder, coord, index, "");
1637 tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->f32, "");
1638 tmp = LLVMBuildFAdd(ctx->builder, tmp, half_texel[c], "");
1639 tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->i32, "");
1640 coord = LLVMBuildInsertElement(ctx->builder, coord, tmp, index, "");
1644 return ac_emit_llvm_intrinsic(&ctx->ac, intr_name, tinfo->dst_type, tinfo->args, tinfo->arg_count,
1649 static LLVMValueRef build_tex_intrinsic(struct nir_to_llvm_context *ctx,
1682 if (ctx->stage != MESA_SHADER_FRAGMENT)
1708 return radv_lower_gather4_integer(ctx, tinfo, instr, intr_name,
1712 return ac_emit_llvm_intrinsic(&ctx->ac, intr_name, tinfo->dst_type, tinfo->args, tinfo->arg_count,
1717 static LLVMValueRef visit_vulkan_resource_index(struct nir_to_llvm_context *ctx,
1720 LLVMValueRef index = get_src(ctx, instr->src[0]);
1723 LLVMValueRef desc_ptr = ctx->descriptor_sets[desc_set];
1724 struct radv_pipeline_layout *pipeline_layout = ctx->options->layout;
1733 desc_ptr = ctx->push_constants;
1735 stride = LLVMConstInt(ctx->i32, 16, false);
1737 stride = LLVMConstInt(ctx->i32, layout->binding[binding].size, false);
1739 offset = LLVMConstInt(ctx->i32, base_offset, false);
1740 index = LLVMBuildMul(ctx->builder, index, stride, "");
1741 offset = LLVMBuildAdd(ctx->builder, offset, index, "");
1743 desc_ptr = build_gep0(ctx, desc_ptr, offset);
1744 desc_ptr = cast_ptr(ctx, desc_ptr, ctx->v4i32);
1745 LLVMSetMetadata(desc_ptr, ctx->uniform_md_kind, ctx->empty_md);
1747 return LLVMBuildLoad(ctx->builder, desc_ptr, "");
1750 static LLVMValueRef visit_load_push_constant(struct nir_to_llvm_context *ctx,
1755 addr = LLVMConstInt(ctx->i32, nir_intrinsic_base(instr), 0);
1756 addr = LLVMBuildAdd(ctx->builder, addr, get_src(ctx, instr->src[0]), "");
1758 ptr = build_gep0(ctx, ctx->push_constants, addr);
1759 ptr = cast_ptr(ctx, ptr, get_def_type(ctx, &instr->dest.ssa));
1761 return LLVMBuildLoad(ctx->builder, ptr, "");
1764 static LLVMValueRef visit_get_buffer_size(struct nir_to_llvm_context *ctx,
1767 LLVMValueRef desc = get_src(ctx, instr->src[0]);
1769 return get_buffer_size(ctx, desc, false);
1771 static void visit_store_ssbo(struct nir_to_llvm_context *ctx,
1775 LLVMTypeRef data_type = ctx->f32;
1780 if (ctx->stage == MESA_SHADER_FRAGMENT)
1781 ctx->shader_info->fs.writes_memory = true;
1783 params[1] = get_src(ctx, instr->src[1]);
1784 params[2] = LLVMConstInt(ctx->i32, 0, false); /* vindex */
1785 params[4] = LLVMConstInt(ctx->i1, 0, false); /* glc */
1786 params[5] = LLVMConstInt(ctx->i1, 0, false); /* slc */
1789 data_type = LLVMVectorType(ctx->f32, instr->num_components);
1791 base_data = to_float(ctx, get_src(ctx, instr->src[0]));
1792 base_data = trim_vector(ctx, base_data, instr->num_components);
1793 base_data = LLVMBuildBitCast(ctx->builder, base_data,
1795 base_offset = get_src(ctx, instr->src[2]); /* voffset */
1814 tmp = LLVMBuildExtractElement(ctx->builder,
1815 base_data, LLVMConstInt(ctx->i32, start, false), "");
1816 data = LLVMBuildInsertElement(ctx->builder, LLVMGetUndef(ctx->v2f32), tmp,
1817 ctx->i32zero, "");
1819 tmp = LLVMBuildExtractElement(ctx->builder,
1820 base_data, LLVMConstInt(ctx->i32, start + 1, false), "");
1821 data = LLVMBuildInsertElement(ctx->builder, data, tmp,
1822 ctx->i32one, "");
1828 data = LLVMBuildExtractElement(ctx->builder, base_data,
1829 LLVMConstInt(ctx->i32, start, false), "");
1837 offset = LLVMBuildAdd(ctx->builder, offset, LLVMConstInt(ctx->i32, start * 4, false), "");
1841 ac_emit_llvm_intrinsic(&ctx->ac, store_name,
1842 ctx->voidt, params, 6, 0);
1846 static LLVMValueRef visit_atomic_ssbo(struct nir_to_llvm_context *ctx,
1852 if (ctx->stage == MESA_SHADER_FRAGMENT)
1853 ctx->shader_info->fs.writes_memory = true;
1856 params[arg_count++] = llvm_extract_elem(ctx, get_src(ctx, instr->src[3]), 0);
1858 params[arg_count++] = llvm_extract_elem(ctx, get_src(ctx, instr->src[2]), 0);
1859 params[arg_count++] = get_src(ctx, instr->src[0]);
1860 params[arg_count++] = LLVMConstInt(ctx->i32, 0, false); /* vindex */
1861 params[arg_count++] = get_src(ctx, instr->src[1]); /* voffset */
1862 params[arg_count++] = LLVMConstInt(ctx->i1, 0, false); /* slc */
1899 return ac_emit_llvm_intrinsic(&ctx->ac, name, ctx->i32, params, arg_count, 0);
1902 static LLVMValueRef visit_load_buffer(struct nir_to_llvm_context *ctx,
1906 LLVMTypeRef data_type = ctx->f32;
1908 data_type = LLVMVectorType(ctx->f32, 4);
1910 data_type = LLVMVectorType(ctx->f32, instr->num_components);
1922 get_src(ctx, instr->src[0]),
1923 LLVMConstInt(ctx->i32, 0, false),
1924 get_src(ctx, instr->src[1]),
1925 LLVMConstInt(ctx->i1, 0, false),
1926 LLVMConstInt(ctx->i1, 0, false),
1930 ac_emit_llvm_intrinsic(&ctx->ac, load_name, data_type, params, 5, 0);
1933 ret = trim_vector(ctx, ret, 3);
1935 return LLVMBuildBitCast(ctx->builder, ret,
1936 get_def_type(ctx, &instr->dest.ssa), "");
1939 static LLVMValueRef visit_load_ubo_buffer(struct nir_to_llvm_context *ctx,
1943 LLVMValueRef rsrc = get_src(ctx, instr->src[0]);
1944 LLVMValueRef offset = get_src(ctx, instr->src[1]);
1946 rsrc = LLVMBuildBitCast(ctx->builder, rsrc, LLVMVectorType(ctx->i8, 16), "");
1951 LLVMBuildAdd(ctx->builder, LLVMConstInt(ctx->i32, 4 * i, 0),
1954 results[i] = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.load.const", ctx->f32,
1959 ret = ac_build_gather_values(&ctx->ac, results, instr->num_components);
1960 return LLVMBuildBitCast(ctx->builder, ret,
1961 get_def_type(ctx, &instr->dest.ssa), "");
1965 radv_get_deref_offset(struct nir_to_llvm_context *ctx, nir_deref *tail,
1986 index = get_src(ctx, deref_array->indirect);
1987 stride = LLVMConstInt(ctx->i32, size, 0);
1988 local_offset = LLVMBuildMul(ctx->builder, stride, index, "");
1991 offset = LLVMBuildAdd(ctx->builder, offset, local_offset, "");
2007 offset = LLVMBuildAdd(ctx->builder, offset,
2008 LLVMConstInt(ctx->i32, const_offset, 0),
2015 static LLVMValueRef visit_load_var(struct nir_to_llvm_context *ctx,
2025 radv_get_deref_offset(ctx, &instr->variables[0]->deref,
2026 ctx->stage == MESA_SHADER_VERTEX,
2032 ctx->stage == MESA_SHADER_VERTEX);
2034 &ctx->ac, ctx->inputs + idx + chan, count,
2037 values[chan] = LLVMBuildExtractElement(ctx->builder,
2041 values[chan] = ctx->inputs[idx + chan + const_index * 4];
2043 return to_integer(ctx, ac_build_gather_values(&ctx->ac, values, ve));
2046 radv_get_deref_offset(ctx, &instr->variables[0]->deref, false,
2053 &ctx->ac, ctx->locals + idx + chan, count,
2056 values[chan] = LLVMBuildExtractElement(ctx->builder,
2060 values[chan] = LLVMBuildLoad(ctx->builder, ctx->locals[idx + chan + const_index * 4], "");
2063 return to_integer(ctx, ac_build_gather_values(&ctx->ac, values, ve));
2065 radv_get_deref_offset(ctx, &instr->variables[0]->deref, false,
2072 &ctx->ac, ctx->outputs + idx + chan, count,
2075 values[chan] = LLVMBuildExtractElement(ctx->builder,
2079 values[chan] = LLVMBuildLoad(ctx->builder,
2080 ctx->outputs[idx + chan + const_index * 4],
2084 return to_integer(ctx, ac_build_gather_values(&ctx->ac, values, ve));
2086 radv_get_deref_offset(ctx, &instr->variables[0]->deref, false,
2088 LLVMValueRef ptr = get_shared_memory_ptr(ctx, idx, ctx->i32);
2092 indir_index = LLVMBuildMul(ctx->builder, indir_index, LLVMConstInt(ctx->i32, 4, false), "");
2095 LLVMValueRef index = LLVMConstInt(ctx->i32, chan, false);
2097 index = LLVMBuildAdd(ctx->builder, index, indir_index, "");
2098 derived_ptr = LLVMBuildGEP(ctx->builder, ptr, &index, 1, "");
2099 values[chan] = LLVMBuildLoad(ctx->builder, derived_ptr, "");
2101 return to_integer(ctx, ac_build_gather_values(&ctx->ac, values, ve));
2110 visit_store_var(struct nir_to_llvm_context *ctx,
2115 LLVMValueRef src = to_float(ctx, get_src(ctx, instr->src[0]));
2121 radv_get_deref_offset(ctx, &instr->variables[0]->deref, false,
2130 value = LLVMBuildExtractElement(ctx->builder, src,
2131 LLVMConstInt(ctx->i32,
2142 &ctx->ac, ctx->outputs + idx + chan, count,
2146 tmp_vec = LLVMBuildInsertElement(ctx->builder, tmp_vec,
2150 build_store_values_extended(ctx, ctx->outputs + idx + chan,
2154 temp_ptr = ctx->outputs[idx + chan + const_index * stride];
2156 LLVMBuildStore(ctx->builder, value, temp_ptr);
2161 radv_get_deref_offset(ctx, &instr->variables[0]->deref, false,
2170 value = LLVMBuildExtractElement(ctx->builder, src,
2171 LLVMConstInt(ctx->i32, chan, false), "");
2176 &ctx->ac, ctx->locals + idx + chan, count,
2179 tmp_vec = LLVMBuildInsertElement(ctx->builder, tmp_vec,
2181 build_store_values_extended(ctx, ctx->locals + idx + chan,
2184 temp_ptr = ctx->locals[idx + chan + const_index * 4];
2186 LLVMBuildStore(ctx->builder, value, temp_ptr);
2192 radv_get_deref_offset(ctx, &instr->variables[0]->deref, false,
2195 ptr = get_shared_memory_ptr(ctx, idx, ctx->i32);
2199 indir_index = LLVMBuildMul(ctx->builder, indir_index, LLVMConstInt(ctx->i32, 4, false), "");
2205 LLVMValueRef index = LLVMConstInt(ctx->i32, chan, false);
2210 value = LLVMBuildExtractElement(ctx->builder, src,
2211 LLVMConstInt(ctx->i32,
2216 index = LLVMBuildAdd(ctx->builder, index, indir_index, "");
2218 derived_ptr = LLVMBuildGEP(ctx->builder, ptr, &index, 1, "");
2219 LLVMBuildStore(ctx->builder,
2220 to_integer(ctx, value), derived_ptr);
2254 static LLVMValueRef get_image_coords(struct nir_to_llvm_context *ctx,
2261 LLVMValueRef src0 = get_src(ctx, instr->src[0]);
2264 LLVMConstInt(ctx->i32, 0, false), LLVMConstInt(ctx->i32, 1, false),
2265 LLVMConstInt(ctx->i32, 2, false), LLVMConstInt(ctx->i32, 3, false),
2280 res = LLVMBuildExtractElement(ctx->builder, src0, masks[0], "");
2288 coords[chan] = LLVMBuildExtractElement(ctx->builder, src0, masks[chan], "");
2293 coords[chan] = LLVMBuildAdd(ctx->builder, coords[chan], LLVMBuildFPToUI(ctx->builder, ctx->frag_pos[chan], ctx->i32, ""), "");
2296 coords[count] = llvm_extract_elem(ctx, get_src(ctx, instr->src[1]), 0);
2301 coords[3] = LLVMGetUndef(ctx->i32);
2304 res = ac_build_gather_values(&ctx->ac, coords, count);
2370 static LLVMValueRef visit_image_load(struct nir_to_llvm_context *ctx,
2383 params[0] = get_sampler_desc(ctx, instr->variables[0], DESC_BUFFER);
2384 params[1] = LLVMBuildExtractElement(ctx->builder, get_src(ctx, instr->src[0]),
2385 LLVMConstInt(ctx->i32, 0, false), ""); /* vindex */
2386 params[2] = LLVMConstInt(ctx->i32, 0, false); /* voffset */
2387 params[3] = LLVMConstInt(ctx->i1, 0, false); /* glc */
2388 params[4] = LLVMConstInt(ctx->i1, 0, false); /* slc */
2389 res = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.amdgcn.buffer.load.format.v4f32", ctx->v4f32,
2392 res = trim_vector(ctx, res, instr->dest.ssa.num_components);
2393 res = to_integer(ctx, res);
2397 LLVMValueRef da = is_da ? ctx->i32one : ctx->i32zero;
2398 LLVMValueRef glc = LLVMConstInt(ctx->i1, 0, false);
2399 LLVMValueRef slc = LLVMConstInt(ctx->i1, 0, false);
2401 params[0] = get_image_coords(ctx, instr);
2402 params[1] = get_sampler_desc(ctx, instr->variables[0], DESC_IMAGE);
2403 params[2] = LLVMConstInt(ctx->i32, 15, false); /* dmask */
2405 params[3] = LLVMConstInt(ctx->i1, 0, false); /* r128 */
2410 LLVMValueRef lwe = LLVMConstInt(ctx->i1, 0, false);
2418 ctx->v4f32, /* vdata */
2423 res = ac_emit_llvm_intrinsic(&ctx->ac, intrinsic_name, ctx->v4f32,
2426 return to_integer(ctx, res);
2429 static void visit_image_store(struct nir_to_llvm_context *ctx,
2435 LLVMValueRef i1false = LLVMConstInt(ctx->i1, 0, 0);
2436 LLVMValueRef i1true = LLVMConstInt(ctx->i1, 1, 0);
2439 if (ctx->stage == MESA_SHADER_FRAGMENT)
2440 ctx->shader_info->fs.writes_memory = true;
2443 params[0] = to_float(ctx, get_src(ctx, instr->src[2])); /* data */
2444 params[1] = get_sampler_desc(ctx, instr->variables[0], DESC_BUFFER);
2445 params[2] = LLVMBuildExtractElement(ctx->builder, get_src(ctx, instr->src[0]),
2446 LLVMConstInt(ctx->i32, 0, false), ""); /* vindex */
2447 params[3] = LLVMConstInt(ctx->i32, 0, false); /* voffset */
2450 ac_emit_llvm_intrinsic(&ctx->ac, "llvm.amdgcn.buffer.store.format.v4f32", ctx->voidt,
2459 params[0] = to_float(ctx, get_src(ctx, instr->src[2]));
2460 params[1] = get_image_coords(ctx, instr); /* coords */
2461 params[2] = get_sampler_desc(ctx, instr->variables[0], DESC_IMAGE);
2462 params[3] = LLVMConstInt(ctx->i32, 15, false); /* dmask */
2482 ac_emit_llvm_intrinsic(&ctx->ac, intrinsic_name, ctx->voidt,
2488 static LLVMValueRef visit_image_atomic(struct nir_to_llvm_context *ctx,
2494 LLVMValueRef i1false = LLVMConstInt(ctx->i1, 0, 0);
2495 LLVMValueRef i1true = LLVMConstInt(ctx->i1, 1, 0);
2502 if (ctx->stage == MESA_SHADER_FRAGMENT)
2503 ctx->shader_info->fs.writes_memory = true;
2505 params[param_count++] = get_src(ctx, instr->src[2]);
2507 params[param_count++] = get_src(ctx, instr->src[3]);
2510 params[param_count++] = get_sampler_desc(ctx, instr->variables[0], DESC_BUFFER);
2511 coords = params[param_count++] = LLVMBuildExtractElement(ctx->builder, get_src(ctx, instr->src[0]),
2512 LLVMConstInt(ctx->i32, 0, false), ""); /* vindex */
2513 params[param_count++] = ctx->i32zero; /* voffset */
2520 coords = params[param_count++] = get_image_coords(ctx, instr);
2521 params[param_count++] = get_sampler_desc(ctx, instr->variables[0], DESC_IMAGE);
2560 return ac_emit_llvm_intrinsic(&ctx->ac, intrinsic_name, ctx->i32, params, param_count, 0);
2563 static LLVMValueRef visit_image_size(struct nir_to_llvm_context *ctx,
2576 return get_buffer_size(ctx, get_sampler_desc(ctx, instr->variables[0], DESC_BUFFER), true);
2577 params[0] = ctx->i32zero;
2578 params[1] = get_sampler_desc(ctx, instr->variables[0], DESC_IMAGE);
2579 params[2] = LLVMConstInt(ctx->i32, 15, false);
2580 params[3] = ctx->i32zero;
2581 params[4] = ctx->i32zero;
2582 params[5] = da ? ctx->i32one : ctx->i32zero;
2583 params[6] = ctx->i32zero;
2584 params[7] = ctx->i32zero;
2585 params[8] = ctx->i32zero;
2586 params[9] = ctx->i32zero;
2588 res = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.getresinfo.i32", ctx->v4i32,
2593 LLVMValueRef two = LLVMConstInt(ctx->i32, 2, false);
2594 LLVMValueRef six = LLVMConstInt(ctx->i32, 6, false);
2595 LLVMValueRef z = LLVMBuildExtractElement(ctx->builder, res, two, "");
2596 z = LLVMBuildSDiv(ctx->builder, z, six, "");
2597 res = LLVMBuildInsertElement(ctx->builder, res, z, two, "");
2602 static void emit_waitcnt(struct nir_to_llvm_context *ctx)
2605 LLVMConstInt(ctx->i32, 0xf70, false),
2607 ac_emit_llvm_intrinsic(&ctx->ac, "llvm.amdgcn.s.waitcnt",
2608 ctx->voidt, args, 1, 0);
2611 static void emit_barrier(struct nir_to_llvm_context *ctx)
2614 ac_emit_llvm_intrinsic(&ctx->ac, "llvm.amdgcn.s.barrier",
2615 ctx->voidt, NULL, 0, 0);
2618 static void emit_discard_if(struct nir_to_llvm_context *ctx,
2622 ctx->shader_info->fs.can_discard = true;
2624 cond = LLVMBuildICmp(ctx->builder, LLVMIntNE,
2625 get_src(ctx, instr->src[0]),
2626 ctx->i32zero, "");
2628 cond = LLVMBuildSelect(ctx->builder, cond,
2629 LLVMConstReal(ctx->f32, -1.0f),
2630 ctx->f32zero, "");
2631 ac_emit_llvm_intrinsic(&ctx->ac, "llvm.AMDGPU.kill",
2632 ctx->voidt,
2637 visit_load_local_invocation_index(struct nir_to_llvm_context *ctx)
2640 LLVMValueRef thread_id = get_thread_id(ctx);
2641 result = LLVMBuildAnd(ctx->builder, ctx->tg_size,
2642 LLVMConstInt(ctx->i32, 0xfc0, false), "");
2644 return LLVMBuildAdd(ctx->builder, result, thread_id, "");
2647 static LLVMValueRef visit_var_atomic(struct nir_to_llvm_context *ctx,
2652 LLVMValueRef src = get_src(ctx, instr->src[0]);
2653 ptr = get_shared_memory_ptr(ctx, idx, ctx->i32);
2656 LLVMValueRef src1 = get_src(ctx, instr->src[1]);
2657 result = LLVMBuildAtomicCmpXchg(ctx->builder,
2696 result = LLVMBuildAtomicRMW(ctx->builder, op, ptr, to_integer(ctx, src),
2707 static LLVMValueRef lookup_interp_param(struct nir_to_llvm_context *ctx,
2717 return ctx->persp_center;
2719 return ctx->persp_centroid;
2721 return ctx->persp_sample;
2725 return ctx->linear_center;
2727 return ctx->linear_centroid;
2729 return ctx->linear_sample;
2735 static LLVMValueRef load_sample_position(struct nir_to_llvm_context *ctx,
2739 LLVMValueRef offset0 = LLVMBuildMul(ctx->builder, sample_id, LLVMConstInt(ctx->i32, 8, false), "");
2740 LLVMValueRef offset1 = LLVMBuildAdd(ctx->builder, offset0, LLVMConstInt(ctx->i32, 4, false), "");
2743 result[0] = build_indexed_load_const(ctx, ctx->sample_positions, offset0);
2744 result[1] = build_indexed_load_const(ctx, ctx->sample_positions, offset1);
2746 return ac_build_gather_values(&ctx->ac, result, 2);
2749 static LLVMValueRef load_sample_pos(struct nir_to_llvm_context *ctx)
2753 values[0] = emit_ffract(ctx, ctx->frag_pos[0]);
2754 values[1] = emit_ffract(ctx, ctx->frag_pos[1]);
2755 return ac_build_gather_values(&ctx->ac, values, 2);
2758 static LLVMValueRef visit_interp(struct nir_to_llvm_context *ctx,
2776 src0 = get_src(ctx, instr->src[0]);
2783 src_c0 = to_float(ctx, LLVMBuildExtractElement(ctx->builder, src0, ctx->i32zero, ""));
2784 src_c1 = to_float(ctx, LLVMBuildExtractElement(ctx->builder, src0, ctx->i32one, ""));
2787 LLVMValueRef halfval = LLVMConstReal(ctx->f32, 0.5f);
2790 sample_position = load_sample_position(ctx, src0);
2792 src_c0 = LLVMBuildExtractElement(ctx->builder, sample_position, ctx->i32zero, "");
2793 src_c0 = LLVMBuildFSub(ctx->builder, src_c0, halfval, "");
2794 src_c1 = LLVMBuildExtractElement(ctx->builder, sample_position, ctx->i32one, "");
2795 src_c1 = LLVMBuildFSub(ctx->builder, src_c1, halfval, "");
2797 interp_param = lookup_interp_param(ctx, instr->variables[0]->var->data.interpolation, location);
2798 attr_number = LLVMConstInt(ctx->i32, input_index, false);
2802 LLVMValueRef ddxy_out = emit_ddxy_interp(ctx, interp_param);
2813 LLVMValueRef ix_ll = LLVMConstInt(ctx->i32, i, false);
2814 LLVMValueRef iy_ll = LLVMConstInt(ctx->i32, i + 2, false);
2815 LLVMValueRef ddx_el = LLVMBuildExtractElement(ctx->builder,
2817 LLVMValueRef ddy_el = LLVMBuildExtractElement(ctx->builder,
2819 LLVMValueRef interp_el = LLVMBuildExtractElement(ctx->builder,
2823 interp_el = LLVMBuildBitCast(ctx->builder, interp_el,
2824 ctx->f32, "");
2826 temp1 = LLVMBuildFMul(ctx->builder, ddx_el, src_c0, "");
2827 temp1 = LLVMBuildFAdd(ctx->builder, temp1, interp_el, "");
2829 temp2 = LLVMBuildFMul(ctx->builder, ddy_el, src_c1, "");
2830 temp2 = LLVMBuildFAdd(ctx->builder, temp2, temp1, "");
2832 ij_out[i] = LLVMBuildBitCast(ctx->builder,
2833 temp2, ctx->i32, "");
2835 interp_param = ac_build_gather_values(&ctx->ac, ij_out, 2);
2841 LLVMValueRef llvm_chan = LLVMConstInt(ctx->i32, chan, false);
2845 args[2] = ctx->prim_mask;
2847 result[chan] = ac_emit_llvm_intrinsic(&ctx->ac, intr_name,
2848 ctx->f32, args, args[3] ? 4 : 3,
2851 return ac_build_gather_values(&ctx->ac, result, 2);
2854 static void visit_intrinsic(struct nir_to_llvm_context *ctx,
2861 result = ctx->workgroup_ids;
2865 result = ctx->base_vertex;
2869 result = ctx->vertex_id;
2873 result = ctx->local_invocation_ids;
2877 result = ctx->start_instance;
2880 ctx->shader_info->fs.force_persample = true;
2881 result = unpack_param(ctx, ctx->ancillary, 8, 4);
2884 ctx->shader_info->fs.force_persample = true;
2885 result = load_sample_pos(ctx);
2888 result = ctx->front_face;
2891 result = ctx->instance_id;
2892 ctx->shader_info->vs.vgpr_comp_cnt = MAX2(3,
2893 ctx->shader_info->vs.vgpr_comp_cnt);
2896 result = ctx->num_work_groups;
2899 result = visit_load_local_invocation_index(ctx);
2902 result = visit_load_push_constant(ctx, instr);
2905 result = visit_vulkan_resource_index(ctx, instr);
2908 visit_store_ssbo(ctx, instr);
2911 result = visit_load_buffer(ctx, instr);
2923 result = visit_atomic_ssbo(ctx, instr);
2926 result = visit_load_ubo_buffer(ctx, instr);
2929 result = visit_get_buffer_size(ctx, instr);
2932 result = visit_load_var(ctx, instr);
2935 visit_store_var(ctx, instr);
2938 result = visit_image_load(ctx, instr);
2941 visit_image_store(ctx, instr);
2951 result = visit_image_atomic(ctx, instr);
2954 result = visit_image_size(ctx, instr);
2957 ctx->shader_info->fs.can_discard = true;
2958 ac_emit_llvm_intrinsic(&ctx->ac, "llvm.AMDGPU.kilp",
2959 ctx->voidt,
2963 emit_discard_if(ctx, instr);
2966 emit_waitcnt(ctx);
2969 emit_barrier(ctx);
2981 result = visit_var_atomic(ctx, instr);
2986 result = visit_interp(ctx, instr);
2995 _mesa_hash_table_insert(ctx->defs, &instr->dest.ssa, result);
2999 static LLVMValueRef get_sampler_desc(struct nir_to_llvm_context *ctx,
3004 LLVMValueRef list = ctx->descriptor_sets[desc_set];
3005 struct radv_descriptor_set_layout *layout = ctx->options->layout->set[desc_set].layout;
3010 LLVMBuilderRef builder = ctx->builder;
3018 type = ctx->v8i32;
3022 type = ctx->v8i32;
3027 type = ctx->v4i32;
3034 type = ctx->v4i32;
3047 index = get_src(ctx, child->indirect);
3054 index = ctx->i32zero;
3056 index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->i32, stride / type_size, 0), "");
3058 list = build_gep0(ctx, list, LLVMConstInt(ctx->i32, offset, 0));
3061 return build_indexed_load_const(ctx, list, index);
3064 static void set_tex_fetch_args(struct nir_to_llvm_context *ctx,
3080 param[count++] = LLVMGetUndef(ctx->i32);
3083 tinfo->args[0] = ac_build_gather_values(&ctx->ac, param, count);
3095 tinfo->dst_type = ctx->v4i32;
3097 tinfo->dst_type = ctx->v4f32;
3103 tinfo->args[1] = LLVMConstInt(ctx->i32, 0, false);
3109 tinfo->args[num_args++] = LLVMConstInt(ctx->i32, dmask, 0);
3110 tinfo->args[num_args++] = LLVMConstInt(ctx->i32, is_rect, 0); /* unorm */
3111 tinfo->args[num_args++] = LLVMConstInt(ctx->i32, 0, 0); /* r128 */
3112 tinfo->args[num_args++] = LLVMConstInt(ctx->i32, da ? 1 : 0, 0);
3113 tinfo->args[num_args++] = LLVMConstInt(ctx->i32, 0, 0); /* glc */
3114 tinfo->args[num_args++] = LLVMConstInt(ctx->i32, 0, 0); /* slc */
3115 tinfo->args[num_args++] = LLVMConstInt(ctx->i32, 0, 0); /* tfe */
3116 tinfo->args[num_args++] = LLVMConstInt(ctx->i32, 0, 0); /* lwe */
3132 static LLVMValueRef sici_fix_sampler_aniso(struct nir_to_llvm_context *ctx,
3135 LLVMBuilderRef builder = ctx->builder;
3138 if (ctx->options->chip_class >= VI)
3142 LLVMConstInt(ctx->i32, 7, 0), "");
3144 LLVMConstInt(ctx->i32, 0, 0), "");
3147 LLVMConstInt(ctx->i32, 0, 0), "");
3150 static void tex_fetch_ptrs(struct nir_to_llvm_context *ctx,
3156 *res_ptr = get_sampler_desc(ctx, instr->texture, DESC_BUFFER);
3158 *res_ptr = get_sampler_desc(ctx, instr->texture, DESC_IMAGE);
3161 *samp_ptr = get_sampler_desc(ctx, instr->sampler, DESC_SAMPLER);
3163 *samp_ptr = get_sampler_desc(ctx, instr->texture, DESC_SAMPLER);
3165 *samp_ptr = sici_fix_sampler_aniso(ctx, *res_ptr, *samp_ptr);
3169 *fmask_ptr = get_sampler_desc(ctx, instr->texture, DESC_FMASK);
3172 static LLVMValueRef apply_round_slice(struct nir_to_llvm_context *ctx,
3175 coord = to_float(ctx, coord);
3176 coord = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.rint.f32", ctx->f32, &coord, 1, 0);
3177 coord = to_integer(ctx, coord);
3181 static void visit_tex(struct nir_to_llvm_context *ctx, nir_tex_instr *instr)
3196 tex_fetch_ptrs(ctx, instr, &res_ptr, &samp_ptr, &fmask_ptr);
3201 coord = get_src(ctx, instr->src[i].src);
3206 comparator = get_src(ctx, instr->src[i].src);
3209 offsets = get_src(ctx, instr->src[i].src);
3213 bias = get_src(ctx, instr->src[i].src);
3216 lod = get_src(ctx, instr->src[i].src);
3219 sample_index = get_src(ctx, instr->src[i].src);
3224 ddx = get_src(ctx, instr->src[i].src);
3228 ddy = get_src(ctx, instr->src[i].src);
3239 result = get_buffer_size(ctx, res_ptr, true);
3245 res = LLVMBuildBitCast(ctx->builder, res_ptr, ctx->v8i32, "");
3246 samples = LLVMBuildExtractElement(ctx->builder, res,
3247 LLVMConstInt(ctx->i32, 3, false), "");
3248 is_msaa = LLVMBuildLShr(ctx->builder, samples,
3249 LLVMConstInt(ctx->i32, 28, false), "");
3250 is_msaa = LLVMBuildAnd(ctx->builder, is_msaa,
3251 LLVMConstInt(ctx->i32, 0xe, false), "");
3252 is_msaa = LLVMBuildICmp(ctx->builder, LLVMIntEQ, is_msaa,
3253 LLVMConstInt(ctx->i32, 0xe, false), "");
3255 samples = LLVMBuildLShr(ctx->builder, samples,
3256 LLVMConstInt(ctx->i32, 16, false), "");
3257 samples = LLVMBuildAnd(ctx->builder, samples,
3258 LLVMConstInt(ctx->i32, 0xf, false), "");
3259 samples = LLVMBuildShl(ctx->builder, ctx->i32one,
3261 samples = LLVMBuildSelect(ctx->builder, is_msaa, samples,
3262 ctx->i32one, "");
3269 coords[chan] = llvm_extract_elem(ctx, coord, chan);
3274 offset[chan] = ctx->i32zero;
3278 offset[chan] = llvm_extract_elem(ctx, offsets, chan);
3279 offset[chan] = LLVMBuildAnd(ctx->builder, offset[chan],
3280 LLVMConstInt(ctx->i32, 0x3f, false), "");
3282 offset[chan] = LLVMBuildShl(ctx->builder, offset[chan],
3283 LLVMConstInt(ctx->i32, chan * 8, false), "");
3285 pack = LLVMBuildOr(ctx->builder, offset[0], offset[1], "");
3286 pack = LLVMBuildOr(ctx->builder, pack, offset[2], "");
3297 address[count++] = llvm_extract_elem(ctx, comparator, 0);
3317 derivs[i * 2] = to_float(ctx, llvm_extract_elem(ctx, ddx, i));
3318 derivs[i * 2 + 1] = to_float(ctx, llvm_extract_elem(ctx, ddy, i));
3324 coords[chan] = to_float(ctx, coords[chan]);
3326 coords[3] = LLVMGetUndef(ctx->f32);
3327 ac_prepare_cube_coords(&ctx->ac,
3344 coords[1] = apply_round_slice(ctx, coords[1]);
3351 coords[2] = apply_round_slice(ctx, coords[2]);
3367 address[count++] = ctx->i32zero;
3371 address[chan] = LLVMBuildBitCast(ctx->builder,
3372 address[chan], ctx->i32, "");
3382 txf_address[2] = ctx->i32zero;
3383 txf_address[3] = ctx->i32zero;
3385 set_tex_fetch_args(ctx, &txf_info, instr, nir_texop_txf,
3389 result = build_tex_intrinsic(ctx, instr, &txf_info);
3391 result = LLVMBuildExtractElement(ctx->builder, result, ctx->i32zero, "");
3392 result = emit_int_cmp(ctx, LLVMIntEQ, result, ctx->i32zero);
3418 txf_address[2] = ctx->i32zero;
3419 txf_address[3] = ctx->i32zero;
3421 set_tex_fetch_args(ctx, &txf_info, instr, nir_texop_txf,
3425 result = build_tex_intrinsic(ctx, instr, &txf_info);
3426 LLVMValueRef four = LLVMConstInt(ctx->i32, 4, false);
3427 LLVMValueRef F = LLVMConstInt(ctx->i32, 0xf, false);
3429 LLVMValueRef fmask = LLVMBuildExtractElement(ctx->builder,
3431 ctx->i32zero, "");
3436 LLVMBuildMul(ctx->builder, address[sample_chan], four, "");
3438 LLVMBuildLShr(ctx->builder, fmask, sample_index4, "");
3440 LLVMBuildAnd(ctx->builder, shifted_fmask, F, "");
3446 LLVMBuildBitCast(ctx->builder, fmask_ptr,
3447 ctx->v8i32, "");
3450 LLVMBuildExtractElement(ctx->builder, fmask_desc,
3451 ctx->i32one, "");
3454 LLVMBuildICmp(ctx->builder, LLVMIntNE,
3455 fmask_word1, ctx->i32zero, "");
3459 LLVMBuildSelect(ctx->builder, word1_is_nonzero,
3470 address[2] = LLVMBuildAdd(ctx->builder,
3471 address[2], LLVMConstInt(ctx->i32, const_offset->i32[2], false), "");
3473 address[1] = LLVMBuildAdd(ctx->builder,
3474 address[1], LLVMConstInt(ctx->i32, const_offset->i32[1], false), "");
3475 address[0] = LLVMBuildAdd(ctx->builder,
3476 address[0], LLVMConstInt(ctx->i32, const_offset->i32[0], false), "");
3487 set_tex_fetch_args(ctx, &tinfo, instr, instr->op,
3490 result = build_tex_intrinsic(ctx, instr, &tinfo);
3493 result = LLVMBuildExtractElement(ctx->builder, result, LLVMConstInt(ctx->i32, 3, false), "");
3495 result = LLVMBuildExtractElement(ctx->builder, result, ctx->i32zero, "");
3499 LLVMValueRef two = LLVMConstInt(ctx->i32, 2, false);
3500 LLVMValueRef six = LLVMConstInt(ctx->i32, 6, false);
3501 LLVMValueRef z = LLVMBuildExtractElement(ctx->builder, result, two, "");
3502 z = LLVMBuildSDiv(ctx->builder, z, six, "");
3503 result = LLVMBuildInsertElement(ctx->builder, result, z, two, "");
3505 result = trim_vector(ctx, result, instr->dest.ssa.num_components);
3510 result = to_integer(ctx, result);
3511 _mesa_hash_table_insert(ctx->defs, &instr->dest.ssa, result);
3516 static void visit_phi(struct nir_to_llvm_context *ctx, nir_phi_instr *instr)
3518 LLVMTypeRef type = get_def_type(ctx, &instr->dest.ssa);
3519 LLVMValueRef result = LLVMBuildPhi(ctx->builder, type, "");
3521 _mesa_hash_table_insert(ctx->defs, &instr->dest.ssa, result);
3522 _mesa_hash_table_insert(ctx->phis, instr, result);
3525 static void visit_post_phi(struct nir_to_llvm_context *ctx,
3530 LLVMBasicBlockRef block = get_block(ctx, src->pred);
3531 LLVMValueRef llvm_src = get_src(ctx, src->src);
3537 static void phi_post_pass(struct nir_to_llvm_context *ctx)
3540 hash_table_foreach(ctx->phis, entry) {
3541 visit_post_phi(ctx, (nir_phi_instr*)entry->key,
3547 static void visit_ssa_undef(struct nir_to_llvm_context *ctx,
3554 undef = LLVMGetUndef(ctx->i32);
3556 undef = LLVMGetUndef(LLVMVectorType(ctx->i32, num_components));
3558 _mesa_hash_table_insert(ctx->defs, &instr->def, undef);
3561 static void visit_jump(struct nir_to_llvm_context *ctx,
3566 LLVMBuildBr(ctx->builder, ctx->break_block);
3567 LLVMClearInsertionPosition(ctx->builder);
3570 LLVMBuildBr(ctx->builder, ctx->continue_block);
3571 LLVMClearInsertionPosition(ctx->builder);
3581 static void visit_cf_list(struct nir_to_llvm_context *ctx,
3584 static void visit_block(struct nir_to_llvm_context *ctx, nir_block *block)
3586 LLVMBasicBlockRef llvm_block = LLVMGetInsertBlock(ctx->builder);
3591 visit_alu(ctx, nir_instr_as_alu(instr));
3594 visit_load_const(ctx, nir_instr_as_load_const(instr));
3597 visit_intrinsic(ctx, nir_instr_as_intrinsic(instr));
3600 visit_tex(ctx, nir_instr_as_tex(instr));
3603 visit_phi(ctx, nir_instr_as_phi(instr));
3606 visit_ssa_undef(ctx, nir_instr_as_ssa_undef(instr));
3609 visit_jump(ctx, nir_instr_as_jump(instr));
3619 _mesa_hash_table_insert(ctx->defs, block, llvm_block);
3622 static void visit_if(struct nir_to_llvm_context *ctx, nir_if *if_stmt)
3624 LLVMValueRef value = get_src(ctx, if_stmt->condition);
3627 LLVMAppendBasicBlockInContext(ctx->context, ctx->main_function, "");
3629 LLVMAppendBasicBlockInContext(ctx->context, ctx->main_function, "");
3633 ctx->context, ctx->main_function, "");
3635 LLVMValueRef cond = LLVMBuildICmp(ctx->builder, LLVMIntNE, value,
3636 LLVMConstInt(ctx->i32, 0, false), "");
3637 LLVMBuildCondBr(ctx->builder, cond, if_block, else_block);
3639 LLVMPositionBuilderAtEnd(ctx->builder, if_block);
3640 visit_cf_list(ctx, &if_stmt->then_list);
3641 if (LLVMGetInsertBlock(ctx->builder))
3642 LLVMBuildBr(ctx->builder, merge_block);
3645 LLVMPositionBuilderAtEnd(ctx->builder, else_block);
3646 visit_cf_list(ctx, &if_stmt->else_list);
3647 if (LLVMGetInsertBlock(ctx->builder))
3648 LLVMBuildBr(ctx->builder, merge_block);
3651 LLVMPositionBuilderAtEnd(ctx->builder, merge_block);
3654 static void visit_loop(struct nir_to_llvm_context *ctx, nir_loop *loop)
3656 LLVMBasicBlockRef continue_parent = ctx->continue_block;
3657 LLVMBasicBlockRef break_parent = ctx->break_block;
3659 ctx->continue_block =
3660 LLVMAppendBasicBlockInContext(ctx->context, ctx->main_function, "");
3661 ctx->break_block =
3662 LLVMAppendBasicBlockInContext(ctx->context, ctx->main_function, "");
3664 LLVMBuildBr(ctx->builder, ctx->continue_block);
3665 LLVMPositionBuilderAtEnd(ctx->builder, ctx->continue_block);
3666 visit_cf_list(ctx, &loop->body);
3668 if (LLVMGetInsertBlock(ctx->builder))
3669 LLVMBuildBr(ctx->builder, ctx->continue_block);
3670 LLVMPositionBuilderAtEnd(ctx->builder, ctx->break_block);
3672 ctx->continue_block = continue_parent;
3673 ctx->break_block = break_parent;
3676 static void visit_cf_list(struct nir_to_llvm_context *ctx,
3683 visit_block(ctx, nir_cf_node_as_block(node));
3687 visit_if(ctx, nir_cf_node_as_if(node));
3691 visit_loop(ctx, nir_cf_node_as_loop(node));
3701 handle_vs_input_decl(struct nir_to_llvm_context *ctx,
3704 LLVMValueRef t_list_ptr = ctx->vertex_buffers;
3716 if (ctx->options->key.vs.instance_rate_inputs & (1u << index)) {
3717 buffer_index = LLVMBuildAdd(ctx->builder, ctx->instance_id,
3718 ctx->start_instance, "");
3719 ctx->shader_info->vs.vgpr_comp_cnt = MAX2(3,
3720 ctx->shader_info->vs.vgpr_comp_cnt);
3722 buffer_index = LLVMBuildAdd(ctx->builder, ctx->vertex_id,
3723 ctx->base_vertex, "");
3726 t_offset = LLVMConstInt(ctx->i32, index + i, false);
3728 t_list = build_indexed_load_const(ctx, t_list_ptr, t_offset);
3730 args[1] = LLVMConstInt(ctx->i32, 0, false);
3732 input = ac_emit_llvm_intrinsic(&ctx->ac,
3733 "llvm.SI.vs.load.input", ctx->v4f32, args, 3,
3737 LLVMValueRef llvm_chan = LLVMConstInt(ctx->i32, chan, false);
3738 ctx->inputs[radeon_llvm_reg_index_soa(idx, chan)] =
3739 to_integer(ctx, LLVMBuildExtractElement(ctx->builder,
3746 static void interp_fs_input(struct nir_to_llvm_context *ctx,
3756 attr_number = LLVMConstInt(ctx->i32, attr, false);
3772 LLVMValueRef llvm_chan = LLVMConstInt(ctx->i32, chan, false);
3778 result[chan] = ac_emit_llvm_intrinsic(&ctx->ac, intr_name,
3779 ctx->f32, args, args[3] ? 4 : 3,
3785 handle_fs_input_decl(struct nir_to_llvm_context *ctx,
3793 ctx->input_mask |= ((1ull << attrib_count) - 1) << variable->data.location;
3799 ctx->shader_info->fs.force_persample = true;
3805 interp = lookup_interp_param(ctx, variable->data.interpolation, interp_type);
3810 ctx->inputs[radeon_llvm_reg_index_soa(idx + i, 0)] = interp;
3815 handle_shader_input_decl(struct nir_to_llvm_context *ctx,
3818 switch (ctx->stage) {
3820 handle_vs_input_decl(ctx, variable);
3823 handle_fs_input_decl(ctx, variable);
3832 handle_fs_inputs_pre(struct nir_to_llvm_context *ctx,
3838 LLVMValueRef *inputs = ctx->inputs +radeon_llvm_reg_index_soa(i, 0);
3840 if (!(ctx->input_mask & (1ull << i)))
3845 interp_fs_input(ctx, index, interp_param, ctx->prim_mask,
3849 ctx->shader_info->fs.flat_shaded_mask |= 1u << index;
3853 inputs[i] = ctx->frag_pos[i];
3855 inputs[3] = ac_emit_fdiv(&ctx->ac, ctx->f32one, ctx->frag_pos[3]);
3858 ctx->shader_info->fs.num_interp = index;
3859 if (ctx->input_mask & (1 << VARYING_SLOT_PNTC))
3860 ctx->shader_info->fs.has_pcoord = true;
3861 ctx->shader_info->fs.input_mask = ctx->input_mask >> VARYING_SLOT_VAR0;
3865 ac_build_alloca(struct nir_to_llvm_context *ctx,
3869 LLVMBuilderRef builder = ctx->builder;
3874 LLVMBuilderRef first_builder = LLVMCreateBuilderInContext(ctx->context);
3891 static LLVMValueRef si_build_alloca_undef(struct nir_to_llvm_context *ctx,
3895 LLVMValueRef ptr = ac_build_alloca(ctx, type, name);
3896 LLVMBuildStore(ctx->builder, LLVMGetUndef(type), ptr);
3901 handle_shader_output_decl(struct nir_to_llvm_context *ctx,
3909 if (ctx->stage == MESA_SHADER_VERTEX) {
3915 ctx->shader_info->vs.clip_dist_mask = (1 << length) - 1;
3916 ctx->num_clips = length;
3918 ctx->shader_info->vs.cull_dist_mask = (1 << length) - 1;
3919 ctx->num_culls = length;
3930 ctx->outputs[radeon_llvm_reg_index_soa(idx + i, chan)] =
3931 si_build_alloca_undef(ctx, ctx->f32, "");
3934 ctx->output_mask |= ((1ull << attrib_count) - 1) << idx;
3938 setup_locals(struct nir_to_llvm_context *ctx,
3942 ctx->num_locals = 0;
3945 variable->data.driver_location = ctx->num_locals * 4;
3946 ctx->num_locals += attrib_count;
3948 ctx->locals = malloc(4 * ctx->num_locals * sizeof(LLVMValueRef));
3949 if (!ctx->locals)
3952 for (i = 0; i < ctx->num_locals; i++) {
3954 ctx->locals[i * 4 + j] =
3955 si_build_alloca_undef(ctx, ctx->f32, "temp");
3961 emit_float_saturate(struct nir_to_llvm_context *ctx, LLVMValueRef v, float lo, float hi)
3963 v = to_float(ctx, v);
3964 v = emit_intrin_2f_param(ctx, "llvm.maxnum.f32", v, LLVMConstReal(ctx->f32, lo));
3965 return emit_intrin_2f_param(ctx, "llvm.minnum.f32", v, LLVMConstReal(ctx->f32, hi));
3969 static LLVMValueRef emit_pack_int16(struct nir_to_llvm_context *ctx,
3972 LLVMValueRef const16 = LLVMConstInt(ctx->i32, 16, false);
3975 comp[0] = LLVMBuildAnd(ctx->builder, src0, LLVMConstInt(ctx-> i32, 65535, 0), "");
3976 comp[1] = LLVMBuildAnd(ctx->builder, src1, LLVMConstInt(ctx-> i32, 65535, 0), "");
3977 comp[1] = LLVMBuildShl(ctx->builder, comp[1], const16, "");
3978 return LLVMBuildOr(ctx->builder, comp[0], comp[1], "");
3983 si_llvm_init_export_args(struct nir_to_llvm_context *ctx,
3989 args[0] = LLVMConstInt(ctx->i32, target != V_008DFC_SQ_EXP_NULL ? 0xf : 0, false);
3991 args[1] = LLVMConstInt(ctx->i32, 0, false);
3994 args[2] = LLVMConstInt(ctx->i32, 0, false);
3996 args[3] = LLVMConstInt(ctx->i32, target, false);
3998 args[4] = LLVMConstInt(ctx->i32, 0, false); /* COMPR flag */
3999 args[5] = LLVMGetUndef(ctx->f32);
4000 args[6] = LLVMGetUndef(ctx->f32);
4001 args[7] = LLVMGetUndef(ctx->f32);
4002 args[8] = LLVMGetUndef(ctx->f32);
4007 if (ctx->stage == MESA_SHADER_FRAGMENT && target >= V_008DFC_SQ_EXP_MRT) {
4010 unsigned col_format = (ctx->options->key.fs.col_format >> (4 * index)) & 0xf;
4011 bool is_int8 = (ctx->options->key.fs.is_int8 >> index) & 1;
4015 args[0] = LLVMConstInt(ctx->i32, 0x0, 0);
4016 args[3] = LLVMConstInt(ctx->i32, V_008DFC_SQ_EXP_NULL, 0);
4020 args[0] = LLVMConstInt(ctx->i32, 0x1, 0);
4025 args[0] = LLVMConstInt(ctx->i32, 0x3, 0);
4031 args[0] = LLVMConstInt(ctx->i32, 0x9, 0);
4037 args[4] = ctx->i32one;
4046 packed = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.packf16",
4047 ctx->i32, pack_args, 2,
4055 val[chan] = emit_float_saturate(ctx, values[chan], 0, 1);
4056 val[chan] = LLVMBuildFMul(ctx->builder, val[chan],
4057 LLVMConstReal(ctx->f32, 65535), "");
4058 val[chan] = LLVMBuildFAdd(ctx->builder, val[chan],
4059 LLVMConstReal(ctx->f32, 0.5), "");
4060 val[chan] = LLVMBuildFPToUI(ctx->builder, val[chan],
4061 ctx->i32, "");
4064 args[4] = ctx->i32one;
4065 args[5] = emit_pack_int16(ctx, val[0], val[1]);
4066 args[6] = emit_pack_int16(ctx, val[2], val[3]);
4071 val[chan] = emit_float_saturate(ctx, values[chan], -1, 1);
4072 val[chan] = LLVMBuildFMul(ctx->builder, val[chan],
4073 LLVMConstReal(ctx->f32, 32767), "");
4076 val[chan] = LLVMBuildFAdd(ctx->builder, val[chan],
4077 LLVMBuildSelect(ctx->builder,
4078 LLVMBuildFCmp(ctx->builder, LLVMRealOGE,
4079 val[chan], ctx->f32zero, ""),
4080 LLVMConstReal(ctx->f32, 0.5),
4081 LLVMConstReal(ctx->f32, -0.5), ""), "");
4082 val[chan] = LLVMBuildFPToSI(ctx->builder, val[chan], ctx->i32, "");
4085 args[4] = ctx->i32one;
4086 args[5] = emit_pack_int16(ctx, val[0], val[1]);
4087 args[6] = emit_pack_int16(ctx, val[2], val[3]);
4091 LLVMValueRef max = LLVMConstInt(ctx->i32, is_int8 ? 255 : 65535, 0);
4094 val[chan] = to_integer(ctx, values[chan]);
4095 val[chan] = emit_minmax_int(ctx, LLVMIntULT, val[chan], max);
4098 args[4] = ctx->i32one;
4099 args[5] = emit_pack_int16(ctx, val[0], val[1]);
4100 args[6] = emit_pack_int16(ctx, val[2], val[3]);
4105 LLVMValueRef max = LLVMConstInt(ctx->i32, is_int8 ? 127 : 32767, 0);
4106 LLVMValueRef min = LLVMConstInt(ctx->i32, is_int8 ? -128 : -32768, 0);
4110 val[chan] = to_integer(ctx, values[chan]);
4111 val[chan] = emit_minmax_int(ctx, LLVMIntSLT, val[chan], max);
4112 val[chan] = emit_minmax_int(ctx, LLVMIntSGT, val[chan], min);
4115 args[4] = ctx->i32one;
4116 args[5] = emit_pack_int16(ctx, val[0], val[1]);
4117 args[6] = emit_pack_int16(ctx, val[2], val[3]);
4130 args[i] = to_float(ctx, args[i]);
4134 handle_vs_outputs_post(struct nir_to_llvm_context *ctx)
4143 const uint64_t clip_mask = ctx->output_mask & ((1ull << VARYING_SLOT_CLIP_DIST0) |
4152 if (ctx->shader_info->vs.cull_dist_mask)
4153 ctx->shader_info->vs.cull_dist_mask <<= ctx->num_clips;
4156 for (j = 0; j < ctx->num_clips; j++)
4157 slots[j] = to_float(ctx, LLVMBuildLoad(ctx->builder,
4158 ctx->outputs[radeon_llvm_reg_index_soa(i, j)], ""));
4160 for (j = 0; j < ctx->num_culls; j++)
4161 slots[ctx->num_clips + j] = to_float(ctx, LLVMBuildLoad(ctx->builder,
4162 ctx->outputs[radeon_llvm_reg_index_soa(i, j)], ""));
4164 for (i = ctx->num_clips + ctx->num_culls; i < 8; i++)
4165 slots[i] = LLVMGetUndef(ctx->f32);
4167 if (ctx->num_clips + ctx->num_culls > 4) {
4169 si_llvm_init_export_args(ctx, &slots[4], target, args);
4175 si_llvm_init_export_args(ctx, &slots[0], target, args);
4183 if (!(ctx->output_mask & (1ull << i)))
4187 values[j] = to_float(ctx, LLVMBuildLoad(ctx->builder,
4188 ctx->outputs[radeon_llvm_reg_index_soa(i, j)], ""));
4198 ctx->shader_info->vs.writes_pointsize = true;
4202 ctx->shader_info->vs.writes_layer = true;
4206 ctx->shader_info->vs.writes_viewport_index = true;
4210 ctx->shader_info->vs.export_mask |= 1u << (i - VARYING_SLOT_VAR0);
4215 si_llvm_init_export_args(ctx, values, target, args);
4222 ac_emit_llvm_intrinsic(&ctx->ac,
4224 ctx->voidt,
4231 pos_args[0][0] = LLVMConstInt(ctx->i32, 0xf, false);
4232 pos_args[0][1] = ctx->i32zero; /* EXEC mask */
4233 pos_args[0][2] = ctx->i32zero; /* last export? */
4234 pos_args[0][3] = LLVMConstInt(ctx->i32, V_008DFC_SQ_EXP_POS, false);
4235 pos_args[0][4] = ctx->i32zero; /* COMPR flag */
4236 pos_args[0][5] = ctx->f32zero; /* X */
4237 pos_args[0][6] = ctx->f32zero; /* Y */
4238 pos_args[0][7] = ctx->f32zero; /* Z */
4239 pos_args[0][8] = ctx->f32one; /* W */
4242 uint32_t mask = ((ctx->shader_info->vs.writes_pointsize == true ? 1 : 0) |
4243 (ctx->shader_info->vs.writes_layer == true ? 4 : 0) |
4244 (ctx->shader_info->vs.writes_viewport_index == true ? 8 : 0));
4246 pos_args[1][0] = LLVMConstInt(ctx->i32, mask, false); /* writemask */
4247 pos_args[1][1] = ctx->i32zero; /* EXEC mask */
4248 pos_args[1][2] = ctx->i32zero; /* last export? */
4249 pos_args[1][3] = LLVMConstInt(ctx->i32, V_008DFC_SQ_EXP_POS + 1, false);
4250 pos_args[1][4] = ctx->i32zero; /* COMPR flag */
4251 pos_args[1][5] = ctx->f32zero; /* X */
4252 pos_args[1][6] = ctx->f32zero; /* Y */
4253 pos_args[1][7] = ctx->f32zero; /* Z */
4254 pos_args[1][8] = ctx->f32zero; /* W */
4256 if (ctx->shader_info->vs.writes_pointsize == true)
4258 if (ctx->shader_info->vs.writes_layer == true)
4260 if (ctx->shader_info->vs.writes_viewport_index == true)
4274 pos_args[i][3] = LLVMConstInt(ctx->i32, V_008DFC_SQ_EXP_POS + pos_idx++, false);
4276 pos_args[i][2] = ctx->i32one;
4277 ac_emit_llvm_intrinsic(&ctx->ac,
4279 ctx->voidt,
4283 ctx->shader_info->vs.pos_exports = num_pos_exports;
4284 ctx->shader_info->vs.param_exports = param_count;
4288 si_export_mrt_color(struct nir_to_llvm_context *ctx,
4293 si_llvm_init_export_args(ctx, color, param,
4297 args[1] = ctx->i32one; /* whether the EXEC mask is valid */
4298 args[2] = ctx->i32one; /* DONE bit */
4299 } else if (args[0] == ctx->i32zero)
4302 ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.export",
4303 ctx->voidt, args, 9, 0);
4307 si_export_mrt_z(struct nir_to_llvm_context *ctx,
4313 args[1] = ctx->i32one; /* whether the EXEC mask is valid */
4314 args[2] = ctx->i32one; /* DONE bit */
4316 args[3] = LLVMConstInt(ctx->i32, V_008DFC_SQ_EXP_MRTZ, false);
4318 args[4] = ctx->i32zero; /* COMP flag */
4319 args[5] = LLVMGetUndef(ctx->f32); /* R, depth */
4320 args[6] = LLVMGetUndef(ctx->f32); /* G, stencil test val[0:7], stencil op val[8:15] */
4321 args[7] = LLVMGetUndef(ctx->f32); /* B, sample mask */
4322 args[8] = LLVMGetUndef(ctx->f32); /* A, alpha to mask */
4341 if (ctx->options->chip_class == SI &&
4342 ctx->options->family != CHIP_OLAND)
4345 args[0] = LLVMConstInt(ctx->i32, mask, false);
4346 ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.export",
4347 ctx->voidt, args, 9, 0);
4351 handle_fs_outputs_post(struct nir_to_llvm_context *ctx)
4359 if (!(ctx->output_mask & (1ull << i)))
4363 ctx->shader_info->fs.writes_z = true;
4364 depth = to_float(ctx, LLVMBuildLoad(ctx->builder,
4365 ctx->outputs[radeon_llvm_reg_index_soa(i, 0)], ""));
4367 ctx->shader_info->fs.writes_stencil = true;
4368 stencil = to_float(ctx, LLVMBuildLoad(ctx->builder,
4369 ctx->outputs[radeon_llvm_reg_index_soa(i, 0)], ""));
4373 values[j] = to_float(ctx, LLVMBuildLoad(ctx->builder,
4374 ctx->outputs[radeon_llvm_reg_index_soa(i, j)], ""));
4376 if (!ctx->shader_info->fs.writes_z && !ctx->shader_info->fs.writes_stencil)
4377 last = ctx->output_mask <= ((1ull << (i + 1)) - 1);
4379 si_export_mrt_color(ctx, values, V_008DFC_SQ_EXP_MRT + index, last);
4385 si_export_mrt_z(ctx, depth, stencil, samplemask);
4387 si_export_mrt_color(ctx, NULL, V_008DFC_SQ_EXP_NULL, true);
4389 ctx->shader_info->fs.output_mask = index ? ((1ull << index) - 1) : 0;
4393 handle_shader_outputs_post(struct nir_to_llvm_context *ctx)
4395 switch (ctx->stage) {
4397 handle_vs_outputs_post(ctx);
4400 handle_fs_outputs_post(ctx);
4408 handle_shared_compute_var(struct nir_to_llvm_context *ctx,
4416 static void ac_llvm_finalize_module(struct nir_to_llvm_context * ctx)
4421 ctx->module);
4435 LLVMRunFunctionPassManager(passmgr, ctx->main_function);
4438 LLVMDisposeBuilder(ctx->builder);
4448 struct nir_to_llvm_context ctx = {0};
4451 ctx.options = options;
4452 ctx.shader_info = shader_info;
4453 ctx.context = LLVMContextCreate();
4454 ctx.module = LLVMModuleCreateWithNameInContext("shader", ctx.context);
4456 ac_llvm_context_init(&ctx.ac, ctx.context);
4457 ctx.ac.module = ctx.module;
4459 ctx.has_ds_bpermute = ctx.options->chip_class >= VI;
4463 LLVMSetTarget(ctx.module, "amdgcn--");
4467 LLVMSetDataLayout(ctx.module, data_layout_str);
4471 setup_types(&ctx);
4473 ctx.builder = LLVMCreateBuilderInContext(ctx.context);
4474 ctx.ac.builder = ctx.builder;
4475 ctx.stage = nir->stage;
4482 create_function(&ctx);
4492 LLVMTypeRef i8p = LLVMPointerType(ctx.i8, LOCAL_ADDR_SPACE);
4494 handle_shared_compute_var(&ctx, variable, &shared_size, idx);
4499 var = LLVMAddGlobalInAddressSpace(ctx.module,
4500 LLVMArrayType(ctx.i8, shared_size),
4504 ctx.shared_memory = LLVMBuildBitCast(ctx.builder, var, i8p, "");
4509 handle_shader_input_decl(&ctx, variable);
4512 handle_fs_inputs_pre(&ctx, nir);
4515 handle_shader_output_decl(&ctx, variable);
4517 ctx.defs = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
4519 ctx.phis = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
4524 setup_locals(&ctx, func);
4526 visit_cf_list(&ctx, &func->impl->body);
4527 phi_post_pass(&ctx);
4529 handle_shader_outputs_post(&ctx);
4530 LLVMBuildRetVoid(ctx.builder);
4532 ac_llvm_finalize_module(&ctx);
4533 free(ctx.locals);
4534 ralloc_free(ctx.defs);
4535 ralloc_free(ctx.phis);
4537 return ctx.module;
4620 LLVMContextRef ctx = LLVMGetModuleContext(llvm_module);
4622 LLVMContextDispose(ctx);