Skip to content

Commit

Permalink
[X86][FP16] Fix masking problem of VF[,C]MADDCSH intrinsics (llvm#118071
Browse files Browse the repository at this point in the history
)

Fixes: llvm#98306
  • Loading branch information
phoebewang authored Nov 29, 2024
1 parent 9300274 commit bbea1de
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 3 deletions.
3 changes: 3 additions & 0 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26265,6 +26265,9 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
}
if (!NewOp)
NewOp = DAG.getNode(IntrData->Opc0, dl, VT, Src1, Src2, Src3);
if (IntrData->Opc0 == X86ISD::VFMADDCSH ||
IntrData->Opc0 == X86ISD::VFCMADDCSH)
return getScalarMaskingNode(NewOp, Mask, PassThru, Subtarget, DAG);
return getVectorMaskingNode(NewOp, Mask, PassThru, Subtarget, DAG);
}
case IFMA_OP:
Expand Down
6 changes: 3 additions & 3 deletions llvm/lib/Target/X86/X86InstrAVX512.td
Original file line number Diff line number Diff line change
Expand Up @@ -13533,17 +13533,17 @@ let Uses = [MXCSR] in {
multiclass avx512_cfmaop_sh_common<bits<8> opc, string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd,
bit IsCommutable> {
let Predicates = [HasFP16], Constraints = "@earlyclobber $dst, $src1 = $dst" in {
defm r : AVX512_maskable_3src<opc, MRMSrcReg, v4f32x_info, (outs VR128X:$dst),
defm r : AVX512_maskable_3src_scalar<opc, MRMSrcReg, f32x_info, (outs VR128X:$dst),
(ins VR128X:$src2, VR128X:$src3), OpcodeStr,
"$src3, $src2", "$src2, $src3",
(v4f32 (OpNode VR128X:$src2, VR128X:$src3, VR128X:$src1)), IsCommutable>,
Sched<[WriteFMAX]>;
defm m : AVX512_maskable_3src<opc, MRMSrcMem, v4f32x_info, (outs VR128X:$dst),
defm m : AVX512_maskable_3src_scalar<opc, MRMSrcMem, f32x_info, (outs VR128X:$dst),
(ins VR128X:$src2, ssmem:$src3), OpcodeStr,
"$src3, $src2", "$src2, $src3",
(v4f32 (OpNode VR128X:$src2, (sse_load_f32 addr:$src3), VR128X:$src1))>,
Sched<[WriteFMAX.Folded, WriteFMAX.ReadAfterFold]>;
defm rb : AVX512_maskable_3src<opc, MRMSrcReg, v4f32x_info, (outs VR128X:$dst),
defm rb : AVX512_maskable_3src_scalar<opc, MRMSrcReg, f32x_info, (outs VR128X:$dst),
(ins VR128X:$src2, VR128X:$src3, AVX512RC:$rc), OpcodeStr,
"$rc, $src3, $src2", "$src2, $src3, $rc",
(v4f32 (OpNodeRnd VR128X:$src2, VR128X:$src3, VR128X:$src1, (i32 timm:$rc)))>,
Expand Down
12 changes: 12 additions & 0 deletions llvm/test/CodeGen/X86/avx512cfmulsh-instrinsics.ll
Original file line number Diff line number Diff line change
Expand Up @@ -277,3 +277,15 @@ define <4 x float> @test_int_x86_avx512fp16_maskz_cfcmadd_sh(<4 x float> %x0, <4
%res = call <4 x float> @llvm.x86.avx512fp16.maskz.vfcmadd.csh(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 9)
ret <4 x float> %res
}

define <4 x float> @PR98306() {
; CHECK-LABEL: PR98306:
; CHECK: ## %bb.0:
; CHECK-NEXT: kxorw %k0, %k0, %k1
; CHECK-NEXT: vmovaps {{.*#+}} xmm1 = [7.8125E-3,1.050912E+6,4.203776E+6,1.6815616E+7]
; CHECK-NEXT: vmovaps {{.*#+}} xmm0 = [3.2E+1,4.03288064E+8,8.0658432E+8,1.61318502E+9]
; CHECK-NEXT: vfmaddcsh {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0 {%k1} {z}
; CHECK-NEXT: retq
%res = call <4 x float> @llvm.x86.avx512fp16.maskz.vfmadd.csh(<4 x float> <float 7.812500e-03, float 0x4130092000000000, float 0x4150094000000000, float 0x4170096000000000>, <4 x float> <float 2.000000e+00, float 0x4188098000000000, float 0x4198099000000000, float 0x41A809A000000000>, <4 x float> <float 3.200000e+01, float 0x41B809B000000000, float 0x41C809C000000000, float 0x41D809D000000000>, i8 0, i32 4)
ret <4 x float> %res
}

0 comments on commit bbea1de

Please sign in to comment.