diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index ff4c0e9bbd50e7..02585c9f603736 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -3535,7 +3535,21 @@ bool RISCVDAGToDAGISel::selectLow8BitsVSplat(SDValue N, SDValue &SplatVal) { return selectVSplat(N, SplatVal); } -bool RISCVDAGToDAGISel::selectFPImm(SDValue N, SDValue &Imm) { +bool RISCVDAGToDAGISel::selectScalarFPAsInt(SDValue N, SDValue &Imm) { + // Allow bitcasts from XLenVT -> FP. + if (N.getOpcode() == ISD::BITCAST && + N.getOperand(0).getValueType() == Subtarget->getXLenVT()) { + Imm = N.getOperand(0); + return true; + } + // Allow moves from XLenVT to FP. + if (N.getOpcode() == RISCVISD::FMV_H_X || + N.getOpcode() == RISCVISD::FMV_W_X_RV64) { + Imm = N.getOperand(0); + return true; + } + + // Otherwise, look for FP constants that can materialized with scalar int. ConstantFPSDNode *CFP = dyn_cast(N.getNode()); if (!CFP) return false; diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h index 1d120c13442d51..2e738d8d25a6dc 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h @@ -140,7 +140,7 @@ class RISCVDAGToDAGISel : public SelectionDAGISel { // Matches the splat of a value which can be extended or truncated, such that // only the bottom 8 bits are preserved. bool selectLow8BitsVSplat(SDValue N, SDValue &SplatVal); - bool selectFPImm(SDValue N, SDValue &Imm); + bool selectScalarFPAsInt(SDValue N, SDValue &Imm); bool selectRVVSimm5(SDValue N, unsigned Width, SDValue &Imm); template bool selectRVVSimm5(SDValue N, SDValue &Imm) { diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td index 430e09fd834ba7..fe7de9d7bc79aa 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -236,7 +236,8 @@ def VLOpFrag : PatFrag<(ops), (XLenVT (VLOp (XLenVT AVL:$vl)))>; // This must be kept in sync with RISCV::VLMaxSentinel. def VLMax : OutPatFrag<(ops), (XLenVT -1)>; -def SelectFPImm : ComplexPattern; +def SelectScalarFPAsInt : ComplexPattern; // List of EEW. defvar EEWList = [8, 16, 32, 64]; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td index f12f82cb159529..b54cdcbd1b0e9c 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td @@ -1374,7 +1374,7 @@ foreach fvti = !listconcat(AllFloatVectors, AllBFloatVectors) in { fvti.AVL, fvti.Log2SEW)>; def : Pat<(fvti.Vector (vselect (fvti.Mask V0), - (SplatFPOp (SelectFPImm (XLenVT GPR:$imm))), + (SplatFPOp (SelectScalarFPAsInt (XLenVT GPR:$imm))), fvti.RegClass:$rs2)), (!cast("PseudoVMERGE_VXM_"#fvti.LMul.MX) (fvti.Vector (IMPLICIT_DEF)), diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td index 9afbe567193607..a27c3a416816e2 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td @@ -2575,7 +2575,7 @@ foreach fvti = !listconcat(AllFloatVectors, AllBFloatVectors) in { GPR:$vl, fvti.Log2SEW)>; def : Pat<(fvti.Vector (riscv_vmerge_vl (fvti.Mask V0), - (SplatFPOp (SelectFPImm (XLenVT GPR:$imm))), + (SplatFPOp (SelectScalarFPAsInt (XLenVT GPR:$imm))), fvti.RegClass:$rs2, fvti.RegClass:$passthru, VLOpFrag)), @@ -2619,7 +2619,7 @@ foreach fvti = !listconcat(AllFloatVectors, AllBFloatVectors) in { (!cast("PseudoVMV_V_I_"#fvti.LMul.MX) $passthru, 0, GPR:$vl, fvti.Log2SEW, TU_MU)>; def : Pat<(fvti.Vector (riscv_vfmv_v_f_vl - fvti.Vector:$passthru, (fvti.Scalar (SelectFPImm (XLenVT GPR:$imm))), VLOpFrag)), + fvti.Vector:$passthru, (fvti.Scalar (SelectScalarFPAsInt (XLenVT GPR:$imm))), VLOpFrag)), (!cast("PseudoVMV_V_X_"#fvti.LMul.MX) $passthru, GPR:$imm, GPR:$vl, fvti.Log2SEW, TU_MU)>; } @@ -2940,7 +2940,7 @@ foreach vti = NoGroupFloatVectors in { VLOpFrag)), (PseudoVMV_S_X $passthru, (XLenVT X0), GPR:$vl, vti.Log2SEW)>; def : Pat<(vti.Vector (riscv_vfmv_s_f_vl (vti.Vector vti.RegClass:$passthru), - (vti.Scalar (SelectFPImm (XLenVT GPR:$imm))), + (vti.Scalar (SelectScalarFPAsInt (XLenVT GPR:$imm))), VLOpFrag)), (PseudoVMV_S_X $passthru, GPR:$imm, GPR:$vl, vti.Log2SEW)>; def : Pat<(vti.Vector (riscv_vfmv_s_f_vl (vti.Vector vti.RegClass:$passthru), diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll index e3aabb5de29c28..b5d3e2cd776f27 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll @@ -1348,20 +1348,16 @@ define <32 x double> @buildvec_v32f64_exact_vlen(double %e0, double %e1, double ; ; RV64-LABEL: buildvec_v32f64_exact_vlen: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -96 -; RV64-NEXT: .cfi_def_cfa_offset 96 -; RV64-NEXT: fsd fs0, 88(sp) # 8-byte Folded Spill -; RV64-NEXT: fsd fs1, 80(sp) # 8-byte Folded Spill -; RV64-NEXT: fsd fs2, 72(sp) # 8-byte Folded Spill -; RV64-NEXT: fsd fs3, 64(sp) # 8-byte Folded Spill -; RV64-NEXT: fsd fs4, 56(sp) # 8-byte Folded Spill -; RV64-NEXT: fsd fs5, 48(sp) # 8-byte Folded Spill -; RV64-NEXT: fsd fs6, 40(sp) # 8-byte Folded Spill -; RV64-NEXT: fsd fs7, 32(sp) # 8-byte Folded Spill -; RV64-NEXT: fsd fs8, 24(sp) # 8-byte Folded Spill -; RV64-NEXT: fsd fs9, 16(sp) # 8-byte Folded Spill -; RV64-NEXT: fsd fs10, 8(sp) # 8-byte Folded Spill -; RV64-NEXT: fsd fs11, 0(sp) # 8-byte Folded Spill +; RV64-NEXT: addi sp, sp, -64 +; RV64-NEXT: .cfi_def_cfa_offset 64 +; RV64-NEXT: fsd fs0, 56(sp) # 8-byte Folded Spill +; RV64-NEXT: fsd fs1, 48(sp) # 8-byte Folded Spill +; RV64-NEXT: fsd fs2, 40(sp) # 8-byte Folded Spill +; RV64-NEXT: fsd fs3, 32(sp) # 8-byte Folded Spill +; RV64-NEXT: fsd fs4, 24(sp) # 8-byte Folded Spill +; RV64-NEXT: fsd fs5, 16(sp) # 8-byte Folded Spill +; RV64-NEXT: fsd fs6, 8(sp) # 8-byte Folded Spill +; RV64-NEXT: fsd fs7, 0(sp) # 8-byte Folded Spill ; RV64-NEXT: .cfi_offset fs0, -8 ; RV64-NEXT: .cfi_offset fs1, -16 ; RV64-NEXT: .cfi_offset fs2, -24 @@ -1370,34 +1366,26 @@ define <32 x double> @buildvec_v32f64_exact_vlen(double %e0, double %e1, double ; RV64-NEXT: .cfi_offset fs5, -48 ; RV64-NEXT: .cfi_offset fs6, -56 ; RV64-NEXT: .cfi_offset fs7, -64 -; RV64-NEXT: .cfi_offset fs8, -72 -; RV64-NEXT: .cfi_offset fs9, -80 -; RV64-NEXT: .cfi_offset fs10, -88 -; RV64-NEXT: .cfi_offset fs11, -96 ; RV64-NEXT: fmv.d.x ft4, a7 -; RV64-NEXT: fmv.d.x ft5, a6 -; RV64-NEXT: fmv.d.x ft6, a5 -; RV64-NEXT: fmv.d.x ft7, a4 -; RV64-NEXT: fmv.d.x ft8, a3 -; RV64-NEXT: fmv.d.x ft9, a2 -; RV64-NEXT: fmv.d.x ft10, a1 -; RV64-NEXT: fmv.d.x ft11, a0 -; RV64-NEXT: fld ft0, 216(sp) -; RV64-NEXT: fld ft1, 208(sp) -; RV64-NEXT: fld ft2, 200(sp) -; RV64-NEXT: fld ft3, 192(sp) -; RV64-NEXT: fld fs0, 184(sp) -; RV64-NEXT: fld fs1, 176(sp) -; RV64-NEXT: fld fs2, 168(sp) -; RV64-NEXT: fld fs3, 160(sp) -; RV64-NEXT: fld fs4, 152(sp) -; RV64-NEXT: fld fs5, 144(sp) -; RV64-NEXT: fld fs6, 136(sp) -; RV64-NEXT: fld fs7, 128(sp) -; RV64-NEXT: fld fs8, 104(sp) -; RV64-NEXT: fld fs9, 96(sp) -; RV64-NEXT: fld fs10, 120(sp) -; RV64-NEXT: fld fs11, 112(sp) +; RV64-NEXT: fmv.d.x ft5, a5 +; RV64-NEXT: fmv.d.x ft6, a3 +; RV64-NEXT: fmv.d.x ft7, a1 +; RV64-NEXT: fld ft0, 184(sp) +; RV64-NEXT: fld ft1, 176(sp) +; RV64-NEXT: fld ft2, 168(sp) +; RV64-NEXT: fld ft3, 160(sp) +; RV64-NEXT: fld ft8, 152(sp) +; RV64-NEXT: fld ft9, 144(sp) +; RV64-NEXT: fld ft10, 136(sp) +; RV64-NEXT: fld ft11, 128(sp) +; RV64-NEXT: fld fs0, 120(sp) +; RV64-NEXT: fld fs1, 112(sp) +; RV64-NEXT: fld fs2, 104(sp) +; RV64-NEXT: fld fs3, 96(sp) +; RV64-NEXT: fld fs4, 72(sp) +; RV64-NEXT: fld fs5, 64(sp) +; RV64-NEXT: fld fs6, 88(sp) +; RV64-NEXT: fld fs7, 80(sp) ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV64-NEXT: vfmv.v.f v8, fa2 ; RV64-NEXT: vfslide1down.vf v9, v8, fa3 @@ -1407,43 +1395,39 @@ define <32 x double> @buildvec_v32f64_exact_vlen(double %e0, double %e1, double ; RV64-NEXT: vfslide1down.vf v10, v10, fa5 ; RV64-NEXT: vfmv.v.f v11, fa6 ; RV64-NEXT: vfslide1down.vf v11, v11, fa7 -; RV64-NEXT: vfmv.v.f v12, ft11 -; RV64-NEXT: vfslide1down.vf v12, v12, ft10 -; RV64-NEXT: vfmv.v.f v13, ft9 -; RV64-NEXT: vfslide1down.vf v13, v13, ft8 -; RV64-NEXT: vfmv.v.f v14, ft7 -; RV64-NEXT: vfslide1down.vf v14, v14, ft6 -; RV64-NEXT: vfmv.v.f v15, ft5 +; RV64-NEXT: vmv.v.x v12, a0 +; RV64-NEXT: vfslide1down.vf v12, v12, ft7 +; RV64-NEXT: vmv.v.x v13, a2 +; RV64-NEXT: vfslide1down.vf v13, v13, ft6 +; RV64-NEXT: vmv.v.x v14, a4 +; RV64-NEXT: vfslide1down.vf v14, v14, ft5 +; RV64-NEXT: vmv.v.x v15, a6 ; RV64-NEXT: vfslide1down.vf v15, v15, ft4 -; RV64-NEXT: vfmv.v.f v16, fs11 -; RV64-NEXT: vfslide1down.vf v17, v16, fs10 -; RV64-NEXT: vfmv.v.f v16, fs9 -; RV64-NEXT: vfslide1down.vf v16, v16, fs8 -; RV64-NEXT: vfmv.v.f v18, fs7 -; RV64-NEXT: vfslide1down.vf v18, v18, fs6 -; RV64-NEXT: vfmv.v.f v19, fs5 -; RV64-NEXT: vfslide1down.vf v19, v19, fs4 -; RV64-NEXT: vfmv.v.f v20, fs3 -; RV64-NEXT: vfslide1down.vf v20, v20, fs2 -; RV64-NEXT: vfmv.v.f v21, fs1 -; RV64-NEXT: vfslide1down.vf v21, v21, fs0 +; RV64-NEXT: vfmv.v.f v16, fs7 +; RV64-NEXT: vfslide1down.vf v17, v16, fs6 +; RV64-NEXT: vfmv.v.f v16, fs5 +; RV64-NEXT: vfslide1down.vf v16, v16, fs4 +; RV64-NEXT: vfmv.v.f v18, fs3 +; RV64-NEXT: vfslide1down.vf v18, v18, fs2 +; RV64-NEXT: vfmv.v.f v19, fs1 +; RV64-NEXT: vfslide1down.vf v19, v19, fs0 +; RV64-NEXT: vfmv.v.f v20, ft11 +; RV64-NEXT: vfslide1down.vf v20, v20, ft10 +; RV64-NEXT: vfmv.v.f v21, ft9 +; RV64-NEXT: vfslide1down.vf v21, v21, ft8 ; RV64-NEXT: vfmv.v.f v22, ft3 ; RV64-NEXT: vfslide1down.vf v22, v22, ft2 ; RV64-NEXT: vfmv.v.f v23, ft1 ; RV64-NEXT: vfslide1down.vf v23, v23, ft0 -; RV64-NEXT: fld fs0, 88(sp) # 8-byte Folded Reload -; RV64-NEXT: fld fs1, 80(sp) # 8-byte Folded Reload -; RV64-NEXT: fld fs2, 72(sp) # 8-byte Folded Reload -; RV64-NEXT: fld fs3, 64(sp) # 8-byte Folded Reload -; RV64-NEXT: fld fs4, 56(sp) # 8-byte Folded Reload -; RV64-NEXT: fld fs5, 48(sp) # 8-byte Folded Reload -; RV64-NEXT: fld fs6, 40(sp) # 8-byte Folded Reload -; RV64-NEXT: fld fs7, 32(sp) # 8-byte Folded Reload -; RV64-NEXT: fld fs8, 24(sp) # 8-byte Folded Reload -; RV64-NEXT: fld fs9, 16(sp) # 8-byte Folded Reload -; RV64-NEXT: fld fs10, 8(sp) # 8-byte Folded Reload -; RV64-NEXT: fld fs11, 0(sp) # 8-byte Folded Reload -; RV64-NEXT: addi sp, sp, 96 +; RV64-NEXT: fld fs0, 56(sp) # 8-byte Folded Reload +; RV64-NEXT: fld fs1, 48(sp) # 8-byte Folded Reload +; RV64-NEXT: fld fs2, 40(sp) # 8-byte Folded Reload +; RV64-NEXT: fld fs3, 32(sp) # 8-byte Folded Reload +; RV64-NEXT: fld fs4, 24(sp) # 8-byte Folded Reload +; RV64-NEXT: fld fs5, 16(sp) # 8-byte Folded Reload +; RV64-NEXT: fld fs6, 8(sp) # 8-byte Folded Reload +; RV64-NEXT: fld fs7, 0(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 64 ; RV64-NEXT: ret %v0 = insertelement <32 x double> poison, double %e0, i64 0 %v1 = insertelement <32 x double> %v0, double %e1, i64 1