From 8c17ed1512239a5a9b1320f678a8cd89db8b0981 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 11 Sep 2024 21:13:26 -0700 Subject: [PATCH] [RISCV] Generalize RISCVDAGToDAGISel::selectFPImm to handle bitcasts from int to FP. (#108284) selectFPImm previously handled cases where an FPImm could be materialized in an integer register. We can generalize this to cases where a value was in an integer register and then copied to a scalar FP register to be used by a vector instruction. In the affected test, the call lowering code used up all of the FP argument registers and started using GPRs. Now we use integer vector instructions to consume those GPRs instead of moving them to scalar FP first. --- llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp | 16 ++- llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h | 2 +- .../Target/RISCV/RISCVInstrInfoVPseudos.td | 3 +- .../Target/RISCV/RISCVInstrInfoVSDPatterns.td | 2 +- .../Target/RISCV/RISCVInstrInfoVVLPatterns.td | 6 +- .../RISCV/rvv/fixed-vectors-fp-buildvec.ll | 130 ++++++++---------- 6 files changed, 79 insertions(+), 80 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index ff4c0e9bbd50e7..02585c9f603736 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -3535,7 +3535,21 @@ bool RISCVDAGToDAGISel::selectLow8BitsVSplat(SDValue N, SDValue &SplatVal) { return selectVSplat(N, SplatVal); } -bool RISCVDAGToDAGISel::selectFPImm(SDValue N, SDValue &Imm) { +bool RISCVDAGToDAGISel::selectScalarFPAsInt(SDValue N, SDValue &Imm) { + // Allow bitcasts from XLenVT -> FP. + if (N.getOpcode() == ISD::BITCAST && + N.getOperand(0).getValueType() == Subtarget->getXLenVT()) { + Imm = N.getOperand(0); + return true; + } + // Allow moves from XLenVT to FP. + if (N.getOpcode() == RISCVISD::FMV_H_X || + N.getOpcode() == RISCVISD::FMV_W_X_RV64) { + Imm = N.getOperand(0); + return true; + } + + // Otherwise, look for FP constants that can materialized with scalar int. ConstantFPSDNode *CFP = dyn_cast(N.getNode()); if (!CFP) return false; diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h index 1d120c13442d51..2e738d8d25a6dc 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h @@ -140,7 +140,7 @@ class RISCVDAGToDAGISel : public SelectionDAGISel { // Matches the splat of a value which can be extended or truncated, such that // only the bottom 8 bits are preserved. bool selectLow8BitsVSplat(SDValue N, SDValue &SplatVal); - bool selectFPImm(SDValue N, SDValue &Imm); + bool selectScalarFPAsInt(SDValue N, SDValue &Imm); bool selectRVVSimm5(SDValue N, unsigned Width, SDValue &Imm); template bool selectRVVSimm5(SDValue N, SDValue &Imm) { diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td index 430e09fd834ba7..fe7de9d7bc79aa 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -236,7 +236,8 @@ def VLOpFrag : PatFrag<(ops), (XLenVT (VLOp (XLenVT AVL:$vl)))>; // This must be kept in sync with RISCV::VLMaxSentinel. def VLMax : OutPatFrag<(ops), (XLenVT -1)>; -def SelectFPImm : ComplexPattern; +def SelectScalarFPAsInt : ComplexPattern; // List of EEW. defvar EEWList = [8, 16, 32, 64]; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td index f12f82cb159529..b54cdcbd1b0e9c 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td @@ -1374,7 +1374,7 @@ foreach fvti = !listconcat(AllFloatVectors, AllBFloatVectors) in { fvti.AVL, fvti.Log2SEW)>; def : Pat<(fvti.Vector (vselect (fvti.Mask V0), - (SplatFPOp (SelectFPImm (XLenVT GPR:$imm))), + (SplatFPOp (SelectScalarFPAsInt (XLenVT GPR:$imm))), fvti.RegClass:$rs2)), (!cast("PseudoVMERGE_VXM_"#fvti.LMul.MX) (fvti.Vector (IMPLICIT_DEF)), diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td index 9afbe567193607..a27c3a416816e2 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td @@ -2575,7 +2575,7 @@ foreach fvti = !listconcat(AllFloatVectors, AllBFloatVectors) in { GPR:$vl, fvti.Log2SEW)>; def : Pat<(fvti.Vector (riscv_vmerge_vl (fvti.Mask V0), - (SplatFPOp (SelectFPImm (XLenVT GPR:$imm))), + (SplatFPOp (SelectScalarFPAsInt (XLenVT GPR:$imm))), fvti.RegClass:$rs2, fvti.RegClass:$passthru, VLOpFrag)), @@ -2619,7 +2619,7 @@ foreach fvti = !listconcat(AllFloatVectors, AllBFloatVectors) in { (!cast("PseudoVMV_V_I_"#fvti.LMul.MX) $passthru, 0, GPR:$vl, fvti.Log2SEW, TU_MU)>; def : Pat<(fvti.Vector (riscv_vfmv_v_f_vl - fvti.Vector:$passthru, (fvti.Scalar (SelectFPImm (XLenVT GPR:$imm))), VLOpFrag)), + fvti.Vector:$passthru, (fvti.Scalar (SelectScalarFPAsInt (XLenVT GPR:$imm))), VLOpFrag)), (!cast("PseudoVMV_V_X_"#fvti.LMul.MX) $passthru, GPR:$imm, GPR:$vl, fvti.Log2SEW, TU_MU)>; } @@ -2940,7 +2940,7 @@ foreach vti = NoGroupFloatVectors in { VLOpFrag)), (PseudoVMV_S_X $passthru, (XLenVT X0), GPR:$vl, vti.Log2SEW)>; def : Pat<(vti.Vector (riscv_vfmv_s_f_vl (vti.Vector vti.RegClass:$passthru), - (vti.Scalar (SelectFPImm (XLenVT GPR:$imm))), + (vti.Scalar (SelectScalarFPAsInt (XLenVT GPR:$imm))), VLOpFrag)), (PseudoVMV_S_X $passthru, GPR:$imm, GPR:$vl, vti.Log2SEW)>; def : Pat<(vti.Vector (riscv_vfmv_s_f_vl (vti.Vector vti.RegClass:$passthru), diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll index e3aabb5de29c28..b5d3e2cd776f27 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll @@ -1348,20 +1348,16 @@ define <32 x double> @buildvec_v32f64_exact_vlen(double %e0, double %e1, double ; ; RV64-LABEL: buildvec_v32f64_exact_vlen: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -96 -; RV64-NEXT: .cfi_def_cfa_offset 96 -; RV64-NEXT: fsd fs0, 88(sp) # 8-byte Folded Spill -; RV64-NEXT: fsd fs1, 80(sp) # 8-byte Folded Spill -; RV64-NEXT: fsd fs2, 72(sp) # 8-byte Folded Spill -; RV64-NEXT: fsd fs3, 64(sp) # 8-byte Folded Spill -; RV64-NEXT: fsd fs4, 56(sp) # 8-byte Folded Spill -; RV64-NEXT: fsd fs5, 48(sp) # 8-byte Folded Spill -; RV64-NEXT: fsd fs6, 40(sp) # 8-byte Folded Spill -; RV64-NEXT: fsd fs7, 32(sp) # 8-byte Folded Spill -; RV64-NEXT: fsd fs8, 24(sp) # 8-byte Folded Spill -; RV64-NEXT: fsd fs9, 16(sp) # 8-byte Folded Spill -; RV64-NEXT: fsd fs10, 8(sp) # 8-byte Folded Spill -; RV64-NEXT: fsd fs11, 0(sp) # 8-byte Folded Spill +; RV64-NEXT: addi sp, sp, -64 +; RV64-NEXT: .cfi_def_cfa_offset 64 +; RV64-NEXT: fsd fs0, 56(sp) # 8-byte Folded Spill +; RV64-NEXT: fsd fs1, 48(sp) # 8-byte Folded Spill +; RV64-NEXT: fsd fs2, 40(sp) # 8-byte Folded Spill +; RV64-NEXT: fsd fs3, 32(sp) # 8-byte Folded Spill +; RV64-NEXT: fsd fs4, 24(sp) # 8-byte Folded Spill +; RV64-NEXT: fsd fs5, 16(sp) # 8-byte Folded Spill +; RV64-NEXT: fsd fs6, 8(sp) # 8-byte Folded Spill +; RV64-NEXT: fsd fs7, 0(sp) # 8-byte Folded Spill ; RV64-NEXT: .cfi_offset fs0, -8 ; RV64-NEXT: .cfi_offset fs1, -16 ; RV64-NEXT: .cfi_offset fs2, -24 @@ -1370,34 +1366,26 @@ define <32 x double> @buildvec_v32f64_exact_vlen(double %e0, double %e1, double ; RV64-NEXT: .cfi_offset fs5, -48 ; RV64-NEXT: .cfi_offset fs6, -56 ; RV64-NEXT: .cfi_offset fs7, -64 -; RV64-NEXT: .cfi_offset fs8, -72 -; RV64-NEXT: .cfi_offset fs9, -80 -; RV64-NEXT: .cfi_offset fs10, -88 -; RV64-NEXT: .cfi_offset fs11, -96 ; RV64-NEXT: fmv.d.x ft4, a7 -; RV64-NEXT: fmv.d.x ft5, a6 -; RV64-NEXT: fmv.d.x ft6, a5 -; RV64-NEXT: fmv.d.x ft7, a4 -; RV64-NEXT: fmv.d.x ft8, a3 -; RV64-NEXT: fmv.d.x ft9, a2 -; RV64-NEXT: fmv.d.x ft10, a1 -; RV64-NEXT: fmv.d.x ft11, a0 -; RV64-NEXT: fld ft0, 216(sp) -; RV64-NEXT: fld ft1, 208(sp) -; RV64-NEXT: fld ft2, 200(sp) -; RV64-NEXT: fld ft3, 192(sp) -; RV64-NEXT: fld fs0, 184(sp) -; RV64-NEXT: fld fs1, 176(sp) -; RV64-NEXT: fld fs2, 168(sp) -; RV64-NEXT: fld fs3, 160(sp) -; RV64-NEXT: fld fs4, 152(sp) -; RV64-NEXT: fld fs5, 144(sp) -; RV64-NEXT: fld fs6, 136(sp) -; RV64-NEXT: fld fs7, 128(sp) -; RV64-NEXT: fld fs8, 104(sp) -; RV64-NEXT: fld fs9, 96(sp) -; RV64-NEXT: fld fs10, 120(sp) -; RV64-NEXT: fld fs11, 112(sp) +; RV64-NEXT: fmv.d.x ft5, a5 +; RV64-NEXT: fmv.d.x ft6, a3 +; RV64-NEXT: fmv.d.x ft7, a1 +; RV64-NEXT: fld ft0, 184(sp) +; RV64-NEXT: fld ft1, 176(sp) +; RV64-NEXT: fld ft2, 168(sp) +; RV64-NEXT: fld ft3, 160(sp) +; RV64-NEXT: fld ft8, 152(sp) +; RV64-NEXT: fld ft9, 144(sp) +; RV64-NEXT: fld ft10, 136(sp) +; RV64-NEXT: fld ft11, 128(sp) +; RV64-NEXT: fld fs0, 120(sp) +; RV64-NEXT: fld fs1, 112(sp) +; RV64-NEXT: fld fs2, 104(sp) +; RV64-NEXT: fld fs3, 96(sp) +; RV64-NEXT: fld fs4, 72(sp) +; RV64-NEXT: fld fs5, 64(sp) +; RV64-NEXT: fld fs6, 88(sp) +; RV64-NEXT: fld fs7, 80(sp) ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV64-NEXT: vfmv.v.f v8, fa2 ; RV64-NEXT: vfslide1down.vf v9, v8, fa3 @@ -1407,43 +1395,39 @@ define <32 x double> @buildvec_v32f64_exact_vlen(double %e0, double %e1, double ; RV64-NEXT: vfslide1down.vf v10, v10, fa5 ; RV64-NEXT: vfmv.v.f v11, fa6 ; RV64-NEXT: vfslide1down.vf v11, v11, fa7 -; RV64-NEXT: vfmv.v.f v12, ft11 -; RV64-NEXT: vfslide1down.vf v12, v12, ft10 -; RV64-NEXT: vfmv.v.f v13, ft9 -; RV64-NEXT: vfslide1down.vf v13, v13, ft8 -; RV64-NEXT: vfmv.v.f v14, ft7 -; RV64-NEXT: vfslide1down.vf v14, v14, ft6 -; RV64-NEXT: vfmv.v.f v15, ft5 +; RV64-NEXT: vmv.v.x v12, a0 +; RV64-NEXT: vfslide1down.vf v12, v12, ft7 +; RV64-NEXT: vmv.v.x v13, a2 +; RV64-NEXT: vfslide1down.vf v13, v13, ft6 +; RV64-NEXT: vmv.v.x v14, a4 +; RV64-NEXT: vfslide1down.vf v14, v14, ft5 +; RV64-NEXT: vmv.v.x v15, a6 ; RV64-NEXT: vfslide1down.vf v15, v15, ft4 -; RV64-NEXT: vfmv.v.f v16, fs11 -; RV64-NEXT: vfslide1down.vf v17, v16, fs10 -; RV64-NEXT: vfmv.v.f v16, fs9 -; RV64-NEXT: vfslide1down.vf v16, v16, fs8 -; RV64-NEXT: vfmv.v.f v18, fs7 -; RV64-NEXT: vfslide1down.vf v18, v18, fs6 -; RV64-NEXT: vfmv.v.f v19, fs5 -; RV64-NEXT: vfslide1down.vf v19, v19, fs4 -; RV64-NEXT: vfmv.v.f v20, fs3 -; RV64-NEXT: vfslide1down.vf v20, v20, fs2 -; RV64-NEXT: vfmv.v.f v21, fs1 -; RV64-NEXT: vfslide1down.vf v21, v21, fs0 +; RV64-NEXT: vfmv.v.f v16, fs7 +; RV64-NEXT: vfslide1down.vf v17, v16, fs6 +; RV64-NEXT: vfmv.v.f v16, fs5 +; RV64-NEXT: vfslide1down.vf v16, v16, fs4 +; RV64-NEXT: vfmv.v.f v18, fs3 +; RV64-NEXT: vfslide1down.vf v18, v18, fs2 +; RV64-NEXT: vfmv.v.f v19, fs1 +; RV64-NEXT: vfslide1down.vf v19, v19, fs0 +; RV64-NEXT: vfmv.v.f v20, ft11 +; RV64-NEXT: vfslide1down.vf v20, v20, ft10 +; RV64-NEXT: vfmv.v.f v21, ft9 +; RV64-NEXT: vfslide1down.vf v21, v21, ft8 ; RV64-NEXT: vfmv.v.f v22, ft3 ; RV64-NEXT: vfslide1down.vf v22, v22, ft2 ; RV64-NEXT: vfmv.v.f v23, ft1 ; RV64-NEXT: vfslide1down.vf v23, v23, ft0 -; RV64-NEXT: fld fs0, 88(sp) # 8-byte Folded Reload -; RV64-NEXT: fld fs1, 80(sp) # 8-byte Folded Reload -; RV64-NEXT: fld fs2, 72(sp) # 8-byte Folded Reload -; RV64-NEXT: fld fs3, 64(sp) # 8-byte Folded Reload -; RV64-NEXT: fld fs4, 56(sp) # 8-byte Folded Reload -; RV64-NEXT: fld fs5, 48(sp) # 8-byte Folded Reload -; RV64-NEXT: fld fs6, 40(sp) # 8-byte Folded Reload -; RV64-NEXT: fld fs7, 32(sp) # 8-byte Folded Reload -; RV64-NEXT: fld fs8, 24(sp) # 8-byte Folded Reload -; RV64-NEXT: fld fs9, 16(sp) # 8-byte Folded Reload -; RV64-NEXT: fld fs10, 8(sp) # 8-byte Folded Reload -; RV64-NEXT: fld fs11, 0(sp) # 8-byte Folded Reload -; RV64-NEXT: addi sp, sp, 96 +; RV64-NEXT: fld fs0, 56(sp) # 8-byte Folded Reload +; RV64-NEXT: fld fs1, 48(sp) # 8-byte Folded Reload +; RV64-NEXT: fld fs2, 40(sp) # 8-byte Folded Reload +; RV64-NEXT: fld fs3, 32(sp) # 8-byte Folded Reload +; RV64-NEXT: fld fs4, 24(sp) # 8-byte Folded Reload +; RV64-NEXT: fld fs5, 16(sp) # 8-byte Folded Reload +; RV64-NEXT: fld fs6, 8(sp) # 8-byte Folded Reload +; RV64-NEXT: fld fs7, 0(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 64 ; RV64-NEXT: ret %v0 = insertelement <32 x double> poison, double %e0, i64 0 %v1 = insertelement <32 x double> %v0, double %e1, i64 1