Skip to content

Commit

Permalink
[RISCV] Generalize RISCVDAGToDAGISel::selectFPImm to handle bitcasts …
Browse files Browse the repository at this point in the history
…from int to FP. (llvm#108284)

selectFPImm previously handled cases where an FPImm could be
materialized in an integer register.

We can generalize this to cases where a value was in an integer register
and then copied to a scalar FP register to be used by a vector
instruction.

In the affected test, the call lowering code used up all of the FP
argument registers and started using GPRs. Now we use integer vector
instructions to consume those GPRs instead of moving them to scalar FP
first.
  • Loading branch information
topperc authored Sep 12, 2024
1 parent 08740a6 commit 8c17ed1
Show file tree
Hide file tree
Showing 6 changed files with 79 additions and 80 deletions.
16 changes: 15 additions & 1 deletion llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3535,7 +3535,21 @@ bool RISCVDAGToDAGISel::selectLow8BitsVSplat(SDValue N, SDValue &SplatVal) {
return selectVSplat(N, SplatVal);
}

bool RISCVDAGToDAGISel::selectFPImm(SDValue N, SDValue &Imm) {
bool RISCVDAGToDAGISel::selectScalarFPAsInt(SDValue N, SDValue &Imm) {
// Allow bitcasts from XLenVT -> FP.
if (N.getOpcode() == ISD::BITCAST &&
N.getOperand(0).getValueType() == Subtarget->getXLenVT()) {
Imm = N.getOperand(0);
return true;
}
// Allow moves from XLenVT to FP.
if (N.getOpcode() == RISCVISD::FMV_H_X ||
N.getOpcode() == RISCVISD::FMV_W_X_RV64) {
Imm = N.getOperand(0);
return true;
}

// Otherwise, look for FP constants that can materialized with scalar int.
ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N.getNode());
if (!CFP)
return false;
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ class RISCVDAGToDAGISel : public SelectionDAGISel {
// Matches the splat of a value which can be extended or truncated, such that
// only the bottom 8 bits are preserved.
bool selectLow8BitsVSplat(SDValue N, SDValue &SplatVal);
bool selectFPImm(SDValue N, SDValue &Imm);
bool selectScalarFPAsInt(SDValue N, SDValue &Imm);

bool selectRVVSimm5(SDValue N, unsigned Width, SDValue &Imm);
template <unsigned Width> bool selectRVVSimm5(SDValue N, SDValue &Imm) {
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,8 @@ def VLOpFrag : PatFrag<(ops), (XLenVT (VLOp (XLenVT AVL:$vl)))>;
// This must be kept in sync with RISCV::VLMaxSentinel.
def VLMax : OutPatFrag<(ops), (XLenVT -1)>;

def SelectFPImm : ComplexPattern<fAny, 1, "selectFPImm", [], [], 1>;
def SelectScalarFPAsInt : ComplexPattern<fAny, 1, "selectScalarFPAsInt", [], [],
1>;

// List of EEW.
defvar EEWList = [8, 16, 32, 64];
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
Original file line number Diff line number Diff line change
Expand Up @@ -1374,7 +1374,7 @@ foreach fvti = !listconcat(AllFloatVectors, AllBFloatVectors) in {
fvti.AVL, fvti.Log2SEW)>;

def : Pat<(fvti.Vector (vselect (fvti.Mask V0),
(SplatFPOp (SelectFPImm (XLenVT GPR:$imm))),
(SplatFPOp (SelectScalarFPAsInt (XLenVT GPR:$imm))),
fvti.RegClass:$rs2)),
(!cast<Instruction>("PseudoVMERGE_VXM_"#fvti.LMul.MX)
(fvti.Vector (IMPLICIT_DEF)),
Expand Down
6 changes: 3 additions & 3 deletions llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
Original file line number Diff line number Diff line change
Expand Up @@ -2575,7 +2575,7 @@ foreach fvti = !listconcat(AllFloatVectors, AllBFloatVectors) in {
GPR:$vl, fvti.Log2SEW)>;

def : Pat<(fvti.Vector (riscv_vmerge_vl (fvti.Mask V0),
(SplatFPOp (SelectFPImm (XLenVT GPR:$imm))),
(SplatFPOp (SelectScalarFPAsInt (XLenVT GPR:$imm))),
fvti.RegClass:$rs2,
fvti.RegClass:$passthru,
VLOpFrag)),
Expand Down Expand Up @@ -2619,7 +2619,7 @@ foreach fvti = !listconcat(AllFloatVectors, AllBFloatVectors) in {
(!cast<Instruction>("PseudoVMV_V_I_"#fvti.LMul.MX)
$passthru, 0, GPR:$vl, fvti.Log2SEW, TU_MU)>;
def : Pat<(fvti.Vector (riscv_vfmv_v_f_vl
fvti.Vector:$passthru, (fvti.Scalar (SelectFPImm (XLenVT GPR:$imm))), VLOpFrag)),
fvti.Vector:$passthru, (fvti.Scalar (SelectScalarFPAsInt (XLenVT GPR:$imm))), VLOpFrag)),
(!cast<Instruction>("PseudoVMV_V_X_"#fvti.LMul.MX)
$passthru, GPR:$imm, GPR:$vl, fvti.Log2SEW, TU_MU)>;
}
Expand Down Expand Up @@ -2940,7 +2940,7 @@ foreach vti = NoGroupFloatVectors in {
VLOpFrag)),
(PseudoVMV_S_X $passthru, (XLenVT X0), GPR:$vl, vti.Log2SEW)>;
def : Pat<(vti.Vector (riscv_vfmv_s_f_vl (vti.Vector vti.RegClass:$passthru),
(vti.Scalar (SelectFPImm (XLenVT GPR:$imm))),
(vti.Scalar (SelectScalarFPAsInt (XLenVT GPR:$imm))),
VLOpFrag)),
(PseudoVMV_S_X $passthru, GPR:$imm, GPR:$vl, vti.Log2SEW)>;
def : Pat<(vti.Vector (riscv_vfmv_s_f_vl (vti.Vector vti.RegClass:$passthru),
Expand Down
130 changes: 57 additions & 73 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1348,20 +1348,16 @@ define <32 x double> @buildvec_v32f64_exact_vlen(double %e0, double %e1, double
;
; RV64-LABEL: buildvec_v32f64_exact_vlen:
; RV64: # %bb.0:
; RV64-NEXT: addi sp, sp, -96
; RV64-NEXT: .cfi_def_cfa_offset 96
; RV64-NEXT: fsd fs0, 88(sp) # 8-byte Folded Spill
; RV64-NEXT: fsd fs1, 80(sp) # 8-byte Folded Spill
; RV64-NEXT: fsd fs2, 72(sp) # 8-byte Folded Spill
; RV64-NEXT: fsd fs3, 64(sp) # 8-byte Folded Spill
; RV64-NEXT: fsd fs4, 56(sp) # 8-byte Folded Spill
; RV64-NEXT: fsd fs5, 48(sp) # 8-byte Folded Spill
; RV64-NEXT: fsd fs6, 40(sp) # 8-byte Folded Spill
; RV64-NEXT: fsd fs7, 32(sp) # 8-byte Folded Spill
; RV64-NEXT: fsd fs8, 24(sp) # 8-byte Folded Spill
; RV64-NEXT: fsd fs9, 16(sp) # 8-byte Folded Spill
; RV64-NEXT: fsd fs10, 8(sp) # 8-byte Folded Spill
; RV64-NEXT: fsd fs11, 0(sp) # 8-byte Folded Spill
; RV64-NEXT: addi sp, sp, -64
; RV64-NEXT: .cfi_def_cfa_offset 64
; RV64-NEXT: fsd fs0, 56(sp) # 8-byte Folded Spill
; RV64-NEXT: fsd fs1, 48(sp) # 8-byte Folded Spill
; RV64-NEXT: fsd fs2, 40(sp) # 8-byte Folded Spill
; RV64-NEXT: fsd fs3, 32(sp) # 8-byte Folded Spill
; RV64-NEXT: fsd fs4, 24(sp) # 8-byte Folded Spill
; RV64-NEXT: fsd fs5, 16(sp) # 8-byte Folded Spill
; RV64-NEXT: fsd fs6, 8(sp) # 8-byte Folded Spill
; RV64-NEXT: fsd fs7, 0(sp) # 8-byte Folded Spill
; RV64-NEXT: .cfi_offset fs0, -8
; RV64-NEXT: .cfi_offset fs1, -16
; RV64-NEXT: .cfi_offset fs2, -24
Expand All @@ -1370,34 +1366,26 @@ define <32 x double> @buildvec_v32f64_exact_vlen(double %e0, double %e1, double
; RV64-NEXT: .cfi_offset fs5, -48
; RV64-NEXT: .cfi_offset fs6, -56
; RV64-NEXT: .cfi_offset fs7, -64
; RV64-NEXT: .cfi_offset fs8, -72
; RV64-NEXT: .cfi_offset fs9, -80
; RV64-NEXT: .cfi_offset fs10, -88
; RV64-NEXT: .cfi_offset fs11, -96
; RV64-NEXT: fmv.d.x ft4, a7
; RV64-NEXT: fmv.d.x ft5, a6
; RV64-NEXT: fmv.d.x ft6, a5
; RV64-NEXT: fmv.d.x ft7, a4
; RV64-NEXT: fmv.d.x ft8, a3
; RV64-NEXT: fmv.d.x ft9, a2
; RV64-NEXT: fmv.d.x ft10, a1
; RV64-NEXT: fmv.d.x ft11, a0
; RV64-NEXT: fld ft0, 216(sp)
; RV64-NEXT: fld ft1, 208(sp)
; RV64-NEXT: fld ft2, 200(sp)
; RV64-NEXT: fld ft3, 192(sp)
; RV64-NEXT: fld fs0, 184(sp)
; RV64-NEXT: fld fs1, 176(sp)
; RV64-NEXT: fld fs2, 168(sp)
; RV64-NEXT: fld fs3, 160(sp)
; RV64-NEXT: fld fs4, 152(sp)
; RV64-NEXT: fld fs5, 144(sp)
; RV64-NEXT: fld fs6, 136(sp)
; RV64-NEXT: fld fs7, 128(sp)
; RV64-NEXT: fld fs8, 104(sp)
; RV64-NEXT: fld fs9, 96(sp)
; RV64-NEXT: fld fs10, 120(sp)
; RV64-NEXT: fld fs11, 112(sp)
; RV64-NEXT: fmv.d.x ft5, a5
; RV64-NEXT: fmv.d.x ft6, a3
; RV64-NEXT: fmv.d.x ft7, a1
; RV64-NEXT: fld ft0, 184(sp)
; RV64-NEXT: fld ft1, 176(sp)
; RV64-NEXT: fld ft2, 168(sp)
; RV64-NEXT: fld ft3, 160(sp)
; RV64-NEXT: fld ft8, 152(sp)
; RV64-NEXT: fld ft9, 144(sp)
; RV64-NEXT: fld ft10, 136(sp)
; RV64-NEXT: fld ft11, 128(sp)
; RV64-NEXT: fld fs0, 120(sp)
; RV64-NEXT: fld fs1, 112(sp)
; RV64-NEXT: fld fs2, 104(sp)
; RV64-NEXT: fld fs3, 96(sp)
; RV64-NEXT: fld fs4, 72(sp)
; RV64-NEXT: fld fs5, 64(sp)
; RV64-NEXT: fld fs6, 88(sp)
; RV64-NEXT: fld fs7, 80(sp)
; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV64-NEXT: vfmv.v.f v8, fa2
; RV64-NEXT: vfslide1down.vf v9, v8, fa3
Expand All @@ -1407,43 +1395,39 @@ define <32 x double> @buildvec_v32f64_exact_vlen(double %e0, double %e1, double
; RV64-NEXT: vfslide1down.vf v10, v10, fa5
; RV64-NEXT: vfmv.v.f v11, fa6
; RV64-NEXT: vfslide1down.vf v11, v11, fa7
; RV64-NEXT: vfmv.v.f v12, ft11
; RV64-NEXT: vfslide1down.vf v12, v12, ft10
; RV64-NEXT: vfmv.v.f v13, ft9
; RV64-NEXT: vfslide1down.vf v13, v13, ft8
; RV64-NEXT: vfmv.v.f v14, ft7
; RV64-NEXT: vfslide1down.vf v14, v14, ft6
; RV64-NEXT: vfmv.v.f v15, ft5
; RV64-NEXT: vmv.v.x v12, a0
; RV64-NEXT: vfslide1down.vf v12, v12, ft7
; RV64-NEXT: vmv.v.x v13, a2
; RV64-NEXT: vfslide1down.vf v13, v13, ft6
; RV64-NEXT: vmv.v.x v14, a4
; RV64-NEXT: vfslide1down.vf v14, v14, ft5
; RV64-NEXT: vmv.v.x v15, a6
; RV64-NEXT: vfslide1down.vf v15, v15, ft4
; RV64-NEXT: vfmv.v.f v16, fs11
; RV64-NEXT: vfslide1down.vf v17, v16, fs10
; RV64-NEXT: vfmv.v.f v16, fs9
; RV64-NEXT: vfslide1down.vf v16, v16, fs8
; RV64-NEXT: vfmv.v.f v18, fs7
; RV64-NEXT: vfslide1down.vf v18, v18, fs6
; RV64-NEXT: vfmv.v.f v19, fs5
; RV64-NEXT: vfslide1down.vf v19, v19, fs4
; RV64-NEXT: vfmv.v.f v20, fs3
; RV64-NEXT: vfslide1down.vf v20, v20, fs2
; RV64-NEXT: vfmv.v.f v21, fs1
; RV64-NEXT: vfslide1down.vf v21, v21, fs0
; RV64-NEXT: vfmv.v.f v16, fs7
; RV64-NEXT: vfslide1down.vf v17, v16, fs6
; RV64-NEXT: vfmv.v.f v16, fs5
; RV64-NEXT: vfslide1down.vf v16, v16, fs4
; RV64-NEXT: vfmv.v.f v18, fs3
; RV64-NEXT: vfslide1down.vf v18, v18, fs2
; RV64-NEXT: vfmv.v.f v19, fs1
; RV64-NEXT: vfslide1down.vf v19, v19, fs0
; RV64-NEXT: vfmv.v.f v20, ft11
; RV64-NEXT: vfslide1down.vf v20, v20, ft10
; RV64-NEXT: vfmv.v.f v21, ft9
; RV64-NEXT: vfslide1down.vf v21, v21, ft8
; RV64-NEXT: vfmv.v.f v22, ft3
; RV64-NEXT: vfslide1down.vf v22, v22, ft2
; RV64-NEXT: vfmv.v.f v23, ft1
; RV64-NEXT: vfslide1down.vf v23, v23, ft0
; RV64-NEXT: fld fs0, 88(sp) # 8-byte Folded Reload
; RV64-NEXT: fld fs1, 80(sp) # 8-byte Folded Reload
; RV64-NEXT: fld fs2, 72(sp) # 8-byte Folded Reload
; RV64-NEXT: fld fs3, 64(sp) # 8-byte Folded Reload
; RV64-NEXT: fld fs4, 56(sp) # 8-byte Folded Reload
; RV64-NEXT: fld fs5, 48(sp) # 8-byte Folded Reload
; RV64-NEXT: fld fs6, 40(sp) # 8-byte Folded Reload
; RV64-NEXT: fld fs7, 32(sp) # 8-byte Folded Reload
; RV64-NEXT: fld fs8, 24(sp) # 8-byte Folded Reload
; RV64-NEXT: fld fs9, 16(sp) # 8-byte Folded Reload
; RV64-NEXT: fld fs10, 8(sp) # 8-byte Folded Reload
; RV64-NEXT: fld fs11, 0(sp) # 8-byte Folded Reload
; RV64-NEXT: addi sp, sp, 96
; RV64-NEXT: fld fs0, 56(sp) # 8-byte Folded Reload
; RV64-NEXT: fld fs1, 48(sp) # 8-byte Folded Reload
; RV64-NEXT: fld fs2, 40(sp) # 8-byte Folded Reload
; RV64-NEXT: fld fs3, 32(sp) # 8-byte Folded Reload
; RV64-NEXT: fld fs4, 24(sp) # 8-byte Folded Reload
; RV64-NEXT: fld fs5, 16(sp) # 8-byte Folded Reload
; RV64-NEXT: fld fs6, 8(sp) # 8-byte Folded Reload
; RV64-NEXT: fld fs7, 0(sp) # 8-byte Folded Reload
; RV64-NEXT: addi sp, sp, 64
; RV64-NEXT: ret
%v0 = insertelement <32 x double> poison, double %e0, i64 0
%v1 = insertelement <32 x double> %v0, double %e1, i64 1
Expand Down

0 comments on commit 8c17ed1

Please sign in to comment.