Skip to content

Commit

Permalink
improve HWIntrinsic containment logic
Browse files Browse the repository at this point in the history
  • Loading branch information
saucecontrol committed Dec 16, 2024
1 parent 052ad42 commit e7bc099
Show file tree
Hide file tree
Showing 8 changed files with 289 additions and 615 deletions.
3 changes: 2 additions & 1 deletion src/coreclr/jit/codegeninterface.h
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,8 @@ class CodeGenInterface
public:
static bool instIsFP(instruction ins);
#if defined(TARGET_XARCH)
static bool instIsEmbeddedBroadcastCompatible(instruction ins);
static bool instIsEmbeddedBroadcastCompatible(instruction ins);
static unsigned instInputSize(instruction ins);
#endif // TARGET_XARCH
//-------------------------------------------------------------------------
// Liveness-related fields & methods
Expand Down
4 changes: 2 additions & 2 deletions src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20422,6 +20422,7 @@ bool GenTree::isContainableHWIntrinsic() const
return true;
}

case NI_SSE3_LoadAndDuplicateToVector128:
case NI_SSE3_MoveAndDuplicate:
case NI_AVX_BroadcastScalarToVector128:
case NI_AVX2_BroadcastScalarToVector128:
Expand Down Expand Up @@ -26971,8 +26972,6 @@ bool GenTreeHWIntrinsic::OperIsMemoryLoad(GenTree** pAddr) const
case NI_SSE41_ConvertToVector128Int64:
case NI_AVX2_BroadcastScalarToVector128:
case NI_AVX2_BroadcastScalarToVector256:
case NI_AVX512F_BroadcastScalarToVector512:
case NI_AVX512BW_BroadcastScalarToVector512:
case NI_AVX2_ConvertToVector256Int16:
case NI_AVX2_ConvertToVector256Int32:
case NI_AVX2_ConvertToVector256Int64:
Expand Down Expand Up @@ -27241,6 +27240,7 @@ bool GenTreeHWIntrinsic::OperIsBroadcastScalar() const
case NI_AVX2_BroadcastScalarToVector256:
case NI_AVX_BroadcastScalarToVector128:
case NI_AVX_BroadcastScalarToVector256:
case NI_SSE3_LoadAndDuplicateToVector128:
case NI_SSE3_MoveAndDuplicate:
case NI_AVX512F_BroadcastScalarToVector512:
return true;
Expand Down
2 changes: 0 additions & 2 deletions src/coreclr/jit/hwintrinsic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2075,8 +2075,6 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic,
case NI_SSE41_ConvertToVector128Int64:
case NI_AVX2_BroadcastScalarToVector128:
case NI_AVX2_BroadcastScalarToVector256:
case NI_AVX512F_BroadcastScalarToVector512:
case NI_AVX512BW_BroadcastScalarToVector512:
case NI_AVX2_ConvertToVector256Int16:
case NI_AVX2_ConvertToVector256Int32:
case NI_AVX2_ConvertToVector256Int64:
Expand Down
13 changes: 1 addition & 12 deletions src/coreclr/jit/hwintrinsiccodegenxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -912,18 +912,7 @@ void CodeGen::genHWIntrinsic_R_RM(
case NI_AVX2_BroadcastScalarToVector128:
case NI_AVX2_BroadcastScalarToVector256:
{
if (varTypeIsSmall(node->GetSimdBaseType()))
{
if (compiler->canUseEvexEncoding())
{
needsInstructionFixup = true;
}
else
{
needsBroadcastFixup = true;
}
}
else if (compiler->canUseEvexEncoding())
if (compiler->canUseEvexEncoding())
{
needsInstructionFixup = true;
}
Expand Down
14 changes: 7 additions & 7 deletions src/coreclr/jit/hwintrinsiclistxarch.h

Large diffs are not rendered by default.

43 changes: 35 additions & 8 deletions src/coreclr/jit/instr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -362,6 +362,31 @@ bool CodeGenInterface::instIsEmbeddedBroadcastCompatible(instruction ins)

return (instInfo[ins] & INS_Flags_EmbeddedBroadcastSupported) != 0;
}

/*****************************************************************************
*
* Returns the given instruction's embedded broadcast size in bytes.
*/

unsigned CodeGenInterface::instInputSize(instruction ins)
{
assert((unsigned)ins < ArrLen(instInfo));

insFlags inputSize = static_cast<insFlags>((instInfo[ins] & Input_Mask));
switch (inputSize)
{
case Input_8Bit:
return 1;
case Input_16Bit:
return 2;
case Input_32Bit:
return 4;
case Input_64Bit:
return 8;
default:
unreached();
}
}
#endif // TARGET_XARCH

/*****************************************************************************
Expand Down Expand Up @@ -835,11 +860,12 @@ CodeGen::OperandDesc CodeGen::genOperandDesc(GenTree* op)
var_types simdBaseType = hwintrinsic->GetSimdBaseType();
switch (intrinsicId)
{
case NI_SSE3_LoadAndDuplicateToVector128:
case NI_AVX_BroadcastScalarToVector128:
case NI_AVX_BroadcastScalarToVector256:
{
// we have the assumption that AVX_BroadcastScalarToVector*
// only take the memory address as the operand.
// we have the assumption that these intrinsics
// only take a memory address as the operand.
assert(hwintrinsic->isContained());
assert(hwintrinsic->OperIsMemoryLoad());
assert(hwintrinsic->GetOperandCount() == 1);
Expand Down Expand Up @@ -871,16 +897,16 @@ CodeGen::OperandDesc CodeGen::genOperandDesc(GenTree* op)
// If broadcast node is contained, should mean that we have some forms like
// Broadcast -> CreateScalarUnsafe -> Scalar.
// If so, directly emit scalar.
// In the codes below, we specially handle the `Broadcast -> CNS_INT` form and
// In the code below, we specially handle the `Broadcast -> CNS_INT/CNS_LNG` form and
// handle other cases recursively.
GenTree* hwintrinsicChild = hwintrinsic->Op(1);
assert(hwintrinsicChild->isContained());
if (hwintrinsicChild->OperIs(GT_CNS_INT))
if (hwintrinsicChild->OperIs(GT_CNS_INT, GT_CNS_LNG))
{
// a special case is when the operand of CreateScalarUnsafe is in integer type,
// CreateScalarUnsafe node will be fold, so we directly match a pattern of
// broadcast -> LCL_VAR(TYP_(U)INT)
ssize_t scalarValue = hwintrinsicChild->AsIntCon()->IconValue();
// a special case is when the operand of CreateScalarUnsafe is an integer type,
// CreateScalarUnsafe node will be folded, so we directly match a pattern of
// broadcast -> LCL_VAR(TYP_(U)INT/LONG)
INT64 scalarValue = hwintrinsicChild->AsIntConCommon()->IntegralValue();
UNATIVE_OFFSET cnum = emit->emitDataConst(&scalarValue, genTypeSize(simdBaseType),
genTypeSize(simdBaseType), simdBaseType);
return OperandDesc(compiler->eeFindJitDataOffs(cnum));
Expand All @@ -893,6 +919,7 @@ CodeGen::OperandDesc CodeGen::genOperandDesc(GenTree* op)
}
break;
}

case NI_Vector128_CreateScalarUnsafe:
case NI_Vector256_CreateScalarUnsafe:
case NI_Vector512_CreateScalarUnsafe:
Expand Down
Loading

0 comments on commit e7bc099

Please sign in to comment.