Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

JIT: unroll more memset patterns #110893

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
81 changes: 81 additions & 0 deletions src/coreclr/jit/assertionprop.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2600,6 +2600,82 @@ AssertionIndex Compiler::optAssertionIsSubtype(GenTree* tree, GenTree* methodTab
return NO_ASSERTION_INDEX;
}

//------------------------------------------------------------------------------
// optVNBasedFoldExpr_Call_Memset: Unrolls NI_System_SpanHelpers_Fill for constant length.
//
// Arguments:
// call - NI_System_SpanHelpers_Fill call to unroll
//
// Return Value:
// Returns a new tree or nullptr if nothing is changed.
//
GenTree* Compiler::optVNBasedFoldExpr_Call_Memset(GenTreeCall* call)
{
assert(call->IsSpecialIntrinsic(this, NI_System_SpanHelpers_Fill));

CallArg* dstArg = call->gtArgs.GetUserArgByIndex(0);
CallArg* lenArg = call->gtArgs.GetUserArgByIndex(1);
CallArg* valArg = call->gtArgs.GetUserArgByIndex(2);

var_types valType = valArg->GetSignatureType();
unsigned lengthScale = genTypeSize(valType);

if (varTypeIsStruct(valType) || varTypeIsGC(valType))
{
JITDUMP("...value's type is not supported - bail out.\n");
return nullptr;
}

ValueNum lenVN = vnStore->VNConservativeNormalValue(lenArg->GetNode()->gtVNPair);
if (!vnStore->IsVNConstant(lenVN))
{
JITDUMP("...length is not a constant - bail out.\n");
return nullptr;
}

size_t len = vnStore->CoercedConstantValue<size_t>(lenVN);
if ((len > getUnrollThreshold(Memset)) ||
// The first condition prevents the overflow in the second condition.
// since both len and lengthScale are expected to be small at this point.
(len * lengthScale) > getUnrollThreshold(Memset))
{
JITDUMP("...length is too big to unroll - bail out.\n");
return nullptr;
}

// Some arbitrary threshold if the value is not a constant,
// since it is unlikely that we can optimize it further.
if (!valArg->GetNode()->OperIsConst() && (len >= 8))
{
JITDUMP("...length is too big to unroll for non-constant value - bail out.\n");
return nullptr;
}

// Spill the side effects directly in the args, we're going to
// pick them up in the following gtExtractSideEffList
GenTree* dst = fgMakeMultiUse(&dstArg->NodeRef());
GenTree* val = fgMakeMultiUse(&valArg->NodeRef());

GenTree* result = nullptr;
gtExtractSideEffList(call, &result, GTF_ALL_EFFECT, true);

for (size_t offset = 0; offset < len; offset++)
{
// Clone dst and add offset if necessary.
GenTree* offsetNode = gtNewIconNode((ssize_t)(offset * lengthScale), TYP_I_IMPL);
GenTree* currDst = gtNewOperNode(GT_ADD, dst->TypeGet(), gtCloneExpr(dst), offsetNode);
GenTreeStoreInd* storeInd =
gtNewStoreIndNode(valType, currDst, gtCloneExpr(val), GTF_IND_UNALIGNED | GTF_IND_ALLOW_NON_ATOMIC);

// Merge with the previous result.
result = result == nullptr ? storeInd : gtNewOperNode(GT_COMMA, TYP_VOID, result, storeInd);
}

JITDUMP("...optimized into STOREIND(s):\n");
DISPTREE(result);
return result;
}

//------------------------------------------------------------------------------
// optVNBasedFoldExpr_Call_Memmove: Unrolls NI_System_SpanHelpers_Memmove/CORINFO_HELP_MEMCPY
// if possible. This function effectively duplicates LowerCallMemmove.
Expand Down Expand Up @@ -2758,6 +2834,11 @@ GenTree* Compiler::optVNBasedFoldExpr_Call(BasicBlock* block, GenTree* parent, G
return optVNBasedFoldExpr_Call_Memmove(call);
}

if (call->IsSpecialIntrinsic(this, NI_System_SpanHelpers_Fill))
{
return optVNBasedFoldExpr_Call_Memset(call);
}

return nullptr;
}

Expand Down
1 change: 1 addition & 0 deletions src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -8061,6 +8061,7 @@ class Compiler
GenTree* optVNBasedFoldExpr(BasicBlock* block, GenTree* parent, GenTree* tree);
GenTree* optVNBasedFoldExpr_Call(BasicBlock* block, GenTree* parent, GenTreeCall* call);
GenTree* optVNBasedFoldExpr_Call_Memmove(GenTreeCall* call);
GenTree* optVNBasedFoldExpr_Call_Memset(GenTreeCall* call);

AssertionIndex GetAssertionCount()
{
Expand Down
39 changes: 34 additions & 5 deletions src/coreclr/jit/lower.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9064,8 +9064,9 @@ void Lowering::LowerStoreIndirCoalescing(GenTreeIndir* ind)
}

// Since we're merging two stores of the same type, the new type is twice wider.
var_types oldType = ind->TypeGet();
var_types newType;
var_types oldType = ind->TypeGet();
var_types newType = TYP_UNDEF;
bool tryReusingPrevValue = false;
switch (oldType)
{
case TYP_BYTE:
Expand Down Expand Up @@ -9117,16 +9118,23 @@ void Lowering::LowerStoreIndirCoalescing(GenTreeIndir* ind)
newType = TYP_SIMD32;
break;
}
return;
tryReusingPrevValue = true;
break;

case TYP_SIMD32:
if (comp->getPreferredVectorByteLength() >= 64)
{
newType = TYP_SIMD64;
break;
}
return;
#endif // TARGET_AMD64
tryReusingPrevValue = true;
break;
#elif defined(TARGET_ARM64) // TARGET_AMD64
case TYP_SIMD16:
tryReusingPrevValue = true;
break;

#endif // TARGET_ARM64
#endif // FEATURE_HW_INTRINSICS
#endif // TARGET_64BIT

Expand All @@ -9139,6 +9147,27 @@ void Lowering::LowerStoreIndirCoalescing(GenTreeIndir* ind)
return;
}

// If we can't merge these two stores into a single store, we can at least
// cache prevData.value to a local and reuse it in currData.
// Normally, LSRA is expected to do this for us, but it's not always the case for SIMD.
if (tryReusingPrevValue)
{
#if defined(FEATURE_HW_INTRINSICS)
LIR::Use use;
if (currData.value->OperIs(GT_CNS_VEC) && GenTree::Compare(prevData.value, currData.value) &&
BlockRange().TryGetUse(prevData.value, &use))
{
GenTree* prevValueTmp = comp->gtNewLclvNode(use.ReplaceWithLclVar(comp), prevData.value->TypeGet());
BlockRange().InsertBefore(currData.value, prevValueTmp);
BlockRange().Remove(currData.value);
ind->Data() = prevValueTmp;
}
#endif // FEATURE_HW_INTRINSICS
return;
}

assert(newType != TYP_UNDEF);

// We should not be here for stores requiring write barriers.
assert(!comp->codeGen->gcInfo.gcIsWriteBarrierStoreIndNode(ind->AsStoreInd()));
assert(!comp->codeGen->gcInfo.gcIsWriteBarrierStoreIndNode(prevInd->AsStoreInd()));
Expand Down
Loading