mirror of
https://github.com/VSadov/Satori.git
synced 2025-06-12 02:30:29 +09:00
ARM64: Use stlur for volatile stores (#91553)
Co-authored-by: Bruce Forstall <brucefo@microsoft.com>
This commit is contained in:
parent
a974719dc2
commit
f3c7893c52
3 changed files with 137 additions and 66 deletions
|
@ -1235,6 +1235,10 @@ protected:
|
|||
#else
|
||||
instruction genGetInsForOper(genTreeOps oper, var_types type);
|
||||
#endif
|
||||
instruction genGetVolatileLdStIns(instruction currentIns,
|
||||
regNumber targetReg,
|
||||
GenTreeIndir* indir,
|
||||
bool* needsBarrier);
|
||||
bool genEmitOptimizedGCWriteBarrier(GCInfo::WriteBarrierForm writeBarrierForm, GenTree* addr, GenTree* data);
|
||||
GenTree* getCallTarget(const GenTreeCall* call, CORINFO_METHOD_HANDLE* methHnd);
|
||||
regNumber getCallIndirectionCellReg(GenTreeCall* call);
|
||||
|
|
|
@ -4207,22 +4207,10 @@ void CodeGen::genCodeForStoreInd(GenTreeStoreInd* tree)
|
|||
|
||||
if (tree->IsVolatile())
|
||||
{
|
||||
bool addrIsInReg = addr->isUsedFromReg();
|
||||
bool addrIsAligned = ((tree->gtFlags & GTF_IND_UNALIGNED) == 0);
|
||||
bool needsBarrier = true;
|
||||
ins = genGetVolatileLdStIns(ins, dataReg, tree, &needsBarrier);
|
||||
|
||||
if ((ins == INS_strb) && addrIsInReg)
|
||||
{
|
||||
ins = INS_stlrb;
|
||||
}
|
||||
else if ((ins == INS_strh) && addrIsInReg && addrIsAligned)
|
||||
{
|
||||
ins = INS_stlrh;
|
||||
}
|
||||
else if ((ins == INS_str) && genIsValidIntReg(dataReg) && addrIsInReg && addrIsAligned)
|
||||
{
|
||||
ins = INS_stlr;
|
||||
}
|
||||
else
|
||||
if (needsBarrier)
|
||||
{
|
||||
// issue a full memory barrier before a volatile StInd
|
||||
// Note: We cannot issue store barrier ishst because it is a weaker barrier.
|
||||
|
|
|
@ -1728,6 +1728,134 @@ void CodeGen::genCodeForIndexAddr(GenTreeIndexAddr* node)
|
|||
genProduceReg(node);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------
|
||||
// genGetVolatileLdStIns: Determine the most efficient instruction to perform a
|
||||
// volatile load or store and whether an explicit barrier is required or not.
|
||||
//
|
||||
// Arguments:
|
||||
// currentIns - the current instruction to perform load/store
|
||||
// targetReg - the target register
|
||||
// indir - the indirection node representing the volatile load/store
|
||||
// needsBarrier - OUT parameter. Set to true if an explicit memory barrier is required.
|
||||
//
|
||||
// Return Value:
|
||||
// instruction to perform the volatile load/store with.
|
||||
//
|
||||
instruction CodeGen::genGetVolatileLdStIns(instruction currentIns,
|
||||
regNumber targetReg,
|
||||
GenTreeIndir* indir,
|
||||
bool* needsBarrier)
|
||||
{
|
||||
assert(indir->IsVolatile());
|
||||
|
||||
if (!genIsValidIntReg(targetReg))
|
||||
{
|
||||
// We don't have special instructions to perform volatile loads/stores for non-integer registers.
|
||||
*needsBarrier = true;
|
||||
return currentIns;
|
||||
}
|
||||
|
||||
assert(!varTypeIsFloating(indir));
|
||||
assert(!varTypeIsSIMD(indir));
|
||||
|
||||
#ifdef TARGET_ARM64
|
||||
*needsBarrier = false;
|
||||
|
||||
if (indir->IsUnaligned() && (currentIns != INS_ldrb) && (currentIns != INS_strb))
|
||||
{
|
||||
// We have to use a normal load/store + explicit memory barrier
|
||||
// to avoid unaligned access exceptions.
|
||||
*needsBarrier = true;
|
||||
return currentIns;
|
||||
}
|
||||
|
||||
const bool addrIsInReg = indir->Addr()->isUsedFromReg();
|
||||
|
||||
// With RCPC2 (arm64 v8.4+) we can work with simple addressing modes like [reg + simm9]
|
||||
const bool shouldUseRcpc2 = compiler->compOpportunisticallyDependsOn(InstructionSet_Rcpc2) && !addrIsInReg &&
|
||||
indir->Addr()->OperIs(GT_LEA) && !indir->HasIndex() && (indir->Scale() == 1) &&
|
||||
emitter::emitIns_valid_imm_for_unscaled_ldst_offset(indir->Offset());
|
||||
|
||||
if (shouldUseRcpc2)
|
||||
{
|
||||
assert(!addrIsInReg);
|
||||
switch (currentIns)
|
||||
{
|
||||
// Loads
|
||||
|
||||
case INS_ldrb:
|
||||
return INS_ldapurb;
|
||||
|
||||
case INS_ldrh:
|
||||
return INS_ldapurh;
|
||||
|
||||
case INS_ldr:
|
||||
return INS_ldapur;
|
||||
|
||||
// Stores
|
||||
|
||||
case INS_strb:
|
||||
return INS_stlurb;
|
||||
|
||||
case INS_strh:
|
||||
return INS_stlurh;
|
||||
|
||||
case INS_str:
|
||||
return INS_stlur;
|
||||
|
||||
default:
|
||||
*needsBarrier = true;
|
||||
return currentIns;
|
||||
}
|
||||
}
|
||||
|
||||
// Only RCPC2 (arm64 v8.4+) allows us to deal with contained addresses.
|
||||
// In other cases we'll have to emit a normal load/store + explicit memory barrier.
|
||||
// It means that lower should generally avoid generating contained addresses for volatile loads/stores
|
||||
// so we can use cheaper instructions.
|
||||
if (!addrIsInReg)
|
||||
{
|
||||
*needsBarrier = true;
|
||||
return currentIns;
|
||||
}
|
||||
|
||||
// RCPC1 (arm64 v8.3+) offers a bit more relaxed memory ordering than ldar. Which is sufficient for
|
||||
// .NET memory model's requirements, see https://github.com/dotnet/runtime/issues/67374
|
||||
const bool hasRcpc1 = compiler->compOpportunisticallyDependsOn(InstructionSet_Rcpc);
|
||||
switch (currentIns)
|
||||
{
|
||||
// Loads
|
||||
|
||||
case INS_ldrb:
|
||||
return hasRcpc1 ? INS_ldaprb : INS_ldarb;
|
||||
|
||||
case INS_ldrh:
|
||||
return hasRcpc1 ? INS_ldaprh : INS_ldarh;
|
||||
|
||||
case INS_ldr:
|
||||
return hasRcpc1 ? INS_ldapr : INS_ldar;
|
||||
|
||||
// Stores
|
||||
|
||||
case INS_strb:
|
||||
return INS_stlrb;
|
||||
|
||||
case INS_strh:
|
||||
return INS_stlrh;
|
||||
|
||||
case INS_str:
|
||||
return INS_stlr;
|
||||
|
||||
default:
|
||||
*needsBarrier = true;
|
||||
return currentIns;
|
||||
}
|
||||
#else // TARGET_ARM64
|
||||
*needsBarrier = true;
|
||||
return currentIns;
|
||||
#endif // !TARGET_ARM64
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------
|
||||
// genCodeForIndir: Produce code for a GT_IND node.
|
||||
//
|
||||
|
@ -1755,58 +1883,9 @@ void CodeGen::genCodeForIndir(GenTreeIndir* tree)
|
|||
|
||||
bool emitBarrier = false;
|
||||
|
||||
if ((tree->gtFlags & GTF_IND_VOLATILE) != 0)
|
||||
if (tree->IsVolatile())
|
||||
{
|
||||
#ifdef TARGET_ARM64
|
||||
bool addrIsInReg = tree->Addr()->isUsedFromReg();
|
||||
bool addrIsAligned = ((tree->gtFlags & GTF_IND_UNALIGNED) == 0);
|
||||
|
||||
// On arm64-v8.3+ we can use ldap* instructions with acquire/release semantics to avoid
|
||||
// full memory barriers if mixed with STLR
|
||||
bool hasRcpc = compiler->compOpportunisticallyDependsOn(InstructionSet_Rcpc);
|
||||
|
||||
// On arm64-v8.4+ we can use ldapur* instructions with acquire/release semantics to
|
||||
// avoid full memory barriers if address is contained and unscaled
|
||||
bool hasRcpc2 = compiler->compOpportunisticallyDependsOn(InstructionSet_Rcpc2);
|
||||
|
||||
bool handledWithLdapur = false;
|
||||
if (hasRcpc2 && !addrIsInReg && tree->Addr()->OperIs(GT_LEA) && !tree->HasIndex() && (tree->Scale() == 1) &&
|
||||
emitter::emitIns_valid_imm_for_unscaled_ldst_offset(tree->Offset()))
|
||||
{
|
||||
if (ins == INS_ldrb)
|
||||
{
|
||||
ins = INS_ldapurb;
|
||||
handledWithLdapur = true;
|
||||
}
|
||||
else if ((ins == INS_ldrh) && addrIsAligned)
|
||||
{
|
||||
ins = INS_ldapurh;
|
||||
handledWithLdapur = true;
|
||||
}
|
||||
else if ((ins == INS_ldr) && addrIsAligned && genIsValidIntReg(targetReg))
|
||||
{
|
||||
ins = INS_ldapur;
|
||||
handledWithLdapur = true;
|
||||
}
|
||||
}
|
||||
|
||||
if ((ins == INS_ldrb) && addrIsInReg)
|
||||
{
|
||||
ins = hasRcpc ? INS_ldaprb : INS_ldarb;
|
||||
}
|
||||
else if ((ins == INS_ldrh) && addrIsInReg && addrIsAligned)
|
||||
{
|
||||
ins = hasRcpc ? INS_ldaprh : INS_ldarh;
|
||||
}
|
||||
else if ((ins == INS_ldr) && addrIsInReg && addrIsAligned && genIsValidIntReg(targetReg))
|
||||
{
|
||||
ins = hasRcpc ? INS_ldapr : INS_ldar;
|
||||
}
|
||||
else if (!handledWithLdapur)
|
||||
#endif // TARGET_ARM64
|
||||
{
|
||||
emitBarrier = true;
|
||||
}
|
||||
ins = genGetVolatileLdStIns(ins, targetReg, tree, &emitBarrier);
|
||||
}
|
||||
|
||||
GetEmitter()->emitInsLoadStoreOp(ins, emitActualTypeSize(type), targetReg, tree);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue