mirror of
https://github.com/VSadov/Satori.git
synced 2025-06-09 17:44:48 +09:00
JIT: Compute BB weights with higher precision, use tolerance in if-conversion (#88385)
In https://github.com/dotnet/runtime/issues/88376#issuecomment-1620371447 I noticed a spurious diff in the System.Tests.Perf_Int32.ToStringHex benchmark that looked like this: ```diff G_M62987_IG13: and edi, esi mov ebx, dword ptr [rbp+88H] - mov ecx, 1 test ebx, ebx - cmovle ebx, ecx + jg SHORT G_M62987_IG15 + ;; size=12 bbWeight=1.00 PerfScore 2.50 +G_M62987_IG14: + mov ebx, 1 + ;; size=5 bbWeight=0.98 PerfScore 0.24 +G_M62987_IG15: ``` Investigating, it turns out to be caused by a check in if-conversion for detecting loops, that is using a very sharp boundary. It turns out that the integral block weights are exactly the same (in both the base and diff), but the floating point calculation ended up with exactly 1 and very close to 1 in the base/diff cases respectively. So two changes to address it: * Switch getBBWeight to multiply by BB_UNITY_WEIGHT after dividing * Switch if-conversion to have a 5% tolerance so that losing just a single count in the wrong place will not cause a decision change. Note that the check here is used as a cheap "is inside loop" check.
This commit is contained in:
parent
a29d84885d
commit
3f19af848a
2 changed files with 6 additions and 5 deletions
|
@ -1665,9 +1665,9 @@ weight_t BasicBlock::getBBWeight(Compiler* comp)
|
|||
{
|
||||
weight_t calledCount = getCalledCount(comp);
|
||||
|
||||
// Normalize the bbWeights by multiplying by BB_UNITY_WEIGHT and dividing by the calledCount.
|
||||
// Normalize the bbWeight.
|
||||
//
|
||||
weight_t fullResult = this->bbWeight * BB_UNITY_WEIGHT / calledCount;
|
||||
weight_t fullResult = (this->bbWeight / calledCount) * BB_UNITY_WEIGHT;
|
||||
|
||||
return fullResult;
|
||||
}
|
||||
|
|
|
@ -656,10 +656,11 @@ bool OptIfConversionDsc::optIfConvert()
|
|||
|
||||
if (!m_comp->compStressCompile(Compiler::STRESS_IF_CONVERSION_INNER_LOOPS, 25))
|
||||
{
|
||||
// Don't optimise the block if it is inside a loop
|
||||
// When inside a loop, branches are quicker than selects.
|
||||
// Don't optimise the block if it is inside a loop. Loop-carried
|
||||
// dependencies can cause significant stalls if if-converted.
|
||||
// Detect via the block weight as that will be high when inside a loop.
|
||||
if (m_startBlock->getBBWeight(m_comp) > BB_UNITY_WEIGHT)
|
||||
|
||||
if (m_startBlock->getBBWeight(m_comp) > BB_UNITY_WEIGHT * 1.05)
|
||||
{
|
||||
JITDUMP("Skipping if-conversion inside loop (via weight)\n");
|
||||
return false;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue