Satori/src/coreclr/vm/tieredcompilation.cpp

// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// ===========================================================================
// File: TieredCompilation.CPP
//
// ===========================================================================


#include "common.h"
#include "excep.h"
#include "log.h"
#include "threadsuspend.h"
#include "tieredcompilation.h"

// TieredCompilationManager determines which methods should be recompiled and
// how they should be recompiled to best optimize the running code. It then
// handles logistics of getting new code created and installed.
//
//
// # Important entrypoints in this code:
//
//
// a) .ctor -                called once during AppDomain initialization
// b) HandleCallCountingForFirstCall(...) - called when a method's code version is being
//                           invoked for the first time.
//
// # Overall workflow
//
// Methods initially call into HandleCallCountingForFirstCall() and once the call count exceeds
// a fixed limit we queue work on to our internal list of methods needing to
// be recompiled (m_methodsToOptimize). If there is currently no thread
// servicing our queue asynchronously then we use the runtime threadpool
// QueueUserWorkItem to recruit one. During the callback for each threadpool work
// item we handle as many methods as possible in a fixed period of time, then
// queue another threadpool work item if m_methodsToOptimize hasn't been drained.
//
// The background thread enters at StaticBackgroundWorkCallback(), enters the
// appdomain, and then begins calling OptimizeMethod on each method in the
// queue. For each method we jit it, then update the precode so that future
// entrypoint callers will run the new code.
//
// # Error handling
//
// The overall principle is don't swallow terminal failures that may have corrupted the
// process (AV for example), but otherwise for any transient issue or functional limitation
// that prevents us from optimizing log it for diagnostics and then back out gracefully,
// continuing to run the less optimal code. The feature should be constructed so that
// errors are limited to OS resource exhaustion or poorly behaved managed code
// (for example within an AssemblyResolve event or static constructor triggered by the JIT).

#if defined(FEATURE_TIERED_COMPILATION) && !defined(DACCESS_COMPILE)

CrstStatic TieredCompilationManager::s_lock;
#ifdef _DEBUG
Thread *TieredCompilationManager::s_backgroundWorkerThread = nullptr;
#endif
CLREventStatic TieredCompilationManager::s_backgroundWorkAvailableEvent;
bool TieredCompilationManager::s_isBackgroundWorkerRunning = false;
bool TieredCompilationManager::s_isBackgroundWorkerProcessingWork = false;

// Called at AppDomain construction
TieredCompilationManager::TieredCompilationManager() :
    m_countOfMethodsToOptimize(0),
    m_countOfNewMethodsCalledDuringDelay(0),
    m_methodsPendingCountingForTier1(nullptr),
    m_tier1CallCountingCandidateMethodRecentlyRecorded(false),
    m_isPendingCallCountingCompletion(false),
    m_recentlyRequestedCallCountingCompletion(false)
{
    WRAPPER_NO_CONTRACT;
    // On Unix, we can reach here before EEConfig is initialized, so defer config-based initialization to Init()
}

// Called at AppDomain Init
void TieredCompilationManager::Init()
{
    CONTRACTL
    {
        GC_NOTRIGGER;
        CAN_TAKE_LOCK;
        MODE_PREEMPTIVE;
    }
    CONTRACTL_END;
}

#endif // FEATURE_TIERED_COMPILATION && !DACCESS_COMPILE

NativeCodeVersion::OptimizationTier TieredCompilationManager::GetInitialOptimizationTier(PTR_MethodDesc pMethodDesc)
{
    WRAPPER_NO_CONTRACT;
    _ASSERTE(pMethodDesc != NULL);

#ifdef FEATURE_TIERED_COMPILATION
    if (!pMethodDesc->IsEligibleForTieredCompilation())
    {
        // The optimization tier is not used
        return NativeCodeVersion::OptimizationTierOptimized;
    }

    _ASSERT(!pMethodDesc->RequestedAggressiveOptimization());

    if (!pMethodDesc->GetLoaderAllocator()->GetCallCountingManager()->IsCallCountingEnabled(NativeCodeVersion(pMethodDesc)))
    {
        // Tier 0 call counting may have been disabled for several reasons, the intention is to start with and stay at an
        // optimized tier
        return NativeCodeVersion::OptimizationTierOptimized;
    }

#ifdef FEATURE_PGO
    if (g_pConfig->TieredPGO())
    {
        // Initial tier for R2R is always just OptimizationTier0
        // For ILOnly it depends on TieredPGO_InstrumentOnlyHotCode:
        // 1 - OptimizationTier0 as we don't want to instrument the initial version (will only instrument hot Tier0)
        // 2 - OptimizationTier0Instrumented - instrument all ILOnly code
        if (g_pConfig->TieredPGO_InstrumentOnlyHotCode() ||
            ExecutionManager::IsReadyToRunCode(pMethodDesc->GetNativeCode()))
        {
            return NativeCodeVersion::OptimizationTier0;
        }
        return NativeCodeVersion::OptimizationTier0Instrumented;
    }
#endif

    return NativeCodeVersion::OptimizationTier0;
#else
    return NativeCodeVersion::OptimizationTierOptimized;
#endif
}

#if defined(FEATURE_TIERED_COMPILATION) && !defined(DACCESS_COMPILE)

void TieredCompilationManager::HandleCallCountingForFirstCall(MethodDesc* pMethodDesc)
{
    CONTRACTL
    {
        THROWS;
        GC_TRIGGERS;
        MODE_PREEMPTIVE;
    }
    CONTRACTL_END;

    _ASSERTE(pMethodDesc != nullptr);
    _ASSERTE(pMethodDesc->IsEligibleForTieredCompilation());
    _ASSERTE(g_pConfig->TieredCompilation_CallCountingDelayMs() != 0);

    // An exception here (OOM) would mean that the method's calls would not be counted and it would not be promoted. A
    // consideration is that an attempt can be made to reset the code entry point on exception (which can also OOM). Doesn't
    // seem worth it, the exception is propagated and there are other cases where a method may not be promoted due to OOM.
    bool createBackgroundWorker;
    {
        LockHolder tieredCompilationLockHolder;

        SArray<MethodDesc *> *methodsPendingCounting = m_methodsPendingCountingForTier1;
        _ASSERTE((methodsPendingCounting != nullptr) == IsTieringDelayActive());
        if (methodsPendingCounting != nullptr)
        {
            methodsPendingCounting->Append(pMethodDesc);
            ++m_countOfNewMethodsCalledDuringDelay;

            if (!m_tier1CallCountingCandidateMethodRecentlyRecorded)
            {
                // Delay call counting for currently recoded methods further
                m_tier1CallCountingCandidateMethodRecentlyRecorded = true;
            }
            return;
        }

        NewHolder<SArray<MethodDesc *>> methodsPendingCountingHolder = new SArray<MethodDesc *>();
        methodsPendingCountingHolder->Preallocate(64);

        methodsPendingCountingHolder->Append(pMethodDesc);
        ++m_countOfNewMethodsCalledDuringDelay;

        m_methodsPendingCountingForTier1 = methodsPendingCountingHolder.Extract();
        _ASSERTE(!m_tier1CallCountingCandidateMethodRecentlyRecorded);
        _ASSERTE(IsTieringDelayActive());

        // The thread is in a GC_NOTRIGGER scope here. If the background worker is already running, we can schedule it inside
        // the same lock without triggering a GC.
        createBackgroundWorker = !TryScheduleBackgroundWorkerWithoutGCTrigger_Locked();
    }

    if (createBackgroundWorker)
    {
        // Elsewhere, the tiered compilation lock is taken inside the code versioning lock. The code versioning lock is an
        // unsafe any-GC-mode lock, so the tiering lock is also that type of lock. Inside that type of lock, there is an
        // implicit GC_NOTRIGGER contract. So, a thread cannot be created inside the tiering lock since it may GC_TRIGGERS. At
        // this point, this is the only thread that may attempt creating the background worker thread.
        EX_TRY
        {
            CreateBackgroundWorker();
        }
        EX_CATCH
        {
            // Since the tiering lock was released and reacquired, other methods may have been recorded in-between. Just
            // deactivate the tiering delay. Any methods that have been recorded would not have their calls be counted and
            // would not be promoted (due to the small window, there shouldn't be many of those). See consideration above in a
            // similar exception case.
            {
                LockHolder tieredCompilationLockHolder;

                _ASSERTE(IsTieringDelayActive());
                m_tier1CallCountingCandidateMethodRecentlyRecorded = false;
                _ASSERTE(m_methodsPendingCountingForTier1 != nullptr);
                delete m_methodsPendingCountingForTier1;
                m_methodsPendingCountingForTier1 = nullptr;
                _ASSERTE(!IsTieringDelayActive());
            }

            EX_RETHROW;
        }
        EX_END_CATCH(RethrowTerminalExceptions);
    }

    if (ETW::CompilationLog::TieredCompilation::Runtime::IsEnabled())
    {
        ETW::CompilationLog::TieredCompilation::Runtime::SendPause();
    }
}

bool TieredCompilationManager::TrySetCodeEntryPointAndRecordMethodForCallCounting(MethodDesc* pMethodDesc, PCODE codeEntryPoint)
{
    WRAPPER_NO_CONTRACT;
    _ASSERTE(pMethodDesc != nullptr);
    _ASSERTE(pMethodDesc->IsEligibleForTieredCompilation());
    _ASSERTE(codeEntryPoint != (PCODE)NULL);

    if (!IsTieringDelayActive())
    {
        return false;
    }

    LockHolder tieredCompilationLockHolder;

    if (!IsTieringDelayActive())
    {
        return false;
    }

    // Set the code entry point before recording the method for call counting to avoid a race. Otherwise, the tiering delay may
    // expire and enable call counting for the method before the entry point is set here, in which case calls to the method
    // would not be counted anymore.
    pMethodDesc->SetCodeEntryPoint(codeEntryPoint);
    _ASSERTE(m_methodsPendingCountingForTier1 != nullptr);
    m_methodsPendingCountingForTier1->Append(pMethodDesc);
    return true;
}

void TieredCompilationManager::AsyncPromoteToTier1(
    NativeCodeVersion currentNativeCodeVersion,
    bool *createTieringBackgroundWorkerRef)
{
    CONTRACTL
    {
        THROWS;
        GC_NOTRIGGER;
        MODE_ANY;
    }
    CONTRACTL_END;

    _ASSERTE(CodeVersionManager::IsLockOwnedByCurrentThread());
    _ASSERTE(!currentNativeCodeVersion.IsNull());
    _ASSERTE(!currentNativeCodeVersion.IsFinalTier());
    _ASSERTE(createTieringBackgroundWorkerRef != nullptr);
    _ASSERTE(!currentNativeCodeVersion.GetILCodeVersion().IsDeoptimized());

    NativeCodeVersion t1NativeCodeVersion;
    HRESULT hr;

    // Add an inactive native code entry in the versioning table to track the tier1
    // compilation we are going to create. This entry binds the compilation to a
    // particular version of the IL code regardless of any changes that may
    // occur between now and when jitting completes. If the IL does change in that
    // interval the new code entry won't be activated.
    MethodDesc *pMethodDesc = currentNativeCodeVersion.GetMethodDesc();

    NativeCodeVersion::OptimizationTier nextTier = NativeCodeVersion::OptimizationTier1;

#ifdef FEATURE_PGO
    if (g_pConfig->TieredPGO())
    {
        if (currentNativeCodeVersion.GetOptimizationTier() == NativeCodeVersion::OptimizationTier0 &&
            g_pConfig->TieredPGO_InstrumentOnlyHotCode())
        {
            if (ExecutionManager::IsReadyToRunCode(currentNativeCodeVersion.GetNativeCode()))
            {
                // We definitely don't want to use unoptimized instrumentation tier for hot R2R:
                // 1) It will produce a lot of new compilations for small methods which were inlined in R2R
                // 2) Noticeable performance regression from fast R2R to slow instrumented Tier0
                nextTier = NativeCodeVersion::OptimizationTier1Instrumented;
            }
            else
            {
                // For ILOnly it's fine to use unoptimized instrumented tier:
                // 1) No new compilations since previous tier already triggered them
                // 2) Better profile since we'll be able to instrument inlinees
                // 3) Unoptimized instrumented tier is faster to produce and wire up
                nextTier = NativeCodeVersion::OptimizationTier0Instrumented;

#if _DEBUG
                if (CLRConfig::GetConfigValue(CLRConfig::UNSUPPORTED_TieredPGO_InstrumentedTierAlwaysOptimized) != 0)
                {
                    // Override that behavior and always use optimizations.
                    nextTier = NativeCodeVersion::OptimizationTier1Instrumented;
                }
#endif

                // NOTE: we might consider using OptimizationTier1Instrumented if the previous Tier0
                // made it to Tier1-OSR.
            }
        }
    }
#endif

    ILCodeVersion ilCodeVersion = currentNativeCodeVersion.GetILCodeVersion();
    _ASSERTE(!ilCodeVersion.HasAnyOptimizedNativeCodeVersion(currentNativeCodeVersion));
    hr = ilCodeVersion.AddNativeCodeVersion(pMethodDesc, nextTier, &t1NativeCodeVersion);
    if (FAILED(hr))
    {
        ThrowHR(hr);
    }

    // Insert the method into the optimization queue and trigger a thread to service
    // the queue if needed.
    SListElem<NativeCodeVersion>* pMethodListItem = new SListElem<NativeCodeVersion>(t1NativeCodeVersion);
    {
        LockHolder tieredCompilationLockHolder;

        m_methodsToOptimize.InsertTail(pMethodListItem);
        ++m_countOfMethodsToOptimize;

        LOG((LF_TIEREDCOMPILATION, LL_INFO10000, "TieredCompilationManager::AsyncPromoteToTier1 Method=0x%pM (%s::%s), code version id=0x%x queued\n",
            pMethodDesc, pMethodDesc->m_pszDebugClassName, pMethodDesc->m_pszDebugMethodName,
            t1NativeCodeVersion.GetVersionId()));

        // The thread is in a GC_NOTRIGGER scope here. If the background worker is already running, we can schedule it inside
        // the same lock without triggering a GC.
        if (TryScheduleBackgroundWorkerWithoutGCTrigger_Locked())
        {
            return;
        }
    }

    // This function is called from a GC_NOTRIGGER scope and creating the background worker (creating a thread) may GC_TRIGGERS.
    // The caller needs to create the background worker after leaving the GC_NOTRIGGER scope. The contract is that the caller
    // must make an attempt to create the background worker in any normal path. In the event of an atypical exception (eg. OOM),
    // the background worker may not be created and would have to be tried again the next time some background work is queued.
    *createTieringBackgroundWorkerRef = true;
}

bool TieredCompilationManager::TryScheduleBackgroundWorkerWithoutGCTrigger_Locked()
{
    CONTRACTL
    {
        NOTHROW;
        GC_NOTRIGGER;
        MODE_ANY;
    }
    CONTRACTL_END;

    _ASSERTE(IsLockOwnedByCurrentThread());

    if (s_isBackgroundWorkerProcessingWork)
    {
        _ASSERTE(s_isBackgroundWorkerRunning);
        return true;
    }

    if (s_isBackgroundWorkerRunning)
    {
        s_isBackgroundWorkerProcessingWork = true;
        s_backgroundWorkAvailableEvent.Set();
        return true;
    }

    s_isBackgroundWorkerRunning = true;
    s_isBackgroundWorkerProcessingWork = true;
    return false; // it's the caller's responsibility to call CreateBackgroundWorker() after leaving the GC_NOTRIGGER region
}

void TieredCompilationManager::CreateBackgroundWorker()
{
    CONTRACTL
    {
        THROWS;
        GC_TRIGGERS;
        MODE_PREEMPTIVE;
    }
    CONTRACTL_END;

    _ASSERTE(!IsLockOwnedByCurrentThread());
    _ASSERTE(s_isBackgroundWorkerRunning);
    _ASSERTE(s_isBackgroundWorkerProcessingWork);
    _ASSERTE(s_backgroundWorkerThread == nullptr);

    EX_TRY
    {
        if (!s_backgroundWorkAvailableEvent.IsValid())
        {
            // An auto-reset event is used since it's a bit easier to manage and felt more natural in this case. It is also
            // possible to use a manual-reset event instead, though there doesn't appear to be anything to gain from doing so.
            s_backgroundWorkAvailableEvent.CreateAutoEvent(false);
        }

        Thread *newThread = SetupUnstartedThread();
        _ASSERTE(newThread != nullptr);
        INDEBUG(s_backgroundWorkerThread = newThread);
    #ifdef FEATURE_COMINTEROP
        newThread->SetApartment(Thread::AS_InMTA);
    #endif
        newThread->SetBackground(true);

        if (!newThread->CreateNewThread(0, BackgroundWorkerBootstrapper0, newThread, W(".NET Tiered Compilation Worker")))
        {
            newThread->DecExternalCount(false);
            ThrowOutOfMemory();
        }

        newThread->StartThread();
    }
    EX_CATCH
    {
        {
            LockHolder tieredCompilationLockHolder;

            s_isBackgroundWorkerProcessingWork = false;
            s_isBackgroundWorkerRunning = false;
            INDEBUG(s_backgroundWorkerThread = nullptr);
        }

        EX_RETHROW;
    }
    EX_END_CATCH(RethrowTerminalExceptions);
}

DWORD WINAPI TieredCompilationManager::BackgroundWorkerBootstrapper0(LPVOID args)
{
    CONTRACTL
    {
        THROWS;
        GC_TRIGGERS;
        MODE_PREEMPTIVE;
    }
    CONTRACTL_END;

    _ASSERTE(args != nullptr);
    Thread *thread = (Thread *)args;
    _ASSERTE(s_backgroundWorkerThread == thread);

    if (!thread->HasStarted())
    {
        LockHolder tieredCompilationLockHolder;

        s_isBackgroundWorkerProcessingWork = false;
        s_isBackgroundWorkerRunning = false;
        INDEBUG(s_backgroundWorkerThread = nullptr);
        return 0;
    }

    _ASSERTE(GetThread() == thread);
    ManagedThreadBase::KickOff(BackgroundWorkerBootstrapper1, nullptr);

    GCX_PREEMP_NO_DTOR();

    DestroyThread(thread);
    return 0;
}

void TieredCompilationManager::BackgroundWorkerBootstrapper1(LPVOID)
{
    CONTRACTL
    {
        THROWS;
        GC_TRIGGERS;
        MODE_COOPERATIVE;
    }
    CONTRACTL_END;

    GCX_PREEMP();
    GetAppDomain()->GetTieredCompilationManager()->BackgroundWorkerStart();
}

void TieredCompilationManager::BackgroundWorkerStart()
{
    CONTRACTL
    {
        THROWS;
        GC_TRIGGERS;
        MODE_PREEMPTIVE;
    }
    CONTRACTL_END;

    _ASSERTE(s_backgroundWorkAvailableEvent.IsValid());

    DWORD timeoutMs = g_pConfig->TieredCompilation_BackgroundWorkerTimeoutMs();
    DWORD delayMs = g_pConfig->TieredCompilation_CallCountingDelayMs();

    int processorCount = GetCurrentProcessCpuCount();
    _ASSERTE(processorCount > 0);

    LARGE_INTEGER li;
    QueryPerformanceFrequency(&li);
    UINT64 ticksPerS = li.QuadPart;
    UINT64 maxWorkDurationTicks = ticksPerS * 50 / 1000; // 50 ms
    UINT64 minWorkDurationTicks = min(ticksPerS * processorCount / 1000, maxWorkDurationTicks); // <proc count> ms (capped)
    UINT64 workDurationTicks = minWorkDurationTicks;

    while (true)
    {
        _ASSERTE(s_isBackgroundWorkerRunning);
        _ASSERTE(s_isBackgroundWorkerProcessingWork);

        if (IsTieringDelayActive())
        {
            do
            {
                ClrSleepEx(delayMs, false);
            } while (!TryDeactivateTieringDelay());
        }

        // Don't want to perform background work as soon as it is scheduled if there is possibly more important work that could
        // be done. Some operating systems may also give a thread woken by a signal higher priority temporarily, which on a
        // CPU-limited environment may lead to rejitting a method as soon as it's promoted, effectively in the foreground.
        ClrSleepEx(0, false);

        if (IsTieringDelayActive())
        {
            continue;
        }

        if ((m_isPendingCallCountingCompletion || m_countOfMethodsToOptimize != 0) &&
            !DoBackgroundWork(&workDurationTicks, minWorkDurationTicks, maxWorkDurationTicks))
        {
            // Background work was interrupted due to the tiering delay being activated
            _ASSERTE(IsTieringDelayActive());
            continue;
        }

        {
            LockHolder tieredCompilationLockHolder;

            if (IsTieringDelayActive() || m_isPendingCallCountingCompletion || m_countOfMethodsToOptimize != 0)
            {
                continue;
            }

            s_isBackgroundWorkerProcessingWork = false;
        }

        // Wait for the worker to be scheduled again
        DWORD waitResult = s_backgroundWorkAvailableEvent.Wait(timeoutMs, false);
        if (waitResult == WAIT_OBJECT_0)
        {
            continue;
        }

        // The wait timed out, see if the worker can exit. When using the PAL, it may be possible to get WAIT_FAILED in some
        // shutdown scenarios, treat that as a timeout too since a signal would not have been observed anyway.

        LockHolder tieredCompilationLockHolder;

        if (s_isBackgroundWorkerProcessingWork)
        {
            // The background worker got scheduled again just as the wait timed out. The event would have been signaled just
            // after the wait had timed out, so reset it and continue processing work.
            s_backgroundWorkAvailableEvent.Reset();
            continue;
        }

        s_isBackgroundWorkerRunning = false;
        INDEBUG(s_backgroundWorkerThread = nullptr);
        return;
    }
}

bool TieredCompilationManager::IsTieringDelayActive()
{
    LIMITED_METHOD_CONTRACT;
    return m_methodsPendingCountingForTier1 != nullptr;
}

bool TieredCompilationManager::TryDeactivateTieringDelay()
{
    CONTRACTL
    {
        THROWS;
        GC_TRIGGERS;
        MODE_PREEMPTIVE;
    }
    CONTRACTL_END;

    _ASSERTE(GetThread() == s_backgroundWorkerThread);

    SArray<MethodDesc *> *methodsPendingCounting = nullptr;
    UINT32 countOfNewMethodsCalledDuringDelay = 0;
    {
        // It's possible for the timer to tick before it is recorded that the delay is in effect. This lock guarantees that
        // the delay is in effect.
        LockHolder tieredCompilationLockHolder;
        _ASSERTE(IsTieringDelayActive());

        if (m_tier1CallCountingCandidateMethodRecentlyRecorded)
        {
            m_tier1CallCountingCandidateMethodRecentlyRecorded = false;
            return false;
        }

        // Exchange information into locals inside the lock

        methodsPendingCounting = m_methodsPendingCountingForTier1;
        _ASSERTE(methodsPendingCounting != nullptr);
        m_methodsPendingCountingForTier1 = nullptr;

        countOfNewMethodsCalledDuringDelay = m_countOfNewMethodsCalledDuringDelay;
        m_countOfNewMethodsCalledDuringDelay = 0;

        _ASSERTE(!IsTieringDelayActive());
    }

    if (ETW::CompilationLog::TieredCompilation::Runtime::IsEnabled())
    {
        ETW::CompilationLog::TieredCompilation::Runtime::SendResume(countOfNewMethodsCalledDuringDelay);
    }

    // Install call counters
    {
        MethodDesc** methods = methodsPendingCounting->GetElements();
        COUNT_T methodCount = methodsPendingCounting->GetCount();
        CodeVersionManager *codeVersionManager = GetAppDomain()->GetCodeVersionManager();

        MethodDescBackpatchInfoTracker::ConditionalLockHolder slotBackpatchLockHolder;
        CodeVersionManager::LockHolder codeVersioningLockHolder;

        for (COUNT_T i = 0; i < methodCount; ++i)
        {
            MethodDesc *methodDesc = methods[i];
            _ASSERTE(codeVersionManager == methodDesc->GetCodeVersionManager());
            NativeCodeVersion activeCodeVersion =
                codeVersionManager->GetActiveILCodeVersion(methodDesc).GetActiveNativeCodeVersion(methodDesc);
            if (activeCodeVersion.IsNull())
            {
                continue;
            }

            PCODE codeEntryPoint = activeCodeVersion.GetNativeCode();
            if (codeEntryPoint == (PCODE)NULL)
            {
                // The active IL/native code version has changed since the method was queued, and the currently active version
                // doesn't have a code entry point yet
                continue;
            }

            EX_TRY
            {
                bool wasSet = CallCountingManager::SetCodeEntryPoint(activeCodeVersion, codeEntryPoint, false, nullptr);
                _ASSERTE(wasSet);
            }
            EX_CATCH
            {
                STRESS_LOG1(LF_TIEREDCOMPILATION, LL_WARNING, "TieredCompilationManager::DeactivateTieringDelay: "
                    "Exception in CallCountingManager::SetCodeEntryPoint, hr=0x%x\n",
                    GET_EXCEPTION()->GetHR());
            }
            EX_END_CATCH(RethrowTerminalExceptions);
        }
    }

    delete methodsPendingCounting;
    return true;
}

void TieredCompilationManager::AsyncCompleteCallCounting()
{
    CONTRACTL
    {
        THROWS;
        GC_TRIGGERS;
        MODE_PREEMPTIVE;
    }
    CONTRACTL_END;

    {
        LockHolder tieredCompilationLockHolder;

        if (m_recentlyRequestedCallCountingCompletion)
        {
            _ASSERTE(m_isPendingCallCountingCompletion);
        }
        else
        {
            m_isPendingCallCountingCompletion = true;

            // A potentially large number of methods may reach the call count threshold at about the same time or in bursts.
            // This field is used to coalesce a burst of pending completions, see the background work.
            m_recentlyRequestedCallCountingCompletion = true;
        }

        // The thread is in a GC_NOTRIGGER scope here. If the background worker is already running, we can schedule it inside
        // the same lock without triggering a GC.
        if (TryScheduleBackgroundWorkerWithoutGCTrigger_Locked())
        {
            return;
        }
    }

    CreateBackgroundWorker(); // requires GC_TRIGGERS
}

//This method will process one or more methods from optimization queue
// on a background thread. Each such method will be jitted with code
// optimizations enabled and then installed as the active implementation
// of the method entrypoint.
bool TieredCompilationManager::DoBackgroundWork(
    UINT64 *workDurationTicksRef,
    UINT64 minWorkDurationTicks,
    UINT64 maxWorkDurationTicks)
{
    WRAPPER_NO_CONTRACT;
    _ASSERTE(GetThread() == s_backgroundWorkerThread);
    _ASSERTE(m_isPendingCallCountingCompletion || m_countOfMethodsToOptimize != 0);
    _ASSERTE(workDurationTicksRef != nullptr);
    _ASSERTE(minWorkDurationTicks <= maxWorkDurationTicks);

    UINT64 workDurationTicks = *workDurationTicksRef;
    _ASSERTE(workDurationTicks >= minWorkDurationTicks);
    _ASSERTE(workDurationTicks <= maxWorkDurationTicks);

    if (ETW::CompilationLog::TieredCompilation::Runtime::IsEnabled())
    {
        UINT32 countOfMethodsToOptimize = m_countOfMethodsToOptimize;
        if (m_isPendingCallCountingCompletion)
        {
            countOfMethodsToOptimize += CallCountingManager::GetCountOfCodeVersionsPendingCompletion();
        }
        ETW::CompilationLog::TieredCompilation::Runtime::SendBackgroundJitStart(countOfMethodsToOptimize);
    }

    bool sendStopEvent = true;
    bool allMethodsJitted = false;
    UINT32 jittedMethodCount = 0;
    LARGE_INTEGER li;
    QueryPerformanceCounter(&li);
    UINT64 startTicks = li.QuadPart;
    UINT64 previousTicks = startTicks;

    do
    {
        bool completeCallCounting = false;
        NativeCodeVersion nativeCodeVersionToOptimize;
        {
            LockHolder tieredCompilationLockHolder;

            if (IsTieringDelayActive())
            {
                break;
            }

            bool wasPendingCallCountingCompletion = m_isPendingCallCountingCompletion;
            if (wasPendingCallCountingCompletion)
            {
                if (m_recentlyRequestedCallCountingCompletion)
                {
                    // A potentially large number of methods may reach the call count threshold at about the same time or in
                    // bursts. To coalesce a burst of pending completions a bit, if another method has reached the call count
                    // threshold since the last time it was checked here, don't complete call counting yet. Coalescing
                    // call counting completions a bit helps to avoid blocking foreground threads due to lock contention as
                    // methods are continuing to reach the call count threshold.
                    m_recentlyRequestedCallCountingCompletion = false;
                }
                else
                {
                    m_isPendingCallCountingCompletion = false;
                    completeCallCounting = true;
                }
            }

            if (!completeCallCounting)
            {
                nativeCodeVersionToOptimize = GetNextMethodToOptimize();
                if (nativeCodeVersionToOptimize.IsNull())
                {
                    // Ran out of methods to JIT
                    if (wasPendingCallCountingCompletion)
                    {
                        // If call counting completions are pending and delayed above for coalescing, complete call counting
                        // now, as that will add more methods to be rejitted
                        m_isPendingCallCountingCompletion = false;
                        _ASSERTE(!m_recentlyRequestedCallCountingCompletion);
                        completeCallCounting = true;
                    }
                    else
                    {
                        allMethodsJitted = true;
                        break;
                    }
                }
            }
        }

        _ASSERTE(completeCallCounting == !!nativeCodeVersionToOptimize.IsNull());
        if (completeCallCounting)
        {
            EX_TRY
            {
                CallCountingManager::CompleteCallCounting();
            }
            EX_CATCH
            {
                STRESS_LOG1(LF_TIEREDCOMPILATION, LL_WARNING, "TieredCompilationManager::DoBackgroundWork: "
                    "Exception in CallCountingManager::CompleteCallCounting, hr=0x%x\n",
                    GET_EXCEPTION()->GetHR());
            }
            EX_END_CATCH(RethrowTerminalExceptions);

            continue;
        }

        OptimizeMethod(nativeCodeVersionToOptimize);
        ++jittedMethodCount;

        // Yield the thread periodically to give preference to possibly more important work

        QueryPerformanceCounter(&li);
        UINT64 currentTicks = li.QuadPart;
        if (currentTicks - startTicks < workDurationTicks)
        {
            previousTicks = currentTicks;
            continue;
        }
        if (currentTicks - previousTicks >= maxWorkDurationTicks)
        {
            // It's unlikely that one iteration above would have taken that long, more likely this thread got scheduled out for
            // a while, in which case there is no need to yield again. Discount the time taken for the previous iteration and
            // continue processing work.
            startTicks += currentTicks - previousTicks;
            previousTicks = currentTicks;
            continue;
        }

        if (ETW::CompilationLog::TieredCompilation::Runtime::IsEnabled())
        {
            UINT32 countOfMethodsToOptimize = m_countOfMethodsToOptimize;
            if (m_isPendingCallCountingCompletion)
            {
                countOfMethodsToOptimize += CallCountingManager::GetCountOfCodeVersionsPendingCompletion();
            }
            ETW::CompilationLog::TieredCompilation::Runtime::SendBackgroundJitStop(countOfMethodsToOptimize, jittedMethodCount);
        }

        UINT64 beforeSleepTicks = currentTicks;
        ClrSleepEx(0, false);

        QueryPerformanceCounter(&li);
        currentTicks = li.QuadPart;

        // Depending on how oversubscribed thread usage is on the system, the sleep may have caused this thread to not be
        // scheduled for a long time. Yielding the thread too frequently may significantly slow down the background work, which
        // may significantly delay how long it takes to reach steady-state performance. On the other hand, yielding the thread
        // too infrequently may cause the background work to monopolize the available CPU resources and prevent more important
        // foreground work from occurring. So the sleep duration is measured and for the next batch of background work, at least
        // a portion of that measured duration is used (within the min and max to keep things sensible). Since the background
        // work duration is capped to a maximum and since a long sleep delay is likely to repeat, to avoid going back to
        // too-frequent yielding too quickly, the background work duration is decayed back to the minimum if the sleep duration
        // becomes consistently short.
        UINT64 newWorkDurationTicks = (currentTicks - beforeSleepTicks) / 4;
        UINT64 decayedWorkDurationTicks = (workDurationTicks + workDurationTicks / 2) / 2;
        workDurationTicks = newWorkDurationTicks < decayedWorkDurationTicks ? decayedWorkDurationTicks : newWorkDurationTicks;
        if (workDurationTicks < minWorkDurationTicks)
        {
            workDurationTicks = minWorkDurationTicks;
        }
        else if (workDurationTicks > maxWorkDurationTicks)
        {
            workDurationTicks = maxWorkDurationTicks;
        }

        if (IsTieringDelayActive())
        {
            sendStopEvent = false;
            break;
        }

        if (ETW::CompilationLog::TieredCompilation::Runtime::IsEnabled())
        {
            UINT32 countOfMethodsToOptimize = m_countOfMethodsToOptimize;
            if (m_isPendingCallCountingCompletion)
            {
                countOfMethodsToOptimize += CallCountingManager::GetCountOfCodeVersionsPendingCompletion();
            }
            ETW::CompilationLog::TieredCompilation::Runtime::SendBackgroundJitStart(countOfMethodsToOptimize);
        }

        jittedMethodCount = 0;
        startTicks = previousTicks = currentTicks;
    } while (!IsTieringDelayActive());

    if (ETW::CompilationLog::TieredCompilation::Runtime::IsEnabled() && sendStopEvent)
    {
        UINT32 countOfMethodsToOptimize = m_countOfMethodsToOptimize;
        if (m_isPendingCallCountingCompletion)
        {
            countOfMethodsToOptimize += CallCountingManager::GetCountOfCodeVersionsPendingCompletion();
        }
        ETW::CompilationLog::TieredCompilation::Runtime::SendBackgroundJitStop(countOfMethodsToOptimize, jittedMethodCount);
    }

    if (allMethodsJitted)
    {
        EX_TRY
        {
            CallCountingManager::StopAndDeleteAllCallCountingStubs();
        }
        EX_CATCH
        {
            STRESS_LOG1(LF_TIEREDCOMPILATION, LL_WARNING, "TieredCompilationManager::DoBackgroundWork: "
                "Exception in CallCountingManager::StopAndDeleteAllCallCountingStubs, hr=0x%x\n",
                GET_EXCEPTION()->GetHR());
        }
        EX_END_CATCH(RethrowTerminalExceptions);
    }

    *workDurationTicksRef = workDurationTicks;
    return allMethodsJitted;
}

// Jit compiles and installs new optimized code for a method.
// Called on a background thread.
void TieredCompilationManager::OptimizeMethod(NativeCodeVersion nativeCodeVersion)
{
    STANDARD_VM_CONTRACT;

    _ASSERTE(nativeCodeVersion.GetMethodDesc()->IsEligibleForTieredCompilation());
    if (CompileCodeVersion(nativeCodeVersion))
    {
        ActivateCodeVersion(nativeCodeVersion);
    }
}

// Compiles new optimized code for a method.
// Called on a background thread.
BOOL TieredCompilationManager::CompileCodeVersion(NativeCodeVersion nativeCodeVersion)
{
    STANDARD_VM_CONTRACT;

    PCODE pCode = (PCODE)NULL;
    MethodDesc* pMethod = nativeCodeVersion.GetMethodDesc();
    EX_TRY
    {
        PrepareCodeConfigBuffer configBuffer(nativeCodeVersion);
        PrepareCodeConfig *config = configBuffer.GetConfig();

        // This is a recompiling request which means the caller was
        // in COOP mode since the code already ran.
        _ASSERTE(!pMethod->HasUnmanagedCallersOnlyAttribute());
        config->SetCallerGCMode(CallerGCMode::Coop);
        pCode = pMethod->PrepareCode(config);
        LOG((LF_TIEREDCOMPILATION, LL_INFO10000, "TieredCompilationManager::CompileCodeVersion Method=0x%pM (%s::%s), code version id=0x%x, code ptr=0x%p\n",
            pMethod, pMethod->m_pszDebugClassName, pMethod->m_pszDebugMethodName,
            nativeCodeVersion.GetVersionId(),
            pCode));

        if (config->JitSwitchedToMinOpt())
        {
            // The JIT decided to switch to min-opts, likely due to the method being very large or complex. The rejitted code
            // may be slower if the method had been prejitted. Ignore the rejitted code and continue using the tier 0 entry
            // point.
            // TODO: In the future, we should get some feedback from images containing pregenerated code and from tier 0 JIT
            // indicating that the method would not benefit from a rejit and avoid the rejit altogether.
            pCode = (PCODE)NULL;
        }
    }
    EX_CATCH
    {
        // Failing to jit should be rare but acceptable. We will leave whatever code already exists in place.
        STRESS_LOG2(LF_TIEREDCOMPILATION, LL_INFO10, "TieredCompilationManager::CompileCodeVersion: Method %pM failed to jit, hr=0x%x\n",
            pMethod, GET_EXCEPTION()->GetHR());
    }
    EX_END_CATCH(RethrowTerminalExceptions)

    return pCode != (PCODE)NULL;
}

// Updates the MethodDesc and precode so that future invocations of a method will
// execute the native code pointed to by pCode.
// Called on a background thread.
void TieredCompilationManager::ActivateCodeVersion(NativeCodeVersion nativeCodeVersion)
{
    STANDARD_VM_CONTRACT;

    MethodDesc* pMethod = nativeCodeVersion.GetMethodDesc();

    // If the ilParent version is active this will activate the native code version now.
    // Otherwise if the ilParent version becomes active again in the future the native
    // code version will activate then.
    ILCodeVersion ilParent;
    HRESULT hr = S_OK;
    {
        bool mayHaveEntryPointSlotsToBackpatch = pMethod->MayHaveEntryPointSlotsToBackpatch();
        MethodDescBackpatchInfoTracker::ConditionalLockHolder slotBackpatchLockHolder(mayHaveEntryPointSlotsToBackpatch);
        CodeVersionManager::LockHolder codeVersioningLockHolder;

        // As long as we are exclusively using any non-JumpStamp publishing for tiered compilation
        // methods this first attempt should succeed
        ilParent = nativeCodeVersion.GetILCodeVersion();
        hr = ilParent.SetActiveNativeCodeVersion(nativeCodeVersion);
        LOG((LF_TIEREDCOMPILATION, LL_INFO10000, "TieredCompilationManager::ActivateCodeVersion Method=0x%pM (%s::%s), code version id=0x%x. SetActiveNativeCodeVersion ret=0x%x\n",
            pMethod, pMethod->m_pszDebugClassName, pMethod->m_pszDebugMethodName,
            nativeCodeVersion.GetVersionId(),
            hr));
    }
    if (FAILED(hr))
    {
        STRESS_LOG2(LF_TIEREDCOMPILATION, LL_INFO10, "TieredCompilationManager::ActivateCodeVersion: "
            "Method %pM failed to publish native code for native code version %d\n",
            pMethod, nativeCodeVersion.GetVersionId());
    }
}

// Dequeues the next method in the optimization queue.
// This runs on the background thread.
NativeCodeVersion TieredCompilationManager::GetNextMethodToOptimize()
{
    CONTRACTL
    {
        NOTHROW;
        GC_NOTRIGGER;
        MODE_ANY;
    }
    CONTRACTL_END;

    _ASSERTE(IsLockOwnedByCurrentThread());

    SListElem<NativeCodeVersion>* pElem = m_methodsToOptimize.RemoveHead();
    if (pElem != NULL)
    {
        NativeCodeVersion nativeCodeVersion = pElem->GetValue();
        delete pElem;
        _ASSERTE(m_countOfMethodsToOptimize != 0);
        --m_countOfMethodsToOptimize;
        return nativeCodeVersion;
    }
    return NativeCodeVersion();
}

//static
CORJIT_FLAGS TieredCompilationManager::GetJitFlags(PrepareCodeConfig *config)
{
    WRAPPER_NO_CONTRACT;
    _ASSERTE(config != nullptr);
    _ASSERTE(
        !config->WasTieringDisabledBeforeJitting() ||
        config->GetCodeVersion().IsFinalTier());

    CORJIT_FLAGS flags;

    // Determine the optimization tier for the default code version (slightly faster common path during startup compared to
    // below), and disable call counting and set the optimization tier if it's not going to be tier 0 (this is used in other
    // places for the default code version where necessary to avoid the extra expense of GetOptimizationTier()).
    NativeCodeVersion nativeCodeVersion = config->GetCodeVersion();
    if (nativeCodeVersion.IsDefaultVersion() && !config->WasTieringDisabledBeforeJitting())
    {
        MethodDesc *methodDesc = nativeCodeVersion.GetMethodDesc();
        if (!methodDesc->IsEligibleForTieredCompilation())
        {
            _ASSERTE(nativeCodeVersion.GetOptimizationTier() == NativeCodeVersion::OptimizationTierOptimized);
        #ifdef FEATURE_INTERPRETER
            flags.Set(CORJIT_FLAGS::CORJIT_FLAG_MAKEFINALCODE);
        #endif
            return flags;
        }

        _ASSERT(!methodDesc->RequestedAggressiveOptimization());

        if (g_pConfig->TieredCompilation_QuickJit())
        {
            NativeCodeVersion::OptimizationTier currentTier = nativeCodeVersion.GetOptimizationTier();
            if (currentTier == NativeCodeVersion::OptimizationTier::OptimizationTier0Instrumented)
            {
                flags.Set(CORJIT_FLAGS::CORJIT_FLAG_BBINSTR);
                flags.Set(CORJIT_FLAGS::CORJIT_FLAG_TIER0);
                return flags;
            }

            if (currentTier == NativeCodeVersion::OptimizationTier::OptimizationTier1Instrumented)
            {
                flags.Set(CORJIT_FLAGS::CORJIT_FLAG_BBINSTR);
                flags.Set(CORJIT_FLAGS::CORJIT_FLAG_TIER1);
                return flags;
            }

            _ASSERTE(!nativeCodeVersion.IsFinalTier());
            flags.Set(CORJIT_FLAGS::CORJIT_FLAG_TIER0);
            if (g_pConfig->TieredPGO() && g_pConfig->TieredPGO_InstrumentOnlyHotCode())
            {
                // If we plan to only instrument hot code we have to make an exception
                // for cold methods with loops so if those self promote to OSR they need
                // some profile to optimize, so here we allow JIT to enable instrumentation
                // if current method has loops and is eligible for OSR.
                flags.Set(CORJIT_FLAGS::CORJIT_FLAG_BBINSTR_IF_LOOPS);
            }
            return flags;
        }

        methodDesc->GetLoaderAllocator()->GetCallCountingManager()->DisableCallCounting(nativeCodeVersion);
        nativeCodeVersion.SetOptimizationTier(NativeCodeVersion::OptimizationTierOptimized);
    #ifdef FEATURE_INTERPRETER
        flags.Set(CORJIT_FLAGS::CORJIT_FLAG_MAKEFINALCODE);
    #endif
        return flags;
    }

    switch (nativeCodeVersion.GetOptimizationTier())
    {
        case NativeCodeVersion::OptimizationTier0Instrumented:
            _ASSERT(g_pConfig->TieredCompilation_QuickJit());
            flags.Set(CORJIT_FLAGS::CORJIT_FLAG_BBINSTR);
            flags.Set(CORJIT_FLAGS::CORJIT_FLAG_TIER0);
            break;

        case NativeCodeVersion::OptimizationTier1Instrumented:
            _ASSERT(g_pConfig->TieredCompilation_QuickJit());
            flags.Set(CORJIT_FLAGS::CORJIT_FLAG_BBINSTR);
            flags.Set(CORJIT_FLAGS::CORJIT_FLAG_TIER1);
            break;

        case NativeCodeVersion::OptimizationTier0:
            if (g_pConfig->TieredCompilation_QuickJit())
            {
                if (g_pConfig->TieredPGO() && g_pConfig->TieredPGO_InstrumentOnlyHotCode())
                {
                    // If we plan to only instrument hot code we have to make an exception
                    // for cold methods with loops so if those self promote to OSR they need
                    // some profile to optimize, so here we allow JIT to enable instrumentation
                    // if current method has loops and is eligible for OSR.
                    flags.Set(CORJIT_FLAGS::CORJIT_FLAG_BBINSTR_IF_LOOPS);
                }
                flags.Set(CORJIT_FLAGS::CORJIT_FLAG_TIER0);
                break;
            }
            nativeCodeVersion.SetOptimizationTier(NativeCodeVersion::OptimizationTierOptimized);
            goto Optimized;

#ifdef FEATURE_ON_STACK_REPLACEMENT
        case NativeCodeVersion::OptimizationTier1OSR:
            flags.Set(CORJIT_FLAGS::CORJIT_FLAG_OSR);
            FALLTHROUGH;
#endif

        case NativeCodeVersion::OptimizationTier1:
            flags.Set(CORJIT_FLAGS::CORJIT_FLAG_TIER1);
            FALLTHROUGH;

        case NativeCodeVersion::OptimizationTierOptimized:
        Optimized:
#ifdef FEATURE_INTERPRETER
            flags.Set(CORJIT_FLAGS::CORJIT_FLAG_MAKEFINALCODE);
#endif
            break;

        default:
            UNREACHABLE();
    }
    return flags;
}

#ifdef _DEBUG
bool TieredCompilationManager::IsLockOwnedByCurrentThread()
{
    WRAPPER_NO_CONTRACT;
    return !!s_lock.OwnedByCurrentThread();
}
#endif // _DEBUG

#endif // FEATURE_TIERED_COMPILATION && !DACCESS_COMPILE