1
0
Fork 0
mirror of https://github.com/VSadov/Satori.git synced 2025-06-08 03:27:04 +09:00

Concurrent Mark completion and Card Cleaning improvements (#38)

* simpler ShouldDoConcurrent
* m_ccHelpersNum
* more pacing tweaks
* comments and todos
This commit is contained in:
Vladimir Sadov 2025-03-16 18:21:51 -07:00 committed by vsadov
parent 893141c8bf
commit eb380d22d5
3 changed files with 189 additions and 102 deletions

View file

@ -442,8 +442,6 @@ static const int concMarkThreshold = 0;
static const int concMarkThreshold = 8 * 1024 * 1024;
#endif
static const int bgPauseThreshold = 1000; // microseconds
bool SatoriRecycler::ShouldDoConcurrent(int generation)
{
if (IsLowLatencyMode())
@ -451,22 +449,9 @@ bool SatoriRecycler::ShouldDoConcurrent(int generation)
return true;
}
size_t ethOccupancy = this->m_occupancy[1] + this->m_occupancy[0];
if (generation == 2)
if (this->GetTotalOccupancy() < concMarkThreshold)
{
if ((this->m_occupancy[2] + ethOccupancy < concMarkThreshold) &&
m_lastTenuredGcInfo.m_pauseDurations[0] < bgPauseThreshold)
{
return false;
}
}
else
{
if ((ethOccupancy < concMarkThreshold) &&
m_lastEphemeralGcInfo.m_pauseDurations[0] < bgPauseThreshold)
{
return false;
}
return false;
}
return true;
@ -550,6 +535,48 @@ bool SatoriRecycler::HelpOnceCore(bool minQuantum)
return true;
}
if (m_concurrentCleaningState == CC_CLEAN_STATE_WAIT_FOR_HELPERS)
{
// this is not a timeout, but there could be more work.
return true;
}
bool result;
int concurrentCleaningState;
// NB: m_ccHelpersNum is separate from m_activeWorkers
// because app threads may also be helping
Interlocked::Increment(&m_ccHelpersNum);
{
concurrentCleaningState = m_concurrentCleaningState;
if (concurrentCleaningState == CC_CLEAN_STATE_WAIT_FOR_HELPERS)
{
// this is not a timeout, but there could be more work.
result = true;
}
else
{
result = HelpOnceCoreInner(minQuantum);
}
}
int ccHelpersNum = Interlocked::Decrement(&m_ccHelpersNum);
// If we see no work, no workers, and the state is CC_CLEAN_STATE_DONE,
// we will take it as a hint to start blocking stage.
if (result == false && ccHelpersNum == 0 && concurrentCleaningState == CC_CLEAN_STATE_DONE)
{
if (Interlocked::CompareExchange(&m_gcState, GC_STATE_BLOCKING, GC_STATE_CONCURRENT) == GC_STATE_CONCURRENT)
{
BlockingCollect();
}
}
return result;
}
bool SatoriRecycler::HelpOnceCoreInner(bool minQuantum)
{
if (m_ccStackMarkState == CC_MARK_STATE_MARKING)
{
_ASSERTE(m_isBarrierConcurrent);
@ -596,30 +623,26 @@ bool SatoriRecycler::HelpOnceCore(bool minQuantum)
if (!m_concurrentHandlesDone)
{
bool moreWork = MarkHandles(deadline);
if (!moreWork)
if (MarkHandles(deadline))
{
return true;
}
else
{
m_concurrentHandlesDone = true;
}
return true;
}
if (!m_concurrentCardsDone)
{
// doing cards could be chunky (region granularity).
// prefer that it is done by workers
if (IsWorkerThread() || m_activeWorkers == 0)
_ASSERTE(m_condemnedGeneration != 2);
if (MarkThroughCardsConcurrent(deadline))
{
_ASSERTE(m_condemnedGeneration != 2);
if (MarkThroughCardsConcurrent(deadline))
{
return true;
}
else
{
m_concurrentCardsDone = true;
}
return true;
}
else
{
m_concurrentCardsDone = true;
}
}
@ -639,18 +662,13 @@ bool SatoriRecycler::HelpOnceCore(bool minQuantum)
if (m_concurrentCleaningState == CC_CLEAN_STATE_CLEANING)
{
// doing cards could be chunky (region granularity).
// prefer that it is done by workers
if (IsWorkerThread() || m_activeWorkers == 0)
if (CleanCardsConcurrent(deadline))
{
if (CleanCardsConcurrent(deadline))
{
return true;
}
else
{
m_concurrentCleaningState = CC_CLEAN_STATE_DONE;
}
return true;
}
else
{
m_concurrentCleaningState = CC_CLEAN_STATE_DONE;
}
}
@ -785,24 +803,39 @@ void SatoriRecycler::HelpOnce()
if (m_gcState == GC_STATE_CONCURRENT)
{
tryAgain:
bool moreWork = m_activeWorkers > 0 ||
!m_concurrentCardsDone ||
bool moreWork = !m_concurrentCardsDone ||
m_ccStackMarkState != CC_MARK_STATE_DONE ||
m_concurrentCleaningState == CC_CLEAN_STATE_CLEANING ||
m_condemnedGeneration == 0 ||
!m_workList->IsEmpty();
int64_t start = GCToOSInterface::QueryPerformanceCounter();
if (moreWork)
// assume that helpers may create more work
if ((moreWork || m_ccHelpersNum > 0) &&
m_concurrentCleaningState != CC_CLEAN_STATE_WAIT_FOR_HELPERS)
{
HelpOnceCore(/*minQuantum*/ false);
m_noWorkSince = GCToOSInterface::QueryPerformanceCounter();
// if we did not use all the quantum in Low Latency mode,
// consume what roughly remains for pacing reasons.
if (IsLowLatencyMode() &&
m_concurrentCleaningState >= CC_CLEAN_STATE_CLEANING)
bool moreWork = HelpOnceCore(/*minQuantum*/ false);
if (moreWork)
{
m_noWorkSince = GCToOSInterface::QueryPerformanceCounter();
}
else
{
// If got here just because there are helpers and see no work for very long time,
// start moving towards cleaning/blocking, so that frequent, but useless helps do not starve gc forever.
// This should be rare. basically a panic mode.
if (!moreWork && start - m_noWorkSince > m_perfCounterTicksPerMilli)
{
goto treatAsNoWork;
}
}
// Check if we need to pace. Even if we see more work, it could be not a timeout.
// Only makes sense if we can use extra workers.
if (IsLowLatencyMode() && MaxWorkers() > 0)
{
// if we did not use all the quantum in Low Latency mode,
// consume what roughly remains for pacing reasons.
int64_t deadline = start + HelpQuantum() / 2;
int iters = 1;
while (GCToOSInterface::QueryPerformanceCounter() < deadline &&
@ -818,22 +851,45 @@ tryAgain:
}
else
{
if (m_concurrentCleaningState != CC_CLEAN_STATE_DONE)
treatAsNoWork:
// if did not try to clean yet tell new helpers to not enter
if (m_concurrentCleaningState == CC_CLEAN_STATE_NOT_READY)
{
if (m_concurrentCleaningState == CC_CLEAN_STATE_NOT_READY &&
Interlocked::CompareExchange(&m_concurrentCleaningState, CC_CLEAN_STATE_SETTING_UP, CC_CLEAN_STATE_NOT_READY) == CC_CLEAN_STATE_NOT_READY)
Interlocked::CompareExchange(&m_concurrentCleaningState, CC_CLEAN_STATE_WAIT_FOR_HELPERS, CC_CLEAN_STATE_NOT_READY);
}
// if helpers are all out, try to declare cleaning
if (m_concurrentCleaningState == CC_CLEAN_STATE_WAIT_FOR_HELPERS)
{
if (m_ccHelpersNum != 0)
{
// helpers are still present, exit until next time.
GCToEEInterface::GcPoll();
return;
}
if (Interlocked::CompareExchange(&m_concurrentCleaningState, CC_CLEAN_STATE_SETTING_UP, CC_CLEAN_STATE_WAIT_FOR_HELPERS) == CC_CLEAN_STATE_WAIT_FOR_HELPERS)
{
IncrementCardScanTicket();
m_concurrentCleaningState = CC_CLEAN_STATE_CLEANING;
goto tryAgain;
}
}
else if (start - m_noWorkSince > HelpQuantum() * 4)
// we either completely done or cleaning or, in rare case, setting up for cleaning.
// if not done, run another cycle.
if (m_concurrentCleaningState != CC_CLEAN_STATE_DONE)
{
goto tryAgain;
}
// was it long enough since last time we saw work?
if (start - m_noWorkSince > HelpQuantum() * 4)
{
// 4 help quantums without work, seems like we are done
// we may have some helpers draining long chains and not sharing anything
// in such degenerate case we still may want to wrap it up and and block.
if (Interlocked::CompareExchange(&m_gcState, GC_STATE_BLOCKING, GC_STATE_CONCURRENT) == GC_STATE_CONCURRENT)
{
m_activeWorkerFn = nullptr;
BlockingCollect();
}
}
@ -853,7 +909,24 @@ tryAgain:
void SatoriRecycler::ConcurrentWorkerFn()
{
// workers have deadline too, just to come here and check the stage.
while ((m_gcState == GC_STATE_CONCURRENT) && HelpOnceCore(/*minQuantum*/ false));
while (m_gcState == GC_STATE_CONCURRENT)
{
if (HelpOnceCore(/*minQuantum*/ false))
{
m_noWorkSince = GCToOSInterface::QueryPerformanceCounter();
}
else
{
// we saw no work available and will exit.
// if more work is generated we will be invited back.
break;
}
if (m_concurrentCleaningState == CC_CLEAN_STATE_WAIT_FOR_HELPERS)
{
GCToOSInterface::Sleep(0);
}
}
}
int SatoriRecycler::MaxWorkers()
@ -1138,15 +1211,21 @@ void SatoriRecycler::BlockingCollectImpl()
size_t time = GCToOSInterface::QueryPerformanceCounter();
#endif
// we should not normally have active workers here.
// just in case we support forcing blocking stage for Collect or OOM situations
m_activeWorkerFn = nullptr;
if (IsWorkerThread())
{
// do not consider ourselves a worker to not wait forever when we need workers to leave.
Interlocked::Decrement(&m_activeWorkers);
}
else
{
// make sure everyone sees the new Fn before waiting for workers to drain.
MemoryBarrier();
}
while (m_activeWorkers > 0)
{
// since we are waiting for concurrent workers to stop, we could as well try helping
if (!HelpOnceCore(/*minQuantum*/ true))
{
YieldProcessor();
}
YieldProcessor();
}
m_gcState = GC_STATE_BLOCKED;
@ -1200,6 +1279,12 @@ void SatoriRecycler::BlockingCollectImpl()
Relocate();
Update();
_ASSERTE(m_activeWorkers == 0);
// we are done using workers.
// undo the adjustment if we had to do one
if (IsWorkerThread()) m_activeWorkers++;
m_gcCount[0]++;
m_gcCount[1]++;
if (m_condemnedGeneration == 2)
@ -2562,7 +2647,6 @@ bool SatoriRecycler::CleanCardsConcurrent(int64_t deadline)
_ASSERTE(!region->HasMarksSet());
int8_t resetValue = Satori::CardState::REMEMBERED;
size_t unsetValue = Satori::CardState::BLANK;
if (region->Generation() < 2)
{
// We will not wipe remembered+ephemeral here.
@ -2582,8 +2666,6 @@ bool SatoriRecycler::CleanCardsConcurrent(int64_t deadline)
}
resetValue = Satori::CardState::EPHEMERAL;
_ASSERTE(Satori::CardState::EPHEMERAL == (int8_t)0x80);
unsetValue = 0x8080808080808080;
}
// invariant check: when marking through cards the gen2 remset stays remset, thus should be marked through on every GC
@ -2592,10 +2674,13 @@ bool SatoriRecycler::CleanCardsConcurrent(int64_t deadline)
bool considerAllMarked = region->Generation() > m_condemnedGeneration;
int8_t* cards = page->CardsForGroup(i);
_ASSERTE(Satori::CardState::DIRTY == (int8_t)0x04);
const size_t dirtyBits = 0x0404040404040404;
for (size_t j = 0; j < Satori::CARD_BYTES_IN_CARD_GROUP; j++)
{
// cards are often sparsely set, if j is aligned, check the entire size_t for unset value
if (((j & (sizeof(size_t) - 1)) == 0) && *((size_t*)&cards[j]) == unsetValue)
// cards are often sparsely set, if j is aligned, check if any card in entire size_t is dirty
if (((j & (sizeof(size_t) - 1)) == 0) && (*((size_t*)&cards[j]) & dirtyBits) == 0)
{
j += sizeof(size_t) - 1;
continue;
@ -2900,19 +2985,17 @@ void SatoriRecycler::CleanCards()
continue;
}
_ASSERTE(Satori::CardState::EPHEMERAL == (int8_t)0x80);
const size_t unsetValue = region->Generation() >= 2 ?
Satori::CardState::BLANK :
0x8080808080808080;
bool considerAllMarked = region->Generation() > m_condemnedGeneration;
int8_t* cards = page->CardsForGroup(i);
const int8_t resetValue = region->Generation() >= 2 ? Satori::CardState::REMEMBERED : Satori::CardState::EPHEMERAL;
_ASSERTE(Satori::CardState::DIRTY == (int8_t)0x04);
const size_t dirtyBits = 0x0404040404040404;
for (size_t j = 0; j < Satori::CARD_BYTES_IN_CARD_GROUP; j++)
{
// cards are often sparsely set, if j is aligned, check the entire size_t for unset value
if (((j & (sizeof(size_t) - 1)) == 0) && *((size_t*)&cards[j]) == unsetValue)
// cards are often sparsely set, if j is aligned, check if any card in entire size_t is dirty
if (((j & (sizeof(size_t) - 1)) == 0) && (*((size_t*)&cards[j]) & dirtyBits) == 0)
{
j += sizeof(size_t) - 1;
continue;
@ -4257,28 +4340,27 @@ void SatoriRecycler::DrainDeferredSweepQueue()
bool SatoriRecycler::DrainDeferredSweepQueueConcurrent(int64_t deadline)
{
bool isWorkerGCThread = IsWorkerThread();
if (!isWorkerGCThread && m_activeWorkers > 0)
{
// Sweeping and returning could be chunky or contentious.
// If there are workers, just say more work is needed.
return true;
}
SatoriRegion* curRegion = m_deferredSweepRegions->TryPop();
if (curRegion)
// Sweeping and returning could be contentious.
// If there are workers, do not participate.
if (isWorkerGCThread || m_activeWorkers <= 0)
{
MaybeAskForHelp();
do
SatoriRegion* curRegion = m_deferredSweepRegions->TryPop();
if (curRegion)
{
SweepAndReturnRegion(curRegion);
Interlocked::Decrement(&m_deferredSweepCount);
// ignore deadline on worker threads, we can't do anything else anyways.
if (!isWorkerGCThread && deadline && (GCToOSInterface::QueryPerformanceCounter() - deadline > 0))
MaybeAskForHelp();
do
{
break;
}
} while ((curRegion = m_deferredSweepRegions->TryPop()));
SweepAndReturnRegion(curRegion);
Interlocked::Decrement(&m_deferredSweepCount);
// ignore deadline on worker threads, we can't do anything else anyways.
if (!isWorkerGCThread && deadline && (GCToOSInterface::QueryPerformanceCounter() - deadline > 0))
{
break;
}
} while ((curRegion = m_deferredSweepRegions->TryPop()));
}
}
// no work that we can claim, but we must wait for sweeping to finish and
@ -4319,7 +4401,7 @@ void SatoriRecycler::DrainDeferredSweepQueueWorkerFn()
{
SweepAndReturnRegion(curRegion);
Interlocked::Decrement(&m_deferredSweepCount);
} while ((curRegion = m_deferredSweepRegions->TryPop()));
} while (m_activeWorkerFn && (curRegion = m_deferredSweepRegions->TryPop()));
}
}

View file

@ -172,13 +172,16 @@ private:
static const int CC_MARK_STATE_DONE = 3;
static const int CC_CLEAN_STATE_NOT_READY = 0;
static const int CC_CLEAN_STATE_SETTING_UP = 1;
static const int CC_CLEAN_STATE_CLEANING = 2;
static const int CC_CLEAN_STATE_DONE = 3;
static const int CC_CLEAN_STATE_WAIT_FOR_HELPERS = 1;
static const int CC_CLEAN_STATE_SETTING_UP = 2;
static const int CC_CLEAN_STATE_CLEANING = 3;
static const int CC_CLEAN_STATE_DONE = 4;
volatile int m_ccStackMarkState;
volatile int m_ccStackMarkingThreadsNum;
volatile int m_ccHelpersNum;
int m_syncBlockCacheScanDone;
int m_condemnedGeneration;
@ -261,6 +264,7 @@ private:
void AskForHelp();
void RunWithHelp(void(SatoriRecycler::* method)());
bool HelpOnceCore(bool minQuantum);
bool HelpOnceCoreInner(bool minQuantum);
void PushToEphemeralQueues(SatoriRegion* region);
void PushToTenuredQueues(SatoriRegion* region);

View file

@ -82,6 +82,7 @@ namespace Satori
// address bits set to track finalizable that needs to be scheduled to F-queue
const static size_t FINALIZATION_PENDING = 1;
// one card covers 512 bytes (64 possible pointers)
static const int BYTES_PER_CARD_BYTE = 512;
static const int BYTES_PER_CARD_GROUP = REGION_SIZE_GRANULARITY / 2;
static const int CARD_BYTES_IN_CARD_GROUP = Satori::BYTES_PER_CARD_GROUP / BYTES_PER_CARD_BYTE;