diff --git a/src/coreclr/src/gc/satori/SatoriAllocator.cpp b/src/coreclr/src/gc/satori/SatoriAllocator.cpp index af6a3349476..a1265e9944b 100644 --- a/src/coreclr/src/gc/satori/SatoriAllocator.cpp +++ b/src/coreclr/src/gc/satori/SatoriAllocator.cpp @@ -37,7 +37,7 @@ tryAgain: SatoriRegion* putBack = nullptr; int bucket = SizeToBucket(regionSize); - SatoriRegion* region = m_queues[bucket]->TryRemove(regionSize, putBack); + SatoriRegion* region = m_queues[bucket]->TryRemoveWithSize(regionSize, putBack); if (region) { if (putBack) @@ -50,7 +50,7 @@ tryAgain: while (++bucket < Satori::BUCKET_COUNT) { - region = m_queues[bucket]->TryPop(regionSize, putBack); + region = m_queues[bucket]->TryPopWithSize(regionSize, putBack); if (region) { if (putBack) @@ -184,7 +184,7 @@ SatoriObject* SatoriAllocator::AllocRegular(SatoriAllocationContext* context, si // also need to release this _after_ using it since work is done here already // may also try smoothing, although unlikely. // All this can be tuned once full GC works. - size_t desiredFreeSpace = max(size, Satori::REGION_SIZE_GRANULARITY * 1 / 10); + size_t desiredFreeSpace = max(size, Satori::REGION_SIZE_GRANULARITY * 9 / 10); if (region->ThreadLocalCompact(desiredFreeSpace)) { // we have enough free space in the region to continue diff --git a/src/coreclr/src/gc/satori/SatoriGC.cpp b/src/coreclr/src/gc/satori/SatoriGC.cpp index 8c169e8a872..b4284327aa6 100644 --- a/src/coreclr/src/gc/satori/SatoriGC.cpp +++ b/src/coreclr/src/gc/satori/SatoriGC.cpp @@ -237,7 +237,25 @@ unsigned SatoriGC::GetGcCount() bool SatoriGC::IsThreadUsingAllocationContextHeap(gc_alloc_context* acontext, int thread_number) { - __UNREACHABLE(); + // TODO: VS should prefer when running on the same core as recorded in alloc region, if present. + // negative thread_number could indicate "do not care" + // also need to assign numbers to threads when scanning. + // at very least there is dependency on 0 being unique. + while (true) + { + int threadScanCount = acontext->alloc_count; + int currentScanCount = m_heap->Recycler()->GetScanCount(); + if (threadScanCount >= currentScanCount) + { + break; + } + + if (Interlocked::CompareExchange(&acontext->alloc_count, currentScanCount, threadScanCount) == threadScanCount) + { + return true; + } + } + return false; } diff --git a/src/coreclr/src/gc/satori/SatoriGC.h b/src/coreclr/src/gc/satori/SatoriGC.h index 50b999a5764..8cd818f8029 100644 --- a/src/coreclr/src/gc/satori/SatoriGC.h +++ b/src/coreclr/src/gc/satori/SatoriGC.h @@ -20,8 +20,6 @@ class SatoriGC : public IGCHeapInternal private: int64_t m_perfCounterFrequency; SatoriHeap* m_heap; - SatoriAllocator* m_allocator; - SatoriRecycler* m_recycler; // what is the difference between these two? // should these be volatile diff --git a/src/coreclr/src/gc/satori/SatoriMarkChunk.h b/src/coreclr/src/gc/satori/SatoriMarkChunk.h index 34514c30944..d9a521f8ed6 100644 --- a/src/coreclr/src/gc/satori/SatoriMarkChunk.h +++ b/src/coreclr/src/gc/satori/SatoriMarkChunk.h @@ -50,6 +50,11 @@ public: nullptr; } + size_t Count() + { + return m_top; + } + private: size_t m_top; diff --git a/src/coreclr/src/gc/satori/SatoriObject.inl b/src/coreclr/src/gc/satori/SatoriObject.inl index 76268a43f75..45ba99cfcb7 100644 --- a/src/coreclr/src/gc/satori/SatoriObject.inl +++ b/src/coreclr/src/gc/satori/SatoriObject.inl @@ -178,6 +178,7 @@ inline void SatoriObject::ClearNextInMarkStack() } // TODO: VS same as [Get|Set]NextInMarkStack +// TODO: VS rename GetLocalReloc inline int32_t SatoriObject::GetReloc() { return ((int32_t*)this)[-2]; diff --git a/src/coreclr/src/gc/satori/SatoriQueue.h b/src/coreclr/src/gc/satori/SatoriQueue.h index a960e9e9244..b40b576cc04 100644 --- a/src/coreclr/src/gc/satori/SatoriQueue.h +++ b/src/coreclr/src/gc/satori/SatoriQueue.h @@ -18,12 +18,12 @@ class SatoriQueue public: SatoriQueue() : - m_lock(), m_head(), m_tail() + m_lock(), m_head(), m_tail(), m_count() { m_lock.Initialize(); }; - void Push(T* item) + int Push(T* item) { SatoriLockHolder holder(&m_lock); item->m_containingQueue = this; @@ -38,6 +38,8 @@ public: m_head->m_prev = item; m_head = item; } + + return ++m_count; } T* TryPop() @@ -51,6 +53,7 @@ public: return nullptr; } + m_count--; result->m_containingQueue = nullptr; m_head = result->m_next; if (m_head == nullptr) @@ -72,6 +75,7 @@ public: void Enqueue(T* item) { SatoriLockHolder holder(&m_lock); + m_count++; item->m_containingQueue = this; if (m_tail == nullptr) { @@ -95,6 +99,7 @@ public: return false; } + m_count--; item->m_containingQueue = nullptr; if (item->m_prev == nullptr) { @@ -125,20 +130,16 @@ public: return item->m_containingQueue == this; } - bool CanPop() + int Count() { - return m_head != nullptr; - } - - bool CanDequeue() - { - return m_tail != nullptr; + return m_count; } protected: SatoriLock m_lock; T* m_head; T* m_tail; + int m_count; }; #endif diff --git a/src/coreclr/src/gc/satori/SatoriRecycler.cpp b/src/coreclr/src/gc/satori/SatoriRecycler.cpp index 98ad431fa5c..d3407cb4084 100644 --- a/src/coreclr/src/gc/satori/SatoriRecycler.cpp +++ b/src/coreclr/src/gc/satori/SatoriRecycler.cpp @@ -12,6 +12,8 @@ #include "SatoriHeap.h" #include "SatoriRecycler.h" +#include "SatoriObject.h" +#include "SatoriObject.inl" #include "SatoriRegion.h" #include "SatoriRegion.inl" #include "SatoriMarkChunk.h" @@ -20,9 +22,12 @@ void SatoriRecycler::Initialize(SatoriHeap* heap) { m_heap = heap; - m_regions = new SatoriRegionQueue(); - m_work_list = new SatoriMarkChunkQueue(); - m_free_list = new SatoriMarkChunkQueue(); + m_allRegions = new SatoriRegionQueue(); + m_stayingRegions = new SatoriRegionQueue(); + m_relocatingRegions = new SatoriRegionQueue(); + + m_workList = new SatoriMarkChunkQueue(); + m_freeList = new SatoriMarkChunkQueue(); SatoriRegion* region = m_heap->Allocator()->GetRegion(Satori::REGION_SIZE_GRANULARITY); @@ -35,7 +40,7 @@ void SatoriRecycler::Initialize(SatoriHeap* heap) } SatoriMarkChunk* chunk = SatoriMarkChunk::InitializeAt(mem); - m_free_list->Push(chunk); + m_freeList->Push(chunk); } } @@ -45,8 +50,282 @@ void SatoriRecycler::AddRegion(SatoriRegion* region) // TODO: VS verify + // TODO: VS volatile? region->Publish(); - // TODO: VS leak the region for now - // TODO: VS for now count and once have 5, lets mark. + int count = m_allRegions->Push(region); + if (count > 10) + { + Collect(); + } +} + +void SatoriRecycler::Collect() +{ + // mark own stack into work queues + IncrementScanCount(); + MarkOwnStack(); + + // TODO: VS perhaps drain queues as a part of MarkOwnStack? - to give other threads chance to self-mark? + // thread marking is fast though, so it may not help a lot. + + // TODO: VS we should not be calling Suspend from multiple threads for the same collect event. + // Perhaps lock and send other threads away, they will suspend soon enough. + + // stop other threads. (except this one, it shoud not be considered a cooperative thread while it is busy doing GC). + bool wasCoop = GCToEEInterface::EnablePreemptiveGC(); + _ASSERTE(wasCoop); + GCToEEInterface::SuspendEE(SUSPEND_FOR_GC); + + // TODO: VS this also scans statics. Do we want this? + MarkOtherStacks(); + + // drain queues + DrainMarkQueues(); + + // mark handles to queues + + // while have work + // { + // drain queues + // mark through SATB cards (could be due to overflow) + // } + + // all marked here + + // TODO: VS can't know live size without scanning all live objects. We need to scan at least live ones. + // Then we could as well coalesce gaps and thread to buckets. + // What to do with finalizables? + + // plan regions: + // 0% - return to recycler + // > 80% go to stayers (scan for finalizable one day, if occupancy reduced and has finalizable, this can be done after releasing VM.) + // > 50% or with pins - targets, sweep and thread gaps, slice and release free tails, add to queues, need buckets similar to allocator, should regs have buckets? + // targets go to stayers too. + // + // rest - add to move sources + SatoriRegion* curReg; + while (curReg = m_allRegions->TryPop()) + { + m_stayingRegions->Push(curReg); + } + + // once no more regs in queue + // go through sources and relocate to destinations, + // grab empties if no space, add to stayers and use as if gotten from free buckets. + // if no space at all, put the reg to stayers. (scan for finalizable one day) + + // go through roots and update refs + + // go through stayers, update refs (need to care about relocated in stayers, could happen if no space) + while (curReg = m_stayingRegions->TryPop()) + { + curReg->CleanMarks(); + m_allRegions->Push(curReg); + } + + // restart VM + GCToEEInterface::RestartEE(true); + + // return source regs to allocator. + + // become coop again (note - could block here, it is ok) + GCToEEInterface::DisablePreemptiveGC(); +} + +class MarkContext +{ + friend class SatoriRecycler; + +public: + MarkContext(SatoriRecycler* recycler) + : m_markChunk() + { + m_recycler = recycler; + } + + void PushToMarkQueues(SatoriObject* o) + { + if (m_markChunk && m_markChunk->TryPush(o)) + { + return; + } + + m_recycler->PushToMarkQueuesSlow(m_markChunk, o); + } + +private: + SatoriRecycler* m_recycler; + SatoriMarkChunk* m_markChunk; +}; + +void SatoriRecycler::PushToMarkQueuesSlow(SatoriMarkChunk* ¤tMarkChunk, SatoriObject* o) +{ + if (currentMarkChunk) + { + m_workList->Push(currentMarkChunk); + } + + currentMarkChunk = m_freeList->TryPop(); + if (currentMarkChunk) + { + bool pushed = currentMarkChunk->TryPush(o); + _ASSERTE(pushed); + } + else + { + // TODO: VS mark card table + o->SetEscaped(); + } +} + +void SatoriRecycler::MarkFn(PTR_PTR_Object ppObject, ScanContext* sc, uint32_t flags) +{ + size_t location = (size_t)*ppObject; + if (location == 0) + { + return; + } + + SatoriObject* o = SatoriObject::At(location); + if (flags & GC_CALL_INTERIOR) + { + o = o->ContainingRegion()->FindObject(location); + if (o == nullptr) + { + return; + } + } + + if (!o->IsMarked()) + { + // TODO: VS should use threadsafe variant + o->SetMarked(); + + MarkContext* context = (MarkContext*)sc->_unused1; + // TODO: VS we do not need to push if card is marked, we will have to revisit anyways. + context->PushToMarkQueues(o); + } + + if (flags & GC_CALL_PINNED) + { + o->SetPinned(); + } +}; + +void SatoriRecycler::MarkOwnStack() +{ + gc_alloc_context* aContext = GCToEEInterface::GetAllocContext(); + + // TODO: VS can this be more robust in case the thread gets stuck? + // claim our own stack for scanning + while (true) + { + int threadScanCount = aContext->alloc_count; + int currentScanCount = GetScanCount(); + if (threadScanCount >= currentScanCount) + { + return; + } + + if (Interlocked::CompareExchange(&aContext->alloc_count, currentScanCount, threadScanCount) == threadScanCount) + { + break; + } + } + + // mark roots for the current stack + ScanContext sc; + sc.promotion = TRUE; + + MarkContext c = MarkContext(this); + sc._unused1 = &c; + GCToEEInterface::GcScanCurrentStackRoots((promote_func*)MarkFn, &sc); + + if (c.m_markChunk != nullptr) + { + m_workList->Push(c.m_markChunk); + } +} + +void SatoriRecycler::MarkOtherStacks() +{ + // mark roots for the current stack + ScanContext sc; + sc.promotion = TRUE; + + MarkContext c = MarkContext(this); + sc._unused1 = &c; + + //TODO: VS there should be only one thread with "thread_number == 0" + //TODO: VS implement two-pass scheme with preferred vs. any stacks + + GCToEEInterface::GcScanRoots((promote_func*)MarkFn, 2, 2, &sc); + + if (c.m_markChunk != nullptr) + { + m_workList->Push(c.m_markChunk); + } +} + +// TODO: VS interlocked? +void SatoriRecycler::IncrementScanCount() +{ + m_scanCount++; +} + +// TODO: VS volatile? +inline int SatoriRecycler::GetScanCount() +{ + return m_scanCount; +} + +void SatoriRecycler::DrainMarkQueues() +{ + SatoriMarkChunk* srcChunk = m_workList->TryPop(); + SatoriMarkChunk* dstChunk = nullptr; + while (srcChunk) + { + // drain srcChunk to dst chunk + SatoriObject* o; + while (o = srcChunk->TryPop()) + { + o->ForEachObjectRef( + [&](SatoriObject** ref) + { + SatoriObject* child = *ref; + if (child && !child->IsMarked()) + { + child->SetMarked(); + if (!dstChunk || !dstChunk->TryPush(child)) + { + this->PushToMarkQueuesSlow(dstChunk, child); + } + } + } + ); + } + + // done with srcChunk + // if we have nonempty dstChunk (i.e. produced more work), + // swap src and dst and continue + if (dstChunk && dstChunk->Count() > 0) + { + SatoriMarkChunk* tmp = srcChunk; + _ASSERTE(tmp->Count() == 0); + srcChunk = dstChunk; + dstChunk = tmp; + } + else + { + m_freeList->Push(srcChunk); + srcChunk = m_workList->TryPop(); + } + } + + if (dstChunk) + { + _ASSERTE(dstChunk->Count() == 0); + m_freeList->Push(dstChunk); + } } diff --git a/src/coreclr/src/gc/satori/SatoriRecycler.h b/src/coreclr/src/gc/satori/SatoriRecycler.h index 3dc671b4cb2..99543ca2a48 100644 --- a/src/coreclr/src/gc/satori/SatoriRecycler.h +++ b/src/coreclr/src/gc/satori/SatoriRecycler.h @@ -18,16 +18,34 @@ class SatoriRegion; class SatoriRecycler { + friend class MarkContext; + public: void Initialize(SatoriHeap* heap); void AddRegion(SatoriRegion* region); + int GetScanCount(); + private: SatoriHeap* m_heap; - SatoriRegionQueue* m_regions; - SatoriMarkChunkQueue* m_work_list; - SatoriMarkChunkQueue* m_free_list; + // used to ensure each thread is scanned once per scan round. + int m_scanCount; + + SatoriRegionQueue* m_allRegions; + SatoriRegionQueue* m_stayingRegions; + SatoriRegionQueue* m_relocatingRegions; + + SatoriMarkChunkQueue* m_workList; + SatoriMarkChunkQueue* m_freeList; + + void Collect(); + static void MarkFn(PTR_PTR_Object ppObject, ScanContext* sc, uint32_t flags); + void PushToMarkQueuesSlow(SatoriMarkChunk*& currentMarkChunk, SatoriObject* o); + void MarkOwnStack(); + void MarkOtherStacks(); + void IncrementScanCount(); + void DrainMarkQueues(); }; #endif diff --git a/src/coreclr/src/gc/satori/SatoriRegion.cpp b/src/coreclr/src/gc/satori/SatoriRegion.cpp index 0bf3f5492b6..c1f9b88774c 100644 --- a/src/coreclr/src/gc/satori/SatoriRegion.cpp +++ b/src/coreclr/src/gc/satori/SatoriRegion.cpp @@ -908,6 +908,11 @@ bool SatoriRegion::ThreadLocalCompact(size_t desiredFreeSpace) return (foundFree >= desiredFreeSpace + Satori::MIN_FREE_SIZE || foundFree == desiredFreeSpace); } +void SatoriRegion::CleanMarks() +{ + ZeroMemory(&m_bitmap[BITMAP_START], (BITMAP_SIZE - BITMAP_START) * sizeof(size_t)); +} + void SatoriRegion::Verify() { #ifdef _DEBUG diff --git a/src/coreclr/src/gc/satori/SatoriRegion.h b/src/coreclr/src/gc/satori/SatoriRegion.h index 1877879590e..0ae97c6f6c1 100644 --- a/src/coreclr/src/gc/satori/SatoriRegion.h +++ b/src/coreclr/src/gc/satori/SatoriRegion.h @@ -64,6 +64,8 @@ public: SatoriObject* NextMarked(SatoriObject* after); bool ThreadLocalCompact(size_t desiredFreeSpace); + void CleanMarks(); + void Verify(); private: diff --git a/src/coreclr/src/gc/satori/SatoriRegionQueue.cpp b/src/coreclr/src/gc/satori/SatoriRegionQueue.cpp index 884b6d2d068..2101bb3edbb 100644 --- a/src/coreclr/src/gc/satori/SatoriRegionQueue.cpp +++ b/src/coreclr/src/gc/satori/SatoriRegionQueue.cpp @@ -15,7 +15,7 @@ #include "SatoriRegion.h" #include "SatoriRegion.inl" -SatoriRegion* SatoriRegionQueue::TryPop(size_t regionSize, SatoriRegion*& putBack) +SatoriRegion* SatoriRegionQueue::TryPopWithSize(size_t regionSize, SatoriRegion*& putBack) { m_lock.Enter(); @@ -78,7 +78,7 @@ SatoriRegion* SatoriRegionQueue::TryPop(size_t regionSize, SatoriRegion*& putBac return result; } -SatoriRegion* SatoriRegionQueue::TryRemove(size_t regionSize, SatoriRegion*& putBack) +SatoriRegion* SatoriRegionQueue::TryRemoveWithSize(size_t regionSize, SatoriRegion*& putBack) { m_lock.Enter(); diff --git a/src/coreclr/src/gc/satori/SatoriRegionQueue.h b/src/coreclr/src/gc/satori/SatoriRegionQueue.h index 105db1bb68e..8297f3a151b 100644 --- a/src/coreclr/src/gc/satori/SatoriRegionQueue.h +++ b/src/coreclr/src/gc/satori/SatoriRegionQueue.h @@ -17,8 +17,8 @@ class SatoriRegion; class SatoriRegionQueue : public SatoriQueue { public: - SatoriRegion* TryPop(size_t regionSize, SatoriRegion* &putBack); - SatoriRegion* TryRemove(size_t regionSize, SatoriRegion*& putBack); + SatoriRegion* TryPopWithSize(size_t regionSize, SatoriRegion* &putBack); + SatoriRegion* TryRemoveWithSize(size_t regionSize, SatoriRegion*& putBack); }; #endif diff --git a/src/coreclr/vm/appdomain.cpp b/src/coreclr/vm/appdomain.cpp index 9372e79dafc..833a3088437 100644 --- a/src/coreclr/vm/appdomain.cpp +++ b/src/coreclr/vm/appdomain.cpp @@ -1349,7 +1349,7 @@ void SystemDomain::LazyInitGlobalStringLiteralMap() _ASSERTE(GCHeapUtilities::IsGCInProgress() && GCHeapUtilities::IsServerHeap() && - IsGCSpecialThread()); + (IsGCSpecialThread() || !GetThread()->PreemptiveGCDisabled())); SystemDomain* sysDomain = SystemDomain::System(); if (sysDomain) @@ -5064,7 +5064,7 @@ void AppDomain::EnumStaticGCRefs(promote_func* fn, ScanContext* sc) _ASSERTE(GCHeapUtilities::IsGCInProgress() && GCHeapUtilities::IsServerHeap() && - IsGCSpecialThread()); + (IsGCSpecialThread() || !GetThread()->PreemptiveGCDisabled())); #ifndef CROSSGEN_COMPILE if (m_pLargeHeapHandleTable != nullptr)