diff --git a/src/coreclr/gc/CMakeLists.txt b/src/coreclr/gc/CMakeLists.txt index 594bc0619f3..b76c41271c6 100644 --- a/src/coreclr/gc/CMakeLists.txt +++ b/src/coreclr/gc/CMakeLists.txt @@ -26,6 +26,7 @@ set( GC_SOURCES gcload.cpp handletablecache.cpp satori/SatoriGC.cpp + satori/SatoriHandlePartitioner.cpp satori/SatoriHeap.cpp satori/SatoriPage.cpp satori/SatoriRegion.cpp @@ -105,6 +106,7 @@ if (CLR_CMAKE_TARGET_WIN32) softwarewritewatch.h satori/SatoriGC.h vxsort/do_vxsort.h + satori/SatoriHandlePartitioner.h satori/SatoriHeap.h satori/SatoriPage.h satori/SatoriRegion.h diff --git a/src/coreclr/gc/env/gcenv.interlocked.inl b/src/coreclr/gc/env/gcenv.interlocked.inl index 780f1c0c8ca..6b032d427d4 100644 --- a/src/coreclr/gc/env/gcenv.interlocked.inl +++ b/src/coreclr/gc/env/gcenv.interlocked.inl @@ -120,6 +120,18 @@ __forceinline int8_t Interlocked::CompareExchange(int8_t volatile* desti #endif } +template <> +__forceinline uint8_t Interlocked::CompareExchange(uint8_t volatile* destination, uint8_t exchange, uint8_t comparand) +{ +#ifdef _MSC_VER + return (uint8_t)_InterlockedCompareExchange8((char*)destination, exchange, comparand); +#else + T result = __sync_val_compare_and_swap(destination, comparand, exchange); + ArmInterlockedOperationBarrier(); + return result; +#endif +} + // Perform an atomic addition of two 32-bit values and return the original value of the addend. // Parameters: // addend - variable to be added to diff --git a/src/coreclr/gc/gc.h b/src/coreclr/gc/gc.h index ee4d1886985..0a153879044 100644 --- a/src/coreclr/gc/gc.h +++ b/src/coreclr/gc/gc.h @@ -311,17 +311,7 @@ inline bool IsServerHeap() { #ifdef FEATURE_SVR_GC assert(g_gc_heap_type != GC_HEAP_INVALID); - return g_gc_heap_type == GC_HEAP_SVR; -#else // FEATURE_SVR_GC - return false; -#endif // FEATURE_SVR_GC -} - -inline bool IsSatoriHeap() -{ -#ifdef FEATURE_SATORI_GC - assert(g_gc_heap_type != GC_HEAP_INVALID); - return g_gc_heap_type == GC_HEAP_SATORI; + return g_gc_heap_type >= GC_HEAP_SVR; #else // FEATURE_SVR_GC return false; #endif // FEATURE_SVR_GC diff --git a/src/coreclr/gc/satori/SatoriGC.cpp b/src/coreclr/gc/satori/SatoriGC.cpp index 239dc24e328..1fd575ae198 100644 --- a/src/coreclr/gc/satori/SatoriGC.cpp +++ b/src/coreclr/gc/satori/SatoriGC.cpp @@ -10,6 +10,7 @@ #include "gcenv.h" #include "../env/gcenv.os.h" +#include "SatoriHandlePartitioner.h" #include "SatoriObject.h" #include "SatoriObject.inl" #include "SatoriGC.h" @@ -222,6 +223,7 @@ HRESULT SatoriGC::Initialize() { m_perfCounterFrequency = GCToOSInterface::QueryPerformanceFrequency(); SatoriObject::Initialize(); + SatoriHandlePartitioner::Initialize(); m_heap = SatoriHeap::Create(); if (m_heap == nullptr) { @@ -507,18 +509,14 @@ void SatoriGC::ControlPrivateEvents(GCEventKeyword keyword, GCEventLevel level) GCEventStatus::Set(GCEventProvider_Private, keyword, level); } -// This is not used much. Where it is used, the assumption is that it returns #procs -// see: SyncBlockCacheWeakPtrScan and getNumberOfSlots int SatoriGC::GetNumberOfHeaps() { - return GCToOSInterface::GetTotalProcessorCount();; + return SatoriHandlePartitioner::PartitionCount(); } int SatoriGC::GetHomeHeapNumber() { - // TODO: Satori this is a number in [0, procNum) associated with thread - // it is implementable, do 0 for now. - return 0; + return SatoriHandlePartitioner::CurrentThreadPartition(); } size_t SatoriGC::GetPromotedBytes(int heap_index) diff --git a/src/coreclr/gc/satori/SatoriHandlePartitioner.cpp b/src/coreclr/gc/satori/SatoriHandlePartitioner.cpp new file mode 100644 index 00000000000..a13e290152d --- /dev/null +++ b/src/coreclr/gc/satori/SatoriHandlePartitioner.cpp @@ -0,0 +1,20 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. +// +// SatoriHandlePartitioner.cpp +// + +#include "common.h" + +#include "gcenv.h" +#include "../env/gcenv.os.h" + +#include "SatoriHandlePartitioner.h" + +// static +int SatoriHandlePartitioner::s_partitionCount; +// static +uint8_t* SatoriHandlePartitioner::s_scanTickets; +//static +uint8_t SatoriHandlePartitioner::s_currentTicket; diff --git a/src/coreclr/gc/satori/SatoriHandlePartitioner.h b/src/coreclr/gc/satori/SatoriHandlePartitioner.h new file mode 100644 index 00000000000..7e5a7fd3082 --- /dev/null +++ b/src/coreclr/gc/satori/SatoriHandlePartitioner.h @@ -0,0 +1,100 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. +// +// SatoriHandlePartitioner.h +// + +#ifndef __SATORI_HANDLE_PARTITIONER_H__ +#define __SATORI_HANDLE_PARTITIONER_H__ + +#include "common.h" +#include "gcenv.h" +#include "../env/gcenv.os.h" + +#include "SatoriUtil.h" + +class SatoriHandlePartitioner +{ +public: + static void Initialize() + { + s_partitionCount = (int)GCToOSInterface::GetTotalProcessorCount(); + s_scanTickets = new uint8_t[s_partitionCount]; + } + + static void StartNextScan() + { + s_currentTicket++; + } + + static int PartitionCount() + { + return s_partitionCount; + } + + // TODO: VS optimize % by making count pwr 2 + + static int CurrentThreadPartition() + { + if (!s_partitionCount) + { + return 0; + } + + uint32_t value; + if (GCToOSInterface::CanGetCurrentProcessorNumber()) + { + value = GCToOSInterface::GetCurrentProcessorNumber(); + } + else + { + size_t thread = SatoriUtil::GetCurrentThreadTag(); + // TODO: VS hash this? + value = (uint32_t)thread ^ (thread >> (sizeof(uint32_t) * 8)); + } + + return (int)(value % (uint32_t)s_partitionCount); + } + + static int TryGetOwnPartitionToScan() + { + int partition = CurrentThreadPartition(); + uint8_t partitionTicket = VolatileLoadWithoutBarrier(&s_scanTickets[partition]); + if (partitionTicket != s_currentTicket) + { + if (Interlocked::CompareExchange(&s_scanTickets[partition], s_currentTicket, partitionTicket) == partitionTicket) + { + return partition; + } + } + + return -1; + } + + template + static void ForEachUnscannedPartition(F& lambda) + { + int startPartition = CurrentThreadPartition(); + //TODO: VS partition walk should be NUMA-aware, if possible + for (int i = 0; i < s_partitionCount; i++) + { + int partition = (i + startPartition) % s_partitionCount; + uint8_t partitionTicket = VolatileLoadWithoutBarrier(&s_scanTickets[partition]); + if (partitionTicket != s_currentTicket) + { + if (Interlocked::CompareExchange(&s_scanTickets[partition], s_currentTicket, partitionTicket) == partitionTicket) + { + lambda(partition); + } + } + } + } + +private: + static int s_partitionCount; + static uint8_t* s_scanTickets; + static uint8_t s_currentTicket; +}; + +#endif diff --git a/src/coreclr/gc/satori/SatoriRecycler.cpp b/src/coreclr/gc/satori/SatoriRecycler.cpp index 542487fcfb2..9842c3ad299 100644 --- a/src/coreclr/gc/satori/SatoriRecycler.cpp +++ b/src/coreclr/gc/satori/SatoriRecycler.cpp @@ -10,6 +10,7 @@ #include "gcenv.h" #include "../env/gcenv.os.h" +#include "SatoriHandlePartitioner.h" #include "SatoriHeap.h" #include "SatoriPage.h" #include "SatoriPage.inl" @@ -27,6 +28,9 @@ #undef memcpy #endif //memcpy +#define CONCURRENT true +//#define CONCURRENT false + void SatoriRecycler::Initialize(SatoriHeap* heap) { m_heap = heap; @@ -58,24 +62,31 @@ void SatoriRecycler::Initialize(SatoriHeap* heap) m_gen1Threshold = 5; m_gen1Budget = 0; - m_isConcurrent = true; + m_isConcurrent = CONCURRENT; } // not interlocked. this is not done concurrently. void SatoriRecycler::IncrementScanCount() { m_scanCount++; + + // make sure the ticket is not 0 + if (!GetScanTicket()) + { + m_scanCount++; + } } -// CONSISTENCY: no synchronization needed -// there is only one writer (thread that initiates GC) -// and treads reading this are guarantee to see it -// since they need to know that there is GC in progress in the first place int SatoriRecycler::GetScanCount() { return m_scanCount; } +uint8_t SatoriRecycler::GetScanTicket() +{ + return (uint8_t)m_scanCount; +} + int64_t SatoriRecycler::GetCollectionCount(int gen) { switch (gen) @@ -145,6 +156,7 @@ void SatoriRecycler::Help() // to help in blocking GC. if (m_isConcurrent) { + MarkHandles(); MarkOwnStack(); } } @@ -162,10 +174,12 @@ void SatoriRecycler::TryStartGC(int generation) // but it is a relatively fast part. // draining concurrently needs IU barrier. IncrementScanCount(); + SatoriHandlePartitioner::StartNextScan(); } Help(); + // TODO: VS this should happen when help did not make progress. if (m_condemnedGeneration == 1) { Collect1(); @@ -223,7 +237,7 @@ void SatoriRecycler::Collect1() BlockingCollect(); // this is just to prevent tailcalls - m_isConcurrent = true; + m_isConcurrent = CONCURRENT; } NOINLINE @@ -232,7 +246,7 @@ void SatoriRecycler::Collect2() BlockingCollect(); // this is just to prevent tailcalls - m_isConcurrent = true; + m_isConcurrent = CONCURRENT; } void SatoriRecycler::BlockingCollect() @@ -261,7 +275,7 @@ void SatoriRecycler::BlockingCollect() Mark(); Sweep(); Compact(); - UpdatePointers(); + Finish(); m_gen1Count++; @@ -270,7 +284,7 @@ void SatoriRecycler::BlockingCollect() m_gen2Count++; } - // TODO: update stats and heuristics + // TODO: VS update stats and heuristics if (m_condemnedGeneration == 2) { m_gen1Budget = Gen2RegionCount(); @@ -284,7 +298,7 @@ void SatoriRecycler::BlockingCollect() m_condemnedGeneration = 0; m_gcInProgress = false; - m_isConcurrent = true; + m_isConcurrent = CONCURRENT; // restart VM GCToEEInterface::RestartEE(true); @@ -293,19 +307,20 @@ void SatoriRecycler::BlockingCollect() void SatoriRecycler::Mark() { IncrementScanCount(); + SatoriHandlePartitioner::StartNextScan(); MarkOwnStack(); //TODO: VS MarkOwnHandles? //TODO: VS should reuse a context for the following? MarkOtherStacks(); - MarkFinalizableQueue(); MarkHandles(); + MarkFinalizationQueue(); // mark through all cards that have interesting refs (remembered set). bool revisitCards = m_condemnedGeneration == 1 ? MarkThroughCards(/* minState */ Satori::CARD_INTERESTING) : - false; + CONCURRENT; while (m_workList->Count() > 0 || revisitCards) { @@ -610,9 +625,6 @@ void SatoriRecycler::MarkOtherStacks() MarkContext c = MarkContext(this); sc._unused1 = &c; - //TODO: VS there should be only one thread with "thread_number == 0" - //TODO: VS implement two-pass scheme with preferred vs. any stacks - //generations are meaningless here, so we pass -1 GCToEEInterface::GcScanRoots(MarkFn, -1, -1, &sc); @@ -622,7 +634,7 @@ void SatoriRecycler::MarkOtherStacks() } } -void SatoriRecycler::MarkFinalizableQueue() +void SatoriRecycler::MarkFinalizationQueue() { if (!m_heap->FinalizationQueue()->HasItems()) { @@ -886,14 +898,25 @@ void SatoriRecycler::MarkHandles() { ScanContext sc; sc.promotion = TRUE; - sc.thread_number = 0; - + sc.concurrent = m_isConcurrent; MarkContext c = MarkContext(this); sc._unused1 = &c; - // concurrent, per thread/heap - // relies on thread_number to select handle buckets and specialcases #0 - GCScan::GcScanHandles(MarkFn, m_condemnedGeneration, 2, &sc); + int partition = SatoriHandlePartitioner::TryGetOwnPartitionToScan(); + if (partition != -1) + { + sc.thread_number = partition; + GCScan::GcScanHandles(m_isConcurrent ? MarkFnConcurrent : MarkFn, m_condemnedGeneration, 2, &sc); + //TODO: VS drain own + } + + SatoriHandlePartitioner::ForEachUnscannedPartition( + [&](int p) + { + sc.thread_number = p; + GCScan::GcScanHandles(m_isConcurrent ? MarkFnConcurrent : MarkFn, m_condemnedGeneration, 2, &sc); + } + ); if (c.m_markChunk != nullptr) { @@ -905,19 +928,22 @@ void SatoriRecycler::WeakPtrScan(bool isShort) { ScanContext sc; sc.promotion = TRUE; - sc.thread_number = 0; - // concurrent, per thread/heap - // relies on thread_number to select handle buckets and specialcases #0 - // null out the target of short weakref that were not promoted. - if (isShort) - { - GCScan::GcShortWeakPtrScan(nullptr, m_condemnedGeneration, 2, &sc); - } - else - { - GCScan::GcWeakPtrScan(nullptr, m_condemnedGeneration, 2, &sc); - } + SatoriHandlePartitioner::StartNextScan(); + SatoriHandlePartitioner::ForEachUnscannedPartition( + [&](int p) + { + sc.thread_number = p; + if (isShort) + { + GCScan::GcShortWeakPtrScan(nullptr, m_condemnedGeneration, 2, &sc); + } + else + { + GCScan::GcWeakPtrScan(nullptr, m_condemnedGeneration, 2, &sc); + } + } + ); } void SatoriRecycler::WeakPtrScanBySingleThread() @@ -1057,14 +1083,17 @@ void SatoriRecycler::DependentHandlesInitialScan() { ScanContext sc; sc.promotion = TRUE; - sc.thread_number = 0; - MarkContext c = MarkContext(this); sc._unused1 = &c; - // concurrent, per thread/heap - // relies on thread_number to select handle buckets and specialcases #0 - GCScan::GcDhInitialScan(MarkFn, m_condemnedGeneration, 2, &sc); + SatoriHandlePartitioner::StartNextScan(); + SatoriHandlePartitioner::ForEachUnscannedPartition( + [&](int p) + { + sc.thread_number = p; + GCScan::GcDhInitialScan(MarkFn, m_condemnedGeneration, 2, &sc); + } + ); if (c.m_markChunk != nullptr) { @@ -1076,17 +1105,20 @@ void SatoriRecycler::DependentHandlesRescan() { ScanContext sc; sc.promotion = TRUE; - sc.thread_number = 0; - MarkContext c = MarkContext(this); sc._unused1 = &c; - // concurrent, per thread/heap - // relies on thread_number to select handle buckets and specialcases #0 - if (GCScan::GcDhUnpromotedHandlesExist(&sc)) - { - GCScan::GcDhReScan(&sc); - } + SatoriHandlePartitioner::StartNextScan(); + SatoriHandlePartitioner::ForEachUnscannedPartition( + [&](int p) + { + sc.thread_number = p; + if (GCScan::GcDhUnpromotedHandlesExist(&sc)) + { + GCScan::GcDhReScan(&sc); + } + } + ); if (c.m_markChunk != nullptr) { @@ -1098,6 +1130,8 @@ void SatoriRecycler::PromoteSurvivedHandles() { ScanContext sc; sc.promotion = TRUE; + + // only thread #0 does the work. this is not concurrent. sc.thread_number = 0; // no need for context. we do not create more work here. @@ -1395,58 +1429,46 @@ void SatoriRecycler::UpdateFn(PTR_PTR_Object ppObject, ScanContext* sc, uint32_t } }; -void SatoriRecycler::UpdatePointers() +void SatoriRecycler::Finish() { if (m_isCompacting) { + IncrementScanCount(); + SatoriHandlePartitioner::StartNextScan(); + ScanContext sc; sc.promotion = FALSE; MarkContext c = MarkContext(this); sc._unused1 = &c; - //TODO: VS there should be only one thread with "thread_number == 0" - //TODO: VS implement two-pass scheme with preferred vs. any stacks - IncrementScanCount(); - //generations are meaningless here, so we pass -1 GCToEEInterface::GcScanRoots(UpdateFn, -1, -1, &sc); - // concurrent, per thread/heap - // relies on thread_number to select handle buckets and specialcases #0 - GCScan::GcScanHandles(UpdateFn, m_condemnedGeneration, 2, &sc); + SatoriHandlePartitioner::ForEachUnscannedPartition( + [&](int p) + { + sc.thread_number = p; + GCScan::GcScanHandles(UpdateFn, m_condemnedGeneration, 2, &sc); + } + ); + _ASSERTE(c.m_markChunk == nullptr); - // update refs in finalization queue - if (m_heap->FinalizationQueue()->HasItems()) - { - // add finalization queue to mark list - m_heap->FinalizationQueue()->ForEachObjectRef( - [&](SatoriObject** ppObject) - { - SatoriObject* o = *ppObject; - ptrdiff_t ptr = ((ptrdiff_t*)o)[-1]; - if (ptr < 0) - { - *ppObject = (SatoriObject*)-ptr; - } - } - ); - } - + UpdateFinalizationQueue(); if (m_condemnedGeneration != 2) { UpdatePointersThroughCards(); } } - // return target regions + // finish and return target regions for (int i = 0; i < Satori::FREELIST_COUNT; i++) { - UpdatePointersInRegions(m_relocationTargets[i]); + FinishRegions(m_relocationTargets[i]); } - // return staying regions - UpdatePointersInRegions(m_stayingRegions); + // finish and return staying regions + FinishRegions(m_stayingRegions); // recycle relocated regions. SatoriRegion* curRegion; @@ -1458,7 +1480,27 @@ void SatoriRecycler::UpdatePointers() } } -void SatoriRecycler::UpdatePointersInRegions(SatoriRegionQueue* queue) +void SatoriRecycler::UpdateFinalizationQueue() +{ + // update refs in finalization queue + if (m_heap->FinalizationQueue()->HasItems()) + { + // add finalization queue to mark list + m_heap->FinalizationQueue()->ForEachObjectRef( + [&](SatoriObject** ppObject) + { + SatoriObject* o = *ppObject; + ptrdiff_t ptr = ((ptrdiff_t*)o)[-1]; + if (ptr < 0) + { + *ppObject = (SatoriObject*)-ptr; + } + } + ); + } +} + +void SatoriRecycler::FinishRegions(SatoriRegionQueue* queue) { SatoriRegion* curRegion; while (curRegion = queue->TryPop()) diff --git a/src/coreclr/gc/satori/SatoriRecycler.h b/src/coreclr/gc/satori/SatoriRecycler.h index fd572eda595..acdc4538d4f 100644 --- a/src/coreclr/gc/satori/SatoriRecycler.h +++ b/src/coreclr/gc/satori/SatoriRecycler.h @@ -33,6 +33,7 @@ public: void Collect2(); int GetScanCount(); + uint8_t GetNextScanTicket(); int64_t GetCollectionCount(int gen); int CondemnedGeneration(); @@ -44,7 +45,7 @@ public: private: SatoriHeap* m_heap; - // used to ensure each thread is scanned once per scan round. + // used to ensure each thread or handle partition is scanned once per scan round. int m_scanCount; int m_condemnedGeneration; bool m_isCompacting; @@ -87,8 +88,9 @@ private: void PushToMarkQueuesSlow(SatoriMarkChunk*& currentMarkChunk, SatoriObject* o); void MarkOwnStack(); void MarkOtherStacks(); - void MarkFinalizableQueue(); + void MarkFinalizationQueue(); void IncrementScanCount(); + uint8_t GetScanTicket(); void DrainMarkQueues(SatoriMarkChunk* srcChunk = nullptr); void DrainMarkQueuesConcurrent(SatoriMarkChunk* srcChunk = nullptr); bool MarkThroughCards(int8_t minState); @@ -107,9 +109,11 @@ private: void Sweep(); void Compact(); void RelocateRegion(SatoriRegion* region); - void UpdatePointers(); + void Finish(); - void UpdatePointersInRegions(SatoriRegionQueue* queue); + void UpdateFinalizationQueue(); + + void FinishRegions(SatoriRegionQueue* queue); void UpdatePointersThroughCards(); void SweepRegions(SatoriRegionQueue* regions); void AddRelocationTarget(SatoriRegion* region); diff --git a/src/coreclr/vm/CMakeLists.txt b/src/coreclr/vm/CMakeLists.txt index 4e7e3a7f0e8..44bbabf37b2 100644 --- a/src/coreclr/vm/CMakeLists.txt +++ b/src/coreclr/vm/CMakeLists.txt @@ -506,6 +506,7 @@ set(GC_SOURCES_WKS ../gc/softwarewritewatch.cpp ../gc/handletablecache.cpp ../gc/satori/SatoriGC.cpp + ../gc/satori/SatoriHandlePartitioner.cpp ../gc/satori/SatoriHeap.cpp ../gc/satori/SatoriPage.cpp ../gc/satori/SatoriObject.cpp diff --git a/src/coreclr/vm/gchelpers.cpp b/src/coreclr/vm/gchelpers.cpp index 8048a80c6cf..97296432a24 100644 --- a/src/coreclr/vm/gchelpers.cpp +++ b/src/coreclr/vm/gchelpers.cpp @@ -1378,15 +1378,16 @@ void CheckEscapeSatoriRange(size_t dst, size_t src, size_t len) return; } - // very rare case where we are copying refs out of non-heap area like stack or native heap. - // we do not have a containing type and that would be somewhat inconvenient. - // one way to handle this is by concervatively escaping any value that matches an unescaped pointer in curRegion. + // This is a very rare case where we are copying refs out of non-heap area like stack or native heap. + // We do not have a containing type and that is somewhat inconvenient. // - // in practice, while theoretically possible, I do not know a code path that could lead here. - // as a particular concern, boxing copy typically uses a newly allocated and not yet escaped target. - // - // in case if this is reachable we will simply stop tracking if this ever occurs. - _ASSERTE(!"escaping by copying from outside of heap, we can handle this, but it is unexpected"); + // There are not many scenarios that lead here. In particular, boxing uses a newly + // allocated and not yet escaped target, so it does not. + // One possible way to get here is a copy-back after a reflection call with a boxed nullable + // argument that happen to escape. + // + // We could handle this is by concervatively escaping any value that matches an unescaped pointer in curRegion. + // However, considering how uncommon this is, we will just give up tracking. curRegion->StopEscapeTracking(); } #endif