mirror of
https://github.com/VSadov/Satori.git
synced 2025-06-08 03:27:04 +09:00
Low Latency Mode TODOs (#35)
* Gate and new Lock, also GC thread spin/rate * barrier tweaks * GetGCSafeMethodTable * Suspend SpinWait tweak * GetMemoryInfo throttling * collection heuristics * avail ram * recycler and env changes * barriers * bulk copy * gc spin tweak * TryPopWithTryEnter * small commit is back * m_helperWoken * AskForHelp * tweaks * do stacks last * only lock changes * AllocAligned * misalign TLABs * prefer concurrent in LowLat mode * worklist * spin in TryEnter * rationalizing mark chunk size * shorter card groups * a few todos * Gen0 Gen1 switches * gcTHP switch * tweak conc block time * more conservative trimming * fixes for OSX * fix for arm64 * tweak helper counts * round up * helper --> worker
This commit is contained in:
parent
7f52e2c11d
commit
9755128719
53 changed files with 3411 additions and 1505 deletions
|
@ -213,6 +213,11 @@ if(CLR_CMAKE_HOST_UNIX)
|
|||
add_compile_options($<$<COMPILE_LANG_AND_ID:CXX,GNU>:-Wno-conversion-null>)
|
||||
add_compile_options($<$<COMPILE_LANG_AND_ID:CXX,GNU>:-Wno-pointer-arith>)
|
||||
|
||||
if(CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_I386 OR CLR_CMAKE_TARGET_ARCH_ARM64)
|
||||
# Allow 16 byte compare-exchange
|
||||
add_compile_options(-mcx16)
|
||||
endif(CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_I386 OR CLR_CMAKE_TARGET_ARCH_ARM64)
|
||||
|
||||
set (NATIVE_RESOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/nativeresources)
|
||||
include_directories(${NATIVE_RESOURCE_DIR})
|
||||
set (PROCESS_RC_SCRIPT ${NATIVE_RESOURCE_DIR}/processrc.sh)
|
||||
|
|
|
@ -133,6 +133,7 @@ if(CLR_CMAKE_TARGET_WIN32)
|
|||
bcrypt.lib
|
||||
RuntimeObject.lib
|
||||
delayimp.lib
|
||||
Synchronization.lib
|
||||
)
|
||||
else()
|
||||
list(APPEND CORECLR_LIBRARIES
|
||||
|
|
|
@ -39,6 +39,8 @@ set(GC_SOURCES
|
|||
satori/SatoriAllocationContext.cpp
|
||||
satori/SatoriUtil.cpp
|
||||
satori/SatoriLock.cpp
|
||||
satori/SatoriWorkList.cpp
|
||||
satori/SatoriGate.cpp
|
||||
)
|
||||
|
||||
if(CLR_CMAKE_HOST_UNIX)
|
||||
|
@ -110,6 +112,7 @@ if (CLR_CMAKE_TARGET_WIN32)
|
|||
satori/SatoriAllocationContext.h
|
||||
satori/SatoriUtil.h
|
||||
satori/SatoriLock.h
|
||||
satori/SatoriGate.h
|
||||
)
|
||||
endif(CLR_CMAKE_TARGET_WIN32)
|
||||
|
||||
|
@ -118,6 +121,7 @@ if(CLR_CMAKE_HOST_WIN32)
|
|||
${STATIC_MT_CRT_LIB}
|
||||
${STATIC_MT_VCRT_LIB}
|
||||
kernel32.lib
|
||||
Synchronization.lib
|
||||
advapi32.lib)
|
||||
endif(CLR_CMAKE_HOST_WIN32)
|
||||
|
||||
|
|
5
src/coreclr/gc/env/gcenv.object.h
vendored
5
src/coreclr/gc/env/gcenv.object.h
vendored
|
@ -173,11 +173,16 @@ public:
|
|||
|
||||
MethodTable * GetGCSafeMethodTable() const
|
||||
{
|
||||
#if !defined(FEATURE_SATORI_GC)
|
||||
#ifdef HOST_64BIT
|
||||
return (MethodTable *)((uintptr_t)m_pMethTab & ~7);
|
||||
#else
|
||||
return (MethodTable *)((uintptr_t)m_pMethTab & ~3);
|
||||
#endif //HOST_64BIT
|
||||
#else
|
||||
// Satori does not mess up MT pointers.
|
||||
return RawGetMethodTable();
|
||||
#endif
|
||||
}
|
||||
|
||||
void RawSetMethodTable(MethodTable * pMT)
|
||||
|
|
2
src/coreclr/gc/env/gcenv.os.h
vendored
2
src/coreclr/gc/env/gcenv.os.h
vendored
|
@ -252,7 +252,7 @@ public:
|
|||
// granularity.
|
||||
static void* VirtualReserve(size_t size, size_t alignment, uint32_t flags, uint16_t node = NUMA_NODE_UNDEFINED);
|
||||
|
||||
static void* VirtualReserve(void* location, size_t size);
|
||||
static void* VirtualReserve(void* location, size_t size, bool useTHP = false);
|
||||
|
||||
// Release virtual memory range previously reserved using VirtualReserve
|
||||
// Parameters:
|
||||
|
|
|
@ -149,8 +149,12 @@ public:
|
|||
BOOL_CONFIG (RelocatingInGen1, "gcRelocatingGen1", NULL, true, "Specifies whether GC can relocate objects in Gen1 GC") \
|
||||
BOOL_CONFIG (RelocatingInGen2, "gcRelocatingGen2", NULL, true, "Specifies whether GC can relocate objects in Gen2 GC") \
|
||||
INT_CONFIG (ParallelGC, "gcParallel", NULL, -1, "Specifies max number of addtional GC threads. 0 - no helpers, -1 - default") \
|
||||
BOOL_CONFIG (ThreadLocalGC, "gcThreadLocal", NULL, true, "Specifies whether thread-local GC can be performed") \
|
||||
BOOL_CONFIG (Gen0GC, "gcGen0", NULL, true, "Specifies whether Gen0 GC can be performed") \
|
||||
BOOL_CONFIG (Gen1GC, "gcGen1", NULL, true, "Specifies whether Gen1 GC can be performed") \
|
||||
BOOL_CONFIG (UseTHP, "gcTHP", NULL, true, "Specifies whether Transparent Huge Pages can be used. (Linux only)") \
|
||||
BOOL_CONFIG (TrimmigGC, "gcTrim", NULL, true, "Specifies whether background trimming is enabled") \
|
||||
INT_CONFIG (GCRate, "gcRate", NULL, -1, "Specifies soft min limit for time between GCs in milliseconds. -1 - default") \
|
||||
INT_CONFIG (GCSpin, "gcSpin", NULL, -1, "Spin") \
|
||||
|
||||
// This class is responsible for retreiving configuration information
|
||||
// for how the GC should operate.
|
||||
|
|
|
@ -48,22 +48,22 @@ void SatoriAllocator::Initialize(SatoriHeap* heap)
|
|||
|
||||
for (int i = 0; i < Satori::ALLOCATOR_BUCKET_COUNT; i++)
|
||||
{
|
||||
m_queues[i] = new (nothrow) SatoriRegionQueue(QueueKind::Allocator);
|
||||
m_queues[i] = SatoriRegionQueue::AllocAligned(QueueKind::Allocator);
|
||||
}
|
||||
|
||||
m_workChunks = new (nothrow) SatoriWorkList();
|
||||
m_workChunks = SatoriWorkList::AllocAligned();
|
||||
|
||||
m_immortalRegion = nullptr;
|
||||
m_immortalAlocLock.Initialize();
|
||||
m_immortalAllocLock.Initialize();
|
||||
|
||||
m_pinnedRegion = nullptr;
|
||||
m_pinnedAlocLock.Initialize();
|
||||
m_pinnedAllocLock.Initialize();
|
||||
|
||||
m_largeRegion = nullptr;
|
||||
m_largeAlocLock.Initialize();
|
||||
m_largeAllocLock.Initialize();
|
||||
|
||||
m_regularRegion = nullptr;
|
||||
m_regularAlocLock.Initialize();
|
||||
m_regularAllocLock.Initialize();
|
||||
|
||||
m_singePageAdders = 0;
|
||||
}
|
||||
|
@ -127,7 +127,7 @@ tryAgain:
|
|||
{
|
||||
// Reserving a regular-sized Page.
|
||||
// We will often come here on multiple threads and we do not want all threads to reserve a page.
|
||||
// If someone alse is reserving, we will allow 1 msec of retrying before reserving a page eagerly.
|
||||
// If someone else is reserving, we will allow 1 msec of retrying before reserving a page eagerly.
|
||||
if (newPageDeadline == 0)
|
||||
{
|
||||
newPageDeadline = GCToOSInterface::QueryPerformanceCounter() + GCToOSInterface::QueryPerformanceFrequency() / 1000;
|
||||
|
@ -216,8 +216,21 @@ void SatoriAllocator::ReturnRegion(SatoriRegion* region)
|
|||
m_queues[SizeToBucket(region->Size())]->Push(region);
|
||||
}
|
||||
|
||||
void SatoriAllocator::ReturnRegionNoLock(SatoriRegion* region)
|
||||
{
|
||||
_ASSERTE(region->IsAttachedToAllocatingOwner() == false);
|
||||
_ASSERTE(region->Generation() == -1);
|
||||
_ASSERTE(m_heap->Recycler()->IsBlockingPhase());
|
||||
|
||||
m_queues[SizeToBucket(region->Size())]->PushNoLock(region);
|
||||
}
|
||||
|
||||
void SatoriAllocator::AllocationTickIncrement(AllocationTickKind allocationTickKind, size_t totalAdded, SatoriObject* obj, size_t objSize)
|
||||
{
|
||||
if (!EVENT_ENABLED(GCAllocationTick_V4))
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
size_t& tickAmout = allocationTickKind == AllocationTickKind::Small ?
|
||||
m_smallAllocTickAmount :
|
||||
|
@ -242,6 +255,11 @@ void SatoriAllocator::AllocationTickIncrement(AllocationTickKind allocationTickK
|
|||
|
||||
void SatoriAllocator::AllocationTickDecrement(size_t totalUnused)
|
||||
{
|
||||
if (!EVENT_ENABLED(GCAllocationTick_V4))
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
Interlocked::ExchangeAdd64(&m_smallAllocTickAmount, (size_t)(-(int64_t)totalUnused));
|
||||
}
|
||||
|
||||
|
@ -292,6 +310,15 @@ Object* SatoriAllocator::Alloc(SatoriAllocationContext* context, size_t size, ui
|
|||
return nullptr;
|
||||
}
|
||||
|
||||
thread_local
|
||||
size_t lastSharedRegularAllocUsec;
|
||||
|
||||
#ifdef _DEBUG
|
||||
const size_t minSharedAllocDelay = 1024;
|
||||
#else
|
||||
const size_t minSharedAllocDelay = 128;
|
||||
#endif
|
||||
|
||||
SatoriObject* SatoriAllocator::AllocRegular(SatoriAllocationContext* context, size_t size, uint32_t flags)
|
||||
{
|
||||
|
||||
|
@ -302,17 +329,23 @@ SatoriObject* SatoriAllocator::AllocRegular(SatoriAllocationContext* context, si
|
|||
|
||||
SatoriObject* freeObj = context->alloc_ptr != 0 ? context->FinishAllocFromShared() : nullptr;
|
||||
|
||||
//m_regularAlocLock.Enter();
|
||||
if (m_regularAlocLock.TryEnter())
|
||||
size_t usecNow = m_heap->Recycler()->GetNowUsecs();
|
||||
if (usecNow - lastSharedRegularAllocUsec > minSharedAllocDelay)
|
||||
{
|
||||
if (freeObj && freeObj->ContainingRegion() == m_regularRegion)
|
||||
{
|
||||
size_t size = freeObj->Size();
|
||||
m_regularRegion->SetOccupancy(m_regularRegion->Occupancy() - size);
|
||||
m_regularRegion->AddFreeSpace(freeObj, size);
|
||||
}
|
||||
lastSharedRegularAllocUsec = usecNow;
|
||||
|
||||
return AllocRegularShared(context, size, flags);
|
||||
//m_regularAllocLock.Enter();
|
||||
if (m_regularAllocLock.TryEnter())
|
||||
{
|
||||
if (freeObj && freeObj->ContainingRegion() == m_regularRegion)
|
||||
{
|
||||
size_t size = freeObj->FreeObjSize();
|
||||
m_regularRegion->SetOccupancy(m_regularRegion->Occupancy() - size);
|
||||
m_regularRegion->ReturnFreeSpace(freeObj, size);
|
||||
}
|
||||
|
||||
return AllocRegularShared(context, size, flags);
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
|
@ -343,9 +376,22 @@ SatoriObject* SatoriAllocator::AllocRegular(SatoriAllocationContext* context, si
|
|||
if (moreSpace <= allocRemaining)
|
||||
{
|
||||
bool zeroInitialize = !(flags & GC_ALLOC_ZEROING_OPTIONAL);
|
||||
if (zeroInitialize && moreSpace < SatoriUtil::MinZeroInitSize())
|
||||
if (zeroInitialize)
|
||||
{
|
||||
moreSpace = min(allocRemaining, SatoriUtil::MinZeroInitSize());
|
||||
if (moreSpace < SatoriUtil::MinZeroInitSize())
|
||||
{
|
||||
moreSpace = min(allocRemaining, SatoriUtil::MinZeroInitSize());
|
||||
}
|
||||
|
||||
// " +/- sizeof(size_t)" here is to intentionally misalign alloc_limit on the index granularity
|
||||
// to improve chances that the object that is allocated here will be indexed
|
||||
size_t misAlignedOnIndexEnd = ALIGN_UP(region->GetAllocStart() + moreSpace + sizeof(size_t), Satori::INDEX_GRANULARITY) - sizeof(size_t);
|
||||
size_t misAlignedMoreSpace = misAlignedOnIndexEnd - region->GetAllocStart();
|
||||
|
||||
if (misAlignedMoreSpace <= allocRemaining)
|
||||
{
|
||||
moreSpace = misAlignedMoreSpace;
|
||||
}
|
||||
}
|
||||
|
||||
if (region->Allocate(moreSpace, zeroInitialize))
|
||||
|
@ -414,9 +460,10 @@ SatoriObject* SatoriAllocator::AllocRegular(SatoriAllocationContext* context, si
|
|||
region->DetachFromAlocatingOwnerRelease();
|
||||
m_heap->Recycler()->AddEphemeralRegion(region);
|
||||
|
||||
// if we got this far with region not detached, get another one
|
||||
// TUNING: we could force trying to allocate from shared based on some heuristic
|
||||
// goto tryAgain;
|
||||
|
||||
// if we got this far with region not detached, get another one
|
||||
}
|
||||
|
||||
TryGetRegularRegion(region);
|
||||
|
@ -435,7 +482,7 @@ SatoriObject* SatoriAllocator::AllocRegular(SatoriAllocationContext* context, si
|
|||
// 4) (optional: clear escape tag) Detach
|
||||
|
||||
region->AttachToAllocatingOwner(&context->RegularRegion());
|
||||
if (SatoriUtil::IsThreadLocalGCEnabled())
|
||||
if (SatoriUtil::IsGen0Enabled())
|
||||
{
|
||||
switch (region->ReusableFor())
|
||||
{
|
||||
|
@ -477,9 +524,22 @@ SatoriObject* SatoriAllocator::AllocRegularShared(SatoriAllocationContext* conte
|
|||
{
|
||||
// we have enough free space in the region to continue
|
||||
bool zeroInitialize = !(flags & GC_ALLOC_ZEROING_OPTIONAL);
|
||||
if (zeroInitialize && moreSpace < SatoriUtil::MinZeroInitSize())
|
||||
if (zeroInitialize)
|
||||
{
|
||||
moreSpace = min(allocRemaining, SatoriUtil::MinZeroInitSize());
|
||||
if (moreSpace < SatoriUtil::MinZeroInitSize())
|
||||
{
|
||||
moreSpace = min(allocRemaining, SatoriUtil::MinZeroInitSize());
|
||||
}
|
||||
|
||||
// " +/- sizeof(size_t)" here is to intentionally misalign alloc_limit on the index granularity
|
||||
// to improve chances that the object that is allocated here will be indexed
|
||||
size_t misAlignedOnIndexEnd = ALIGN_UP(region->GetAllocStart() + moreSpace + sizeof(size_t), Satori::INDEX_GRANULARITY) - sizeof(size_t);
|
||||
size_t misAlignedMoreSpace = misAlignedOnIndexEnd - region->GetAllocStart();
|
||||
|
||||
if (misAlignedMoreSpace <= allocRemaining)
|
||||
{
|
||||
moreSpace = misAlignedMoreSpace;
|
||||
}
|
||||
}
|
||||
|
||||
// do not zero-initialize just yet, we will do that after leaving the lock.
|
||||
|
@ -487,7 +547,7 @@ SatoriObject* SatoriAllocator::AllocRegularShared(SatoriAllocationContext* conte
|
|||
if (!result)
|
||||
{
|
||||
//OOM, nothing to undo.
|
||||
m_regularAlocLock.Leave();
|
||||
m_regularAllocLock.Leave();
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
|
@ -497,7 +557,7 @@ SatoriObject* SatoriAllocator::AllocRegularShared(SatoriAllocationContext* conte
|
|||
{
|
||||
// OOM, undo the allocation
|
||||
region->StopAllocating(result->Start());
|
||||
m_regularAlocLock.Leave();
|
||||
m_regularAllocLock.Leave();
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
@ -512,7 +572,7 @@ SatoriObject* SatoriAllocator::AllocRegularShared(SatoriAllocationContext* conte
|
|||
region->IncrementUnfinishedAlloc();
|
||||
|
||||
// done with region modifications.
|
||||
m_regularAlocLock.Leave();
|
||||
m_regularAllocLock.Leave();
|
||||
|
||||
context->alloc_ptr = (uint8_t*)result + size;
|
||||
context->alloc_bytes += moreSpace;
|
||||
|
@ -521,6 +581,7 @@ SatoriObject* SatoriAllocator::AllocRegularShared(SatoriAllocationContext* conte
|
|||
context->alloc_limit = (uint8_t*)result + moreSpace;
|
||||
|
||||
result->CleanSyncBlock();
|
||||
region->SetIndicesForObject(result, result->Start() + size);
|
||||
if (zeroInitialize)
|
||||
{
|
||||
memset((uint8_t*)result + sizeof(size_t), 0, moreSpace - 2 * sizeof(size_t));
|
||||
|
@ -623,8 +684,8 @@ tryAgain:
|
|||
{
|
||||
m_heap->Recycler()->MaybeTriggerGC(gc_reason::reason_alloc_loh);
|
||||
|
||||
//m_largeAlocLock.Enter();
|
||||
if (m_largeAlocLock.TryEnter())
|
||||
//m_largeAllocLock.Enter();
|
||||
if (m_largeAllocLock.TryEnter())
|
||||
{
|
||||
return AllocLargeShared(context, size, flags);
|
||||
}
|
||||
|
@ -688,7 +749,7 @@ tryAgain:
|
|||
}
|
||||
|
||||
// try get from the free list
|
||||
if (region->StartAllocating(size))
|
||||
if (region->StartAllocatingBestFit(size))
|
||||
{
|
||||
// we have enough free space in the region to continue
|
||||
continue;
|
||||
|
@ -746,7 +807,7 @@ SatoriObject* SatoriAllocator::AllocLargeShared(SatoriAllocationContext* context
|
|||
if (!result)
|
||||
{
|
||||
//OOM, nothing to undo
|
||||
m_largeAlocLock.Leave();
|
||||
m_largeAllocLock.Leave();
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
|
@ -756,7 +817,7 @@ SatoriObject* SatoriAllocator::AllocLargeShared(SatoriAllocationContext* context
|
|||
{
|
||||
// OOM, undo the allocation
|
||||
region->StopAllocating(result->Start());
|
||||
m_largeAlocLock.Leave();
|
||||
m_largeAllocLock.Leave();
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
|
@ -766,10 +827,11 @@ SatoriObject* SatoriAllocator::AllocLargeShared(SatoriAllocationContext* context
|
|||
// region stays unparsable until allocation is complete.
|
||||
region->IncrementUnfinishedAlloc();
|
||||
// done with region modifications.
|
||||
m_largeAlocLock.Leave();
|
||||
m_largeAllocLock.Leave();
|
||||
|
||||
context->alloc_bytes_uoh += size;
|
||||
result->CleanSyncBlockAndSetUnfinished();
|
||||
region->SetIndicesForObject(result, result->Start() + size);
|
||||
if (!(flags & GC_ALLOC_ZEROING_OPTIONAL))
|
||||
{
|
||||
memset((uint8_t*)result + sizeof(size_t), 0, size - 2 * sizeof(size_t));
|
||||
|
@ -785,7 +847,7 @@ SatoriObject* SatoriAllocator::AllocLargeShared(SatoriAllocationContext* context
|
|||
}
|
||||
|
||||
// try get from the free list
|
||||
if (region->StartAllocating(size))
|
||||
if (region->StartAllocatingBestFit(size))
|
||||
{
|
||||
// we have enough free space in the region to continue
|
||||
continue;
|
||||
|
@ -811,7 +873,7 @@ SatoriObject* SatoriAllocator::AllocLargeShared(SatoriAllocationContext* context
|
|||
if (!region)
|
||||
{
|
||||
//OOM
|
||||
m_largeAlocLock.Leave();
|
||||
m_largeAllocLock.Leave();
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
|
@ -850,6 +912,7 @@ SatoriObject* SatoriAllocator::AllocHuge(SatoriAllocationContext* context, size_
|
|||
}
|
||||
|
||||
result->CleanSyncBlock();
|
||||
hugeRegion->SetIndicesForObject(result, hugeRegion->Start() + Satori::REGION_SIZE_GRANULARITY);
|
||||
|
||||
// huge regions are not attached to contexts and in gen0+ would appear parseable,
|
||||
// but this one is not parseable yet since the new object has no MethodTable
|
||||
|
@ -881,7 +944,7 @@ SatoriObject* SatoriAllocator::AllocPinned(SatoriAllocationContext* context, siz
|
|||
m_heap->Recycler()->MaybeTriggerGC(gc_reason::reason_alloc_soh);
|
||||
|
||||
// if can't get a lock, let AllocLarge handle this.
|
||||
if (!m_pinnedAlocLock.TryEnter())
|
||||
if (!m_pinnedAllocLock.TryEnter())
|
||||
{
|
||||
return AllocLarge(context, size, flags);
|
||||
}
|
||||
|
@ -898,7 +961,7 @@ SatoriObject* SatoriAllocator::AllocPinned(SatoriAllocationContext* context, siz
|
|||
if (!result)
|
||||
{
|
||||
//OOM, nothing to undo
|
||||
m_pinnedAlocLock.Leave();
|
||||
m_pinnedAllocLock.Leave();
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
|
@ -908,7 +971,7 @@ SatoriObject* SatoriAllocator::AllocPinned(SatoriAllocationContext* context, siz
|
|||
{
|
||||
// OOM, undo the allocation
|
||||
region->StopAllocating(result->Start());
|
||||
m_pinnedAlocLock.Leave();
|
||||
m_pinnedAllocLock.Leave();
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
|
@ -918,10 +981,11 @@ SatoriObject* SatoriAllocator::AllocPinned(SatoriAllocationContext* context, siz
|
|||
// region stays unparsable until allocation is complete.
|
||||
region->IncrementUnfinishedAlloc();
|
||||
// done with region modifications.
|
||||
m_pinnedAlocLock.Leave();
|
||||
m_pinnedAllocLock.Leave();
|
||||
|
||||
context->alloc_bytes_uoh += size;
|
||||
result->CleanSyncBlockAndSetUnfinished();
|
||||
region->SetIndicesForObject(result, result->Start() + size);
|
||||
if (!(flags & GC_ALLOC_ZEROING_OPTIONAL))
|
||||
{
|
||||
memset((uint8_t*)result + sizeof(size_t), 0, size - 2 * sizeof(size_t));
|
||||
|
@ -963,7 +1027,7 @@ SatoriObject* SatoriAllocator::AllocPinned(SatoriAllocationContext* context, siz
|
|||
if (!region)
|
||||
{
|
||||
//OOM
|
||||
m_pinnedAlocLock.Leave();
|
||||
m_pinnedAllocLock.Leave();
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
|
@ -983,7 +1047,7 @@ SatoriObject* SatoriAllocator::AllocImmortal(SatoriAllocationContext* context, s
|
|||
// immortal allocs should be way less than region size.
|
||||
_ASSERTE(size < Satori::REGION_SIZE_GRANULARITY / 2);
|
||||
|
||||
SatoriLockHolder<SatoriLock> holder(&m_immortalAlocLock);
|
||||
SatoriLockHolder holder(&m_immortalAllocLock);
|
||||
SatoriRegion* region = m_immortalRegion;
|
||||
|
||||
while (true)
|
||||
|
|
|
@ -56,6 +56,7 @@ public:
|
|||
SatoriRegion* GetRegion(size_t minSize);
|
||||
void AddRegion(SatoriRegion* region);
|
||||
void ReturnRegion(SatoriRegion* region);
|
||||
void ReturnRegionNoLock(SatoriRegion * region);
|
||||
|
||||
void AllocationTickIncrement(AllocationTickKind isSmall, size_t totalAdded, SatoriObject* obj, size_t obj_size);
|
||||
void AllocationTickDecrement(size_t totalUnused);
|
||||
|
@ -72,25 +73,37 @@ private:
|
|||
SatoriRegionQueue* m_queues[Satori::ALLOCATOR_BUCKET_COUNT];
|
||||
SatoriWorkList* m_workChunks;
|
||||
|
||||
SatoriRegion* m_immortalRegion;
|
||||
SatoriLock m_immortalAlocLock;
|
||||
|
||||
SatoriRegion* m_pinnedRegion;
|
||||
SatoriSpinLock m_pinnedAlocLock;
|
||||
|
||||
SatoriRegion* m_largeRegion;
|
||||
SatoriSpinLock m_largeAlocLock;
|
||||
|
||||
SatoriRegion* m_regularRegion;
|
||||
SatoriSpinLock m_regularAlocLock;
|
||||
SatoriRegion* m_largeRegion;
|
||||
SatoriRegion* m_pinnedRegion;
|
||||
SatoriRegion* m_immortalRegion;
|
||||
|
||||
volatile int32_t m_singePageAdders;
|
||||
DECLSPEC_ALIGN(64)
|
||||
SatoriLock m_regularAllocLock;
|
||||
|
||||
DECLSPEC_ALIGN(64)
|
||||
SatoriLock m_immortalAllocLock;
|
||||
|
||||
DECLSPEC_ALIGN(64)
|
||||
SatoriLock m_largeAllocLock;
|
||||
|
||||
DECLSPEC_ALIGN(64)
|
||||
SatoriLock m_pinnedAllocLock;
|
||||
|
||||
// for event trace
|
||||
size_t m_smallAllocTickAmount;
|
||||
DECLSPEC_ALIGN(64)
|
||||
size_t m_largeAllocTickAmount;
|
||||
|
||||
DECLSPEC_ALIGN(64)
|
||||
size_t m_pinnedAllocTickAmount;
|
||||
|
||||
DECLSPEC_ALIGN(64)
|
||||
size_t m_smallAllocTickAmount;
|
||||
|
||||
private:
|
||||
DECLSPEC_ALIGN(64)
|
||||
volatile int32_t m_singePageAdders;
|
||||
|
||||
SatoriObject* AllocRegular(SatoriAllocationContext* context, size_t size, uint32_t flags);
|
||||
SatoriObject* AllocRegularShared(SatoriAllocationContext* context, size_t size, uint32_t flags);
|
||||
SatoriObject* AllocLarge(SatoriAllocationContext* context, size_t size, uint32_t flags);
|
||||
|
|
|
@ -653,17 +653,20 @@ size_t SatoriGC::GetPromotedBytes(int heap_index)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static uint64_t g_totalLimit;
|
||||
|
||||
void SatoriGC::GetMemoryInfo(uint64_t* highMemLoadThresholdBytes, uint64_t* totalAvailableMemoryBytes, uint64_t* lastRecordedMemLoadBytes, uint64_t* lastRecordedHeapSizeBytes, uint64_t* lastRecordedFragmentationBytes, uint64_t* totalCommittedBytes, uint64_t* promotedBytes, uint64_t* pinnedObjectCount, uint64_t* finalizationPendingCount, uint64_t* index, uint32_t* generation, uint32_t* pauseTimePct, bool* isCompaction, bool* isConcurrent, uint64_t* genInfoRaw, uint64_t* pauseInfoRaw, int kind)
|
||||
{
|
||||
LastRecordedGcInfo* lastGcInfo = m_heap->Recycler()->GetLastGcInfo((gc_kind)kind);
|
||||
|
||||
uint64_t totalLimit = GCToOSInterface::GetPhysicalMemoryLimit();
|
||||
if (g_totalLimit == 0)
|
||||
g_totalLimit = GCToOSInterface::GetPhysicalMemoryLimit();
|
||||
|
||||
uint64_t totalLimit = g_totalLimit;
|
||||
*highMemLoadThresholdBytes = totalLimit * 99 / 100; // just say 99% for now
|
||||
*totalAvailableMemoryBytes = totalLimit;
|
||||
|
||||
uint32_t memLoad;
|
||||
uint64_t availPhysical, availPage;
|
||||
GCToOSInterface::GetMemoryStatus(totalLimit, &memLoad, &availPhysical, &availPage);
|
||||
uint32_t memLoad = GetMemoryLoad();
|
||||
*lastRecordedMemLoadBytes = memLoad * totalLimit / 100;
|
||||
|
||||
*lastRecordedHeapSizeBytes = GetTotalBytesInUse();
|
||||
|
@ -686,13 +689,22 @@ void SatoriGC::GetMemoryInfo(uint64_t* highMemLoadThresholdBytes, uint64_t* tota
|
|||
}
|
||||
}
|
||||
|
||||
static uint32_t g_memLoad;
|
||||
static size_t g_memLoadMsec;
|
||||
|
||||
uint32_t SatoriGC::GetMemoryLoad()
|
||||
{
|
||||
uint32_t memLoad;
|
||||
uint64_t availPhysical, availPage;
|
||||
GCToOSInterface::GetMemoryStatus(0, &memLoad, &availPhysical, &availPage);
|
||||
size_t time = GetNow();
|
||||
|
||||
return memLoad;
|
||||
// limit querying frequency to once per 16 msec.
|
||||
if ((time >> 4) != (g_memLoadMsec >> 4))
|
||||
{
|
||||
uint64_t availPhysical, availPage;
|
||||
GCToOSInterface::GetMemoryStatus(0, &g_memLoad, &availPhysical, &availPage);
|
||||
g_memLoadMsec = time;
|
||||
}
|
||||
|
||||
return g_memLoad;
|
||||
}
|
||||
|
||||
void SatoriGC::DiagGetGCSettings(EtwGCSettingsInfo* etw_settings)
|
||||
|
@ -847,7 +859,8 @@ void SatoriGC::BulkMoveWithWriteBarrier(void* dst, const void* src, size_t byteC
|
|||
memmove(dst, src, byteCount);
|
||||
|
||||
if (byteCount >= sizeof(size_t) &&
|
||||
(!localAssignment || m_heap->Recycler()->IsBarrierConcurrent()))
|
||||
(!(localAssignment || m_heap->Recycler()->IsNextGcFullGc()) ||
|
||||
m_heap->Recycler()->IsBarrierConcurrent()))
|
||||
{
|
||||
SetCardsAfterBulkCopy((size_t)dst, (size_t)src, byteCount);
|
||||
}
|
||||
|
|
87
src/coreclr/gc/satori/SatoriGate.cpp
Normal file
87
src/coreclr/gc/satori/SatoriGate.cpp
Normal file
|
@ -0,0 +1,87 @@
|
|||
// Copyright (c) 2024 Vladimir Sadov
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person
|
||||
// obtaining a copy of this software and associated documentation
|
||||
// files (the "Software"), to deal in the Software without
|
||||
// restriction, including without limitation the rights to use,
|
||||
// copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following
|
||||
// conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be
|
||||
// included in all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
// OTHER DEALINGS IN THE SOFTWARE.
|
||||
//
|
||||
// SatoriGate.cpp
|
||||
//
|
||||
|
||||
#ifdef TARGET_WINDOWS
|
||||
|
||||
#include "common.h"
|
||||
#include "windows.h"
|
||||
#include "synchapi.h"
|
||||
#include "SatoriGate.h"
|
||||
|
||||
SatoriGate::SatoriGate()
|
||||
{
|
||||
m_state = s_blocking;
|
||||
}
|
||||
|
||||
// If this gate is in blocking state, the thread will block
|
||||
// until woken up, possibly spuriously.
|
||||
void SatoriGate::Wait()
|
||||
{
|
||||
uint32_t blocking = s_blocking;
|
||||
BOOL result = WaitOnAddress(&m_state, &blocking, sizeof(uint32_t), INFINITE);
|
||||
_ASSERTE(result == TRUE);
|
||||
m_state = s_blocking;
|
||||
}
|
||||
|
||||
// If this gate is in blocking state, the thread will block
|
||||
// until woken up, possibly spuriously.
|
||||
// or until the wait times out. (in a case of timeout returns false)
|
||||
bool SatoriGate::TimedWait(int timeout)
|
||||
{
|
||||
uint32_t blocking = s_blocking;
|
||||
BOOL result = WaitOnAddress(&m_state, &blocking, sizeof(uint32_t), timeout);
|
||||
_ASSERTE(result == TRUE || GetLastError() == ERROR_TIMEOUT);
|
||||
|
||||
bool woken = result == TRUE;
|
||||
if (woken)
|
||||
{
|
||||
// consume the wake
|
||||
m_state = s_blocking;
|
||||
}
|
||||
|
||||
return woken;
|
||||
}
|
||||
|
||||
// After this call at least one thread will go through the gate, either by waking up,
|
||||
// or by going through Wait without blocking.
|
||||
// If there are several racing wakes, one or more may take effect,
|
||||
// but all wakes will see at least one thread going through the gate.
|
||||
void SatoriGate::WakeOne()
|
||||
{
|
||||
m_state = s_open;
|
||||
WakeByAddressSingle((PVOID)&m_state);
|
||||
}
|
||||
|
||||
// Same as WakeOne, but if there are multiple waiters sleeping,
|
||||
// all will be woken up and go through the gate.
|
||||
void SatoriGate::WakeAll()
|
||||
{
|
||||
m_state = s_open;
|
||||
WakeByAddressAll((PVOID)&m_state);
|
||||
}
|
||||
|
||||
|
||||
#endif
|
60
src/coreclr/gc/satori/SatoriGate.h
Normal file
60
src/coreclr/gc/satori/SatoriGate.h
Normal file
|
@ -0,0 +1,60 @@
|
|||
// Copyright (c) 2024 Vladimir Sadov
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person
|
||||
// obtaining a copy of this software and associated documentation
|
||||
// files (the "Software"), to deal in the Software without
|
||||
// restriction, including without limitation the rights to use,
|
||||
// copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following
|
||||
// conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be
|
||||
// included in all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
// OTHER DEALINGS IN THE SOFTWARE.
|
||||
//
|
||||
// SatoriGate.h
|
||||
//
|
||||
|
||||
#ifndef __SATORI_GATE_H__
|
||||
#define __SATORI_GATE_H__
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
class SatoriGate
|
||||
{
|
||||
private:
|
||||
static const uint32_t s_open = 1;
|
||||
static const uint32_t s_blocking = 0;
|
||||
|
||||
volatile uint32_t m_state;
|
||||
|
||||
#if defined(_INC_PTHREADS)
|
||||
pthread_mutex_t* m_cs;
|
||||
pthread_cond_t* m_cv;
|
||||
#else
|
||||
size_t* dummy1;
|
||||
size_t* dummy2;
|
||||
#endif
|
||||
|
||||
public:
|
||||
SatoriGate();
|
||||
|
||||
void Wait();
|
||||
|
||||
bool TimedWait(int timeout);
|
||||
|
||||
void WakeOne();
|
||||
|
||||
void WakeAll();
|
||||
};
|
||||
|
||||
#endif
|
|
@ -116,7 +116,7 @@ bool SatoriHeap::CommitMoreMap(size_t currentCommittedMapSize)
|
|||
void* commitFrom = (void*)((size_t)&m_pageMap + currentCommittedMapSize);
|
||||
size_t commitSize = SatoriUtil::CommitGranularity();
|
||||
|
||||
SatoriLockHolder<SatoriSpinLock> holder(&m_mapLock);
|
||||
SatoriLockHolder holder(&m_mapLock);
|
||||
if (currentCommittedMapSize <= m_committedMapSize)
|
||||
{
|
||||
if (GCToOSInterface::VirtualCommit(commitFrom, commitSize))
|
||||
|
|
|
@ -132,7 +132,7 @@ private:
|
|||
size_t m_committedMapSize;
|
||||
size_t m_usedMapLength;
|
||||
size_t m_nextPageIndex;
|
||||
SatoriSpinLock m_mapLock;
|
||||
SatoriLock m_mapLock;
|
||||
SatoriPage* m_pageMap[1];
|
||||
|
||||
static int8_t* s_pageByteMap;
|
||||
|
|
|
@ -30,3 +30,189 @@
|
|||
#include "../env/gcenv.os.h"
|
||||
#include "SatoriLock.h"
|
||||
|
||||
NOINLINE
|
||||
bool SatoriLock::EnterSlow(bool noBlock)
|
||||
{
|
||||
bool hasWaited = false;
|
||||
// we will retry after waking up
|
||||
while (true)
|
||||
{
|
||||
int iteration = 1;
|
||||
|
||||
// We will count when we failed to change the state of the lock and increase pauses
|
||||
// so that bursts of activity are better tolerated. This should not happen often.
|
||||
int collisions = 0;
|
||||
|
||||
// We will track the changes of ownership while we are trying to acquire the lock.
|
||||
size_t oldOwner = _owningThreadId;
|
||||
uint32_t ownerChanged = 0;
|
||||
|
||||
int iterationLimit = _spinCount >> SpinCountScaleShift;
|
||||
// inner loop where we try acquiring the lock or registering as a waiter
|
||||
while (true)
|
||||
{
|
||||
//
|
||||
// Try to grab the lock. We may take the lock here even if there are existing waiters. This creates the possibility
|
||||
// of starvation of waiters, but it also prevents lock convoys and preempted waiters from destroying perf.
|
||||
// However, if we do not see _wakeWatchDog cleared for long enough, we go into YieldToWaiters mode to ensure some
|
||||
// waiter progress.
|
||||
//
|
||||
uint32_t oldState = _state;
|
||||
bool canAcquire = ((oldState & Locked) == Unlocked) &&
|
||||
(hasWaited || ((oldState & YieldToWaiters) == 0));
|
||||
|
||||
if (canAcquire)
|
||||
{
|
||||
uint32_t newState = oldState | Locked;
|
||||
if (hasWaited)
|
||||
newState = (newState - WaiterCountIncrement) & ~(WaiterWoken | YieldToWaiters);
|
||||
|
||||
if (CompareExchangeAcq(&_state, newState, oldState))
|
||||
{
|
||||
// GOT THE LOCK!!
|
||||
_ASSERTE((_state | Locked) != 0);
|
||||
_ASSERTE(_owningThreadId == 0);
|
||||
_owningThreadId = SatoriUtil::GetCurrentThreadTag();
|
||||
|
||||
if (hasWaited)
|
||||
_wakeWatchDog = 0;
|
||||
|
||||
// now we can estimate how busy the lock is and adjust spinning accordingly
|
||||
uint16_t spinLimit = _spinCount;
|
||||
if (ownerChanged != 0)
|
||||
{
|
||||
// The lock has changed ownership while we were trying to acquire it.
|
||||
// It is a signal that we might want to spin less next time.
|
||||
// Pursuing a lock that is being "stolen" by other threads is inefficient
|
||||
// due to cache misses and unnecessary sharing of state that keeps invalidating.
|
||||
if (spinLimit > DefaultMinSpinCount)
|
||||
{
|
||||
_spinCount = (uint16_t)(spinLimit - 1);
|
||||
}
|
||||
}
|
||||
else if (spinLimit < DefaultMaxSpinCount &&
|
||||
iteration >= (spinLimit >> SpinCountScaleShift))
|
||||
{
|
||||
// we used all of allowed iterations, but the lock does not look very contested,
|
||||
// we can allow a bit more spinning.
|
||||
//
|
||||
// NB: if we acquired the lock while registering a waiter, and owner did not change it still counts.
|
||||
// (however iteration does not grow beyond the iterationLimit)
|
||||
_spinCount = (uint16_t)(spinLimit + 1);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
size_t newOwner = _owningThreadId;
|
||||
if (newOwner != 0 && newOwner != oldOwner)
|
||||
{
|
||||
if (oldOwner != 0)
|
||||
ownerChanged++;
|
||||
|
||||
oldOwner = newOwner;
|
||||
}
|
||||
|
||||
if (iteration < iterationLimit)
|
||||
{
|
||||
// We failed to acquire the lock and want to retry after a pause.
|
||||
// Ideally we will retry right when the lock becomes free, but we cannot know when that will happen.
|
||||
// We will use a pause that doubles up on every iteration. It will not be more than 2x worse
|
||||
// than the ideal guess, while minimizing the number of retries.
|
||||
// We will allow pauses up to 64~128 spinwaits.
|
||||
IterationBackoff(min(iteration, 6));
|
||||
iteration++;
|
||||
continue;
|
||||
}
|
||||
else if (!canAcquire)
|
||||
{
|
||||
// We reached our spin limit, and need to wait.
|
||||
|
||||
if (noBlock)
|
||||
return false;
|
||||
|
||||
// If waiter was awaken spuriously, it may acquire the lock before wake watchdog is set.
|
||||
// If there are no more waiters for a long time, the watchdog could hang around for a while too.
|
||||
// When a new waiter enters the system, it may look like we had no waiter progress for all that time.
|
||||
// To avoid this, if it looks like we have no waiters and will be the first new one,
|
||||
// clear the watchdog.
|
||||
// It is ok to clear even if we will not end up the first one.
|
||||
// We will self-correct on the next wake and reestablish a new watchdog.
|
||||
if (oldState < WaiterCountIncrement && _wakeWatchDog !=0)
|
||||
_wakeWatchDog = 0;
|
||||
|
||||
// Increment the waiter count.
|
||||
// Note that we do not do any overflow checking on this increment. In order to overflow,
|
||||
// we'd need to have about 1 billion waiting threads, which is inconceivable anytime in the
|
||||
// forseeable future.
|
||||
uint32_t newState = oldState + WaiterCountIncrement;
|
||||
if (hasWaited)
|
||||
newState = (newState - WaiterCountIncrement) & ~WaiterWoken;
|
||||
|
||||
if (Interlocked::CompareExchange(&_state, newState, oldState) == oldState)
|
||||
break;
|
||||
}
|
||||
|
||||
CollisionBackoff(++collisions);
|
||||
}
|
||||
|
||||
//
|
||||
// Now we wait.
|
||||
//
|
||||
_ASSERTE(_state >= WaiterCountIncrement);
|
||||
_gate->Wait();
|
||||
_ASSERTE(_state >= WaiterCountIncrement);
|
||||
|
||||
// this was either real or spurious wake.
|
||||
// either way try acquire again.
|
||||
hasWaited = true;
|
||||
}
|
||||
}
|
||||
|
||||
NOINLINE
|
||||
void SatoriLock::AwakeWaiterIfNeeded()
|
||||
{
|
||||
int collisions = 0;
|
||||
while (true)
|
||||
{
|
||||
uint32_t oldState = _state;
|
||||
if ((int32_t)oldState >= (int32_t)WaiterCountIncrement) // false if WaiterWoken is set
|
||||
{
|
||||
// there are waiters, and nobody has woken one.
|
||||
uint32_t newState = oldState | WaiterWoken;
|
||||
|
||||
uint16_t lastWakeTicks = _wakeWatchDog;
|
||||
if (lastWakeTicks != 0)
|
||||
{
|
||||
uint16_t currentTicks = GetTickCount();
|
||||
if ((int16_t)currentTicks - (int16_t)lastWakeTicks > (int16_t)WaiterWatchdogTicks)
|
||||
{
|
||||
//printf("Last: %i ", (int)lastWakeTicks);
|
||||
//printf("Current: %i \n", (int)currentTicks);
|
||||
newState |= YieldToWaiters;
|
||||
}
|
||||
}
|
||||
|
||||
if (Interlocked::CompareExchange(&_state, newState, oldState) == oldState)
|
||||
{
|
||||
if (lastWakeTicks == 0)
|
||||
{
|
||||
// Sometimes timestamp will be 0.
|
||||
// It is harmless. We will try again on the next wake
|
||||
_wakeWatchDog = GetTickCount();
|
||||
}
|
||||
|
||||
_gate->WakeOne();
|
||||
return;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// no need to wake a waiter.
|
||||
return;
|
||||
}
|
||||
|
||||
CollisionBackoff(++collisions);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -30,125 +30,251 @@
|
|||
#include "common.h"
|
||||
#include "../gc.h"
|
||||
#include "SatoriUtil.h"
|
||||
#include "SatoriGate.h"
|
||||
|
||||
#if defined(TARGET_OSX)
|
||||
#include <time.h>
|
||||
#endif
|
||||
|
||||
class SatoriLock
|
||||
{
|
||||
private:
|
||||
CLRCriticalSection m_cs;
|
||||
// m_state layout:
|
||||
//
|
||||
// bit 0: True if the lock is held, false otherwise.
|
||||
//
|
||||
// bit 1: True if nonwaiters must not get ahead of waiters when acquiring a contended lock.
|
||||
//
|
||||
// sign bit: True if we've set the event to wake a waiting thread. The waiter resets this to false when it
|
||||
// wakes up. This avoids the overhead of setting the event multiple times.
|
||||
//
|
||||
// everything else: A count of the number of threads waiting on the event.
|
||||
static const uint32_t Unlocked = 0;
|
||||
static const uint32_t Locked = 1;
|
||||
static const uint32_t YieldToWaiters = 2;
|
||||
static const uint32_t WaiterCountIncrement = 4;
|
||||
static const uint32_t WaiterWoken = 1u << 31;
|
||||
|
||||
public:
|
||||
void Initialize()
|
||||
{
|
||||
m_cs.Initialize();
|
||||
}
|
||||
volatile uint32_t _state;
|
||||
volatile uint16_t _spinCount;
|
||||
volatile uint16_t _wakeWatchDog;
|
||||
volatile size_t _owningThreadId;
|
||||
|
||||
void Destroy()
|
||||
{
|
||||
m_cs.Destroy();
|
||||
}
|
||||
|
||||
void Enter()
|
||||
{
|
||||
m_cs.Enter();
|
||||
}
|
||||
|
||||
void Leave()
|
||||
{
|
||||
m_cs.Leave();
|
||||
}
|
||||
};
|
||||
|
||||
class SatoriSpinLock
|
||||
{
|
||||
private:
|
||||
int m_backoff;
|
||||
|
||||
public:
|
||||
void Initialize()
|
||||
{
|
||||
m_backoff = 0;
|
||||
}
|
||||
|
||||
void Enter()
|
||||
{
|
||||
if (!CompareExchangeAcq(&m_backoff, 1, 0))
|
||||
{
|
||||
EnterSpin();
|
||||
}
|
||||
}
|
||||
|
||||
bool TryEnter()
|
||||
{
|
||||
return CompareExchangeAcq(&m_backoff, 1, 0);
|
||||
}
|
||||
|
||||
void Leave()
|
||||
{
|
||||
_ASSERTE(m_backoff);
|
||||
VolatileStore(&m_backoff, 0);
|
||||
}
|
||||
SatoriGate* _gate;
|
||||
|
||||
private:
|
||||
|
||||
NOINLINE
|
||||
void EnterSpin()
|
||||
{
|
||||
int localBackoff = 0;
|
||||
while (VolatileLoadWithoutBarrier(&m_backoff) ||
|
||||
!CompareExchangeAcq(&m_backoff, 1, 0))
|
||||
{
|
||||
localBackoff = Backoff(localBackoff);
|
||||
}
|
||||
}
|
||||
|
||||
int Backoff(int backoff)
|
||||
{
|
||||
// TUNING: do we care about 1-proc machines?
|
||||
|
||||
for (int i = 0; i < backoff; i++)
|
||||
{
|
||||
YieldProcessor();
|
||||
|
||||
if ((i & 0x3FF) == 0x3FF)
|
||||
{
|
||||
GCToOSInterface::YieldThread(0);
|
||||
}
|
||||
}
|
||||
|
||||
return (backoff * 2 + 1) & 0x3FFF;
|
||||
}
|
||||
|
||||
static bool CompareExchangeAcq(int volatile* destination, int exchange, int comparand)
|
||||
FORCEINLINE
|
||||
static bool CompareExchangeAcq(uint32_t volatile* destination, uint32_t exchange, uint32_t comparand)
|
||||
{
|
||||
#ifdef _MSC_VER
|
||||
#if defined(TARGET_AMD64)
|
||||
return _InterlockedCompareExchange((long*)destination, exchange, comparand) == comparand;
|
||||
return _InterlockedCompareExchange((long*)destination, exchange, comparand) == (long)comparand;
|
||||
#else
|
||||
return _InterlockedCompareExchange_acq((long*)destination, exchange, comparand) == comparand;
|
||||
return _InterlockedCompareExchange_acq((long*)destination, exchange, comparand) == (long)comparand;
|
||||
#endif
|
||||
#else
|
||||
return __atomic_compare_exchange_n(destination, &comparand, exchange, true, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED);
|
||||
#endif
|
||||
}
|
||||
|
||||
FORCEINLINE
|
||||
static uint32_t InterlockedDecRel(volatile uint32_t* arg)
|
||||
{
|
||||
#ifdef _MSC_VER
|
||||
#if defined(TARGET_AMD64)
|
||||
return (uint32_t)_InterlockedDecrement((long*)arg);
|
||||
#else
|
||||
return (uint32_t)_InterlockedDecrement_rel((long*)arg);
|
||||
#endif
|
||||
#else
|
||||
return __atomic_sub_fetch(arg, 1, __ATOMIC_RELEASE);
|
||||
#endif
|
||||
}
|
||||
|
||||
FORCEINLINE
|
||||
static int64_t GetCheapTimeStamp()
|
||||
{
|
||||
#if defined(TARGET_AMD64)
|
||||
#ifdef _MSC_VER
|
||||
return __rdtsc();
|
||||
#else
|
||||
ptrdiff_t cycles;
|
||||
ptrdiff_t cyclesHi;
|
||||
__asm__ __volatile__
|
||||
("rdtsc":"=a" (cycles), "=d" (cyclesHi));
|
||||
return (cyclesHi << 32) | cycles;
|
||||
#endif
|
||||
#elif defined(TARGET_ARM64)
|
||||
// On arm64 just read timer register instead
|
||||
#ifdef _MSC_VER
|
||||
#define ARM64_CNTVCT_EL0 ARM64_SYSREG(3,3,14,0,2)
|
||||
return _ReadStatusReg(ARM64_CNTVCT_EL0);
|
||||
#elif defined(TARGET_LINUX) || defined(TARGET_OSX)
|
||||
int64_t timerTicks;
|
||||
asm volatile("mrs %0, cntvct_el0" : "=r"(timerTicks));
|
||||
return timerTicks;
|
||||
#else
|
||||
Unsupported platform?
|
||||
#endif
|
||||
#else
|
||||
Unsupported architecture?
|
||||
#endif
|
||||
}
|
||||
|
||||
static const uint16_t SpinCountNotInitialized = INT16_MIN;
|
||||
|
||||
// While spinning is parameterized in terms of iterations,
|
||||
// the internal tuning operates with spin count at a finer scale.
|
||||
// One iteration is mapped to 64 spin count units.
|
||||
static const int SpinCountScaleShift = 6;
|
||||
|
||||
static const uint16_t DefaultMaxSpinCount = 22 << SpinCountScaleShift;
|
||||
static const uint16_t DefaultMinSpinCount = 1 << SpinCountScaleShift;
|
||||
|
||||
// We will use exponential backoff in rare cases when we need to change state atomically and cannot
|
||||
// make progress due to concurrent state changes by other threads.
|
||||
// While we cannot know the ideal amount of wait needed before making a successful attempt,
|
||||
// the exponential backoff will generally be not more than 2X worse than the perfect guess and
|
||||
// will do a lot less attempts than an simple retry. On multiprocessor machine fruitless attempts
|
||||
// will cause unnecessary sharing of the contended state which may make modifying the state more expensive.
|
||||
// To protect against degenerate cases we will cap the per-iteration wait to 1024 spinwaits.
|
||||
static const uint32_t MaxExponentialBackoffBits = 10;
|
||||
|
||||
// This lock is unfair and permits acquiring a contended lock by a nonwaiter in the presence of waiters.
|
||||
// It is possible for one thread to keep holding the lock long enough that waiters go to sleep and
|
||||
// then release and reacquire fast enough that waiters have no chance to get the lock.
|
||||
// In extreme cases one thread could keep retaking the lock starving everybody else.
|
||||
// If we see woken waiters not able to take the lock for too long we will ask nonwaiters to wait.
|
||||
static const uint32_t WaiterWatchdogTicks = 60;
|
||||
|
||||
public:
|
||||
void Initialize()
|
||||
{
|
||||
_state = 0;
|
||||
_spinCount = DefaultMinSpinCount;
|
||||
_wakeWatchDog = 0;
|
||||
_owningThreadId = 0;
|
||||
_gate = new (nothrow) SatoriGate();
|
||||
}
|
||||
|
||||
FORCEINLINE
|
||||
bool TryEnterOneShot()
|
||||
{
|
||||
uint32_t origState = _state;
|
||||
if ((origState & (YieldToWaiters | Locked)) == 0)
|
||||
{
|
||||
uint32_t newState = origState + Locked;
|
||||
if (CompareExchangeAcq(&_state, newState, origState))
|
||||
{
|
||||
_ASSERTE(_owningThreadId == 0);
|
||||
_owningThreadId = SatoriUtil::GetCurrentThreadTag();
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
FORCEINLINE
|
||||
bool TryEnter()
|
||||
{
|
||||
return TryEnterOneShot() ||
|
||||
EnterSlow(/*noBlock*/true);
|
||||
}
|
||||
|
||||
FORCEINLINE
|
||||
void Enter()
|
||||
{
|
||||
if (!TryEnterOneShot())
|
||||
{
|
||||
bool entered = EnterSlow();
|
||||
_ASSERTE(entered);
|
||||
}
|
||||
}
|
||||
|
||||
bool IsLocked()
|
||||
{
|
||||
return (_state & Locked) != 0;
|
||||
}
|
||||
|
||||
FORCEINLINE
|
||||
void Leave()
|
||||
{
|
||||
_ASSERTE(IsLocked());
|
||||
_ASSERTE(_owningThreadId == SatoriUtil::GetCurrentThreadTag());
|
||||
|
||||
_owningThreadId = 0;
|
||||
uint32_t state = InterlockedDecRel(&_state);
|
||||
if ((int32_t)state < (int32_t)WaiterCountIncrement) // true if have no waiters or WaiterWoken is set
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
//
|
||||
// We have waiters; take the slow path.
|
||||
//
|
||||
AwakeWaiterIfNeeded();
|
||||
}
|
||||
|
||||
static void CollisionBackoff(uint32_t collisions)
|
||||
{
|
||||
_ASSERTE(collisions > 0);
|
||||
|
||||
// no need for much randomness here, we will just hash the stack location and a timestamp.
|
||||
uint32_t rand = ((uint32_t)(size_t)&collisions + (uint32_t)GetCheapTimeStamp()) * 2654435769u;
|
||||
uint32_t spins = rand >> (uint8_t)((uint32_t)32 - min(collisions, MaxExponentialBackoffBits));
|
||||
for (int i = 0; i < (int)spins; i++)
|
||||
{
|
||||
YieldProcessor();
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
static uint16_t GetTickCount()
|
||||
{
|
||||
return (uint16_t)GCToOSInterface::GetLowPrecisionTimeStamp();
|
||||
}
|
||||
|
||||
// same idea as in CollisionBackoff, but with guaranteed minimum wait
|
||||
static void IterationBackoff(int iteration)
|
||||
{
|
||||
_ASSERTE(iteration > 0 && iteration < MaxExponentialBackoffBits);
|
||||
|
||||
uint32_t rand = ((uint32_t)(size_t)&iteration + (uint32_t)GetCheapTimeStamp()) * 2654435769u;
|
||||
// set the highmost bit to ensure minimum number of spins is exponentialy increasing
|
||||
// it basically guarantees that we spin at least 1, 2, 4, 8, 16, times, and so on
|
||||
rand |= (1u << 31);
|
||||
uint32_t spins = rand >> (uint8_t)(32 - iteration);
|
||||
for (int i = 0; i < (int)spins; i++)
|
||||
{
|
||||
YieldProcessor();
|
||||
}
|
||||
}
|
||||
|
||||
NOINLINE
|
||||
bool EnterSlow(bool noBlock = false);
|
||||
|
||||
NOINLINE
|
||||
void AwakeWaiterIfNeeded();
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
class SatoriLockHolder : public Satori::StackOnly {
|
||||
private:
|
||||
T* const m_lock;
|
||||
SatoriLock* const m_lock;
|
||||
|
||||
public:
|
||||
// Disallow copying
|
||||
SatoriLockHolder& operator=(const SatoriLockHolder&) = delete;
|
||||
SatoriLockHolder(const SatoriLockHolder&) = delete;
|
||||
|
||||
SatoriLockHolder(T* lock)
|
||||
SatoriLockHolder(SatoriLock* lock)
|
||||
: m_lock(lock)
|
||||
{
|
||||
m_lock->Enter();
|
||||
}
|
||||
|
||||
SatoriLockHolder(T* lock, bool isLocked)
|
||||
SatoriLockHolder(SatoriLock* lock, bool isLocked)
|
||||
: m_lock(lock)
|
||||
{
|
||||
if (!isLocked)
|
||||
|
|
|
@ -54,6 +54,7 @@ public:
|
|||
SatoriObject* Next();
|
||||
|
||||
size_t Size();
|
||||
size_t FreeObjSize();
|
||||
bool SameRegion(SatoriRegion* otherRegion);
|
||||
bool IsFree();
|
||||
bool IsExternal();
|
||||
|
@ -95,6 +96,9 @@ public:
|
|||
template <typename F>
|
||||
void ForEachObjectRef(F lambda, bool includeCollectibleAllocator = false);
|
||||
|
||||
template <typename F>
|
||||
void ForEachObjectRef(F lambda, size_t size, bool includeCollectibleAllocator = false);
|
||||
|
||||
template <typename F>
|
||||
void ForEachObjectRef(F lambda, size_t start, size_t end);
|
||||
|
||||
|
|
|
@ -48,6 +48,15 @@ FORCEINLINE size_t SatoriObject::Size()
|
|||
return size;
|
||||
}
|
||||
|
||||
FORCEINLINE size_t SatoriObject::FreeObjSize()
|
||||
{
|
||||
_ASSERTE(IsFree());
|
||||
size_t size = Satori::MIN_FREE_SIZE;
|
||||
size += (size_t)((ArrayBase*)this)->GetNumComponents();
|
||||
size = ALIGN_UP(size, Satori::OBJECT_ALIGNMENT);
|
||||
return size;
|
||||
}
|
||||
|
||||
inline size_t SatoriObject::Start()
|
||||
{
|
||||
return (size_t)this;
|
||||
|
@ -303,6 +312,72 @@ inline void SatoriObject::ForEachObjectRef(F lambda, bool includeCollectibleAllo
|
|||
}
|
||||
}
|
||||
|
||||
template <typename F>
|
||||
inline void SatoriObject::ForEachObjectRef(F lambda, size_t size, bool includeCollectibleAllocator)
|
||||
{
|
||||
MethodTable* mt = RawGetMethodTable();
|
||||
|
||||
if (includeCollectibleAllocator && mt->Collectible())
|
||||
{
|
||||
uint8_t* loaderAllocator = GCToEEInterface::GetLoaderAllocatorObjectForGC(this);
|
||||
// NB: Allocator ref location is fake. The actual location is a handle).
|
||||
// For that same reason relocation callers should not care about the location.
|
||||
lambda((SatoriObject**)&loaderAllocator);
|
||||
}
|
||||
|
||||
if (!mt->ContainsPointers())
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
CGCDesc* map = CGCDesc::GetCGCDescFromMT(mt);
|
||||
CGCDescSeries* cur = map->GetHighestSeries();
|
||||
|
||||
// GetNumSeries is actually signed.
|
||||
// Negative value means the pattern repeats componentNum times (struct arrays)
|
||||
ptrdiff_t numSeries = (ptrdiff_t)map->GetNumSeries();
|
||||
if (numSeries >= 0)
|
||||
{
|
||||
CGCDescSeries* last = map->GetLowestSeries();
|
||||
|
||||
do
|
||||
{
|
||||
size_t refPtr = (size_t)this + cur->GetSeriesOffset();
|
||||
// series size is offset by the object size, so need to compensate for that.
|
||||
size_t refPtrStop = refPtr + size + cur->GetSeriesSize();
|
||||
|
||||
// top check loop. this could be a zero-element array
|
||||
while (refPtr < refPtrStop)
|
||||
{
|
||||
lambda((SatoriObject**)refPtr);
|
||||
refPtr += sizeof(size_t);
|
||||
}
|
||||
cur--;
|
||||
} while (cur >= last);
|
||||
}
|
||||
else
|
||||
{
|
||||
// repeating patern - an array
|
||||
size_t refPtr = (size_t)this + cur->GetSeriesOffset();
|
||||
uint32_t componentNum = ((ArrayBase*)this)->GetNumComponents();
|
||||
while (componentNum-- > 0)
|
||||
{
|
||||
for (ptrdiff_t i = 0; i > numSeries; i--)
|
||||
{
|
||||
val_serie_item item = *(cur->val_serie + i);
|
||||
size_t refPtrStop = refPtr + item.nptrs * sizeof(size_t);
|
||||
do
|
||||
{
|
||||
lambda((SatoriObject**)refPtr);
|
||||
refPtr += sizeof(size_t);
|
||||
} while (refPtr < refPtrStop);
|
||||
|
||||
refPtr += item.skip;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename F>
|
||||
inline void SatoriObject::ForEachObjectRef(F lambda, size_t start, size_t end)
|
||||
{
|
||||
|
|
|
@ -39,7 +39,7 @@ SatoriPage* SatoriPage::InitializeAt(size_t address, size_t pageSize, SatoriHeap
|
|||
{
|
||||
_ASSERTE(pageSize % Satori::PAGE_SIZE_GRANULARITY == 0);
|
||||
|
||||
SatoriPage* result = (SatoriPage*)GCToOSInterface::VirtualReserve((void*)address, pageSize);
|
||||
SatoriPage* result = (SatoriPage*)GCToOSInterface::VirtualReserve((void*)address, pageSize, SatoriUtil::UseTHP());
|
||||
if (result == nullptr)
|
||||
{
|
||||
return result;
|
||||
|
@ -70,16 +70,23 @@ SatoriPage* SatoriPage::InitializeAt(size_t address, size_t pageSize, SatoriHeap
|
|||
// conservatively assume the first useful card word to cover the start of the first region.
|
||||
size_t cardTableStart = (result->m_firstRegion - address) / Satori::BYTES_PER_CARD_BYTE;
|
||||
// this is also region map size
|
||||
size_t regionNumber = pageSize >> Satori::REGION_BITS;
|
||||
size_t cardGroupSize = regionNumber * 2;
|
||||
size_t regionCount = pageSize >> Satori::REGION_BITS;
|
||||
size_t cardGroupSize = pageSize / Satori::BYTES_PER_CARD_GROUP * 2;
|
||||
|
||||
// initializing to EPHEMERAL is optional. 0 is ok too.
|
||||
// initializing cards to EPHEMERAL is optional. 0 is ok too.
|
||||
// for huge pages it is not as useful and may get expensive.
|
||||
// also if the huge obj contains references, its region will go to gen2 anyways
|
||||
if (pageSize == Satori::PAGE_SIZE_GRANULARITY)
|
||||
{
|
||||
#if _DEBUG
|
||||
// in debug we initialize only half the cards, since it is optional...
|
||||
memset(&result->m_cardTable[cardTableStart], Satori::CardState::EPHEMERAL, (cardTableSize - cardTableStart) / 2);
|
||||
#else
|
||||
memset(&result->m_cardTable[cardTableStart], Satori::CardState::EPHEMERAL, cardTableSize - cardTableStart);
|
||||
memset(&result->m_cardGroups, Satori::CardState::EPHEMERAL, cardGroupSize);
|
||||
#endif
|
||||
// We leave card groups blank as we do not want to look at them when they may not even be covered by regions.
|
||||
// We maintain the invariant that groups are not set for allocator regions.
|
||||
// We do not use EPHEMERAL for groups.
|
||||
}
|
||||
|
||||
result->m_cardTableStart = cardTableStart;
|
||||
|
@ -90,7 +97,7 @@ SatoriPage* SatoriPage::InitializeAt(size_t address, size_t pageSize, SatoriHeap
|
|||
result->m_regionMap = (uint8_t*)(address + 128 + cardGroupSize);
|
||||
|
||||
// make sure the first useful card word is beyond the header.
|
||||
_ASSERTE(result->Start() + cardTableStart > (size_t)(result->m_regionMap) + regionNumber);
|
||||
_ASSERTE(result->Start() + cardTableStart > (size_t)(result->m_regionMap) + regionCount);
|
||||
return result;
|
||||
}
|
||||
|
||||
|
@ -157,7 +164,7 @@ SatoriRegion* SatoriPage::NextInPage(SatoriRegion* region)
|
|||
|
||||
SatoriRegion* SatoriPage::RegionForCardGroup(size_t group)
|
||||
{
|
||||
size_t mapIndex = group;
|
||||
size_t mapIndex = group * Satori::BYTES_PER_CARD_GROUP / Satori::REGION_SIZE_GRANULARITY;
|
||||
while (RegionMap()[mapIndex] > 1)
|
||||
{
|
||||
mapIndex -= ((size_t)1 << (RegionMap()[mapIndex] - 2));
|
||||
|
@ -205,8 +212,8 @@ void SatoriPage::SetCardsForRange(size_t start, size_t end)
|
|||
|
||||
memset((void*)(m_cardTable + firstCard), Satori::CardState::REMEMBERED, lastCard - firstCard + 1);
|
||||
|
||||
size_t firstGroup = firstByteOffset / Satori::REGION_SIZE_GRANULARITY;
|
||||
size_t lastGroup = lastByteOffset / Satori::REGION_SIZE_GRANULARITY;
|
||||
size_t firstGroup = firstByteOffset / Satori::BYTES_PER_CARD_GROUP;
|
||||
size_t lastGroup = lastByteOffset / Satori::BYTES_PER_CARD_GROUP;
|
||||
for (size_t i = firstGroup; i <= lastGroup; i++)
|
||||
{
|
||||
if (!m_cardGroups[i * 2])
|
||||
|
@ -235,7 +242,7 @@ void SatoriPage::DirtyCardForAddress(size_t address)
|
|||
// so the card dirtying write can be unordered.
|
||||
m_cardTable[cardByteOffset] = Satori::CardState::DIRTY;
|
||||
|
||||
size_t cardGroup = offset / Satori::REGION_SIZE_GRANULARITY;
|
||||
size_t cardGroup = offset / Satori::BYTES_PER_CARD_GROUP;
|
||||
VolatileStore(&this->m_cardGroups[cardGroup * 2], Satori::CardState::DIRTY);
|
||||
VolatileStore(&this->m_cardState, Satori::CardState::DIRTY);
|
||||
}
|
||||
|
@ -262,8 +269,8 @@ void SatoriPage::DirtyCardsForRange(size_t start, size_t end)
|
|||
// cleaning will read in the opposite order
|
||||
VolatileStoreBarrier();
|
||||
|
||||
size_t firstGroup = firstByteOffset / Satori::REGION_SIZE_GRANULARITY;
|
||||
size_t lastGroup = lastByteOffset / Satori::REGION_SIZE_GRANULARITY;
|
||||
size_t firstGroup = firstByteOffset / Satori::BYTES_PER_CARD_GROUP;
|
||||
size_t lastGroup = lastByteOffset / Satori::BYTES_PER_CARD_GROUP;
|
||||
for (size_t i = firstGroup; i <= lastGroup; i++)
|
||||
{
|
||||
this->m_cardGroups[i * 2] = Satori::CardState::DIRTY;
|
||||
|
@ -294,8 +301,8 @@ void SatoriPage::DirtyCardsForRangeConcurrent(size_t start, size_t end)
|
|||
|
||||
// we do not clean groups concurrently, so these can be conditional and unordered
|
||||
// only the eventual final state matters
|
||||
size_t firstGroup = firstByteOffset / Satori::REGION_SIZE_GRANULARITY;
|
||||
size_t lastGroup = lastByteOffset / Satori::REGION_SIZE_GRANULARITY;
|
||||
size_t firstGroup = firstByteOffset / Satori::BYTES_PER_CARD_GROUP;
|
||||
size_t lastGroup = lastByteOffset / Satori::BYTES_PER_CARD_GROUP;
|
||||
for (size_t i = firstGroup; i <= lastGroup; i++)
|
||||
{
|
||||
if (m_cardGroups[i * 2] != Satori::CardState::DIRTY)
|
||||
|
@ -310,7 +317,17 @@ void SatoriPage::DirtyCardsForRangeConcurrent(size_t start, size_t end)
|
|||
}
|
||||
}
|
||||
|
||||
void SatoriPage::WipeCardsForRange(size_t start, size_t end, bool isTenured)
|
||||
void SatoriPage::WipeGroupsForRange(size_t start, size_t end)
|
||||
{
|
||||
size_t firstByteOffset = start - Start();
|
||||
size_t lastByteOffset = end - Start() - 1;
|
||||
|
||||
size_t firstGroup = firstByteOffset / Satori::BYTES_PER_CARD_GROUP;
|
||||
size_t lastGroup = lastByteOffset / Satori::BYTES_PER_CARD_GROUP;
|
||||
memset((void*)&m_cardGroups[firstGroup * 2], Satori::CardState::BLANK, (lastGroup - firstGroup + 1) * 2);
|
||||
}
|
||||
|
||||
void SatoriPage::ResetCardsForRange(size_t start, size_t end, bool isTenured)
|
||||
{
|
||||
size_t firstByteOffset = start - Start();
|
||||
size_t lastByteOffset = end - Start() - 1;
|
||||
|
@ -325,7 +342,5 @@ void SatoriPage::WipeCardsForRange(size_t start, size_t end, bool isTenured)
|
|||
int8_t resetValue = isTenured ? Satori::CardState::BLANK : Satori::CardState::EPHEMERAL;
|
||||
memset((void*)(m_cardTable + firstCard), resetValue, lastCard - firstCard + 1);
|
||||
|
||||
size_t firstGroup = firstByteOffset / Satori::REGION_SIZE_GRANULARITY;
|
||||
size_t lastGroup = lastByteOffset / Satori::REGION_SIZE_GRANULARITY;
|
||||
memset((void*)&m_cardGroups[firstGroup * 2], resetValue, (lastGroup - firstGroup + 1) * 2);
|
||||
WipeGroupsForRange(start, end);
|
||||
}
|
||||
|
|
|
@ -69,7 +69,7 @@ public:
|
|||
{
|
||||
m_cardTable[cardByteOffset] = Satori::CardState::REMEMBERED;
|
||||
|
||||
size_t cardGroup = offset / Satori::REGION_SIZE_GRANULARITY;
|
||||
size_t cardGroup = offset / Satori::BYTES_PER_CARD_GROUP;
|
||||
if (!m_cardGroups[cardGroup * 2])
|
||||
{
|
||||
m_cardGroups[cardGroup * 2] = Satori::CardState::REMEMBERED;
|
||||
|
@ -100,7 +100,7 @@ public:
|
|||
|
||||
// we do not clean groups concurrently, so these can be conditional and unordered
|
||||
// only the eventual final state matters
|
||||
size_t cardGroup = offset / Satori::REGION_SIZE_GRANULARITY;
|
||||
size_t cardGroup = offset / Satori::BYTES_PER_CARD_GROUP;
|
||||
if (m_cardGroups[cardGroup * 2] != Satori::CardState::DIRTY)
|
||||
{
|
||||
m_cardGroups[cardGroup * 2] = Satori::CardState::DIRTY;
|
||||
|
@ -117,7 +117,8 @@ public:
|
|||
void DirtyCardsForRange(size_t start, size_t length);
|
||||
void DirtyCardsForRangeConcurrent(size_t start, size_t end);
|
||||
|
||||
void WipeCardsForRange(size_t start, size_t end, bool isTenured);
|
||||
void WipeGroupsForRange(size_t start, size_t end);
|
||||
void ResetCardsForRange(size_t start, size_t end, bool isTenured);
|
||||
|
||||
volatile int8_t& CardState();
|
||||
volatile int8_t& ScanTicket();
|
||||
|
@ -162,14 +163,14 @@ private:
|
|||
// ----- we can have a few more fields above as long as m_cardGroups starts at offset 128.
|
||||
// that can be adjusted if needed
|
||||
|
||||
// computed size,
|
||||
// computed size, located after card groups
|
||||
// 1byte per region
|
||||
// 512 bytes per 1Gb
|
||||
uint8_t* m_regionMap;
|
||||
|
||||
// computed size,
|
||||
// 2byte per region
|
||||
// 1024 bytes per 1Gb
|
||||
// 2 byte per card group (4 per region granule)
|
||||
// 2048 bytes per 1Gb
|
||||
DECLSPEC_ALIGN(128)
|
||||
int8_t m_cardGroups[1];
|
||||
};
|
||||
|
|
|
@ -82,7 +82,7 @@ inline volatile int8_t& SatoriPage::CardGroupScanTicket(size_t i)
|
|||
|
||||
inline size_t SatoriPage::CardGroupCount()
|
||||
{
|
||||
return (End() - Start()) >> Satori::REGION_BITS;
|
||||
return (End() - Start()) / Satori::BYTES_PER_CARD_GROUP;
|
||||
}
|
||||
|
||||
inline int8_t* SatoriPage::CardsForGroup(size_t i)
|
||||
|
|
|
@ -75,7 +75,7 @@ public:
|
|||
_ASSERTE(item->m_prev == nullptr);
|
||||
_ASSERTE(item->m_containingQueue == nullptr);
|
||||
|
||||
SatoriLockHolder<SatoriSpinLock> holder(&m_lock);
|
||||
SatoriLockHolder holder(&m_lock);
|
||||
m_count++;
|
||||
item->m_containingQueue = this;
|
||||
if (m_head == nullptr)
|
||||
|
@ -92,6 +92,31 @@ public:
|
|||
m_head = item;
|
||||
}
|
||||
|
||||
void PushNoLock(T* item)
|
||||
{
|
||||
_ASSERTE(item->m_next == nullptr);
|
||||
_ASSERTE(item->m_prev == nullptr);
|
||||
_ASSERTE(item->m_containingQueue == nullptr);
|
||||
|
||||
size_t oldCount = m_count;
|
||||
Interlocked::Increment(&m_count);
|
||||
|
||||
T* head = Interlocked::ExchangePointer(&m_head, item);
|
||||
if (head == nullptr)
|
||||
{
|
||||
_ASSERTE(m_tail == nullptr);
|
||||
m_tail = item;
|
||||
}
|
||||
else
|
||||
{
|
||||
item->m_next = head;
|
||||
head->m_prev = item;
|
||||
}
|
||||
|
||||
item->m_containingQueue = this;
|
||||
_ASSERTE(m_count > oldCount);
|
||||
}
|
||||
|
||||
T* TryPop()
|
||||
{
|
||||
if (IsEmpty())
|
||||
|
@ -101,23 +126,24 @@ public:
|
|||
|
||||
T* result;
|
||||
{
|
||||
SatoriLockHolder<SatoriSpinLock> holder(&m_lock);
|
||||
SatoriLockHolder holder(&m_lock);
|
||||
result = m_head;
|
||||
if (result == nullptr)
|
||||
{
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
T* next = result->m_next;
|
||||
m_count--;
|
||||
m_head = next;
|
||||
result->m_containingQueue = nullptr;
|
||||
m_head = result->m_next;
|
||||
if (m_head == nullptr)
|
||||
if (next == nullptr)
|
||||
{
|
||||
m_tail = nullptr;
|
||||
}
|
||||
else
|
||||
{
|
||||
m_head->m_prev = nullptr;
|
||||
next->m_prev = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -127,13 +153,56 @@ public:
|
|||
return result;
|
||||
}
|
||||
|
||||
T* TryPopWithTryEnter()
|
||||
{
|
||||
if (IsEmpty())
|
||||
{
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
T* result;
|
||||
{
|
||||
if (!m_lock.TryEnter())
|
||||
{
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
result = m_head;
|
||||
if (result == nullptr)
|
||||
{
|
||||
m_lock.Leave();
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
T* next = result->m_next;
|
||||
m_count--;
|
||||
m_head = next;
|
||||
result->m_containingQueue = nullptr;
|
||||
if (next == nullptr)
|
||||
{
|
||||
m_tail = nullptr;
|
||||
}
|
||||
else
|
||||
{
|
||||
next->m_prev = nullptr;
|
||||
}
|
||||
|
||||
m_lock.Leave();
|
||||
}
|
||||
|
||||
_ASSERTE(result->m_prev == nullptr);
|
||||
result->m_next = nullptr;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void Enqueue(T* item)
|
||||
{
|
||||
_ASSERTE(item->m_next == nullptr);
|
||||
_ASSERTE(item->m_prev == nullptr);
|
||||
_ASSERTE(item->m_containingQueue == nullptr);
|
||||
|
||||
SatoriLockHolder<SatoriSpinLock> holder(&m_lock);
|
||||
SatoriLockHolder holder(&m_lock);
|
||||
m_count++;
|
||||
item->m_containingQueue = this;
|
||||
if (m_tail == nullptr)
|
||||
|
@ -181,7 +250,7 @@ public:
|
|||
bool TryRemove(T* item)
|
||||
{
|
||||
{
|
||||
SatoriLockHolder<SatoriSpinLock> holder(&m_lock);
|
||||
SatoriLockHolder holder(&m_lock);
|
||||
if (!Contains(item))
|
||||
{
|
||||
return false;
|
||||
|
@ -246,7 +315,7 @@ public:
|
|||
|
||||
protected:
|
||||
QueueKind m_kind;
|
||||
SatoriSpinLock m_lock;
|
||||
SatoriLock m_lock;
|
||||
T* m_head;
|
||||
T* m_tail;
|
||||
size_t m_count;
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -31,6 +31,7 @@
|
|||
#include "../gc.h"
|
||||
#include "SatoriRegionQueue.h"
|
||||
#include "SatoriWorkList.h"
|
||||
#include "SatoriGate.h"
|
||||
|
||||
class SatoriHeap;
|
||||
class SatoriTrimmer;
|
||||
|
@ -56,7 +57,9 @@ public:
|
|||
void AddEphemeralRegion(SatoriRegion* region);
|
||||
void AddTenuredRegion(SatoriRegion* region);
|
||||
|
||||
// TODO: VS should be moved to Heap?
|
||||
size_t GetNowMillis();
|
||||
size_t GetNowUsecs();
|
||||
|
||||
bool& IsLowLatencyMode();
|
||||
|
||||
|
@ -69,11 +72,13 @@ public:
|
|||
void TryStartGC(int generation, gc_reason reason);
|
||||
void HelpOnce();
|
||||
void MaybeTriggerGC(gc_reason reason);
|
||||
bool IsBlockingPhase();
|
||||
|
||||
void ConcurrentHelp();
|
||||
bool ShouldDoConcurrent(int generation);
|
||||
void ConcurrentWorkerFn();
|
||||
void ShutDown();
|
||||
|
||||
void BlockingMarkForConcurrentHelper();
|
||||
void BlockingMarkForConcurrentImpl();
|
||||
void BlockingMarkForConcurrent();
|
||||
void MaybeAskForHelp();
|
||||
|
||||
|
@ -99,6 +104,11 @@ public:
|
|||
return m_isBarrierConcurrent;
|
||||
}
|
||||
|
||||
inline bool IsNextGcFullGc()
|
||||
{
|
||||
return m_nextGcIsFullGc;
|
||||
}
|
||||
|
||||
bool IsReuseCandidate(SatoriRegion* region);
|
||||
bool IsRelocationCandidate(SatoriRegion* region);
|
||||
bool IsPromotionCandidate(SatoriRegion* region);
|
||||
|
@ -109,7 +119,7 @@ public:
|
|||
return &m_lastEphemeralGcInfo;
|
||||
|
||||
if (kind == gc_kind_full_blocking)
|
||||
return GetLastGcInfo(gc_kind_any); // no concept of blocking GC, every GC has blocking part.
|
||||
return &m_lastTenuredGcInfo; // no concept of background GC, every GC has blocking part.
|
||||
|
||||
if (kind == gc_kind_background)
|
||||
return GetLastGcInfo(gc_kind_any); // no concept of background GC, cant have 2 GCs at a time.
|
||||
|
@ -124,7 +134,7 @@ private:
|
|||
SatoriHeap* m_heap;
|
||||
|
||||
int m_rootScanTicket;
|
||||
uint8_t m_cardScanTicket;
|
||||
int8_t m_cardScanTicket;
|
||||
|
||||
SatoriWorkList* m_workList;
|
||||
SatoriTrimmer* m_trimmer;
|
||||
|
@ -166,6 +176,11 @@ private:
|
|||
static const int CC_MARK_STATE_MARKING = 2;
|
||||
static const int CC_MARK_STATE_DONE = 3;
|
||||
|
||||
static const int CC_CLEAN_STATE_NOT_READY = 0;
|
||||
static const int CC_CLEAN_STATE_SETTING_UP = 1;
|
||||
static const int CC_CLEAN_STATE_CLEANING = 2;
|
||||
static const int CC_CLEAN_STATE_DONE = 3;
|
||||
|
||||
volatile int m_ccStackMarkState;
|
||||
volatile int m_ccStackMarkingThreadsNum;
|
||||
|
||||
|
@ -175,6 +190,7 @@ private:
|
|||
|
||||
bool m_concurrentCardsDone;
|
||||
bool m_concurrentHandlesDone;
|
||||
volatile int m_concurrentCleaningState;
|
||||
|
||||
bool m_isRelocating;
|
||||
bool m_isLowLatencyMode;
|
||||
|
@ -188,13 +204,15 @@ private:
|
|||
int64_t m_gcDurationMillis[3];
|
||||
|
||||
size_t m_gen1Budget;
|
||||
size_t m_totalBudget;
|
||||
size_t m_totalLimit;
|
||||
size_t m_nextGcIsFullGc;
|
||||
|
||||
size_t m_condemnedRegionsCount;
|
||||
size_t m_deferredSweepCount;
|
||||
size_t m_gen1AddedSinceLastCollection;
|
||||
size_t m_gen2AddedSinceLastCollection;
|
||||
size_t m_gen1CountAtLastGen2;
|
||||
size_t m_gcNextTimeTarget;
|
||||
|
||||
size_t m_occupancy[3];
|
||||
size_t m_occupancyAcc[3];
|
||||
|
@ -210,12 +228,14 @@ private:
|
|||
int64_t m_perfCounterTicksPerMilli;
|
||||
int64_t m_perfCounterTicksPerMicro;
|
||||
|
||||
GCEvent* m_helpersGate;
|
||||
volatile int m_gateSignaled;
|
||||
volatile int m_activeHelpers;
|
||||
volatile int m_totalHelpers;
|
||||
SatoriGate* m_workerGate;
|
||||
|
||||
void(SatoriRecycler::* volatile m_activeHelperFn)();
|
||||
volatile int m_gateSignaled;
|
||||
volatile int m_workerWoken;
|
||||
volatile int m_activeWorkers;
|
||||
volatile int m_totalWorkers;
|
||||
|
||||
void(SatoriRecycler::* volatile m_activeWorkerFn)();
|
||||
|
||||
int64_t m_noWorkSince;
|
||||
|
||||
|
@ -224,9 +244,6 @@ private:
|
|||
LastRecordedGcInfo* m_CurrentGcInfo;
|
||||
|
||||
private:
|
||||
|
||||
bool IsBlockingPhase();
|
||||
|
||||
size_t Gen1RegionCount();
|
||||
size_t Gen2RegionCount();
|
||||
size_t RegionCount();
|
||||
|
@ -243,12 +260,12 @@ private:
|
|||
template <bool isConservative>
|
||||
static void MarkFnConcurrent(PTR_PTR_Object ppObject, ScanContext* sc, uint32_t flags);
|
||||
|
||||
static void HelperThreadFn(void* param);
|
||||
int MaxHelpers();
|
||||
static void WorkerThreadMainLoop(void* param);
|
||||
int MaxWorkers();
|
||||
int64_t HelpQuantum();
|
||||
void AskForHelp();
|
||||
void RunWithHelp(void(SatoriRecycler::* method)());
|
||||
bool HelpOnceCore();
|
||||
bool HelpOnceCore(bool minQuantum);
|
||||
|
||||
void PushToEphemeralQueues(SatoriRegion* region);
|
||||
void PushToTenuredQueues(SatoriRegion* region);
|
||||
|
@ -258,7 +275,7 @@ private:
|
|||
|
||||
void IncrementRootScanTicket();
|
||||
void IncrementCardScanTicket();
|
||||
uint8_t GetCardScanTicket();
|
||||
int8_t GetCardScanTicket();
|
||||
|
||||
void MarkOwnStack(gc_alloc_context* aContext, MarkContext* markContext);
|
||||
void MarkThroughCards();
|
||||
|
@ -271,10 +288,11 @@ private:
|
|||
void MarkOwnStackAndDrainQueues();
|
||||
void MarkOwnStackOrDrainQueuesConcurrent(int64_t deadline);
|
||||
bool MarkDemotedAndDrainQueuesConcurrent(int64_t deadline);
|
||||
void PushOrReturnWorkChunk(SatoriWorkChunk * srcChunk);
|
||||
bool DrainMarkQueuesConcurrent(SatoriWorkChunk* srcChunk = nullptr, int64_t deadline = 0);
|
||||
|
||||
bool HasDirtyCards();
|
||||
bool ScanDirtyCardsConcurrent(int64_t deadline);
|
||||
bool CleanCardsConcurrent(int64_t deadline);
|
||||
void CleanCards();
|
||||
bool MarkHandles(int64_t deadline = 0);
|
||||
void ShortWeakPtrScan();
|
||||
|
@ -316,7 +334,7 @@ private:
|
|||
void Relocate();
|
||||
void RelocateWorker();
|
||||
void RelocateRegion(SatoriRegion* region);
|
||||
void FreeRelocatedRegion(SatoriRegion* curRegion);
|
||||
void FreeRelocatedRegion(SatoriRegion* curRegion, bool noLock);
|
||||
void FreeRelocatedRegionsWorker();
|
||||
|
||||
void PromoteHandlesAndFreeRelocatedRegions();
|
||||
|
@ -333,7 +351,7 @@ private:
|
|||
void KeepRegion(SatoriRegion* curRegion);
|
||||
void DrainDeferredSweepQueue();
|
||||
bool DrainDeferredSweepQueueConcurrent(int64_t deadline = 0);
|
||||
void DrainDeferredSweepQueueHelp();
|
||||
void DrainDeferredSweepQueueWorkerFn();
|
||||
void SweepAndReturnRegion(SatoriRegion* curRegion);
|
||||
|
||||
void ASSERT_NO_WORK();
|
||||
|
|
|
@ -113,7 +113,7 @@ SatoriRecycler* SatoriRegion::Recycler()
|
|||
void SatoriRegion::RearmCardsForTenured()
|
||||
{
|
||||
_ASSERTE(Generation() == 2);
|
||||
m_containingPage->WipeCardsForRange(Start(), End(), /* tenured */ true);
|
||||
m_containingPage->ResetCardsForRange(Start(), End(), /* tenured */ true);
|
||||
HasUnmarkedDemotedObjects() = false;
|
||||
|
||||
FreeDemotedTrackers();
|
||||
|
@ -135,42 +135,72 @@ void SatoriRegion::FreeDemotedTrackers()
|
|||
void SatoriRegion::ResetCardsForEphemeral()
|
||||
{
|
||||
_ASSERTE(Generation() == 2);
|
||||
m_containingPage->WipeCardsForRange(Start(), End(), /* tenured */ false);
|
||||
m_containingPage->ResetCardsForRange(Start(), End(), /* tenured */ false);
|
||||
}
|
||||
|
||||
void SatoriRegion::MakeBlank()
|
||||
{
|
||||
_ASSERTE(!m_hasPendingFinalizables);
|
||||
_ASSERTE(!m_finalizableTrackers);
|
||||
_ASSERTE(!m_acceptedPromotedObjects);
|
||||
_ASSERTE(!m_gen2Objects);
|
||||
_ASSERTE(NothingMarked());
|
||||
|
||||
if (m_generation == 2)
|
||||
{
|
||||
this->ResetCardsForEphemeral();
|
||||
}
|
||||
else
|
||||
{
|
||||
m_containingPage->WipeGroupsForRange(Start(), End());
|
||||
}
|
||||
|
||||
m_generation = -1;
|
||||
m_ownerThreadTag = 0;
|
||||
m_escapeFunc = EscapeFn;
|
||||
m_generation = -1;
|
||||
m_occupancyAtReuse = 0;
|
||||
|
||||
// m_end stays the same
|
||||
// m_containingPage stays the same
|
||||
|
||||
m_reusableFor = ReuseLevel::None;
|
||||
_ASSERTE(!m_allocatingOwnerAttachmentPoint);
|
||||
_ASSERTE(!m_gen2Objects);
|
||||
|
||||
m_allocStart = (size_t)&m_firstObject;
|
||||
m_allocEnd = End();
|
||||
m_occupancy = m_allocEnd - m_allocStart;
|
||||
m_occupancyAtReuse = 0;
|
||||
m_sweepsSinceLastAllocation = 0;
|
||||
m_unfinishedAllocationCount = 0;
|
||||
m_markStack = 0;
|
||||
|
||||
// m_used stays the same
|
||||
// m_committed stays the same
|
||||
|
||||
m_escapedSize = 0;
|
||||
m_objCount = 0;
|
||||
_ASSERTE(!m_markStack);
|
||||
|
||||
m_allocBytesAtCollect = 0;
|
||||
|
||||
m_hasFinalizables = false;
|
||||
_ASSERTE(!m_finalizableTrackers);
|
||||
_ASSERTE(!m_finalizableTrackersLock);
|
||||
|
||||
m_sweepsSinceLastAllocation = 0;
|
||||
|
||||
// m_prev
|
||||
// m_next
|
||||
// m_containingQueue all stay the same
|
||||
|
||||
// assume all space reserved to allocations will be used
|
||||
// (we will revert what will be unused)
|
||||
m_occupancy = m_allocEnd - m_allocStart;
|
||||
m_objCount = 0;
|
||||
|
||||
m_unfinishedAllocationCount = 0;
|
||||
|
||||
m_hasPinnedObjects = false;
|
||||
m_hasMarksSet = false;
|
||||
m_hasFinalizables = false;
|
||||
_ASSERTE(!m_hasPendingFinalizables);
|
||||
m_doNotSweep = false;
|
||||
m_reusableFor = ReuseLevel::None;
|
||||
m_hasUnmarkedDemotedObjects = false;
|
||||
_ASSERTE(!m_acceptedPromotedObjects);
|
||||
_ASSERTE(!m_individuallyPromoted);
|
||||
_ASSERTE(!m_hasUnmarkedDemotedObjects);
|
||||
|
||||
#if _DEBUG
|
||||
m_hasMarksSet = false;
|
||||
#endif
|
||||
|
||||
//clear index and free list
|
||||
ClearFreeLists();
|
||||
|
@ -249,38 +279,94 @@ bool SatoriRegion::ValidateIndexEmpty()
|
|||
|
||||
static const int FREE_LIST_NEXT_OFFSET = sizeof(ArrayBase);
|
||||
|
||||
// prefers leftmost bucket that fits to improve locality, possibly at cost to fragmentation
|
||||
size_t SatoriRegion::StartAllocating(size_t minAllocSize)
|
||||
{
|
||||
_ASSERTE(!IsAllocating());
|
||||
|
||||
// skip buckets that certainly will not fit.
|
||||
DWORD bucket;
|
||||
BitScanReverse64(&bucket, minAllocSize);
|
||||
|
||||
// when minAllocSize is not a power of two we could search through the current bucket,
|
||||
// which may have a large enough obj,
|
||||
// but we will just use the next bucket, which guarantees it fits
|
||||
if (minAllocSize & (minAllocSize - 1))
|
||||
{
|
||||
bucket++;
|
||||
}
|
||||
|
||||
bucket = bucket > Satori::MIN_FREELIST_SIZE_BITS ?
|
||||
bucket - Satori::MIN_FREELIST_SIZE_BITS :
|
||||
0;
|
||||
|
||||
// we will check the first free obj in the bucket, but will not dig through the rest.
|
||||
// if the first obj does not fit, we will switch to the next bucket where everything will fit.
|
||||
size_t minFreeObjSize = minAllocSize + Satori::MIN_FREE_SIZE;
|
||||
|
||||
DWORD selectedBucket = Satori::FREELIST_COUNT;
|
||||
SatoriObject* freeObj = m_freeLists[bucket];
|
||||
if (freeObj)
|
||||
{
|
||||
if (freeObj->FreeObjSize() >= minFreeObjSize)
|
||||
{
|
||||
selectedBucket = bucket;
|
||||
}
|
||||
}
|
||||
|
||||
// in higher buckets everything will fit
|
||||
// prefer free objects that start earlier
|
||||
bucket++;
|
||||
for (; bucket < Satori::FREELIST_COUNT; bucket++)
|
||||
{
|
||||
SatoriObject* freeObjCandidate = m_freeLists[bucket];
|
||||
if (freeObjCandidate &&
|
||||
(selectedBucket == Satori::FREELIST_COUNT || freeObjCandidate->Start() < freeObj->Start()))
|
||||
{
|
||||
selectedBucket = bucket;
|
||||
freeObj = freeObjCandidate;
|
||||
}
|
||||
}
|
||||
|
||||
if (selectedBucket < Satori::FREELIST_COUNT)
|
||||
{
|
||||
m_freeLists[selectedBucket] = *(SatoriObject**)(freeObj->Start() + FREE_LIST_NEXT_OFFSET);
|
||||
m_allocStart = freeObj->Start();
|
||||
m_allocEnd = m_allocStart + freeObj->FreeObjSize();
|
||||
SetOccupancy(m_occupancy + m_allocEnd - m_allocStart);
|
||||
ClearIndicesForAllocRange();
|
||||
_ASSERTE(GetAllocRemaining() >= minAllocSize);
|
||||
m_sweepsSinceLastAllocation = 0;
|
||||
return m_allocStart;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// prefers smallest bucket that fits to reduce fragmentation, possibly at cost to locality
|
||||
size_t SatoriRegion::StartAllocatingBestFit(size_t minAllocSize)
|
||||
{
|
||||
_ASSERTE(!IsAllocating());
|
||||
|
||||
// skip buckets that certainly will not fit.
|
||||
DWORD bucket;
|
||||
BitScanReverse64(&bucket, minAllocSize);
|
||||
bucket = bucket > Satori::MIN_FREELIST_SIZE_BITS ?
|
||||
bucket - Satori::MIN_FREELIST_SIZE_BITS :
|
||||
0;
|
||||
|
||||
// we will check the first free obj in the bucket, but will not dig through the rest.
|
||||
// if the first obj does not fit, we will switch to the next bucket where everything will fit.
|
||||
size_t minFreeObjSize = minAllocSize + Satori::MIN_FREE_SIZE;
|
||||
|
||||
for (; bucket < Satori::FREELIST_COUNT; bucket++)
|
||||
{
|
||||
SatoriObject* freeObj = m_freeLists[bucket];
|
||||
if (freeObj)
|
||||
{
|
||||
m_freeLists[bucket] = *(SatoriObject**)(freeObj->Start() + FREE_LIST_NEXT_OFFSET);
|
||||
m_allocStart = freeObj->Start();
|
||||
m_allocEnd = freeObj->End();
|
||||
SetOccupancy(m_occupancy + m_allocEnd - m_allocStart);
|
||||
ClearIndicesForAllocRange();
|
||||
_ASSERTE(GetAllocRemaining() >= minAllocSize);
|
||||
m_sweepsSinceLastAllocation = 0;
|
||||
return m_allocStart;
|
||||
size_t size = freeObj->FreeObjSize();
|
||||
if (size >= minFreeObjSize)
|
||||
{
|
||||
m_freeLists[bucket] = *(SatoriObject**)(freeObj->Start() + FREE_LIST_NEXT_OFFSET);
|
||||
m_allocStart = freeObj->Start();
|
||||
m_allocEnd = m_allocStart + size;
|
||||
SetOccupancy(m_occupancy + m_allocEnd - m_allocStart);
|
||||
ClearIndicesForAllocRange();
|
||||
_ASSERTE(GetAllocRemaining() >= minAllocSize);
|
||||
m_sweepsSinceLastAllocation = 0;
|
||||
return m_allocStart;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -300,7 +386,7 @@ void SatoriRegion::StopAllocating(size_t allocPtr)
|
|||
_ASSERTE(m_occupancy >= unused);
|
||||
SetOccupancy(m_occupancy - unused);
|
||||
SatoriObject* freeObj = SatoriObject::FormatAsFree(allocPtr, unused);
|
||||
AddFreeSpace(freeObj, unused);
|
||||
ReturnFreeSpace(freeObj, unused);
|
||||
}
|
||||
|
||||
m_allocStart = m_allocEnd = 0;
|
||||
|
@ -328,10 +414,47 @@ void SatoriRegion::AddFreeSpace(SatoriObject* freeObj, size_t size)
|
|||
_ASSERTE(bucket >= 0);
|
||||
_ASSERTE(bucket < Satori::FREELIST_COUNT);
|
||||
|
||||
*(SatoriObject**)(freeObj->Start() + FREE_LIST_NEXT_OFFSET) = m_freeLists[bucket];
|
||||
m_freeLists[bucket] = freeObj;
|
||||
// insert at the tail
|
||||
*(SatoriObject**)(freeObj->Start() + FREE_LIST_NEXT_OFFSET) = nullptr;
|
||||
if (m_freeLists[bucket] == nullptr)
|
||||
{
|
||||
m_freeLists[bucket] = m_freeListTails[bucket] = freeObj;
|
||||
return;
|
||||
}
|
||||
|
||||
SatoriObject* tailObj = m_freeListTails[bucket];
|
||||
_ASSERTE(tailObj);
|
||||
*(SatoriObject**)(tailObj->Start() + FREE_LIST_NEXT_OFFSET) = freeObj;
|
||||
m_freeListTails[bucket] = freeObj;
|
||||
}
|
||||
|
||||
void SatoriRegion::ReturnFreeSpace(SatoriObject* freeObj, size_t size)
|
||||
{
|
||||
_ASSERTE(freeObj->Size() == size);
|
||||
// allocSize is smaller than size to make sure the span can always be made parseable
|
||||
// after allocating objects in it.
|
||||
ptrdiff_t allocSize = size - Satori::MIN_FREE_SIZE;
|
||||
if (allocSize < Satori::MIN_FREELIST_SIZE)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
DWORD bucket;
|
||||
BitScanReverse64(&bucket, allocSize);
|
||||
bucket -= (Satori::MIN_FREELIST_SIZE_BITS);
|
||||
_ASSERTE(bucket >= 0);
|
||||
_ASSERTE(bucket < Satori::FREELIST_COUNT);
|
||||
|
||||
// insert at the head, since we are returning what we recently took.
|
||||
*(SatoriObject**)(freeObj->Start() + FREE_LIST_NEXT_OFFSET) = m_freeLists[bucket];
|
||||
|
||||
if (m_freeLists[bucket] == nullptr)
|
||||
{
|
||||
m_freeListTails[bucket] = freeObj;
|
||||
}
|
||||
|
||||
m_freeLists[bucket] = freeObj;
|
||||
}
|
||||
|
||||
bool SatoriRegion::HasFreeSpaceInTopBucket()
|
||||
{
|
||||
|
@ -689,7 +812,9 @@ size_t SatoriRegion::AllocateHuge(size_t size, bool zeroInitialize)
|
|||
// can give refs pointing to Free. (because of card granularity)
|
||||
SatoriObject* SatoriRegion::FindObject(size_t location)
|
||||
{
|
||||
_ASSERTE(m_generation >= 0 && location >= Start() && location < End());
|
||||
_ASSERTE(m_generation >= 0);
|
||||
_ASSERTE(location >= Start());
|
||||
_ASSERTE(location < End());
|
||||
_ASSERTE(m_unfinishedAllocationCount == 0);
|
||||
|
||||
location = min(location, Start() + Satori::REGION_SIZE_GRANULARITY);
|
||||
|
@ -843,7 +968,7 @@ void SatoriRegion::EscapeRecursively(SatoriObject* o)
|
|||
}
|
||||
|
||||
SetEscaped(o);
|
||||
m_escapedSize += o->Size();
|
||||
m_escapedSize += (int32_t)o->Size();
|
||||
|
||||
// now recursively mark all the objects reachable from escaped object.
|
||||
do
|
||||
|
@ -864,7 +989,7 @@ void SatoriRegion::EscapeRecursively(SatoriObject* o)
|
|||
if (child->SameRegion(this) && !IsEscaped(child))
|
||||
{
|
||||
SetEscaped(child);
|
||||
m_escapedSize += child->Size();
|
||||
m_escapedSize += (int32_t)child->Size();
|
||||
PushToMarkStackIfHasPointers(child);
|
||||
}
|
||||
}
|
||||
|
@ -877,18 +1002,21 @@ void SatoriRegion::EscapeRecursively(SatoriObject* o)
|
|||
void SatoriRegion::EscsapeAll()
|
||||
{
|
||||
size_t objLimit = Start() + Satori::REGION_SIZE_GRANULARITY;
|
||||
for (SatoriObject* o = FirstObject(); o->Start() < objLimit; o = o->Next())
|
||||
for (SatoriObject* o = FirstObject(); o->Start() < objLimit;)
|
||||
{
|
||||
size_t size = o->Size();
|
||||
if (!o->IsFree())
|
||||
{
|
||||
EscapeShallow(o);
|
||||
EscapeShallow(o, size);
|
||||
}
|
||||
|
||||
o = (SatoriObject*)(o->Start() + size);
|
||||
}
|
||||
}
|
||||
|
||||
// do not recurse into children
|
||||
// used when escaping all objects in the region anyways
|
||||
void SatoriRegion::EscapeShallow(SatoriObject* o)
|
||||
void SatoriRegion::EscapeShallow(SatoriObject* o, size_t size)
|
||||
{
|
||||
_ASSERTE(o->SameRegion(this));
|
||||
_ASSERTE(!IsEscaped(o));
|
||||
|
@ -899,7 +1027,7 @@ void SatoriRegion::EscapeShallow(SatoriObject* o)
|
|||
// typically objects have died and we have fewer escapes than before the GC,
|
||||
// so we do not bother to check
|
||||
SetEscaped(o);
|
||||
m_escapedSize += o->Size();
|
||||
m_escapedSize += (int32_t)size;
|
||||
|
||||
o->ForEachObjectRef(
|
||||
[&](SatoriObject** ref)
|
||||
|
@ -910,11 +1038,12 @@ void SatoriRegion::EscapeShallow(SatoriObject* o)
|
|||
|
||||
// mark ref location as exposed
|
||||
SetExposed(ref);
|
||||
}
|
||||
},
|
||||
size
|
||||
);
|
||||
}
|
||||
|
||||
void SatoriRegion::SetOccupancy(size_t occupancy, size_t objCount)
|
||||
void SatoriRegion::SetOccupancy(size_t occupancy, int32_t objCount)
|
||||
{
|
||||
_ASSERTE(objCount == 0 || occupancy != 0);
|
||||
_ASSERTE(occupancy <= (Size() - offsetof(SatoriRegion, m_firstObject)));
|
||||
|
@ -996,9 +1125,8 @@ void SatoriRegion::ThreadLocalMark()
|
|||
m_bitmap[bitmapIndex + (markBitOffset >> 6)] |= ((size_t)1 << (markBitOffset & 63));
|
||||
|
||||
SatoriObject* o = ObjectForMarkBit(bitmapIndex, markBitOffset);
|
||||
o->Validate();
|
||||
|
||||
#ifdef _DEBUG
|
||||
o->Validate();
|
||||
escaped += o->Size();
|
||||
#endif
|
||||
|
||||
|
@ -1130,7 +1258,7 @@ void SatoriRegion::ThreadLocalPlan()
|
|||
|
||||
// stats
|
||||
size_t occupancy = 0;
|
||||
size_t objCount = 0;
|
||||
int32_t objCount = 0;
|
||||
|
||||
// moveable: starts at first movable and reachable, as long as there is any free space to slide in
|
||||
size_t lastMarkedEnd = FirstObject()->Start();
|
||||
|
@ -1410,6 +1538,7 @@ void SatoriRegion::ThreadLocalCompact()
|
|||
{
|
||||
size_t freeSpace = d2->Start() - d1->Start();
|
||||
SatoriObject* freeObj = SatoriObject::FormatAsFree(d1->Start(), freeSpace);
|
||||
SetIndicesForObject(freeObj, d2->Start());
|
||||
AddFreeSpace(freeObj, freeSpace);
|
||||
foundFree += freeSpace;
|
||||
|
||||
|
@ -1777,7 +1906,7 @@ void SatoriRegion::UpdateFinalizableTrackers()
|
|||
}
|
||||
}
|
||||
|
||||
void SatoriRegion::UpdatePointersInObject(SatoriObject* o)
|
||||
void SatoriRegion::UpdatePointersInObject(SatoriObject* o, size_t size)
|
||||
{
|
||||
// if the containing region is large, do not engage with the entire object,
|
||||
// schedule update of separate ranges.
|
||||
|
@ -1797,7 +1926,8 @@ void SatoriRegion::UpdatePointersInObject(SatoriObject* o)
|
|||
*ppObject = (SatoriObject*)-ptr;
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
size
|
||||
);
|
||||
}
|
||||
}
|
||||
|
@ -1810,8 +1940,9 @@ void SatoriRegion::UpdatePointers()
|
|||
SatoriObject* o = FirstObject();
|
||||
do
|
||||
{
|
||||
UpdatePointersInObject(o);
|
||||
o = o->Next();
|
||||
size_t size = o->Size();
|
||||
UpdatePointersInObject(o, size);
|
||||
o = (SatoriObject*)(o->Start() + size);
|
||||
} while (o->Start() < objLimit);
|
||||
}
|
||||
|
||||
|
@ -1929,7 +2060,7 @@ bool SatoriRegion::NothingMarked()
|
|||
void SatoriRegion::ClearMarks()
|
||||
{
|
||||
_ASSERTE(this->HasUnmarkedDemotedObjects() == false);
|
||||
memset(&m_bitmap[BITMAP_START], 0, (BITMAP_LENGTH - BITMAP_START) * sizeof(size_t));
|
||||
memset((void*)&m_bitmap[BITMAP_START], 0, (BITMAP_LENGTH - BITMAP_START) * sizeof(size_t));
|
||||
}
|
||||
|
||||
void SatoriRegion::ClearIndex()
|
||||
|
@ -1939,7 +2070,8 @@ void SatoriRegion::ClearIndex()
|
|||
|
||||
void SatoriRegion::ClearFreeLists()
|
||||
{
|
||||
memset(m_freeLists, 0, sizeof(m_freeLists));
|
||||
// clear free lists and free list tails
|
||||
memset(m_freeLists, 0, sizeof(m_freeLists) * 2);
|
||||
}
|
||||
|
||||
void SatoriRegion::Verify(bool allowMarked)
|
||||
|
|
|
@ -76,11 +76,13 @@ public:
|
|||
size_t AllocateHuge(size_t size, bool zeroInitialize);
|
||||
|
||||
size_t StartAllocating(size_t minSize);
|
||||
size_t StartAllocatingBestFit(size_t minAllocSize);
|
||||
void StopAllocating(size_t allocPtr);
|
||||
void StopAllocating();
|
||||
bool IsAllocating();
|
||||
|
||||
void AddFreeSpace(SatoriObject* freeObj, size_t size);
|
||||
void ReturnFreeSpace(SatoriObject * freeObj, size_t size);
|
||||
|
||||
bool HasFreeSpaceInTopBucket();
|
||||
bool HasFreeSpaceInTopNBuckets(int n);
|
||||
|
@ -132,8 +134,8 @@ public:
|
|||
void IndividuallyPromote();
|
||||
void UpdateFinalizableTrackers();
|
||||
void UpdatePointers();
|
||||
void UpdatePointersInObject(SatoriObject* o);
|
||||
void SetCardsForObject(SatoriObject* o);
|
||||
void UpdatePointersInObject(SatoriObject* o, size_t size);
|
||||
void SetCardsForObject(SatoriObject* o, size_t size);
|
||||
|
||||
template <bool promotingAllRegions>
|
||||
void UpdatePointersInPromotedObjects();
|
||||
|
@ -145,7 +147,7 @@ public:
|
|||
bool AnyExposed(size_t from, size_t length);
|
||||
void EscapeRecursively(SatoriObject* obj);
|
||||
void EscsapeAll();
|
||||
void EscapeShallow(SatoriObject* o);
|
||||
void EscapeShallow(SatoriObject* o, size_t size);
|
||||
|
||||
template <typename F>
|
||||
void ForEachFinalizable(F lambda);
|
||||
|
@ -161,18 +163,18 @@ public:
|
|||
bool HasFinalizables();
|
||||
bool& HasPendingFinalizables();
|
||||
|
||||
void SetOccupancy(size_t occupancy, size_t objCount);
|
||||
void SetOccupancy(size_t occupancy, int32_t objCount);
|
||||
void SetOccupancy(size_t occupancy);
|
||||
size_t Occupancy();
|
||||
size_t& OccupancyAtReuse();
|
||||
size_t ObjCount();
|
||||
int32_t& OccupancyAtReuse();
|
||||
int32_t ObjCount();
|
||||
|
||||
bool& HasPinnedObjects();
|
||||
bool& DoNotSweep();
|
||||
bool& AcceptedPromotedObjects();
|
||||
bool& IndividuallyPromoted();
|
||||
|
||||
size_t SweepsSinceLastAllocation();
|
||||
uint32_t SweepsSinceLastAllocation();
|
||||
|
||||
enum class ReuseLevel : uint8_t
|
||||
{
|
||||
|
@ -221,8 +223,8 @@ private:
|
|||
//
|
||||
// we will overlap the map and the header for simplicity of map operations.
|
||||
// it is ok because the first BITMAP_START elements of the map cover the header/map itself and thus will not be used.
|
||||
// +1 to include End(), it will always be 0, but it is conveninet to make it legal map index.
|
||||
size_t m_bitmap[BITMAP_LENGTH + 1];
|
||||
// +1 to include End(), it will always be 0, but it is convenient to make it legal map index.
|
||||
volatile size_t m_bitmap[BITMAP_LENGTH + 1];
|
||||
|
||||
// Header.(can be up to 72 size_t)
|
||||
struct
|
||||
|
@ -232,54 +234,66 @@ private:
|
|||
size_t m_ownerThreadTag;
|
||||
void (*m_escapeFunc)(SatoriObject**, SatoriObject*, SatoriRegion*);
|
||||
int m_generation;
|
||||
ReuseLevel m_reusableFor;
|
||||
SatoriRegion** m_allocatingOwnerAttachmentPoint;
|
||||
// above fields are accessed from asm helpers
|
||||
|
||||
// the following 5 fields change rarely or not at all.
|
||||
size_t m_end;
|
||||
size_t m_committed;
|
||||
size_t m_used;
|
||||
SatoriPage* m_containingPage;
|
||||
|
||||
SatoriRegion* m_prev;
|
||||
SatoriRegion* m_next;
|
||||
SatoriQueue<SatoriRegion>* m_containingQueue;
|
||||
ReuseLevel m_reusableFor;
|
||||
int32_t m_occupancyAtReuse;
|
||||
|
||||
SatoriRegion** m_allocatingOwnerAttachmentPoint;
|
||||
SatoriWorkChunk* m_gen2Objects;
|
||||
|
||||
// ===== 64 bytes boundary
|
||||
|
||||
// Active allocation may happen in the following range.
|
||||
// The range may not be parseable as sequence of objects
|
||||
// The range is in terms of objects, there is embedded off-by-one error for syncblocks.
|
||||
size_t m_allocStart;
|
||||
size_t m_allocEnd;
|
||||
|
||||
// dirty and comitted watermarks
|
||||
size_t m_used;
|
||||
size_t m_committed;
|
||||
|
||||
// counting escaped objects
|
||||
// when size goes too high, we stop escaping and do not do local GC.
|
||||
int32_t m_escapedSize;
|
||||
// misc uses in thread-local regions
|
||||
int32_t m_markStack;
|
||||
// alloc bytes at last threadlocal collect
|
||||
size_t m_allocBytesAtCollect;
|
||||
|
||||
SatoriWorkChunk* m_finalizableTrackers;
|
||||
int m_finalizableTrackersLock;
|
||||
|
||||
// active allocation may happen in the following range.
|
||||
// the range may not be parseable as sequence of objects
|
||||
// NB: the range is in terms of objects,
|
||||
// there is embedded off-by-one error for syncblocks
|
||||
size_t m_allocStart;
|
||||
size_t m_allocEnd;
|
||||
uint32_t m_sweepsSinceLastAllocation;
|
||||
|
||||
int32_t m_markStack;
|
||||
// ===== 128 bytes boundary
|
||||
SatoriRegion* m_prev;
|
||||
SatoriRegion* m_next;
|
||||
SatoriQueue<SatoriRegion>* m_containingQueue;
|
||||
|
||||
// counting escaped objects
|
||||
// when size goes too high, we stop escaping and do not do local GC.
|
||||
size_t m_escapedSize;
|
||||
size_t m_allocBytesAtCollect;
|
||||
size_t m_objCount;
|
||||
size_t m_occupancy;
|
||||
size_t m_occupancyAtReuse;
|
||||
size_t m_sweepsSinceLastAllocation;
|
||||
int32_t m_objCount;
|
||||
|
||||
size_t m_unfinishedAllocationCount;
|
||||
int32_t m_unfinishedAllocationCount;
|
||||
|
||||
bool m_hasPinnedObjects;
|
||||
bool m_hasMarksSet;
|
||||
bool m_doNotSweep;
|
||||
bool m_hasFinalizables;
|
||||
bool m_hasPendingFinalizables;
|
||||
bool m_doNotSweep;
|
||||
|
||||
bool m_acceptedPromotedObjects;
|
||||
bool m_individuallyPromoted;
|
||||
bool m_hasUnmarkedDemotedObjects;
|
||||
|
||||
// when demoted, we remember our gen2 objects here
|
||||
SatoriWorkChunk* m_gen2Objects;
|
||||
|
||||
#if _DEBUG
|
||||
bool m_hasMarksSet;
|
||||
#endif
|
||||
SatoriObject* m_freeLists[Satori::FREELIST_COUNT];
|
||||
SatoriObject* m_freeListTails[Satori::FREELIST_COUNT];
|
||||
};
|
||||
};
|
||||
|
||||
|
|
|
@ -162,7 +162,7 @@ inline void SatoriRegion::StopEscapeTracking()
|
|||
}
|
||||
|
||||
// Used to simulate writes when containing region is individually promoted.
|
||||
inline void SatoriRegion::SetCardsForObject(SatoriObject* o)
|
||||
inline void SatoriRegion::SetCardsForObject(SatoriObject* o, size_t size)
|
||||
{
|
||||
_ASSERTE(this->Size() == Satori::REGION_SIZE_GRANULARITY);
|
||||
|
||||
|
@ -180,7 +180,8 @@ inline void SatoriRegion::SetCardsForObject(SatoriObject* o)
|
|||
// for simplicity and call a concurrent helper.
|
||||
ContainingPage()->DirtyCardForAddressConcurrent((size_t)ppObject);
|
||||
}
|
||||
}
|
||||
},
|
||||
size
|
||||
);
|
||||
}
|
||||
|
||||
|
@ -279,7 +280,7 @@ bool SatoriRegion::Sweep()
|
|||
m_escapedSize = 0;
|
||||
bool cannotRecycle = this->IsAttachedToAllocatingOwner();
|
||||
size_t occupancy = 0;
|
||||
size_t objCount = 0;
|
||||
int32_t objCount = 0;
|
||||
bool hasFinalizables = false;
|
||||
SatoriObject* o = FirstObject();
|
||||
do
|
||||
|
@ -303,19 +304,20 @@ bool SatoriRegion::Sweep()
|
|||
_ASSERTE(!o->IsFree());
|
||||
cannotRecycle = true;
|
||||
|
||||
size_t size = o->Size();
|
||||
if (isEscapeTracking)
|
||||
{
|
||||
this->EscapeShallow(o);
|
||||
this->EscapeShallow(o, size);
|
||||
}
|
||||
|
||||
if (updatePointers)
|
||||
{
|
||||
UpdatePointersInObject(o);
|
||||
UpdatePointersInObject(o, size);
|
||||
}
|
||||
|
||||
if (individuallyPromoted)
|
||||
{
|
||||
SetCardsForObject(o);
|
||||
SetCardsForObject(o, size);
|
||||
}
|
||||
|
||||
if (!hasFinalizables && o->RawGetMethodTable()->HasFinalizer())
|
||||
|
@ -323,7 +325,6 @@ bool SatoriRegion::Sweep()
|
|||
hasFinalizables = true;
|
||||
}
|
||||
|
||||
size_t size = o->Size();
|
||||
objCount++;
|
||||
occupancy += size;
|
||||
o = (SatoriObject*)(o->Start() + size);
|
||||
|
@ -373,13 +374,13 @@ inline size_t SatoriRegion::Occupancy()
|
|||
return m_occupancy;
|
||||
}
|
||||
|
||||
inline size_t &SatoriRegion::OccupancyAtReuse()
|
||||
inline int32_t &SatoriRegion::OccupancyAtReuse()
|
||||
{
|
||||
_ASSERTE(!IsAllocating());
|
||||
return m_occupancyAtReuse;
|
||||
}
|
||||
|
||||
inline size_t SatoriRegion::ObjCount()
|
||||
inline int32_t SatoriRegion::ObjCount()
|
||||
{
|
||||
return m_objCount;
|
||||
}
|
||||
|
@ -411,7 +412,7 @@ inline bool& SatoriRegion::IndividuallyPromoted()
|
|||
return m_individuallyPromoted;
|
||||
}
|
||||
|
||||
inline size_t SatoriRegion::SweepsSinceLastAllocation()
|
||||
inline uint32_t SatoriRegion::SweepsSinceLastAllocation()
|
||||
{
|
||||
return m_sweepsSinceLastAllocation;
|
||||
}
|
||||
|
@ -550,7 +551,7 @@ inline bool SatoriRegion::CheckAndClearMarked(SatoriObject* o)
|
|||
size_t bitmapIndex = (word >> 9) & (SatoriRegion::BITMAP_LENGTH - 1);
|
||||
size_t mask = (size_t)1 << ((word >> 3) & 63);
|
||||
|
||||
size_t& bitmapWord = m_bitmap[bitmapIndex];
|
||||
volatile size_t& bitmapWord = m_bitmap[bitmapIndex];
|
||||
bool wasMarked = bitmapWord & mask;
|
||||
bitmapWord &= ~mask;
|
||||
return wasMarked;
|
||||
|
@ -683,6 +684,7 @@ void SatoriRegion::UpdatePointersInPromotedObjects()
|
|||
_ASSERTE(!relocated->IsFree());
|
||||
|
||||
SatoriPage* page = relocated->ContainingRegion()->ContainingPage();
|
||||
size_t size = relocated->Size();
|
||||
relocated->ForEachObjectRef(
|
||||
[&](SatoriObject** ppObject)
|
||||
{
|
||||
|
@ -707,10 +709,11 @@ void SatoriRegion::UpdatePointersInPromotedObjects()
|
|||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
size
|
||||
);
|
||||
|
||||
o = o->Next();
|
||||
o = (SatoriObject*)(o->Start() + size);
|
||||
} while (o->Start() < objLimit);
|
||||
}
|
||||
|
||||
|
|
|
@ -208,3 +208,16 @@ SatoriRegion* SatoriRegionQueue::TryDequeueIfHasFreeSpaceInTopBucket()
|
|||
result->m_prev = nullptr;
|
||||
return result;
|
||||
}
|
||||
|
||||
SatoriRegionQueue* SatoriRegionQueue::AllocAligned(QueueKind kind)
|
||||
{
|
||||
const size_t align = 64;
|
||||
#ifdef _MSC_VER
|
||||
void* buffer = _aligned_malloc(sizeof(SatoriRegionQueue), align);
|
||||
#else
|
||||
void* buffer = malloc(sizeof(SatoriRegionQueue) + align);
|
||||
buffer = (void*)ALIGN_UP((size_t)buffer, align);
|
||||
#endif
|
||||
return new(buffer)SatoriRegionQueue(kind);
|
||||
}
|
||||
|
||||
|
|
|
@ -42,6 +42,8 @@ public:
|
|||
SatoriRegion* TryPopWithSize(size_t regionSize, SatoriRegion* &putBack);
|
||||
SatoriRegion* TryRemoveWithSize(size_t regionSize, SatoriRegion*& putBack);
|
||||
SatoriRegion* TryDequeueIfHasFreeSpaceInTopBucket();
|
||||
|
||||
static SatoriRegionQueue* AllocAligned(QueueKind kind);
|
||||
};
|
||||
|
||||
#endif
|
||||
|
|
|
@ -43,8 +43,8 @@ SatoriTrimmer::SatoriTrimmer(SatoriHeap* heap)
|
|||
m_heap = heap;
|
||||
m_state = TRIMMER_STATE_STOPPED;
|
||||
|
||||
m_gate = new (nothrow) GCEvent;
|
||||
m_gate->CreateAutoEventNoThrow(false);
|
||||
m_event = new (nothrow) GCEvent;
|
||||
m_event->CreateAutoEventNoThrow(false);
|
||||
|
||||
if (SatoriUtil::IsTrimmingEnabled())
|
||||
{
|
||||
|
@ -60,18 +60,25 @@ void SatoriTrimmer::LoopFn(void* inst)
|
|||
|
||||
void SatoriTrimmer::Loop()
|
||||
{
|
||||
int64_t lastGen2 = m_heap->Recycler()->GetCollectionCount(2);
|
||||
while (true)
|
||||
{
|
||||
int64_t curGen2 = m_heap->Recycler()->GetCollectionCount(2);
|
||||
|
||||
// limit the trim rate to once per 1 sec + 1 gen2 gc.
|
||||
do
|
||||
// limit the re-trim rate to once per 5 sec.
|
||||
// we would also require that gen2 gc happened since the last round.
|
||||
while (true)
|
||||
{
|
||||
int64_t newGen2 = m_heap->Recycler()->GetCollectionCount(2);
|
||||
if (lastGen2 != newGen2)
|
||||
{
|
||||
lastGen2 = newGen2;
|
||||
break;
|
||||
}
|
||||
|
||||
Interlocked::CompareExchange(&m_state, TRIMMER_STATE_STOPPED, TRIMMER_STATE_RUNNING);
|
||||
// we are not running here, so we can sleep a bit before continuing.
|
||||
GCToOSInterface::Sleep(1000);
|
||||
GCToOSInterface::Sleep(5000);
|
||||
StopAndWait();
|
||||
} while (curGen2 == m_heap->Recycler()->GetCollectionCount(2));
|
||||
}
|
||||
|
||||
m_heap->ForEachPage(
|
||||
[&](SatoriPage* page)
|
||||
|
@ -83,6 +90,8 @@ void SatoriTrimmer::Loop()
|
|||
StopAndWait();
|
||||
}
|
||||
|
||||
int64_t lastGen1 = m_heap->Recycler()->GetCollectionCount(1);
|
||||
|
||||
page->ForEachRegion(
|
||||
[&](SatoriRegion* region)
|
||||
{
|
||||
|
@ -106,13 +115,25 @@ void SatoriTrimmer::Loop()
|
|||
|
||||
if (didSomeWork)
|
||||
{
|
||||
// limit the decommit/coalesce rate to 1 region/msec.
|
||||
GCToOSInterface::Sleep(1);
|
||||
// limit the decommit/coalesce rate to 1 region/10 msec.
|
||||
GCToOSInterface::Sleep(10);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// this is a low priority task, if something needs to run, yield
|
||||
GCToOSInterface::YieldThread(0);
|
||||
|
||||
// also we will pause for 1 sec if there was a GC - to further reduce the churn
|
||||
// if the app is allocation-active.
|
||||
int64_t newGen1 = m_heap->Recycler()->GetCollectionCount(1);
|
||||
if (newGen1 != lastGen1)
|
||||
{
|
||||
lastGen1 = newGen1;
|
||||
GCToOSInterface::Sleep(1000);
|
||||
}
|
||||
|
||||
if (m_state != TRIMMER_STATE_RUNNING)
|
||||
{
|
||||
StopAndWait();
|
||||
|
@ -129,6 +150,9 @@ void SatoriTrimmer::StopAndWait()
|
|||
while (true)
|
||||
{
|
||||
tryAgain:
|
||||
|
||||
// this is a low priority task, if something needs to run, yield
|
||||
GCToOSInterface::YieldThread(0);
|
||||
int state = m_state;
|
||||
switch (state)
|
||||
{
|
||||
|
@ -150,7 +174,7 @@ void SatoriTrimmer::StopAndWait()
|
|||
|
||||
if (Interlocked::CompareExchange(&m_state, TRIMMER_STATE_BLOCKED, state) == state)
|
||||
{
|
||||
m_gate->Wait(INFINITE, false);
|
||||
m_event->Wait(INFINITE, false);
|
||||
}
|
||||
continue;
|
||||
case TRIMMER_STATE_RUNNING:
|
||||
|
@ -170,7 +194,7 @@ void SatoriTrimmer::SetOkToRun()
|
|||
case TRIMMER_STATE_BLOCKED:
|
||||
// trimmer can't get out of BlOCKED by itself, ordinary assignment is ok
|
||||
m_state = TRIMMER_STATE_OK_TO_RUN;
|
||||
m_gate->Set();
|
||||
m_event->Set();
|
||||
break;
|
||||
case TRIMMER_STATE_STOPPED:
|
||||
Interlocked::CompareExchange(&m_state, TRIMMER_STATE_OK_TO_RUN, state);
|
||||
|
|
|
@ -51,7 +51,7 @@ private:
|
|||
static const int TRIMMER_STATE_RUNNING = 3;
|
||||
|
||||
SatoriHeap* m_heap;
|
||||
GCEvent* m_gate;
|
||||
GCEvent* m_event;
|
||||
size_t m_lastGen2Count;
|
||||
volatile int m_state;
|
||||
|
||||
|
|
|
@ -64,18 +64,31 @@ namespace Satori
|
|||
// we use a trivial array object to fill holes, thus this is the size of a shortest array object.
|
||||
static const size_t MIN_FREE_SIZE = 3 * sizeof(size_t);
|
||||
|
||||
// ~1024 items for now, we can fiddle with size a bit later
|
||||
const static size_t MARK_CHUNK_SIZE = 1024 * sizeof(size_t);
|
||||
// If a single mark takes very roughly ~50ns (5-20 for CAS + some extra), then 1k objects marks in 50us
|
||||
// we set the chunk to roughly 1/2k to expect it mark in under 20us or so
|
||||
const static size_t MARK_CHUNK_COUNT = 512;
|
||||
|
||||
// this includes header, so the number of objects is slightly less (by -2)
|
||||
const static size_t MARK_CHUNK_SIZE = MARK_CHUNK_COUNT * sizeof(size_t);
|
||||
|
||||
// objects that are bigger are chunked into ranges when marking.
|
||||
// the threshold is slightly less than MARK_CHUNK_SIZE, so that object in the range
|
||||
// could fit into same chunk
|
||||
const static size_t MARK_RANGE_THRESHOLD = MARK_CHUNK_SIZE - 2 * sizeof(size_t);
|
||||
|
||||
// if we have more than twice this much and work list is empty we can share half
|
||||
const static int SHARE_WORK_THRESHOLD = 4;
|
||||
|
||||
// address bits set to track finalizable that needs to be scheduled to F-queue
|
||||
const static size_t FINALIZATION_PENDING = 1;
|
||||
|
||||
static const int BYTES_PER_CARD_BYTE = 512;
|
||||
static const int CARD_BYTES_IN_CARD_GROUP = Satori::REGION_SIZE_GRANULARITY / BYTES_PER_CARD_BYTE;
|
||||
static const int BYTES_PER_CARD_GROUP = REGION_SIZE_GRANULARITY / 2;
|
||||
static const int CARD_BYTES_IN_CARD_GROUP = Satori::BYTES_PER_CARD_GROUP / BYTES_PER_CARD_BYTE;
|
||||
|
||||
namespace CardState
|
||||
{
|
||||
static const int8_t EPHEMERAL = -128; // 0b10000000
|
||||
static const int8_t EPHEMERAL = -128; // 0b10000000 only used in cards (not groups or higher)
|
||||
static const int8_t BLANK = 0;
|
||||
static const int8_t REMEMBERED = 1;
|
||||
static const int8_t PROCESSING = 2;
|
||||
|
@ -163,23 +176,6 @@ public:
|
|||
#endif
|
||||
}
|
||||
|
||||
static size_t CommitGranularity()
|
||||
{
|
||||
// we can support sizes that are > OS page and binary fractions of REGION_SIZE_GRANULARITY.
|
||||
// we can also support PAGE_SIZE_GRANULARITY
|
||||
size_t result = 1024 * 32;
|
||||
|
||||
// result = Satori::REGION_SIZE_GRANULARITY;
|
||||
|
||||
// result = Satori::PAGE_SIZE_GRANULARITY;
|
||||
|
||||
#if defined(TARGET_LINUX) && defined(TARGET_ARM64)
|
||||
result = max(result, GCToOSInterface::GetPageSize());
|
||||
#endif
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// TUNING: Needs tuning?
|
||||
// When doing regular allocation we clean this much memory
|
||||
// if we do cleaning, and if available.
|
||||
|
@ -189,43 +185,55 @@ public:
|
|||
return 16 * 1024;
|
||||
}
|
||||
|
||||
// COMPlus_gcConservative
|
||||
// DOTNET_gcConservative
|
||||
static bool IsConservativeMode()
|
||||
{
|
||||
return (GCConfig::GetConservativeGC());
|
||||
}
|
||||
|
||||
// COMPlus_gcConcurrent
|
||||
static bool IsConcurrent()
|
||||
// DOTNET_gcConcurrent
|
||||
static bool IsConcurrentEnabled()
|
||||
{
|
||||
return (GCConfig::GetConcurrentGC());
|
||||
}
|
||||
|
||||
// COMPlus_gcRelocatingGen1
|
||||
// DOTNET_gcRelocatingGen1
|
||||
static bool IsRelocatingInGen1()
|
||||
{
|
||||
return (GCConfig::GetRelocatingInGen1());
|
||||
}
|
||||
|
||||
// COMPlus_gcRelocatingGen2
|
||||
// DOTNET_gcRelocatingGen2
|
||||
static bool IsRelocatingInGen2()
|
||||
{
|
||||
return (GCConfig::GetRelocatingInGen2());
|
||||
}
|
||||
|
||||
// COMPlus_gcThreadLocal
|
||||
static bool IsThreadLocalGCEnabled()
|
||||
// DOTNET_gcGen0
|
||||
static bool IsGen0Enabled()
|
||||
{
|
||||
return (GCConfig::GetThreadLocalGC());
|
||||
return (GCConfig::GetGen0GC());
|
||||
}
|
||||
|
||||
// COMPlus_gcTrim
|
||||
// DOTNET_gcGen1
|
||||
static bool IsGen1Enabled()
|
||||
{
|
||||
return (GCConfig::GetGen1GC());
|
||||
}
|
||||
|
||||
// DOTNET_gcTHP
|
||||
static bool UseTHP()
|
||||
{
|
||||
return (GCConfig::GetUseTHP());
|
||||
}
|
||||
|
||||
// DOTNET_gcTrim
|
||||
static bool IsTrimmingEnabled()
|
||||
{
|
||||
return (GCConfig::GetTrimmigGC());
|
||||
}
|
||||
|
||||
// COMPlus_GCLatencyMode
|
||||
// DOTNET_GCLatencyMode
|
||||
static bool IsLowLatencyMode()
|
||||
{
|
||||
return (GCConfig::GetLatencyMode()) >= 2;
|
||||
|
@ -242,11 +250,65 @@ public:
|
|||
return partitionCount;
|
||||
}
|
||||
|
||||
// COMPlus_gcParallel
|
||||
static int MaxHelpersCount()
|
||||
// DOTNET_gcParallel
|
||||
static int MaxWorkersCount()
|
||||
{
|
||||
return (int)GCConfig::GetParallelGC();
|
||||
}
|
||||
|
||||
// DOTNET_gcRate
|
||||
static int GcRate()
|
||||
{
|
||||
int gcRate = (int)GCConfig::GetGCRate();
|
||||
if (gcRate == -1)
|
||||
{
|
||||
#if _DEBUG
|
||||
// minimum rate-limiting in debug
|
||||
return 0;
|
||||
#else
|
||||
return 3;
|
||||
#endif
|
||||
}
|
||||
|
||||
return gcRate;
|
||||
}
|
||||
|
||||
// DOTNET_gcSpin
|
||||
static int GcSpin()
|
||||
{
|
||||
int gcSpin = (int)GCConfig::GetGCSpin();
|
||||
if (gcSpin == -1)
|
||||
{
|
||||
return 10;
|
||||
}
|
||||
|
||||
return gcSpin;
|
||||
}
|
||||
|
||||
static size_t CommitGranularity()
|
||||
{
|
||||
// we can support sizes that are > OS page and binary fractions of REGION_SIZE_GRANULARITY.
|
||||
// we can also support PAGE_SIZE_GRANULARITY
|
||||
size_t result = 1024 * 32;
|
||||
|
||||
#if defined(TARGET_LINUX)
|
||||
|
||||
#if defined(TARGET_ARM64)
|
||||
result = max(result, GCToOSInterface::GetPageSize());
|
||||
#endif
|
||||
|
||||
if (UseTHP())
|
||||
{
|
||||
result = Satori::REGION_SIZE_GRANULARITY;
|
||||
}
|
||||
#endif
|
||||
|
||||
// result = Satori::REGION_SIZE_GRANULARITY;
|
||||
|
||||
// result = Satori::PAGE_SIZE_GRANULARITY;
|
||||
|
||||
return result;
|
||||
}
|
||||
};
|
||||
|
||||
#endif
|
||||
|
|
|
@ -31,6 +31,7 @@
|
|||
#include "../gc.h"
|
||||
#include "SatoriUtil.h"
|
||||
#include "SatoriQueue.h"
|
||||
#include "SatoriObject.h"
|
||||
|
||||
class SatoriWorkChunk
|
||||
{
|
||||
|
@ -51,11 +52,12 @@ public:
|
|||
|
||||
static size_t Capacity()
|
||||
{
|
||||
return (Satori::MARK_CHUNK_SIZE - sizeof(SatoriWorkChunk)) / sizeof(SatoriObject*);
|
||||
return Satori::MARK_CHUNK_SIZE / sizeof(SatoriObject*) - /* m_top, m_next*/ 2;
|
||||
}
|
||||
|
||||
size_t Count()
|
||||
{
|
||||
_ASSERTE(!IsRange());
|
||||
return m_top;
|
||||
}
|
||||
|
||||
|
@ -102,6 +104,21 @@ public:
|
|||
return false;
|
||||
}
|
||||
|
||||
void TakeFrom(SatoriWorkChunk* other, size_t count)
|
||||
{
|
||||
_ASSERTE(Count() == 0);
|
||||
_ASSERTE(other->Count() >= count);
|
||||
|
||||
m_top = count;
|
||||
other->m_top -= count;
|
||||
|
||||
size_t otherTop = other->m_top;
|
||||
for (size_t i = 0; i < count; i++)
|
||||
{
|
||||
m_data[i] = other->m_data[otherTop + i];
|
||||
}
|
||||
}
|
||||
|
||||
void SetNext(SatoriWorkChunk* next)
|
||||
{
|
||||
m_next = next;
|
||||
|
|
78
src/coreclr/gc/satori/SatoriWorkList.cpp
Normal file
78
src/coreclr/gc/satori/SatoriWorkList.cpp
Normal file
|
@ -0,0 +1,78 @@
|
|||
// Copyright (c) 2024 Vladimir Sadov
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person
|
||||
// obtaining a copy of this software and associated documentation
|
||||
// files (the "Software"), to deal in the Software without
|
||||
// restriction, including without limitation the rights to use,
|
||||
// copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following
|
||||
// conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be
|
||||
// included in all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
// OTHER DEALINGS IN THE SOFTWARE.
|
||||
//
|
||||
// SatoriWorkList.cpp
|
||||
//
|
||||
|
||||
#include "common.h"
|
||||
|
||||
#include "gcenv.h"
|
||||
#include "../env/gcenv.os.h"
|
||||
#include "SatoriWorkList.h"
|
||||
|
||||
NOINLINE
|
||||
void SatoriWorkList::PushSlow(SatoriWorkChunk* item)
|
||||
{
|
||||
uint32_t collisions = 1;
|
||||
while (true)
|
||||
{
|
||||
SatoriWorkList orig = *this;
|
||||
item->m_next = orig.m_head;
|
||||
if (Cas128((int64_t*)this, orig.m_aba + 1, (int64_t)item, (int64_t*)&orig))
|
||||
break;
|
||||
|
||||
SatoriLock::CollisionBackoff(collisions++);
|
||||
}
|
||||
|
||||
#ifdef _DEBUG
|
||||
Interlocked::Increment(&m_count);
|
||||
#endif
|
||||
}
|
||||
|
||||
NOINLINE
|
||||
SatoriWorkChunk* SatoriWorkList::TryPopSlow()
|
||||
{
|
||||
uint32_t collisions = 1;
|
||||
SatoriWorkList orig;
|
||||
while (true)
|
||||
{
|
||||
orig = *this;
|
||||
if (orig.m_head == nullptr)
|
||||
{
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
if (Cas128((int64_t*)this, orig.m_aba + 1, (int64_t)orig.m_head->m_next, (int64_t*)&orig))
|
||||
break;
|
||||
|
||||
SatoriLock::CollisionBackoff(collisions++);
|
||||
}
|
||||
|
||||
#ifdef _DEBUG
|
||||
Interlocked::Decrement(&m_count);
|
||||
#endif
|
||||
|
||||
SatoriWorkChunk* result = orig.m_head;
|
||||
result->m_next = nullptr;
|
||||
return result;
|
||||
}
|
|
@ -31,16 +31,43 @@
|
|||
#include "../gc.h"
|
||||
#include "SatoriWorkChunk.h"
|
||||
|
||||
|
||||
#if defined(TARGET_WINDOWS)
|
||||
FORCEINLINE uint8_t Cas128(int64_t volatile *pDst, int64_t iValueHigh, int64_t iValueLow, int64_t *pComparandAndResult)
|
||||
{
|
||||
return _InterlockedCompareExchange128(pDst, iValueHigh, iValueLow, pComparandAndResult);
|
||||
}
|
||||
#else
|
||||
#pragma clang diagnostic push
|
||||
#pragma clang diagnostic ignored "-Watomic-alignment"
|
||||
FORCEINLINE uint8_t Cas128(int64_t volatile *pDst, int64_t iValueHigh, int64_t iValueLow, int64_t *pComparandAndResult)
|
||||
{
|
||||
__int128_t iValue = ((__int128_t)iValueHigh << 64) + (uint64_t)iValueLow;
|
||||
return __atomic_compare_exchange_n ((__int128_t*)pDst, (__int128_t*)pComparandAndResult, iValue, /*weak*/ true, /* success_memorder */ __ATOMIC_SEQ_CST, /* failure_memorder */ __ATOMIC_RELAXED);
|
||||
}
|
||||
#pragma clang diagnostic pop
|
||||
#endif // HOST_AMD64
|
||||
|
||||
class SatoriWorkList
|
||||
{
|
||||
public:
|
||||
SatoriWorkList() :
|
||||
m_lock(), m_head()
|
||||
m_head(), m_aba()
|
||||
#ifdef _DEBUG
|
||||
, m_count()
|
||||
#endif
|
||||
{}
|
||||
|
||||
static SatoriWorkList* AllocAligned()
|
||||
{
|
||||
m_lock.Initialize();
|
||||
const size_t align = 64;
|
||||
#ifdef _MSC_VER
|
||||
void* buffer = _aligned_malloc(sizeof(SatoriWorkList), align);
|
||||
#else
|
||||
void* buffer = malloc(sizeof(SatoriWorkList) + align);
|
||||
buffer = (void*)ALIGN_UP((size_t)buffer, align);
|
||||
#endif
|
||||
return new(buffer)SatoriWorkList();
|
||||
}
|
||||
|
||||
bool IsEmpty()
|
||||
|
@ -48,42 +75,44 @@ public:
|
|||
return m_head == nullptr;
|
||||
}
|
||||
|
||||
FORCEINLINE
|
||||
void Push(SatoriWorkChunk* item)
|
||||
{
|
||||
_ASSERTE(item->m_next == nullptr);
|
||||
|
||||
SatoriLockHolder<SatoriSpinLock> holder(&m_lock);
|
||||
item->m_next = m_head;
|
||||
m_head = item;
|
||||
SatoriWorkList orig = *this;
|
||||
item->m_next = orig.m_head;
|
||||
if (Cas128((int64_t*)this, orig.m_aba + 1, (int64_t)item, (int64_t*)&orig))
|
||||
{
|
||||
#ifdef _DEBUG
|
||||
m_count++;
|
||||
Interlocked::Increment(&m_count);
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
|
||||
PushSlow(item);
|
||||
}
|
||||
|
||||
FORCEINLINE
|
||||
SatoriWorkChunk* TryPop()
|
||||
{
|
||||
if (IsEmpty())
|
||||
SatoriWorkList orig = *this;
|
||||
if (orig.m_head == nullptr)
|
||||
{
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
SatoriWorkChunk* result;
|
||||
if (Cas128((int64_t*)this, orig.m_aba + 1, (int64_t)orig.m_head->m_next, (int64_t*)&orig))
|
||||
{
|
||||
SatoriLockHolder<SatoriSpinLock> holder(&m_lock);
|
||||
result = m_head;
|
||||
if (result == nullptr)
|
||||
{
|
||||
return result;
|
||||
}
|
||||
|
||||
m_head = result->m_next;
|
||||
#ifdef _DEBUG
|
||||
m_count--;
|
||||
#endif
|
||||
#ifdef _DEBUG
|
||||
Interlocked::Decrement(&m_count);
|
||||
#endif
|
||||
SatoriWorkChunk* result = orig.m_head;
|
||||
result->m_next = nullptr;
|
||||
return result;
|
||||
}
|
||||
|
||||
result->m_next = nullptr;
|
||||
return result;
|
||||
return TryPopSlow();
|
||||
}
|
||||
|
||||
#ifdef _DEBUG
|
||||
|
@ -94,11 +123,20 @@ public:
|
|||
#endif
|
||||
|
||||
private:
|
||||
SatoriSpinLock m_lock;
|
||||
SatoriWorkChunk* m_head;
|
||||
struct
|
||||
{
|
||||
SatoriWorkChunk* volatile m_head;
|
||||
volatile size_t m_aba;
|
||||
};
|
||||
#ifdef _DEBUG
|
||||
size_t m_count;
|
||||
#endif
|
||||
|
||||
NOINLINE
|
||||
void PushSlow(SatoriWorkChunk* item);
|
||||
|
||||
NOINLINE
|
||||
SatoriWorkChunk* TryPopSlow();
|
||||
};
|
||||
|
||||
#endif
|
||||
|
|
|
@ -306,3 +306,151 @@ bool GCEvent::CreateOSManualEventNoThrow(bool initialState)
|
|||
m_impl = event;
|
||||
return true;
|
||||
}
|
||||
|
||||
#define _INC_PTHREADS
|
||||
#include "..\satori\SatoriGate.h"
|
||||
|
||||
#if defined(TARGET_LINUX)
|
||||
|
||||
#include <linux/futex.h> /* Definition of FUTEX_* constants */
|
||||
#include <sys/syscall.h> /* Definition of SYS_* constants */
|
||||
#include <unistd.h>
|
||||
|
||||
#ifndef INT_MAX
|
||||
#define INT_MAX 2147483647
|
||||
#endif
|
||||
|
||||
SatoriGate::SatoriGate()
|
||||
{
|
||||
m_state = s_blocking;
|
||||
}
|
||||
|
||||
// returns true if was woken up. false if timed out
|
||||
bool SatoriGate::TimedWait(int timeout)
|
||||
{
|
||||
timespec t;
|
||||
uint64_t nanoseconds = (uint64_t)timeout * tccMilliSecondsToNanoSeconds;
|
||||
t.tv_sec = nanoseconds / tccSecondsToNanoSeconds;
|
||||
t.tv_nsec = nanoseconds % tccSecondsToNanoSeconds;
|
||||
|
||||
long waitResult = syscall(SYS_futex, &m_state, FUTEX_WAIT_PRIVATE, s_blocking, &t, NULL, 0);
|
||||
|
||||
// woken, not blocking, interrupted, timeout
|
||||
assert(waitResult == 0 || errno == EAGAIN || errno == ETIMEDOUT || errno == EINTR);
|
||||
|
||||
bool woken = waitResult == 0 || errno != ETIMEDOUT;
|
||||
if (woken)
|
||||
{
|
||||
// consume the wake
|
||||
m_state = s_blocking;
|
||||
}
|
||||
|
||||
return woken;
|
||||
}
|
||||
|
||||
void SatoriGate::Wait()
|
||||
{
|
||||
syscall(SYS_futex, &m_state, FUTEX_WAIT_PRIVATE, s_blocking, NULL, NULL, 0);
|
||||
}
|
||||
|
||||
void SatoriGate::WakeAll()
|
||||
{
|
||||
m_state = s_open;
|
||||
syscall(SYS_futex, &m_state, FUTEX_WAKE_PRIVATE, s_blocking, INT_MAX , NULL, 0);
|
||||
}
|
||||
|
||||
void SatoriGate::WakeOne()
|
||||
{
|
||||
m_state = s_open;
|
||||
syscall(SYS_futex, &m_state, FUTEX_WAKE_PRIVATE, s_blocking, 1, NULL, 0);
|
||||
}
|
||||
#else
|
||||
SatoriGate::SatoriGate()
|
||||
{
|
||||
m_cs = new (nothrow) pthread_mutex_t();
|
||||
m_cv = new (nothrow) pthread_cond_t();
|
||||
|
||||
pthread_mutex_init(m_cs, NULL);
|
||||
pthread_condattr_t attrs;
|
||||
pthread_condattr_init(&attrs);
|
||||
#if HAVE_PTHREAD_CONDATTR_SETCLOCK && !HAVE_CLOCK_GETTIME_NSEC_NP
|
||||
// Ensure that the pthread_cond_timedwait will use CLOCK_MONOTONIC
|
||||
pthread_condattr_setclock(&attrs, CLOCK_MONOTONIC);
|
||||
#endif // HAVE_PTHREAD_CONDATTR_SETCLOCK && !HAVE_CLOCK_GETTIME_NSEC_NP
|
||||
pthread_cond_init(m_cv, &attrs);
|
||||
pthread_condattr_destroy(&attrs);
|
||||
}
|
||||
|
||||
// returns true if was woken up
|
||||
bool SatoriGate::TimedWait(int timeout)
|
||||
{
|
||||
timespec endTime;
|
||||
#if HAVE_CLOCK_GETTIME_NSEC_NP
|
||||
uint64_t endNanoseconds;
|
||||
uint64_t nanoseconds = (uint64_t)timeout * tccMilliSecondsToNanoSeconds;
|
||||
NanosecondsToTimeSpec(nanoseconds, &endTime);
|
||||
endNanoseconds = clock_gettime_nsec_np(CLOCK_UPTIME_RAW) + nanoseconds;
|
||||
#elif HAVE_PTHREAD_CONDATTR_SETCLOCK
|
||||
clock_gettime(CLOCK_MONOTONIC, &endTime);
|
||||
TimeSpecAdd(&endTime, timeout);
|
||||
#else
|
||||
#error "Don't know how to perform timed wait on this platform"
|
||||
#endif
|
||||
|
||||
int waitResult = 0;
|
||||
pthread_mutex_lock(m_cs);
|
||||
#if HAVE_CLOCK_GETTIME_NSEC_NP
|
||||
// Since OSX doesn't support CLOCK_MONOTONIC, we use relative variant of the timed wait.
|
||||
waitResult = m_state == s_open ?
|
||||
0 :
|
||||
pthread_cond_timedwait_relative_np(m_cv, m_cs, &endTime);
|
||||
#else // HAVE_CLOCK_GETTIME_NSEC_NP
|
||||
waitResult = m_state == SatoriGate::s_open ?
|
||||
0 :
|
||||
pthread_cond_timedwait(m_cv, m_cs, &endTime);
|
||||
#endif // HAVE_CLOCK_GETTIME_NSEC_NP
|
||||
pthread_mutex_unlock(m_cs);
|
||||
assert(waitResult == 0 || waitResult == ETIMEDOUT);
|
||||
|
||||
bool woken = waitResult == 0;
|
||||
if (woken)
|
||||
{
|
||||
// consume the wake
|
||||
m_state = s_blocking;
|
||||
}
|
||||
|
||||
return woken;
|
||||
}
|
||||
|
||||
void SatoriGate::Wait()
|
||||
{
|
||||
int waitResult;
|
||||
pthread_mutex_lock(m_cs);
|
||||
|
||||
waitResult = m_state == SatoriGate::s_open ?
|
||||
0 :
|
||||
pthread_cond_wait(m_cv, m_cs);
|
||||
|
||||
pthread_mutex_unlock(m_cs);
|
||||
assert(waitResult == 0);
|
||||
|
||||
m_state = s_blocking;
|
||||
}
|
||||
|
||||
void SatoriGate::WakeAll()
|
||||
{
|
||||
m_state = SatoriGate::s_open;
|
||||
pthread_mutex_lock(m_cs);
|
||||
pthread_cond_broadcast(m_cv);
|
||||
pthread_mutex_unlock(m_cs);
|
||||
}
|
||||
|
||||
void SatoriGate::WakeOne()
|
||||
{
|
||||
m_state = SatoriGate::s_open;
|
||||
pthread_mutex_lock(m_cs);
|
||||
pthread_cond_signal(m_cv);
|
||||
pthread_mutex_unlock(m_cs);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
|
|
@ -628,7 +628,7 @@ void* GCToOSInterface::VirtualReserve(size_t size, size_t alignment, uint32_t fl
|
|||
return VirtualReserveInner(size, alignment, flags, 0, /* committing */ false);
|
||||
}
|
||||
|
||||
void* GCToOSInterface::VirtualReserve(void* location, size_t size)
|
||||
void* GCToOSInterface::VirtualReserve(void* location, size_t size, bool useTHP)
|
||||
{
|
||||
void* pRetVal = mmap(location, size, PROT_NONE, MAP_ANON | MAP_PRIVATE , -1, 0);
|
||||
|
||||
|
@ -643,10 +643,18 @@ void* GCToOSInterface::VirtualReserve(void* location, size_t size)
|
|||
return NULL;
|
||||
}
|
||||
|
||||
#ifdef TARGET_LINUX
|
||||
if (useTHP)
|
||||
{
|
||||
madvise(pRetVal, size, MADV_HUGEPAGE);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef MADV_DONTDUMP
|
||||
// Do not include reserved memory in coredump.
|
||||
madvise(pRetVal, size, MADV_DONTDUMP);
|
||||
#endif
|
||||
|
||||
return pRetVal;
|
||||
}
|
||||
|
||||
|
|
|
@ -701,7 +701,7 @@ void* GCToOSInterface::VirtualReserve(size_t size, size_t alignment, uint32_t fl
|
|||
}
|
||||
}
|
||||
|
||||
void* GCToOSInterface::VirtualReserve(void* location, size_t size)
|
||||
void* GCToOSInterface::VirtualReserve(void* location, size_t size, bool useTHP /*unused*/)
|
||||
{
|
||||
DWORD memFlags = MEM_RESERVE;
|
||||
return ::VirtualAlloc(location, size, memFlags, PAGE_READWRITE);
|
||||
|
|
|
@ -78,6 +78,7 @@ The .NET Foundation licenses this file to you under the MIT license.
|
|||
<SdkNativeLibrary Include="user32.lib" />
|
||||
<SdkNativeLibrary Include="version.lib" />
|
||||
<SdkNativeLibrary Include="ws2_32.lib" />
|
||||
<SdkNativeLibrary Include="Synchronization.lib" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
|
|
|
@ -2371,3 +2371,10 @@ ucrtbase!memset
|
|||
ucrtbase!realloc
|
||||
ucrtbase!wmemcpy_s
|
||||
ucrtbase!wmemmove_s
|
||||
|
||||
#
|
||||
# Synchronization.lib
|
||||
#
|
||||
Synchronization!WaitOnAddress
|
||||
Synchronization!WakeByAddressSingle
|
||||
Synchronization!WakeByAddressAll
|
||||
|
|
|
@ -24,10 +24,10 @@ if(CLR_CMAKE_HOST_UNIX)
|
|||
add_definitions(-DFEATURE_OBJCMARSHAL)
|
||||
endif(CLR_CMAKE_TARGET_APPLE)
|
||||
|
||||
if(CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_I386)
|
||||
if(CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_I386 OR CLR_CMAKE_TARGET_ARCH_ARM64)
|
||||
# Allow 16 byte compare-exchange
|
||||
add_compile_options(-mcx16)
|
||||
endif(CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_I386)
|
||||
endif(CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_I386 OR CLR_CMAKE_TARGET_ARCH_ARM64)
|
||||
endif (CLR_CMAKE_HOST_UNIX)
|
||||
|
||||
if(CLR_CMAKE_TARGET_ANDROID)
|
||||
|
|
|
@ -67,6 +67,8 @@ set(COMMON_RUNTIME_SOURCES
|
|||
${GC_DIR}/satori/SatoriAllocationContext.cpp
|
||||
${GC_DIR}/satori/SatoriUtil.cpp
|
||||
${GC_DIR}/satori/SatoriLock.cpp
|
||||
${GC_DIR}/satori/SatoriWorkList.cpp
|
||||
${GC_DIR}/satori/SatoriGate.cpp
|
||||
)
|
||||
|
||||
set(SERVER_GC_SOURCES
|
||||
|
@ -149,6 +151,7 @@ if (WIN32)
|
|||
${GC_DIR}/satori/SatoriAllocationContext.h
|
||||
${GC_DIR}/satori/SatoriUtil.h
|
||||
${GC_DIR}/satori/SatoriLock.h
|
||||
${GC_DIR}/satori/SatoriGate.h
|
||||
)
|
||||
|
||||
include_directories(windows)
|
||||
|
|
|
@ -45,12 +45,18 @@ class Object
|
|||
public:
|
||||
MethodTable * GetMethodTable() const
|
||||
{ return m_pEEType; }
|
||||
MethodTable * GetGCSafeMethodTable() const
|
||||
MethodTable* GetGCSafeMethodTable() const
|
||||
#if !defined(FEATURE_SATORI_GC)
|
||||
#ifdef TARGET_64BIT
|
||||
{ return dac_cast<PTR_EEType>((dac_cast<TADDR>(m_pEEType)) & ~((uintptr_t)7)); }
|
||||
#else
|
||||
{ return dac_cast<PTR_EEType>((dac_cast<TADDR>(m_pEEType)) & ~((uintptr_t)3)); }
|
||||
#endif
|
||||
#else
|
||||
// Satori does not mess up MT pointers.
|
||||
{ return get_EEType(); }
|
||||
#endif
|
||||
|
||||
ObjHeader * GetHeader() { return dac_cast<DPTR(ObjHeader)>(dac_cast<TADDR>(this) - SYNC_BLOCK_SKEW); }
|
||||
#ifndef DACCESS_COMPILE
|
||||
void set_EEType(MethodTable * pEEType)
|
||||
|
|
|
@ -336,13 +336,12 @@ LEAF_END RhpByRefAssignRef, _TEXT
|
|||
// rsi - object
|
||||
//
|
||||
LEAF_ENTRY RhpCheckedAssignRef, _TEXT
|
||||
|
||||
// See if this is in GCHeap
|
||||
mov rax, rdi
|
||||
shr rax, 30 // round to page size ( >> PAGE_BITS )
|
||||
add rax, [C_VAR(g_card_bundle_table)] // fetch the page byte map
|
||||
cmp byte ptr [rax], 0
|
||||
jne C_FUNC(RhpAssignRef)
|
||||
// See if dst is in GCHeap
|
||||
mov rax, [C_VAR(g_card_bundle_table)] // fetch the page byte map
|
||||
mov r8, rdi
|
||||
shr r8, 30 // dst page index
|
||||
cmp byte ptr [rax + r8], 0
|
||||
jne C_FUNC(CheckedEntry)
|
||||
|
||||
NotInHeap:
|
||||
ALTERNATE_ENTRY RhpCheckedAssignRefAVLocation
|
||||
|
@ -354,19 +353,23 @@ LEAF_END RhpCheckedAssignRef, _TEXT
|
|||
// rdi - dest address
|
||||
// rsi - object
|
||||
//
|
||||
.balign 16
|
||||
LEAF_ENTRY RhpAssignRef, _TEXT
|
||||
// check for escaping assignment
|
||||
// 1) check if we own the source region
|
||||
|
||||
#ifdef FEATURE_SATORI_EXTERNAL_OBJECTS
|
||||
mov rax, rsi
|
||||
shr rax, 30 // round to page size ( >> PAGE_BITS )
|
||||
add rax, [C_VAR(g_card_bundle_table)] // fetch the page byte map
|
||||
cmp byte ptr [rax], 0
|
||||
je JustAssign // src not in heap
|
||||
// check if src is in heap
|
||||
mov rax, [C_VAR(g_card_bundle_table)] // fetch the page byte map
|
||||
|
||||
ALTERNATE_ENTRY CheckedEntry
|
||||
mov r8, rsi
|
||||
shr r8, 30 // src page index
|
||||
cmp byte ptr [rax + r8], 0
|
||||
je JustAssign // src not in heap
|
||||
#else
|
||||
ALTERNATE_ENTRY CheckedEntry
|
||||
#endif
|
||||
|
||||
// check for escaping assignment
|
||||
// 1) check if we own the source region
|
||||
mov rdx, rsi
|
||||
and rdx, 0xFFFFFFFFFFE00000 // source region
|
||||
|
||||
|
@ -407,76 +410,86 @@ ALTERNATE_ENTRY RhpAssignRefAVLocationNotHeap
|
|||
ALTERNATE_ENTRY RhpAssignRefAVLocation
|
||||
mov [rdi], rsi
|
||||
|
||||
// TUNING: barriers in different modes could be separate pieces of code, but barrier switch
|
||||
// needs to suspend EE, not sure if skipping mode check would worth that much.
|
||||
mov r11, [C_VAR(g_write_watch_table)]
|
||||
|
||||
// set rdi per contract with JIT_ByRefWriteBarrier
|
||||
mov rax, rdi
|
||||
add rdi, 8
|
||||
|
||||
xor rsi, rdi
|
||||
shr rsi, 21
|
||||
|
||||
// check the barrier state. this must be done after the assignment (in program order)
|
||||
// if state == 2 we do not set or dirty cards.
|
||||
cmp r11, 2
|
||||
jne DoCards
|
||||
// set rsi per contract with JIT_ByRefWriteBarrier
|
||||
mov rsi, r10
|
||||
Exit:
|
||||
ret
|
||||
|
||||
DoCards:
|
||||
// if same region, just check if barrier is not concurrent
|
||||
xor rsi, rax
|
||||
shr rsi, 21
|
||||
// set rsi per contract with JIT_ByRefWriteBarrier
|
||||
mov rsi, r10
|
||||
jz CheckConcurrent // same region, just check if barrier is not concurrent
|
||||
|
||||
|
||||
// if src is in gen2/3 and the barrier is not concurrent we do not need to mark cards
|
||||
cmp dword ptr [rdx + 16], 2
|
||||
jl MarkCards
|
||||
|
||||
CheckConcurrent:
|
||||
cmp byte ptr [C_VAR(g_sw_ww_enabled_for_gc_heap)], 0
|
||||
jne MarkCards
|
||||
ret
|
||||
// if concurrent, load card location
|
||||
cmp r11, 0
|
||||
je Exit
|
||||
|
||||
MarkCards:
|
||||
// fetch card location for rax (saved rdi)
|
||||
mov r9 , [C_VAR(g_card_table)] // fetch the page map
|
||||
mov rdx, rax
|
||||
mov rdx, rax
|
||||
shr rax, 30
|
||||
mov rax, qword ptr [r9 + rax * 8] // page
|
||||
sub rdx, rax // offset in page
|
||||
mov r8 ,rdx
|
||||
mov r8, rdx
|
||||
shr rdx, 9 // card offset
|
||||
shr r8 , 21 // group offset
|
||||
shr r8, 20 // group index
|
||||
lea r8, [rax + r8 * 2 + 0x80] // group offset
|
||||
|
||||
// check if concurrent marking is in progress
|
||||
cmp byte ptr [C_VAR(g_sw_ww_enabled_for_gc_heap)], 0
|
||||
// check if concurrent marking is in progress
|
||||
cmp r11, 0
|
||||
jne DirtyCard
|
||||
|
||||
// SETTING CARD
|
||||
// SETTING CARD
|
||||
SetCard:
|
||||
cmp byte ptr [rax + rdx], 0
|
||||
jne CardSet
|
||||
jne Exit
|
||||
mov byte ptr [rax + rdx], 1
|
||||
SetGroup:
|
||||
cmp byte ptr [rax + r8 * 2 + 0x80], 0
|
||||
cmp byte ptr [r8], 0
|
||||
jne CardSet
|
||||
mov byte ptr [rax + r8 * 2 + 0x80], 1
|
||||
mov byte ptr [r8], 1
|
||||
SetPage:
|
||||
cmp byte ptr [rax], 0
|
||||
jne CardSet
|
||||
mov byte ptr [rax], 1
|
||||
|
||||
CardSet:
|
||||
// check if concurrent marking is still not in progress
|
||||
cmp byte ptr [C_VAR(g_sw_ww_enabled_for_gc_heap)], 0
|
||||
// check if concurrent marking is still not in progress
|
||||
cmp qword ptr [C_VAR(g_write_watch_table)], 0
|
||||
jne DirtyCard
|
||||
ret
|
||||
|
||||
// DIRTYING CARD
|
||||
// DIRTYING CARD
|
||||
DirtyCard:
|
||||
mov byte ptr [rax + rdx], 4
|
||||
DirtyGroup:
|
||||
cmp byte ptr [rax + r8 * 2 + 0x80], 4
|
||||
cmp byte ptr [r8], 4
|
||||
je Exit
|
||||
mov byte ptr [rax + r8 * 2 + 0x80], 4
|
||||
mov byte ptr [r8], 4
|
||||
DirtyPage:
|
||||
cmp byte ptr [rax], 4
|
||||
je Exit
|
||||
mov byte ptr [rax], 4
|
||||
|
||||
Exit:
|
||||
ret
|
||||
|
||||
// this is expected to be rare.
|
||||
|
@ -484,12 +497,19 @@ ALTERNATE_ENTRY RhpAssignRefAVLocation
|
|||
|
||||
// 4) check if the source is escaped
|
||||
mov rax, rsi
|
||||
add rax, 8 // escape bit is MT + 1
|
||||
and rax, 0x1FFFFF
|
||||
shr rax, 3
|
||||
bt qword ptr [rdx], rax
|
||||
jb AssignAndMarkCards // source is already escaped.
|
||||
|
||||
// save rdi, rsi, rdx and r10 (possibly preadjusted rsi)
|
||||
// Align rsp
|
||||
mov r9, rsp
|
||||
and rsp, -16
|
||||
sub rsp, 8
|
||||
|
||||
// save rsp, rdi, rsi, rdx and r10 (possibly preadjusted rsi)
|
||||
push r9
|
||||
push rdi
|
||||
push rsi
|
||||
push rdx
|
||||
|
@ -502,6 +522,7 @@ ALTERNATE_ENTRY RhpAssignRefAVLocation
|
|||
pop rdx
|
||||
pop rsi
|
||||
pop rdi
|
||||
pop rsp
|
||||
jmp AssignAndMarkCards
|
||||
LEAF_END RhpAssignRef, _TEXT
|
||||
|
||||
|
@ -521,14 +542,13 @@ LEAF_ENTRY RhpByRefAssignRef, _TEXT
|
|||
ALTERNATE_ENTRY RhpByRefAssignRefAVLocation1
|
||||
mov rsi, [rsi]
|
||||
|
||||
// See if assignment is into heap
|
||||
mov rax, rdi
|
||||
shr rax, 30 // round to page size ( >> PAGE_BITS )
|
||||
add rax, [C_VAR(g_card_bundle_table)] // fetch the page byte map
|
||||
cmp byte ptr [rax], 0
|
||||
jne C_FUNC(RhpAssignRef)
|
||||
// See if dst is in GCHeap
|
||||
mov rax, [C_VAR(g_card_bundle_table)] // fetch the page byte map
|
||||
mov r8, rdi
|
||||
shr r8, 30 // dst page index
|
||||
cmp byte ptr [rax + r8], 0
|
||||
jne C_FUNC(CheckedEntry)
|
||||
|
||||
.balign 16
|
||||
NotInHeap_RhpByRefAssignRef:
|
||||
ALTERNATE_ENTRY RhpByRefAssignRefAVLocation2
|
||||
mov [rdi], rsi
|
||||
|
@ -540,13 +560,13 @@ LEAF_END RhpByRefAssignRef, _TEXT
|
|||
LEAF_ENTRY RhpCheckedLockCmpXchg, _TEXT
|
||||
// Setup rax with the new object for the exchange, that way it will automatically hold the correct result
|
||||
// afterwards and we can leave rsi unaltered ready for the GC write barrier below.
|
||||
mov rax, rdx
|
||||
mov rax, rdx
|
||||
mov r11, [C_VAR(g_card_bundle_table)] // fetch the page byte map
|
||||
|
||||
// check if dst is in heap
|
||||
mov rdx, rdi
|
||||
shr rdx, 30 // round to page size ( >> PAGE_BITS )
|
||||
add rdx, [C_VAR(g_card_bundle_table)] // fetch the page byte map
|
||||
cmp byte ptr [rdx], 0
|
||||
cmp byte ptr [r11 + rdx], 0
|
||||
je JustAssign_CmpXchg // dst not in heap
|
||||
|
||||
// check for escaping assignment
|
||||
|
@ -554,8 +574,7 @@ LEAF_ENTRY RhpCheckedLockCmpXchg, _TEXT
|
|||
#ifdef FEATURE_SATORI_EXTERNAL_OBJECTS
|
||||
mov rdx, rsi
|
||||
shr rdx, 30 // round to page size ( >> PAGE_BITS )
|
||||
add rdx, [C_VAR(g_card_bundle_table)] // fetch the page byte map
|
||||
cmp byte ptr [rdx], 0
|
||||
cmp byte ptr [r11 + rdx], 0
|
||||
je JustAssign_CmpXchg // src not in heap
|
||||
#endif
|
||||
|
||||
|
@ -597,19 +616,30 @@ ALTERNATE_ENTRY RhpCheckedLockCmpXchgAVLocation
|
|||
lock cmpxchg [rdi], rsi
|
||||
jne Exit_CmpXchg
|
||||
|
||||
// TUNING: barriers in different modes could be separate pieces of code, but barrier switch
|
||||
// needs to suspend EE, not sure if skipping mode check would worth that much.
|
||||
mov r10, [C_VAR(g_write_watch_table)]
|
||||
|
||||
// check the barrier state. this must be done after the assignment (in program order)
|
||||
// if state == 2 we do not set or dirty cards.
|
||||
cmp r10, 2
|
||||
jne DoCards_CmpXchg
|
||||
Exit_CmpXchg:
|
||||
ret
|
||||
|
||||
DoCards_CmpXchg:
|
||||
// if same region, just check if barrier is not concurrent
|
||||
xor rsi, rdi
|
||||
shr rsi, 21
|
||||
jz CheckConcurrent_CmpXchg // same region, just check if barrier is not concurrent
|
||||
|
||||
// TUNING: nonconcurrent and concurrent barriers could be separate pieces of code, but to switch
|
||||
// need to suspend EE, not sure if skipping concurrent check would worth that much.
|
||||
|
||||
// if src is in gen2/3 and the barrier is not concurrent we do not need to mark cards
|
||||
cmp dword ptr [rdx + 16], 2
|
||||
jl MarkCards_CmpXchg
|
||||
|
||||
CheckConcurrent_CmpXchg:
|
||||
cmp byte ptr [C_VAR(g_sw_ww_enabled_for_gc_heap)], 0
|
||||
// if concurrent, load card location
|
||||
cmp r10, 0
|
||||
jne MarkCards_CmpXchg
|
||||
ret
|
||||
|
||||
|
@ -622,29 +652,30 @@ ALTERNATE_ENTRY RhpCheckedLockCmpXchgAVLocation
|
|||
sub rdx, r11 // offset in page
|
||||
mov rsi,rdx
|
||||
shr rdx, 9 // card offset
|
||||
shr rsi, 21 // group offset
|
||||
shr rsi, 20 // group index
|
||||
lea rsi, [r11 + rsi * 2 + 0x80] // group offset
|
||||
|
||||
// check if concurrent marking is in progress
|
||||
cmp byte ptr [C_VAR(g_sw_ww_enabled_for_gc_heap)], 0
|
||||
// check if concurrent marking is in progress
|
||||
cmp r10, 0
|
||||
jne DirtyCard_CmpXchg
|
||||
|
||||
// SETTING CARD FOR rdi
|
||||
SetCard_CmpXchg:
|
||||
cmp byte ptr [r11 + rdx], 0
|
||||
jne CardSet_CmpXchg
|
||||
jne Exit_CmpXchg
|
||||
mov byte ptr [r11 + rdx], 1
|
||||
SetGroup_CmpXchg:
|
||||
cmp byte ptr [r11 + rsi * 2 + 0x80], 0
|
||||
cmp byte ptr [rsi], 0
|
||||
jne CardSet_CmpXchg
|
||||
mov byte ptr [r11 + rsi * 2 + 0x80], 1
|
||||
mov byte ptr [rsi], 1
|
||||
SetPage_CmpXchg:
|
||||
cmp byte ptr [r11], 0
|
||||
jne CardSet_CmpXchg
|
||||
mov byte ptr [r11], 1
|
||||
|
||||
CardSet_CmpXchg:
|
||||
// check if concurrent marking is still not in progress
|
||||
cmp byte ptr [C_VAR(g_sw_ww_enabled_for_gc_heap)], 0
|
||||
// check if concurrent marking is still not in progress
|
||||
cmp qword ptr [C_VAR(g_write_watch_table)], 0
|
||||
jne DirtyCard_CmpXchg
|
||||
ret
|
||||
|
||||
|
@ -654,15 +685,13 @@ ALTERNATE_ENTRY RhpCheckedLockCmpXchgAVLocation
|
|||
je Exit_CmpXchg
|
||||
mov byte ptr [r11 + rdx], 4
|
||||
DirtyGroup_CmpXchg:
|
||||
cmp byte ptr [r11 + rsi * 2 + 0x80], 4
|
||||
cmp byte ptr [rsi], 4
|
||||
je Exit_CmpXchg
|
||||
mov byte ptr [r11 + rsi * 2 + 0x80], 4
|
||||
mov byte ptr [rsi], 4
|
||||
DirtyPage_CmpXchg:
|
||||
cmp byte ptr [r11], 4
|
||||
je Exit_CmpXchg
|
||||
mov byte ptr [r11], 4
|
||||
|
||||
Exit_CmpXchg:
|
||||
ret
|
||||
|
||||
// this is expected to be rare.
|
||||
|
@ -670,39 +699,45 @@ ALTERNATE_ENTRY RhpCheckedLockCmpXchgAVLocation
|
|||
|
||||
// 4) check if the source is escaped
|
||||
mov r11, rsi
|
||||
add r11, 8 // escape bit is MT + 1
|
||||
and r11, 0x1FFFFF
|
||||
shr r11, 3
|
||||
bt qword ptr [rdx], r11
|
||||
jb AssignAndMarkCards_CmpXchg // source is already escaped.
|
||||
|
||||
// save rax, rdi, rsi, rdx and have enough stack for the callee
|
||||
// Align rsp
|
||||
mov r9, rsp
|
||||
and rsp, -16
|
||||
sub rsp, 8
|
||||
|
||||
// save rsp, rax, rdi, rsi, rdx and have enough stack for the callee
|
||||
push r9
|
||||
push rax
|
||||
push rdi
|
||||
push rsi
|
||||
push rdx
|
||||
sub rsp, 0x20
|
||||
|
||||
// void SatoriRegion::EscapeFn(SatoriObject** dst, SatoriObject* src, SatoriRegion* region)
|
||||
call qword ptr [rdx + 8]
|
||||
|
||||
add rsp, 0x20
|
||||
pop rdx
|
||||
pop rsi
|
||||
pop rdi
|
||||
pop rax
|
||||
pop rsp
|
||||
jmp AssignAndMarkCards_CmpXchg
|
||||
LEAF_END RhpCheckedLockCmpXchg, _TEXT
|
||||
|
||||
LEAF_ENTRY RhpCheckedXchg, _TEXT
|
||||
// Setup rax with the new object for the exchange, that way it will automatically hold the correct result
|
||||
// afterwards and we can leave rsi unaltered ready for the GC write barrier below.
|
||||
mov rax, rsi
|
||||
mov rax, rsi
|
||||
mov r11, [C_VAR(g_card_bundle_table)] // fetch the page byte map
|
||||
|
||||
// check if dst is in heap
|
||||
mov rdx, rdi
|
||||
shr rdx, 30 // round to page size ( >> PAGE_BITS )
|
||||
add rdx, [C_VAR(g_card_bundle_table)] // fetch the page byte map
|
||||
cmp byte ptr [rdx], 0
|
||||
cmp byte ptr [r11 + rdx], 0
|
||||
je JustAssign_Xchg // dst not in heap
|
||||
|
||||
// check for escaping assignment
|
||||
|
@ -710,8 +745,7 @@ LEAF_ENTRY RhpCheckedXchg, _TEXT
|
|||
#ifdef FEATURE_SATORI_EXTERNAL_OBJECTS
|
||||
mov rdx, rsi
|
||||
shr rdx, 30 // round to page size ( >> PAGE_BITS )
|
||||
add rdx, [C_VAR(g_card_bundle_table)] // fetch the page byte map
|
||||
cmp byte ptr [rdx], 0
|
||||
cmp byte ptr [r11 + rdx], 0
|
||||
je JustAssign_Xchg // src not in heap
|
||||
#endif
|
||||
|
||||
|
@ -751,19 +785,30 @@ ALTERNATE_ENTRY RhpCheckedXchgAVLocationNotHeap
|
|||
ALTERNATE_ENTRY RhpCheckedXchgAVLocation
|
||||
xchg [rdi], rax
|
||||
|
||||
// TUNING: barriers in different modes could be separate pieces of code, but barrier switch
|
||||
// needs to suspend EE, not sure if skipping mode check would worth that much.
|
||||
mov r10, [C_VAR(g_write_watch_table)]
|
||||
|
||||
// check the barrier state. this must be done after the assignment (in program order)
|
||||
// if state == 2 we do not set or dirty cards.
|
||||
cmp r10, 2
|
||||
jne DoCards_Xchg
|
||||
Exit_Xchg:
|
||||
ret
|
||||
|
||||
DoCards_Xchg:
|
||||
// if same region, just check if barrier is not concurrent
|
||||
xor rsi, rdi
|
||||
shr rsi, 21
|
||||
jz CheckConcurrent_Xchg // same region, just check if barrier is not concurrent
|
||||
|
||||
// TUNING: nonconcurrent and concurrent barriers could be separate pieces of code, but to switch
|
||||
// need to suspend EE, not sure if skipping concurrent check would worth that much.
|
||||
|
||||
// if src is in gen2/3 and the barrier is not concurrent we do not need to mark cards
|
||||
cmp dword ptr [rdx + 16], 2
|
||||
jl MarkCards_Xchg
|
||||
|
||||
CheckConcurrent_Xchg:
|
||||
cmp byte ptr [C_VAR(g_sw_ww_enabled_for_gc_heap)], 0
|
||||
// if concurrent, load card location
|
||||
cmp r10, 0
|
||||
jne MarkCards_Xchg
|
||||
ret
|
||||
|
||||
|
@ -776,29 +821,30 @@ ALTERNATE_ENTRY RhpCheckedXchgAVLocation
|
|||
sub rdx, r11 // offset in page
|
||||
mov rsi,rdx
|
||||
shr rdx, 9 // card offset
|
||||
shr rsi, 21 // group offset
|
||||
shr rsi, 20 // group index
|
||||
lea rsi, [r11 + rsi * 2 + 0x80] // group offset
|
||||
|
||||
// check if concurrent marking is in progress
|
||||
cmp byte ptr [C_VAR(g_sw_ww_enabled_for_gc_heap)], 0
|
||||
// check if concurrent marking is in progress
|
||||
cmp r10, 0
|
||||
jne DirtyCard_Xchg
|
||||
|
||||
// SETTING CARD FOR rdi
|
||||
SetCard_Xchg:
|
||||
cmp byte ptr [r11 + rdx], 0
|
||||
jne CardSet_Xchg
|
||||
jne Exit_Xchg
|
||||
mov byte ptr [r11 + rdx], 1
|
||||
SetGroup_Xchg:
|
||||
cmp byte ptr [r11 + rsi * 2 + 0x80], 0
|
||||
cmp byte ptr [rsi], 0
|
||||
jne CardSet_Xchg
|
||||
mov byte ptr [r11 + rsi * 2 + 0x80], 1
|
||||
mov byte ptr [rsi], 1
|
||||
SetPage_Xchg:
|
||||
cmp byte ptr [r11], 0
|
||||
jne CardSet_Xchg
|
||||
mov byte ptr [r11], 1
|
||||
|
||||
CardSet_Xchg:
|
||||
// check if concurrent marking is still not in progress
|
||||
cmp byte ptr [C_VAR(g_sw_ww_enabled_for_gc_heap)], 0
|
||||
// check if concurrent marking is still not in progress
|
||||
cmp qword ptr [C_VAR(g_write_watch_table)], 0
|
||||
jne DirtyCard_Xchg
|
||||
ret
|
||||
|
||||
|
@ -808,15 +854,13 @@ ALTERNATE_ENTRY RhpCheckedXchgAVLocation
|
|||
je Exit_Xchg
|
||||
mov byte ptr [r11 + rdx], 4
|
||||
DirtyGroup_Xchg:
|
||||
cmp byte ptr [r11 + rsi * 2 + 0x80], 4
|
||||
cmp byte ptr [rsi], 4
|
||||
je Exit_Xchg
|
||||
mov byte ptr [r11 + rsi * 2 + 0x80], 4
|
||||
mov byte ptr [rsi], 4
|
||||
DirtyPage_Xchg:
|
||||
cmp byte ptr [r11], 4
|
||||
je Exit_Xchg
|
||||
mov byte ptr [r11], 4
|
||||
|
||||
Exit_Xchg:
|
||||
ret
|
||||
|
||||
// this is expected to be rare.
|
||||
|
@ -824,26 +868,32 @@ ALTERNATE_ENTRY RhpCheckedXchgAVLocation
|
|||
|
||||
// 4) check if the source is escaped
|
||||
mov r11, rsi
|
||||
add r11, 8 // escape bit is MT + 1
|
||||
and r11, 0x1FFFFF
|
||||
shr r11, 3
|
||||
bt qword ptr [rdx], r11
|
||||
jb AssignAndMarkCards_Xchg // source is already escaped.
|
||||
|
||||
// save rax, rdi, rsi, rdx and have enough stack for the callee
|
||||
// Align rsp
|
||||
mov r9, rsp
|
||||
and rsp, -16
|
||||
sub rsp, 8
|
||||
|
||||
// save rsp, rax, rdi, rsi, rdx and have enough stack for the callee
|
||||
push r9
|
||||
push rax
|
||||
push rdi
|
||||
push rsi
|
||||
push rdx
|
||||
sub rsp, 0x20
|
||||
|
||||
// void SatoriRegion::EscapeFn(SatoriObject** dst, SatoriObject* src, SatoriRegion* region)
|
||||
call qword ptr [rdx + 8]
|
||||
|
||||
add rsp, 0x20
|
||||
pop rdx
|
||||
pop rsi
|
||||
pop rdi
|
||||
pop rax
|
||||
pop rsp
|
||||
jmp AssignAndMarkCards_Xchg
|
||||
LEAF_END RhpCheckedXchg, _TEXT
|
||||
|
||||
|
|
|
@ -353,12 +353,12 @@ else ;FEATURE_SATORI_GC
|
|||
;
|
||||
LEAF_ENTRY RhpCheckedAssignRef, _TEXT
|
||||
|
||||
; See if this is in GCHeap
|
||||
mov rax, rcx
|
||||
shr rax, 30 ; round to page size ( >> PAGE_BITS )
|
||||
add rax, [g_card_bundle_table] ; fetch the page byte map
|
||||
cmp byte ptr [rax], 0
|
||||
jne RhpAssignRef
|
||||
; See if dst is in GCHeap
|
||||
mov rax, [g_card_bundle_table] ; fetch the page byte map
|
||||
mov r8, rcx
|
||||
shr r8, 30 ; dst page index
|
||||
cmp byte ptr [rax + r8], 0
|
||||
jne CheckedEntry
|
||||
|
||||
NotInHeap:
|
||||
ALTERNATE_ENTRY RhpCheckedAssignRefAVLocation
|
||||
|
@ -371,18 +371,21 @@ LEAF_END RhpCheckedAssignRef, _TEXT
|
|||
; rdx - object
|
||||
;
|
||||
LEAF_ENTRY RhpAssignRef, _TEXT
|
||||
align 16
|
||||
; check for escaping assignment
|
||||
; 1) check if we own the source region
|
||||
|
||||
ifdef FEATURE_SATORI_EXTERNAL_OBJECTS
|
||||
mov rax, rdx
|
||||
shr rax, 30 ; round to page size ( >> PAGE_BITS )
|
||||
add rax, [g_card_bundle_table] ; fetch the page byte map
|
||||
cmp byte ptr [rax], 0
|
||||
; check if src is in heap
|
||||
mov rax, [g_card_bundle_table] ; fetch the page byte map
|
||||
ALTERNATE_ENTRY CheckedEntry
|
||||
mov r8, rdx
|
||||
shr r8, 30 ; dst page index
|
||||
cmp byte ptr [rax + r8], 0
|
||||
je JustAssign ; src not in heap
|
||||
else
|
||||
ALTERNATE_ENTRY CheckedEntry
|
||||
endif
|
||||
|
||||
; check for escaping assignment
|
||||
; 1) check if we own the source region
|
||||
mov r8, rdx
|
||||
and r8, 0FFFFFFFFFFE00000h ; source region
|
||||
|
||||
|
@ -416,21 +419,30 @@ ALTERNATE_ENTRY RhpAssignRefAVLocationNotHeap
|
|||
ALTERNATE_ENTRY RhpAssignRefAVLocation
|
||||
mov [rcx], rdx
|
||||
|
||||
; TUNING: barriers in different modes could be separate pieces of code, but barrier switch
|
||||
; needs to suspend EE, not sure if skipping mode check would worth that much.
|
||||
mov r11, qword ptr [g_write_watch_table]
|
||||
|
||||
; check the barrier state. this must be done after the assignment (in program order)
|
||||
; if state == 2 we do not set or dirty cards.
|
||||
cmp r11, 2h
|
||||
jne DoCards
|
||||
Exit:
|
||||
ret
|
||||
|
||||
DoCards:
|
||||
; if same region, just check if barrier is not concurrent
|
||||
xor rdx, rcx
|
||||
shr rdx, 21
|
||||
jz CheckConcurrent ; same region, just check if barrier is not concurrent
|
||||
|
||||
; TUNING: nonconcurrent and concurrent barriers could be separate pieces of code, but to switch
|
||||
; need to suspend EE, not sure if skipping concurrent check would worth that much.
|
||||
jz CheckConcurrent
|
||||
|
||||
; if src is in gen2/3 and the barrier is not concurrent we do not need to mark cards
|
||||
cmp dword ptr [r8 + 16], 2
|
||||
jl MarkCards
|
||||
|
||||
CheckConcurrent:
|
||||
cmp byte ptr [g_sw_ww_enabled_for_gc_heap], 0h
|
||||
jne MarkCards
|
||||
ret
|
||||
cmp r11, 0h
|
||||
je Exit
|
||||
|
||||
MarkCards:
|
||||
; fetch card location for rcx
|
||||
|
@ -441,21 +453,22 @@ ALTERNATE_ENTRY RhpAssignRefAVLocation
|
|||
sub r8, rax ; offset in page
|
||||
mov rdx,r8
|
||||
shr r8, 9 ; card offset
|
||||
shr rdx, 21 ; group offset
|
||||
shr rdx, 20 ; group index
|
||||
lea rdx, [rax + rdx * 2 + 80h] ; group offset
|
||||
|
||||
; check if concurrent marking is in progress
|
||||
cmp byte ptr [g_sw_ww_enabled_for_gc_heap], 0h
|
||||
cmp r11, 0h
|
||||
jne DirtyCard
|
||||
|
||||
; SETTING CARD FOR RCX
|
||||
SetCard:
|
||||
cmp byte ptr [rax + r8], 0
|
||||
jne CardSet
|
||||
jne Exit
|
||||
mov byte ptr [rax + r8], 1
|
||||
SetGroup:
|
||||
cmp byte ptr [rax + rdx * 2 + 80h], 0
|
||||
cmp byte ptr [rdx], 0
|
||||
jne CardSet
|
||||
mov byte ptr [rax + rdx * 2 + 80h], 1
|
||||
mov byte ptr [rdx], 1
|
||||
SetPage:
|
||||
cmp byte ptr [rax], 0
|
||||
jne CardSet
|
||||
|
@ -463,7 +476,7 @@ ALTERNATE_ENTRY RhpAssignRefAVLocation
|
|||
|
||||
CardSet:
|
||||
; check if concurrent marking is still not in progress
|
||||
cmp byte ptr [g_sw_ww_enabled_for_gc_heap], 0h
|
||||
cmp qword ptr [g_write_watch_table], 0h
|
||||
jne DirtyCard
|
||||
ret
|
||||
|
||||
|
@ -471,15 +484,13 @@ ALTERNATE_ENTRY RhpAssignRefAVLocation
|
|||
DirtyCard:
|
||||
mov byte ptr [rax + r8], 4
|
||||
DirtyGroup:
|
||||
cmp byte ptr [rax + rdx * 2 + 80h], 4
|
||||
cmp byte ptr [rdx], 4
|
||||
je Exit
|
||||
mov byte ptr [rax + rdx * 2 + 80h], 4
|
||||
mov byte ptr [rdx], 4
|
||||
DirtyPage:
|
||||
cmp byte ptr [rax], 4
|
||||
je Exit
|
||||
mov byte ptr [rax], 4
|
||||
|
||||
Exit:
|
||||
ret
|
||||
|
||||
; this is expected to be rare.
|
||||
|
@ -487,12 +498,18 @@ ALTERNATE_ENTRY RhpAssignRefAVLocation
|
|||
|
||||
; 4) check if the source is escaped
|
||||
mov rax, rdx
|
||||
add rax, 8 ; escape bit is MT + 1
|
||||
and rax, 01FFFFFh
|
||||
shr rax, 3
|
||||
bt qword ptr [r8], rax
|
||||
jb AssignAndMarkCards ; source is already escaped.
|
||||
|
||||
; save rcx, rdx, r8 and have enough stack for the callee
|
||||
; Align rsp
|
||||
mov r9, rsp
|
||||
and rsp, -16
|
||||
|
||||
; save rsp, rcx, rdx, r8 and have enough stack for the callee
|
||||
push r9
|
||||
push rcx
|
||||
push rdx
|
||||
push r8
|
||||
|
@ -505,6 +522,7 @@ ALTERNATE_ENTRY RhpAssignRefAVLocation
|
|||
pop r8
|
||||
pop rdx
|
||||
pop rcx
|
||||
pop rsp
|
||||
jmp AssignAndMarkCards
|
||||
LEAF_END RhpAssignRef, _TEXT
|
||||
|
||||
|
@ -526,14 +544,13 @@ ALTERNATE_ENTRY RhpByRefAssignRefAVLocation1
|
|||
add rdi, 8h
|
||||
add rsi, 8h
|
||||
|
||||
; See if assignment is into heap
|
||||
mov rax, rcx
|
||||
shr rax, 30 ; round to page size ( >> PAGE_BITS )
|
||||
add rax, [g_card_bundle_table] ; fetch the page byte map
|
||||
cmp byte ptr [rax], 0
|
||||
jne RhpAssignRef
|
||||
; See if dst is in GCHeap
|
||||
mov rax, [g_card_bundle_table] ; fetch the page byte map
|
||||
mov r8, rcx
|
||||
shr r8, 30 ; dst page index
|
||||
cmp byte ptr [rax + r8], 0
|
||||
jne CheckedEntry
|
||||
|
||||
align 16
|
||||
NotInHeap:
|
||||
ALTERNATE_ENTRY RhpByRefAssignRefAVLocation2
|
||||
mov [rcx], rdx
|
||||
|
@ -543,13 +560,13 @@ LEAF_END RhpByRefAssignRef, _TEXT
|
|||
LEAF_ENTRY RhpCheckedLockCmpXchg, _TEXT
|
||||
;; Setup rax with the new object for the exchange, that way it will automatically hold the correct result
|
||||
;; afterwards and we can leave rdx unaltered ready for the GC write barrier below.
|
||||
mov rax, r8
|
||||
mov rax, r8
|
||||
mov r11, [g_card_bundle_table] ; fetch the page byte map
|
||||
|
||||
; check if dst is in heap
|
||||
mov r8, rcx
|
||||
shr r8, 30 ; round to page size ( >> PAGE_BITS )
|
||||
add r8, [g_card_bundle_table] ; fetch the page byte map
|
||||
cmp byte ptr [r8], 0
|
||||
cmp byte ptr [r11 + r8], 0
|
||||
je JustAssign ; dst not in heap
|
||||
|
||||
; check for escaping assignment
|
||||
|
@ -557,8 +574,7 @@ LEAF_ENTRY RhpCheckedLockCmpXchg, _TEXT
|
|||
ifdef FEATURE_SATORI_EXTERNAL_OBJECTS
|
||||
mov r8, rdx
|
||||
shr r8, 30 ; round to page size ( >> PAGE_BITS )
|
||||
add r8, [g_card_bundle_table] ; fetch the page byte map
|
||||
cmp byte ptr [r8], 0
|
||||
cmp byte ptr [r11 + r8], 0
|
||||
je JustAssign ; src not in heap
|
||||
endif
|
||||
|
||||
|
@ -596,70 +612,78 @@ ALTERNATE_ENTRY RhpCheckedLockCmpXchgAVLocation
|
|||
lock cmpxchg [rcx], rdx
|
||||
jne Exit
|
||||
|
||||
; TUNING: barriers in different modes could be separate pieces of code, but barrier switch
|
||||
; needs to suspend EE, not sure if skipping mode check would worth that much.
|
||||
mov r11, qword ptr [g_write_watch_table]
|
||||
|
||||
; check the barrier state. this must be done after the assignment (in program order)
|
||||
; if state == 2 we do not set or dirty cards.
|
||||
cmp r11, 2h
|
||||
jne DoCards
|
||||
Exit:
|
||||
ret
|
||||
|
||||
DoCards:
|
||||
; if same region, just check if barrier is not concurrent
|
||||
xor rdx, rcx
|
||||
shr rdx, 21
|
||||
jz CheckConcurrent ; same region, just check if barrier is not concurrent
|
||||
|
||||
; TUNING: nonconcurrent and concurrent barriers could be separate pieces of code, but to switch
|
||||
; need to suspend EE, not sure if skipping concurrent check would worth that much.
|
||||
jz CheckConcurrent
|
||||
|
||||
; if src is in gen2/3 and the barrier is not concurrent we do not need to mark cards
|
||||
cmp dword ptr [r8 + 16], 2
|
||||
jl MarkCards
|
||||
|
||||
CheckConcurrent:
|
||||
cmp byte ptr [g_sw_ww_enabled_for_gc_heap], 0h
|
||||
jne MarkCards
|
||||
ret
|
||||
cmp r11, 0h
|
||||
je Exit
|
||||
|
||||
MarkCards:
|
||||
; fetch card location for rcx
|
||||
mov r9 , [g_card_table] ; fetch the page map
|
||||
mov r8, rcx
|
||||
shr rcx, 30
|
||||
mov r11, qword ptr [r9 + rcx * 8] ; page
|
||||
sub r8, r11 ; offset in page
|
||||
mov r10, qword ptr [r9 + rcx * 8] ; page
|
||||
sub r8, r10 ; offset in page
|
||||
mov rdx,r8
|
||||
shr r8, 9 ; card offset
|
||||
shr rdx, 21 ; group offset
|
||||
shr rdx, 20 ; group index
|
||||
lea rdx, [r10 + rdx * 2 + 80h] ; group offset
|
||||
|
||||
; check if concurrent marking is in progress
|
||||
cmp byte ptr [g_sw_ww_enabled_for_gc_heap], 0h
|
||||
cmp r11, 0h
|
||||
jne DirtyCard
|
||||
|
||||
; SETTING CARD FOR RCX
|
||||
SetCard:
|
||||
cmp byte ptr [r11 + r8], 0
|
||||
jne CardSet
|
||||
mov byte ptr [r11 + r8], 1
|
||||
cmp byte ptr [r10 + r8], 0
|
||||
jne Exit
|
||||
mov byte ptr [r10 + r8], 1
|
||||
SetGroup:
|
||||
cmp byte ptr [r11 + rdx * 2 + 80h], 0
|
||||
cmp byte ptr [rdx], 0
|
||||
jne CardSet
|
||||
mov byte ptr [r11 + rdx * 2 + 80h], 1
|
||||
mov byte ptr [rdx], 1
|
||||
SetPage:
|
||||
cmp byte ptr [r11], 0
|
||||
cmp byte ptr [r10], 0
|
||||
jne CardSet
|
||||
mov byte ptr [r11], 1
|
||||
mov byte ptr [r10], 1
|
||||
|
||||
CardSet:
|
||||
; check if concurrent marking is still not in progress
|
||||
cmp byte ptr [g_sw_ww_enabled_for_gc_heap], 0h
|
||||
cmp qword ptr [g_write_watch_table], 0h
|
||||
jne DirtyCard
|
||||
ret
|
||||
|
||||
; DIRTYING CARD FOR RCX
|
||||
DirtyCard:
|
||||
mov byte ptr [r11 + r8], 4
|
||||
mov byte ptr [r10 + r8], 4
|
||||
DirtyGroup:
|
||||
cmp byte ptr [r11 + rdx * 2 + 80h], 4
|
||||
cmp byte ptr [rdx], 4
|
||||
je Exit
|
||||
mov byte ptr [r11 + rdx * 2 + 80h], 4
|
||||
mov byte ptr [rdx], 4
|
||||
DirtyPage:
|
||||
cmp byte ptr [r11], 4
|
||||
cmp byte ptr [r10], 4
|
||||
je Exit
|
||||
mov byte ptr [r11], 4
|
||||
|
||||
Exit:
|
||||
mov byte ptr [r10], 4
|
||||
ret
|
||||
|
||||
; this is expected to be rare.
|
||||
|
@ -667,39 +691,46 @@ ALTERNATE_ENTRY RhpCheckedLockCmpXchgAVLocation
|
|||
|
||||
; 4) check if the source is escaped
|
||||
mov r11, rdx
|
||||
add r11, 8 ; escape bit is MT + 1
|
||||
and r11, 01FFFFFh
|
||||
shr r11, 3
|
||||
bt qword ptr [r8], r11
|
||||
jb AssignAndMarkCards ; source is already escaped.
|
||||
|
||||
; save rax, rcx, rdx, r8 and have enough stack for the callee
|
||||
; Align rsp
|
||||
mov r9, rsp
|
||||
and rsp, -16
|
||||
|
||||
; save rsp, rax, rcx, rdx, r8 and have enough stack for the callee
|
||||
push r9
|
||||
push rax
|
||||
push rcx
|
||||
push rdx
|
||||
push r8
|
||||
sub rsp, 20h
|
||||
sub rsp, 28h
|
||||
|
||||
; void SatoriRegion::EscapeFn(SatoriObject** dst, SatoriObject* src, SatoriRegion* region)
|
||||
call qword ptr [r8 + 8]
|
||||
|
||||
add rsp, 20h
|
||||
add rsp, 28h
|
||||
pop r8
|
||||
pop rdx
|
||||
pop rcx
|
||||
pop rax
|
||||
pop rsp
|
||||
jmp AssignAndMarkCards
|
||||
LEAF_END RhpCheckedLockCmpXchg, _TEXT
|
||||
|
||||
LEAF_ENTRY RhpCheckedXchg, _TEXT
|
||||
;; Setup rax with the new object for the exchange, that way it will automatically hold the correct result
|
||||
;; afterwards and we can leave rdx unaltered ready for the GC write barrier below.
|
||||
mov rax, rdx
|
||||
mov rax, rdx
|
||||
mov r11, [g_card_bundle_table] ; fetch the page byte map
|
||||
|
||||
; check if dst is in heap
|
||||
mov r8, rcx
|
||||
shr r8, 30 ; round to page size ( >> PAGE_BITS )
|
||||
add r8, [g_card_bundle_table] ; fetch the page byte map
|
||||
cmp byte ptr [r8], 0
|
||||
cmp byte ptr [r11 + r8], 0
|
||||
je JustAssign ; dst not in heap
|
||||
|
||||
; check for escaping assignment
|
||||
|
@ -707,8 +738,7 @@ LEAF_ENTRY RhpCheckedXchg, _TEXT
|
|||
ifdef FEATURE_SATORI_EXTERNAL_OBJECTS
|
||||
mov r8, rdx
|
||||
shr r8, 30 ; round to page size ( >> PAGE_BITS )
|
||||
add r8, [g_card_bundle_table] ; fetch the page byte map
|
||||
cmp byte ptr [r8], 0
|
||||
cmp byte ptr [r11 + r8], 0
|
||||
je JustAssign ; src not in heap
|
||||
endif
|
||||
|
||||
|
@ -744,70 +774,78 @@ ALTERNATE_ENTRY RhpCheckedXchgAVLocationNotHeap
|
|||
ALTERNATE_ENTRY RhpCheckedXchgAVLocation
|
||||
xchg [rcx], rax
|
||||
|
||||
; TUNING: barriers in different modes could be separate pieces of code, but barrier switch
|
||||
; needs to suspend EE, not sure if skipping mode check would worth that much.
|
||||
mov r11, qword ptr [g_write_watch_table]
|
||||
|
||||
; check the barrier state. this must be done after the assignment (in program order)
|
||||
; if state == 2 we do not set or dirty cards.
|
||||
cmp r11, 2h
|
||||
jne DoCards
|
||||
Exit:
|
||||
ret
|
||||
|
||||
DoCards:
|
||||
; if same region, just check if barrier is not concurrent
|
||||
xor rdx, rcx
|
||||
shr rdx, 21
|
||||
jz CheckConcurrent ; same region, just check if barrier is not concurrent
|
||||
|
||||
; TUNING: nonconcurrent and concurrent barriers could be separate pieces of code, but to switch
|
||||
; need to suspend EE, not sure if skipping concurrent check would worth that much.
|
||||
jz CheckConcurrent
|
||||
|
||||
; if src is in gen2/3 and the barrier is not concurrent we do not need to mark cards
|
||||
cmp dword ptr [r8 + 16], 2
|
||||
jl MarkCards
|
||||
|
||||
CheckConcurrent:
|
||||
cmp byte ptr [g_sw_ww_enabled_for_gc_heap], 0h
|
||||
jne MarkCards
|
||||
ret
|
||||
cmp r11, 0h
|
||||
je Exit
|
||||
|
||||
MarkCards:
|
||||
; fetch card location for rcx
|
||||
mov r9 , [g_card_table] ; fetch the page map
|
||||
mov r8, rcx
|
||||
shr rcx, 30
|
||||
mov r11, qword ptr [r9 + rcx * 8] ; page
|
||||
sub r8, r11 ; offset in page
|
||||
mov r10, qword ptr [r9 + rcx * 8] ; page
|
||||
sub r8, r10 ; offset in page
|
||||
mov rdx,r8
|
||||
shr r8, 9 ; card offset
|
||||
shr rdx, 21 ; group offset
|
||||
shr rdx, 20 ; group index
|
||||
lea rdx, [r10 + rdx * 2 + 80h] ; group offset
|
||||
|
||||
; check if concurrent marking is in progress
|
||||
cmp byte ptr [g_sw_ww_enabled_for_gc_heap], 0h
|
||||
cmp r11, 0h
|
||||
jne DirtyCard
|
||||
|
||||
; SETTING CARD FOR RCX
|
||||
SetCard:
|
||||
cmp byte ptr [r11 + r8], 0
|
||||
jne CardSet
|
||||
mov byte ptr [r11 + r8], 1
|
||||
cmp byte ptr [r10 + r8], 0
|
||||
jne Exit
|
||||
mov byte ptr [r10 + r8], 1
|
||||
SetGroup:
|
||||
cmp byte ptr [r11 + rdx * 2 + 80h], 0
|
||||
cmp byte ptr [rdx], 0
|
||||
jne CardSet
|
||||
mov byte ptr [r11 + rdx * 2 + 80h], 1
|
||||
mov byte ptr [rdx], 1
|
||||
SetPage:
|
||||
cmp byte ptr [r11], 0
|
||||
cmp byte ptr [r10], 0
|
||||
jne CardSet
|
||||
mov byte ptr [r11], 1
|
||||
mov byte ptr [r10], 1
|
||||
|
||||
CardSet:
|
||||
; check if concurrent marking is still not in progress
|
||||
cmp byte ptr [g_sw_ww_enabled_for_gc_heap], 0h
|
||||
cmp qword ptr [g_write_watch_table], 0h
|
||||
jne DirtyCard
|
||||
ret
|
||||
|
||||
; DIRTYING CARD FOR RCX
|
||||
DirtyCard:
|
||||
mov byte ptr [r11 + r8], 4
|
||||
mov byte ptr [r10 + r8], 4
|
||||
DirtyGroup:
|
||||
cmp byte ptr [r11 + rdx * 2 + 80h], 4
|
||||
cmp byte ptr [rdx], 4
|
||||
je Exit
|
||||
mov byte ptr [r11 + rdx * 2 + 80h], 4
|
||||
mov byte ptr [rdx], 4
|
||||
DirtyPage:
|
||||
cmp byte ptr [r11], 4
|
||||
cmp byte ptr [r10], 4
|
||||
je Exit
|
||||
mov byte ptr [r11], 4
|
||||
|
||||
Exit:
|
||||
mov byte ptr [r10], 4
|
||||
ret
|
||||
|
||||
; this is expected to be rare.
|
||||
|
@ -815,26 +853,33 @@ ALTERNATE_ENTRY RhpCheckedXchgAVLocation
|
|||
|
||||
; 4) check if the source is escaped
|
||||
mov r11, rdx
|
||||
add r11, 8 ; escape bit is MT + 1
|
||||
and r11, 01FFFFFh
|
||||
shr r11, 3
|
||||
bt qword ptr [r8], r11
|
||||
jb AssignAndMarkCards ; source is already escaped.
|
||||
|
||||
; save rax, rcx, rdx, r8 and have enough stack for the callee
|
||||
; Align rsp
|
||||
mov r9, rsp
|
||||
and rsp, -16
|
||||
|
||||
; save rsp, rax, rcx, rdx, r8 and have enough stack for the callee
|
||||
push r9
|
||||
push rax
|
||||
push rcx
|
||||
push rdx
|
||||
push r8
|
||||
sub rsp, 20h
|
||||
sub rsp, 28h
|
||||
|
||||
; void SatoriRegion::EscapeFn(SatoriObject** dst, SatoriObject* src, SatoriRegion* region)
|
||||
call qword ptr [r8 + 8]
|
||||
|
||||
add rsp, 20h
|
||||
add rsp, 28h
|
||||
pop r8
|
||||
pop rdx
|
||||
pop rcx
|
||||
pop rax
|
||||
pop rsp
|
||||
jmp AssignAndMarkCards
|
||||
LEAF_END RhpCheckedXchg, _TEXT
|
||||
|
||||
|
|
|
@ -434,19 +434,23 @@ LEAF_END RhpByRefAssignRefArm64, _TEXT
|
|||
// on the managed heap.
|
||||
//
|
||||
// On entry:
|
||||
// x14 : the destination address (LHS of the assignment).
|
||||
// x14 : the destination address (LHS of the assignment)
|
||||
// May not be a heap location (hence the checked).
|
||||
// x15 : the object reference (RHS of the assignment).
|
||||
// x15 : the object reference (RHS of the assignment)
|
||||
//
|
||||
// On exit:
|
||||
// x12, x17 : trashed
|
||||
// x14 : incremented by 8
|
||||
// x12 : trashed
|
||||
// x14 : trashed (incremented by 8 to implement JIT_ByRefWriteBarrier contract)
|
||||
// x15 : trashed
|
||||
// x16 : trashed (ip0)
|
||||
// x17 : trashed (ip1)
|
||||
LEAF_ENTRY RhpCheckedAssignRefArm64, _TEXT
|
||||
PREPARE_EXTERNAL_VAR_INDIRECT g_card_bundle_table, x12
|
||||
add x12, x12, x14, lsr #30
|
||||
ldrb w12, [x12]
|
||||
cbz x12, LOCAL_LABEL(NotInHeap)
|
||||
b C_FUNC(RhpAssignRefArm64)
|
||||
// See if dst is in GCHeap
|
||||
PREPARE_EXTERNAL_VAR_INDIRECT g_card_bundle_table, x16
|
||||
lsr x17, x14, #30 // dst page index
|
||||
ldrb w12, [x16, x17]
|
||||
cbz x12 , LOCAL_LABEL(NotInHeap)
|
||||
b C_FUNC(CheckedEntry)
|
||||
|
||||
LOCAL_LABEL(NotInHeap):
|
||||
ALTERNATE_ENTRY RhpCheckedAssignRefAVLocation
|
||||
|
@ -460,25 +464,30 @@ LEAF_END RhpCheckedAssignRefArm64, _TEXT
|
|||
// reside on the managed heap.
|
||||
//
|
||||
// On entry:
|
||||
// x14 : the destination address (LHS of the assignment).
|
||||
// x15 : the object reference (RHS of the assignment).
|
||||
// x14 : the destination address (LHS of the assignment)
|
||||
// x15 : the object reference (RHS of the assignment)
|
||||
//
|
||||
// On exit:
|
||||
// x12, x17 : trashed
|
||||
// x14 : incremented by 8
|
||||
// x12 : trashed
|
||||
// x14 : trashed (incremented by 8 to implement JIT_ByRefWriteBarrier contract)
|
||||
// x15 : trashed
|
||||
// x16 : trashed (ip0)
|
||||
// x17 : trashed (ip1)
|
||||
LEAF_ENTRY RhpAssignRefArm64, _TEXT
|
||||
// check for escaping assignment
|
||||
// 1) check if we own the source region
|
||||
#ifdef FEATURE_SATORI_EXTERNAL_OBJECTS
|
||||
PREPARE_EXTERNAL_VAR_INDIRECT g_card_bundle_table, x12
|
||||
add x12, x12, x15, lsr #30
|
||||
ldrb w12, [x12]
|
||||
cbz x12, LOCAL_LABEL(JustAssign)
|
||||
PREPARE_EXTERNAL_VAR_INDIRECT g_card_bundle_table, x16
|
||||
ALTERNATE_ENTRY CheckedEntry
|
||||
lsr x17, x15, #30 // source page index
|
||||
ldrb w12, [x16, x17]
|
||||
cbz x12, LOCAL_LABEL(JustAssign) // null or external (immutable) object
|
||||
#else
|
||||
ALTERNATE_ENTRY CheckedEntry
|
||||
cbz x15, LOCAL_LABEL(JustAssign) // assigning null
|
||||
#endif
|
||||
and x12, x15, #0xFFFFFFFFFFE00000 // source region
|
||||
ldr x12, [x12] // region tag
|
||||
and x16, x15, #0xFFFFFFFFFFE00000 // source region
|
||||
ldr x12, [x16] // region tag
|
||||
#ifdef TARGET_OSX
|
||||
mrs x17, TPIDRRO_EL0
|
||||
and x17, x17, #-8 // thread tag on OSX
|
||||
|
@ -489,119 +498,124 @@ LEAF_ENTRY RhpAssignRefArm64, _TEXT
|
|||
bne LOCAL_LABEL(AssignAndMarkCards) // not local to this thread
|
||||
|
||||
// 2) check if the src and dst are from the same region
|
||||
eor x12, x14, x15
|
||||
lsr x12, x12, #21
|
||||
cbnz x12, LOCAL_LABEL(RecordEscape) // cross region assignment. definitely escaping
|
||||
and x12, x14, #0xFFFFFFFFFFE00000 // target aligned to region
|
||||
cmp x12, x16
|
||||
bne LOCAL_LABEL(RecordEscape) // cross region assignment. definitely escaping
|
||||
|
||||
// 3) check if the target is exposed
|
||||
ubfx x17, x14,#9,#12 // word index = (dst >> 9) & 0x1FFFFF
|
||||
and x12, x15, #0xFFFFFFFFFFE00000 // source region
|
||||
ldr x17, [x12, x17, lsl #3] // mark word = [region + index * 8]
|
||||
lsr x12, x14, #3 // bit = (dst >> 3) [& 63]
|
||||
lsr x17, x17, x12
|
||||
tbnz x17, #0, LOCAL_LABEL(RecordEscape) // target is exposed. record an escape.
|
||||
|
||||
str x15, [x14], #8 // UNORDERED assignment of unescaped object
|
||||
ret lr
|
||||
ubfx x17, x14,#9,#12 // word index = (dst >> 9) & 0x1FFFFF
|
||||
ldr x17, [x16, x17, lsl #3] // mark word = [region + index * 8]
|
||||
lsr x12, x14, #3 // bit = (dst >> 3) [& 63]
|
||||
lsr x17, x17, x12
|
||||
tbnz x17, #0, LOCAL_LABEL(RecordEscape) // target is exposed. record an escape.
|
||||
|
||||
// UNORDERED! assignment of unescaped, null or external (immutable) object
|
||||
LOCAL_LABEL(JustAssign):
|
||||
ALTERNATE_ENTRY RhpAssignRefAVLocationNotHeap
|
||||
stlr x15, [x14] // no card marking, src is not a heap object
|
||||
add x14, x14, 8
|
||||
ret lr
|
||||
str x15, [x14], #8
|
||||
ret lr
|
||||
|
||||
LOCAL_LABEL(AssignAndMarkCards):
|
||||
ALTERNATE_ENTRY RhpAssignRefAVLocation
|
||||
stlr x15, [x14]
|
||||
|
||||
// need couple temps. Save before using.
|
||||
stp x2, x3, [sp, -16]!
|
||||
// TUNING: barriers in different modes could be separate pieces of code, but barrier switch
|
||||
// needs to suspend EE, not sure if skipping mode check would worth that much.
|
||||
PREPARE_EXTERNAL_VAR_INDIRECT g_write_watch_table, x17
|
||||
// check the barrier state. this must be done after the assignment (in program order)
|
||||
// if state == 2 we do not set or dirty cards.
|
||||
tbz x17, #1, LOCAL_LABEL(DoCards)
|
||||
|
||||
eor x12, x14, x15
|
||||
lsr x12, x12, #21
|
||||
cbz x12, LOCAL_LABEL(CheckConcurrent) // same region, just check if barrier is not concurrent
|
||||
LOCAL_LABEL(ExitNoCards):
|
||||
add x14, x14, 8
|
||||
ret lr
|
||||
|
||||
LOCAL_LABEL(DoCards):
|
||||
// if same region, just check if barrier is not concurrent
|
||||
and x12, x14, #0xFFFFFFFFFFE00000 // target aligned to region
|
||||
cmp x12, x16
|
||||
beq LOCAL_LABEL(CheckConcurrent) // same region, just check if barrier is not concurrent
|
||||
|
||||
// if src is in gen2/3 and the barrier is not concurrent we do not need to mark cards
|
||||
and x2, x15, #0xFFFFFFFFFFE00000 // source region
|
||||
ldr w12, [x2, 16]
|
||||
ldr w12, [x16, 16] // source region + 16 -> generation
|
||||
tbz x12, #1, LOCAL_LABEL(MarkCards)
|
||||
|
||||
LOCAL_LABEL(CheckConcurrent):
|
||||
PREPARE_EXTERNAL_VAR_INDIRECT g_write_watch_table, x12 // !g_write_watch_table -> !concurrent
|
||||
cbnz x12, LOCAL_LABEL(MarkCards)
|
||||
|
||||
// if not concurrent, exit
|
||||
cbz x17, LOCAL_LABEL(ExitNoCards)
|
||||
|
||||
LOCAL_LABEL(MarkCards):
|
||||
// need couple temps. Save before using.
|
||||
stp x2, x3, [sp, -16]!
|
||||
|
||||
// fetch card location for x14
|
||||
PREPARE_EXTERNAL_VAR_INDIRECT g_card_table, x12 // fetch the page map
|
||||
lsr x16, x14, #30
|
||||
ldr x16, [x12, x16, lsl #3] // page
|
||||
sub x2, x14, x16 // offset in page
|
||||
lsr x15, x2, #20 // group index
|
||||
lsr x2, x2, #9 // card offset
|
||||
lsl x15, x15, #1 // group offset (index * 2)
|
||||
|
||||
// check if concurrent marking is in progress
|
||||
cbnz x17, LOCAL_LABEL(DirtyCard)
|
||||
|
||||
// SETTING CARD FOR X14
|
||||
LOCAL_LABEL(SetCard):
|
||||
ldrb w3, [x16, x2]
|
||||
cbnz w3, LOCAL_LABEL(Exit)
|
||||
mov w17, #1
|
||||
strb w17, [x16, x2]
|
||||
LOCAL_LABEL(SetGroup):
|
||||
add x12, x16, #0x80
|
||||
ldrb w3, [x12, x15]
|
||||
cbnz w3, LOCAL_LABEL(CardSet)
|
||||
strb w17, [x12, x15]
|
||||
LOCAL_LABEL(SetPage):
|
||||
ldrb w3, [x16]
|
||||
cbnz w3, LOCAL_LABEL(CardSet)
|
||||
strb w17, [x16]
|
||||
|
||||
LOCAL_LABEL(CardSet):
|
||||
// check if concurrent marking is still not in progress
|
||||
PREPARE_EXTERNAL_VAR_INDIRECT g_write_watch_table, x12
|
||||
cbnz x12, LOCAL_LABEL(DirtyCard)
|
||||
|
||||
LOCAL_LABEL(Exit):
|
||||
ldp x2, x3, [sp], 16
|
||||
add x14, x14, 8
|
||||
ret lr
|
||||
|
||||
LOCAL_LABEL(MarkCards):
|
||||
// fetch card location for x14
|
||||
PREPARE_EXTERNAL_VAR_INDIRECT g_card_table, x12 // fetch the page map
|
||||
lsr x17, x14, #30
|
||||
ldr x17, [x12, x17, lsl #3] // page
|
||||
sub x2, x14, x17 // offset in page
|
||||
lsr x15, x2, #21 // group index
|
||||
lsl x15, x15, #1 // group offset (index * 2)
|
||||
lsr x2, x2, #9 // card offset
|
||||
|
||||
// check if concurrent marking is in progress
|
||||
PREPARE_EXTERNAL_VAR_INDIRECT g_write_watch_table, x12 // !g_write_watch_table -> !concurrent
|
||||
cbnz x12, LOCAL_LABEL(DirtyCard)
|
||||
|
||||
// SETTING CARD FOR X14
|
||||
LOCAL_LABEL(SetCard):
|
||||
ldrb w3, [x17, x2]
|
||||
cbnz w3, LOCAL_LABEL(CardSet)
|
||||
mov w16, #1
|
||||
strb w16, [x17, x2]
|
||||
LOCAL_LABEL(SetGroup):
|
||||
add x12, x17, #0x80
|
||||
ldrb w3, [x12, x15]
|
||||
cbnz w3, LOCAL_LABEL(CardSet)
|
||||
strb w16, [x12, x15]
|
||||
LOCAL_LABEL(SetPage):
|
||||
ldrb w3, [x17]
|
||||
cbnz w3, LOCAL_LABEL(CardSet)
|
||||
strb w16, [x17]
|
||||
|
||||
LOCAL_LABEL(CardSet):
|
||||
// check if concurrent marking is still not in progress
|
||||
PREPARE_EXTERNAL_VAR_INDIRECT g_write_watch_table, x12 // !g_write_watch_table -> !concurrent
|
||||
cbnz x12, LOCAL_LABEL(DirtyCard)
|
||||
b LOCAL_LABEL(Exit)
|
||||
|
||||
// DIRTYING CARD FOR X14
|
||||
LOCAL_LABEL(DirtyCard):
|
||||
mov w16, #4
|
||||
add x2, x2, x17
|
||||
mov w17, #4
|
||||
add x2, x2, x16
|
||||
// must be after the field write to allow concurrent clean
|
||||
stlrb w16, [x2]
|
||||
stlrb w17, [x2]
|
||||
LOCAL_LABEL(DirtyGroup):
|
||||
add x12, x17, #0x80
|
||||
add x12, x16, #0x80
|
||||
ldrb w3, [x12, x15]
|
||||
tbnz w3, #2, LOCAL_LABEL(Exit)
|
||||
strb w16, [x12, x15]
|
||||
strb w17, [x12, x15]
|
||||
LOCAL_LABEL(DirtyPage):
|
||||
ldrb w3, [x17]
|
||||
ldrb w3, [x16]
|
||||
tbnz w3, #2, LOCAL_LABEL(Exit)
|
||||
strb w16, [x17]
|
||||
strb w17, [x16]
|
||||
b LOCAL_LABEL(Exit)
|
||||
|
||||
// this is expected to be rare.
|
||||
LOCAL_LABEL(RecordEscape):
|
||||
|
||||
// 4) check if the source is escaped
|
||||
and x12, x15, #0xFFFFFFFFFFE00000 // source region
|
||||
add x16, x15, #8 // escape bit is MT + 1
|
||||
ubfx x17, x16, #9,#12 // word index = (dst >> 9) & 0x1FFFFF
|
||||
ldr x17, [x12, x17, lsl #3] // mark word = [region + index * 8]
|
||||
lsr x12, x16, #3 // bit = (dst >> 3) [& 63]
|
||||
// 4) check if the source is escaped (x16 has source region)
|
||||
add x12, x15, #8 // escape bit is MT + 1
|
||||
ubfx x17, x12, #9,#12 // word index = (dst >> 9) & 0x1FFFFF
|
||||
ldr x17, [x16, x17, lsl #3] // mark word = [region + index * 8]
|
||||
lsr x12, x12, #3 // bit = (dst >> 3) [& 63]
|
||||
lsr x17, x17, x12
|
||||
tbnz x17, #0, LOCAL_LABEL(AssignAndMarkCards) // source is already escaped.
|
||||
|
||||
// because of the barrier call convention
|
||||
// we need to preserve caller-saved x0 through x18 and x29/x30
|
||||
// we need to preserve caller-saved x0 through x15 and x29/x30
|
||||
|
||||
stp x29,x30, [sp, -16 * 9]!
|
||||
stp x0, x1, [sp, 16 * 1]
|
||||
|
@ -616,8 +630,8 @@ ALTERNATE_ENTRY RhpAssignRefAVLocation
|
|||
// void SatoriRegion::EscapeFn(SatoriObject** dst, SatoriObject* src, SatoriRegion* region)
|
||||
// mov x0, x14 EscapeFn does not use dst, it is just to avoid arg shuffle on x64
|
||||
mov x1, x15
|
||||
and x2, x15, #0xFFFFFFFFFFE00000 // source region
|
||||
ldr x12, [x2, #8] // EscapeFn address
|
||||
mov x2, x16 // source region
|
||||
ldr x12, [x16, #8] // EscapeFn address
|
||||
blr x12
|
||||
|
||||
ldp x0, x1, [sp, 16 * 1]
|
||||
|
@ -630,6 +644,7 @@ ALTERNATE_ENTRY RhpAssignRefAVLocation
|
|||
ldp x14,x15, [sp, 16 * 8]
|
||||
ldp x29,x30, [sp], 16 * 9
|
||||
|
||||
and x16, x15, #0xFFFFFFFFFFE00000 // source region
|
||||
b LOCAL_LABEL(AssignAndMarkCards)
|
||||
LEAF_END RhpAssignRefArm64, _TEXT
|
||||
|
||||
|
@ -670,8 +685,8 @@ LEAF_ENTRY RhpCheckedLockCmpXchg
|
|||
#else
|
||||
cbz x1, LOCAL_LABEL(JustAssign_Cmp_Xchg) // assigning null
|
||||
#endif
|
||||
and x12, x1, #0xFFFFFFFFFFE00000 // source region
|
||||
ldr x12, [x12] // region tag
|
||||
and x16, x1, #0xFFFFFFFFFFE00000 // source region
|
||||
ldr x12, [x16] // region tag
|
||||
#ifdef TARGET_OSX
|
||||
mrs x17, TPIDRRO_EL0
|
||||
and x17, x17, #-8 // thread tag on OSX
|
||||
|
@ -682,14 +697,13 @@ LEAF_ENTRY RhpCheckedLockCmpXchg
|
|||
bne LOCAL_LABEL(AssignAndMarkCards_Cmp_Xchg) // not local to this thread
|
||||
|
||||
// 2) check if the src and dst are from the same region
|
||||
eor x12, x0, x1
|
||||
lsr x12, x12, #21
|
||||
cbnz x12, LOCAL_LABEL(RecordEscape_Cmp_Xchg) // cross region assignment. definitely escaping
|
||||
and x12, x0, #0xFFFFFFFFFFE00000 // target aligned to region
|
||||
cmp x12, x16
|
||||
bne LOCAL_LABEL(RecordEscape_Cmp_Xchg) // cross region assignment. definitely escaping
|
||||
|
||||
// 3) check if the target is exposed
|
||||
ubfx x17, x0,#9,#12 // word index = (dst >> 9) & 0x1FFFFF
|
||||
and x12, x1, #0xFFFFFFFFFFE00000 // source region
|
||||
ldr x17, [x12, x17, lsl #3] // mark word = [region + index * 8]
|
||||
ldr x17, [x16, x17, lsl #3] // mark word = [region + index * 8]
|
||||
lsr x12, x0, #3 // bit = (dst >> 3) [& 63]
|
||||
lsr x17, x17, x12
|
||||
tbnz x17, #0, LOCAL_LABEL(RecordEscape_Cmp_Xchg) // target is exposed. record an escape.
|
||||
|
@ -704,8 +718,8 @@ ALTERNATE_ENTRY RhpCheckedLockCmpXchgAVLocationNotHeap
|
|||
mov x15, x1 // x15 = val
|
||||
|
||||
#ifndef LSE_INSTRUCTIONS_ENABLED_BY_DEFAULT
|
||||
PREPARE_EXTERNAL_VAR_INDIRECT_W g_cpuFeatures, 16
|
||||
tbz w16, #ARM64_ATOMICS_FEATURE_FLAG_BIT, LOCAL_LABEL(TryAgain1_Cmp_Xchg)
|
||||
PREPARE_EXTERNAL_VAR_INDIRECT_W g_cpuFeatures, 17
|
||||
tbz w17, #ARM64_ATOMICS_FEATURE_FLAG_BIT, LOCAL_LABEL(TryAgain1_Cmp_Xchg)
|
||||
#endif
|
||||
|
||||
mov x17, x2
|
||||
|
@ -713,7 +727,7 @@ ALTERNATE_ENTRY RhpCheckedLockCmpXchgAVLocation
|
|||
casal x2, x1, [x0] // exchange
|
||||
mov x0, x2 // x0 = result
|
||||
cmp x2, x17
|
||||
bne LOCAL_LABEL(Exit_Cmp_Xchg)
|
||||
bne LOCAL_LABEL(Exit_Cmp_XchgNoCards)
|
||||
|
||||
#ifndef LSE_INSTRUCTIONS_ENABLED_BY_DEFAULT
|
||||
b LOCAL_LABEL(SkipLLScCmpXchg)
|
||||
|
@ -736,87 +750,99 @@ LOCAL_LABEL(SkipLLScCmpXchg):
|
|||
#endif
|
||||
|
||||
cbnz x10, LOCAL_LABEL(DoCardsCmpXchg)
|
||||
LOCAL_LABEL(Exit_Cmp_Xchg):
|
||||
LOCAL_LABEL(Exit_Cmp_XchgNoCards):
|
||||
ret lr
|
||||
|
||||
LOCAL_LABEL(DoCardsCmpXchg):
|
||||
|
||||
eor x12, x14, x15
|
||||
lsr x12, x12, #21
|
||||
cbz x12, LOCAL_LABEL(CheckConcurrent_Cmp_Xchg) // same region, just check if barrier is not concurrent
|
||||
// TUNING: barriers in different modes could be separate pieces of code, but barrier switch
|
||||
// needs to suspend EE, not sure if skipping mode check would worth that much.
|
||||
PREPARE_EXTERNAL_VAR_INDIRECT g_write_watch_table, x17
|
||||
|
||||
// check the barrier state. this must be done after the assignment (in program order)
|
||||
// if state == 2 we do not set or dirty cards.
|
||||
tbnz x17, #1, LOCAL_LABEL(Exit_Cmp_XchgNoCards)
|
||||
|
||||
LOCAL_LABEL(DoCardsCmpXchg1):
|
||||
// if same region, just check if barrier is not concurrent
|
||||
and x12, x14, #0xFFFFFFFFFFE00000 // target aligned to region
|
||||
cmp x12, x16
|
||||
beq LOCAL_LABEL(CheckConcurrentCmpXchg) // same region, just check if barrier is not concurrent
|
||||
|
||||
// we will trash x2 and x3, this is a regular call, so it is ok
|
||||
// if src is in gen2/3 and the barrier is not concurrent we do not need to mark cards
|
||||
and x2, x15, #0xFFFFFFFFFFE00000 // source region
|
||||
ldr w12, [x2, 16]
|
||||
tbz x12, #1, LOCAL_LABEL(MarkCards_Cmp_Xchg)
|
||||
ldr w12, [x16, 16] // source region + 16 -> generation
|
||||
tbz x12, #1, LOCAL_LABEL(MarkCardsCmpXchg)
|
||||
|
||||
LOCAL_LABEL(CheckConcurrentCmpXchg):
|
||||
// if not concurrent, exit
|
||||
cbz x17, LOCAL_LABEL(Exit_Cmp_XchgNoCards)
|
||||
|
||||
LOCAL_LABEL(MarkCardsCmpXchg):
|
||||
// need couple temps. Save before using.
|
||||
stp x2, x3, [sp, -16]!
|
||||
|
||||
LOCAL_LABEL(CheckConcurrent_Cmp_Xchg):
|
||||
PREPARE_EXTERNAL_VAR_INDIRECT g_write_watch_table, x12 // !g_write_watch_table -> !concurrent
|
||||
cbz x12, LOCAL_LABEL(Exit_Cmp_Xchg)
|
||||
|
||||
LOCAL_LABEL(MarkCards_Cmp_Xchg):
|
||||
// fetch card location for x14
|
||||
PREPARE_EXTERNAL_VAR_INDIRECT g_card_table, x12 // fetch the page map
|
||||
lsr x17, x14, #30
|
||||
ldr x17, [x12, x17, lsl #3] // page
|
||||
sub x2, x14, x17 // offset in page
|
||||
lsr x15, x2, #21 // group index
|
||||
lsl x15, x15, #1 // group offset (index * 2)
|
||||
lsr x16, x14, #30
|
||||
ldr x16, [x12, x16, lsl #3] // page
|
||||
sub x2, x14, x16 // offset in page
|
||||
lsr x15, x2, #20 // group index
|
||||
lsr x2, x2, #9 // card offset
|
||||
lsl x15, x15, #1 // group offset (index * 2)
|
||||
|
||||
// check if concurrent marking is in progress
|
||||
PREPARE_EXTERNAL_VAR_INDIRECT g_write_watch_table, x12 // !g_write_watch_table -> !concurrent
|
||||
cbnz x12, LOCAL_LABEL(DirtyCard_Cmp_Xchg)
|
||||
cbnz x17, LOCAL_LABEL(DirtyCardCmpXchg)
|
||||
|
||||
// SETTING CARD FOR X14
|
||||
LOCAL_LABEL(SetCard_Cmp_Xchg):
|
||||
ldrb w3, [x17, x2]
|
||||
cbnz w3, LOCAL_LABEL(CardSet_Cmp_Xchg)
|
||||
mov w16, #1
|
||||
strb w16, [x17, x2]
|
||||
LOCAL_LABEL(SetGroup_Cmp_Xchg):
|
||||
add x12, x17, #0x80
|
||||
LOCAL_LABEL(SetCardCmpXchg):
|
||||
ldrb w3, [x16, x2]
|
||||
cbnz w3, LOCAL_LABEL(ExitCmpXchg)
|
||||
mov w17, #1
|
||||
strb w17, [x16, x2]
|
||||
LOCAL_LABEL(SetGroupCmpXchg):
|
||||
add x12, x16, #0x80
|
||||
ldrb w3, [x12, x15]
|
||||
cbnz w3, LOCAL_LABEL(CardSet_Cmp_Xchg)
|
||||
strb w16, [x12, x15]
|
||||
LOCAL_LABEL(SetPage_Cmp_Xchg):
|
||||
ldrb w3, [x17]
|
||||
cbnz w3, LOCAL_LABEL(CardSet_Cmp_Xchg)
|
||||
strb w16, [x17]
|
||||
cbnz w3, LOCAL_LABEL(CardSetCmpXchg)
|
||||
strb w17, [x12, x15]
|
||||
LOCAL_LABEL(SetPageCmpXchg):
|
||||
ldrb w3, [x16]
|
||||
cbnz w3, LOCAL_LABEL(CardSetCmpXchg)
|
||||
strb w17, [x16]
|
||||
|
||||
LOCAL_LABEL(CardSet_Cmp_Xchg):
|
||||
LOCAL_LABEL(CardSetCmpXchg):
|
||||
// check if concurrent marking is still not in progress
|
||||
PREPARE_EXTERNAL_VAR_INDIRECT g_write_watch_table, x12 // !g_write_watch_table -> !concurrent
|
||||
cbnz x12, LOCAL_LABEL(DirtyCard_Cmp_Xchg)
|
||||
ret lr
|
||||
PREPARE_EXTERNAL_VAR_INDIRECT g_write_watch_table, x12
|
||||
cbnz x12, LOCAL_LABEL(DirtyCardCmpXchg)
|
||||
|
||||
LOCAL_LABEL(ExitCmpXchg):
|
||||
ldp x2, x3, [sp], 16
|
||||
ret lr
|
||||
|
||||
// DIRTYING CARD FOR X14
|
||||
LOCAL_LABEL(DirtyCard_Cmp_Xchg):
|
||||
mov w16, #4
|
||||
add x2, x2, x17
|
||||
LOCAL_LABEL(DirtyCardCmpXchg):
|
||||
mov w17, #4
|
||||
add x2, x2, x16
|
||||
// must be after the field write to allow concurrent clean
|
||||
stlrb w16, [x2]
|
||||
LOCAL_LABEL(DirtyGroup_Cmp_Xchg):
|
||||
add x12, x17, #0x80
|
||||
stlrb w17, [x2]
|
||||
LOCAL_LABEL(DirtyGroupCmpXchg):
|
||||
add x12, x16, #0x80
|
||||
ldrb w3, [x12, x15]
|
||||
tbnz w3, #2, LOCAL_LABEL(Exit_Cmp_Xchg)
|
||||
strb w16, [x12, x15]
|
||||
LOCAL_LABEL(DirtyPage_Cmp_Xchg):
|
||||
ldrb w3, [x17]
|
||||
tbnz w3, #2, LOCAL_LABEL(Exit_Cmp_Xchg)
|
||||
strb w16, [x17]
|
||||
ret lr
|
||||
tbnz w3, #2, LOCAL_LABEL(ExitCmpXchg)
|
||||
strb w17, [x12, x15]
|
||||
LOCAL_LABEL(DirtyPagCmpXchge):
|
||||
ldrb w3, [x16]
|
||||
tbnz w3, #2, LOCAL_LABEL(ExitCmpXchg)
|
||||
strb w17, [x16]
|
||||
b LOCAL_LABEL(ExitCmpXchg)
|
||||
|
||||
// this is expected to be rare.
|
||||
LOCAL_LABEL(RecordEscape_Cmp_Xchg):
|
||||
|
||||
// 4) check if the source is escaped
|
||||
and x12, x1, #0xFFFFFFFFFFE00000 // source region
|
||||
add x16, x1, #8 // escape bit is MT + 1
|
||||
ubfx x17, x16, #9,#12 // word index = (dst >> 9) & 0x1FFFFF
|
||||
ldr x17, [x12, x17, lsl #3] // mark word = [region + index * 8]
|
||||
lsr x12, x16, #3 // bit = (dst >> 3) [& 63]
|
||||
add x12, x1, #8 // escape bit is MT + 1
|
||||
ubfx x17, x12, #9,#12 // word index = (dst >> 9) & 0x1FFFFF
|
||||
ldr x17, [x16, x17, lsl #3] // mark word = [region + index * 8]
|
||||
lsr x12, x12, #3 // bit = (dst >> 3) [& 63]
|
||||
lsr x17, x17, x12
|
||||
tbnz x17, #0, LOCAL_LABEL(AssignAndMarkCards_Cmp_Xchg) // source is already escaped.
|
||||
|
||||
|
@ -827,8 +853,8 @@ LOCAL_LABEL(DoCardsCmpXchg):
|
|||
str x2, [sp, 16 * 2]
|
||||
|
||||
// void SatoriRegion::EscapeFn(SatoriObject** dst, SatoriObject* src, SatoriRegion* region)
|
||||
and x2, x1, #0xFFFFFFFFFFE00000 // source region
|
||||
ldr x12, [x2, #8] // EscapeFn address
|
||||
mov x2, x16 // source region
|
||||
ldr x12, [x16, #8] // EscapeFn address
|
||||
blr x12
|
||||
|
||||
ldp x0, x1, [sp, 16 * 1]
|
||||
|
@ -837,6 +863,7 @@ LOCAL_LABEL(DoCardsCmpXchg):
|
|||
|
||||
// x10 should be not 0 to indicate that can`t skip cards.
|
||||
mov x10,#1
|
||||
and x16, x1, #0xFFFFFFFFFFE00000 // source region
|
||||
b LOCAL_LABEL(AssignAndMarkCards_Cmp_Xchg)
|
||||
LEAF_END RhpCheckedLockCmpXchg, _TEXT
|
||||
|
||||
|
@ -859,24 +886,23 @@ LEAF_END RhpCheckedLockCmpXchg, _TEXT
|
|||
//
|
||||
LEAF_ENTRY RhpCheckedXchg, _TEXT
|
||||
|
||||
PREPARE_EXTERNAL_VAR_INDIRECT g_card_bundle_table, x10
|
||||
// check if dst is in heap
|
||||
PREPARE_EXTERNAL_VAR_INDIRECT g_card_bundle_table, x12
|
||||
add x12, x12, x0, lsr #30
|
||||
add x12, x10, x0, lsr #30
|
||||
ldrb w12, [x12]
|
||||
cbz x12, LOCAL_LABEL(JustAssign_Xchg)
|
||||
|
||||
// check for escaping assignment
|
||||
// 1) check if we own the source region
|
||||
#ifdef FEATURE_SATORI_EXTERNAL_OBJECTS
|
||||
PREPARE_EXTERNAL_VAR_INDIRECT g_card_bundle_table, x12
|
||||
add x12, x12, x1, lsr #30
|
||||
add x12, x10, x1, lsr #30
|
||||
ldrb w12, [x12]
|
||||
cbz x12, LOCAL_LABEL(JustAssign_Xchg)
|
||||
#else
|
||||
cbz x1, LOCAL_LABEL(JustAssign_Xchg) // assigning null
|
||||
#endif
|
||||
and x12, x1, #0xFFFFFFFFFFE00000 // source region
|
||||
ldr x12, [x12] // region tag
|
||||
and x16, x1, #0xFFFFFFFFFFE00000 // source region
|
||||
ldr x12, [x16] // region tag
|
||||
#ifdef TARGET_OSX
|
||||
mrs x17, TPIDRRO_EL0
|
||||
and x17, x17, #-8 // thread tag on OSX
|
||||
|
@ -887,19 +913,19 @@ LEAF_ENTRY RhpCheckedXchg, _TEXT
|
|||
bne LOCAL_LABEL(AssignAndMarkCards_Xchg) // not local to this thread
|
||||
|
||||
// 2) check if the src and dst are from the same region
|
||||
eor x12, x0, x1
|
||||
lsr x12, x12, #21
|
||||
cbnz x12, LOCAL_LABEL(RecordEscape_Xchg) // cross region assignment. definitely escaping
|
||||
and x12, x0, #0xFFFFFFFFFFE00000 // target aligned to region
|
||||
cmp x12, x16
|
||||
bne LOCAL_LABEL(RecordEscape_Xchg) // cross region assignment. definitely escaping
|
||||
|
||||
// 3) check if the target is exposed
|
||||
ubfx x17, x0,#9,#12 // word index = (dst >> 9) & 0x1FFFFF
|
||||
and x12, x1, #0xFFFFFFFFFFE00000 // source region
|
||||
ldr x17, [x12, x17, lsl #3] // mark word = [region + index * 8]
|
||||
ldr x17, [x16, x17, lsl #3] // mark word = [region + index * 8]
|
||||
lsr x12, x0, #3 // bit = (dst >> 3) [& 63]
|
||||
lsr x17, x17, x12
|
||||
tbnz x17, #0, LOCAL_LABEL(RecordEscape_Xchg) // target is exposed. record an escape.
|
||||
|
||||
LOCAL_LABEL(JustAssign_Xchg):
|
||||
// TODO: VS use LSE_INSTRUCTIONS_ENABLED_BY_DEFAULT instead
|
||||
#ifdef TARGET_OSX
|
||||
ALTERNATE_ENTRY RhpCheckedXchgAVLocationNotHeap
|
||||
swpal x1, x0, [x0] // exchange
|
||||
|
@ -930,85 +956,96 @@ ALTERNATE_ENTRY RhpCheckedXchgAVLocation2
|
|||
dmb ish
|
||||
#endif
|
||||
|
||||
eor x12, x14, x1
|
||||
lsr x12, x12, #21
|
||||
cbz x12, LOCAL_LABEL(CheckConcurrent_Xchg) // same region, just check if barrier is not concurrent
|
||||
// TUNING: barriers in different modes could be separate pieces of code, but barrier switch
|
||||
// needs to suspend EE, not sure if skipping mode check would worth that much.
|
||||
PREPARE_EXTERNAL_VAR_INDIRECT g_write_watch_table, x17
|
||||
// check the barrier state. this must be done after the assignment (in program order)
|
||||
// if state == 2 we do not set or dirty cards.
|
||||
tbz x17, #1, LOCAL_LABEL(DoCardsXchg)
|
||||
|
||||
LOCAL_LABEL(ExitNoCardsXchg):
|
||||
ret lr
|
||||
|
||||
LOCAL_LABEL(DoCardsXchg):
|
||||
// if same region, just check if barrier is not concurrent
|
||||
and x12, x14, #0xFFFFFFFFFFE00000 // target aligned to region
|
||||
cmp x12, x16
|
||||
beq LOCAL_LABEL(CheckConcurrentXchg) // same region, just check if barrier is not concurrent
|
||||
|
||||
// we will trash x2 and x3, this is a regular call, so it is ok
|
||||
// if src is in gen2/3 and the barrier is not concurrent we do not need to mark cards
|
||||
and x2, x1, #0xFFFFFFFFFFE00000 // source region
|
||||
ldr w12, [x2, 16]
|
||||
tbz x12, #1, LOCAL_LABEL(MarkCards_Xchg)
|
||||
ldr w12, [x16, 16] // source region + 16 -> generation
|
||||
tbz x12, #1, LOCAL_LABEL(MarkCardsXchg)
|
||||
|
||||
LOCAL_LABEL(CheckConcurrent_Xchg):
|
||||
PREPARE_EXTERNAL_VAR_INDIRECT g_write_watch_table, x12 // !g_write_watch_table -> !concurrent
|
||||
cbnz x12, LOCAL_LABEL(MarkCards_Xchg)
|
||||
|
||||
LOCAL_LABEL(Exit_Xchg):
|
||||
ret lr
|
||||
LOCAL_LABEL(CheckConcurrentXchg):
|
||||
// if not concurrent, exit
|
||||
cbz x17, LOCAL_LABEL(ExitNoCardsXchg)
|
||||
|
||||
LOCAL_LABEL(MarkCardsXchg):
|
||||
// need couple temps. Save before using.
|
||||
stp x2, x3, [sp, -16]!
|
||||
|
||||
LOCAL_LABEL(MarkCards_Xchg):
|
||||
// fetch card location for x14
|
||||
PREPARE_EXTERNAL_VAR_INDIRECT g_card_table, x12 // fetch the page map
|
||||
lsr x17, x14, #30
|
||||
ldr x17, [x12, x17, lsl #3] // page
|
||||
sub x2, x14, x17 // offset in page
|
||||
lsr x1, x2, #21 // group index
|
||||
lsl x1, x1, #1 // group offset (index * 2)
|
||||
lsr x16, x14, #30
|
||||
ldr x16, [x12, x16, lsl #3] // page
|
||||
sub x2, x14, x16 // offset in page
|
||||
lsr x15, x2, #20 // group index
|
||||
lsr x2, x2, #9 // card offset
|
||||
lsl x15, x15, #1 // group offset (index * 2)
|
||||
|
||||
// check if concurrent marking is in progress
|
||||
PREPARE_EXTERNAL_VAR_INDIRECT g_write_watch_table, x12 // !g_write_watch_table -> !concurrent
|
||||
cbnz x12, LOCAL_LABEL(DirtyCard_Xchg)
|
||||
cbnz x17, LOCAL_LABEL(DirtyCardXchg)
|
||||
|
||||
// SETTING CARD FOR X14
|
||||
LOCAL_LABEL(SetCard_Xchg):
|
||||
ldrb w3, [x17, x2]
|
||||
cbnz w3, LOCAL_LABEL(CardSet_Xchg)
|
||||
mov w16, #1
|
||||
strb w16, [x17, x2]
|
||||
LOCAL_LABEL(SetGroup_Xchg):
|
||||
add x12, x17, #0x80
|
||||
ldrb w3, [x12, x1]
|
||||
cbnz w3, LOCAL_LABEL(CardSet_Xchg)
|
||||
strb w16, [x12, x1]
|
||||
LOCAL_LABEL(SetPage_Xchg):
|
||||
ldrb w3, [x17]
|
||||
cbnz w3, LOCAL_LABEL(CardSet_Xchg)
|
||||
strb w16, [x17]
|
||||
LOCAL_LABEL(SetCardXchg):
|
||||
ldrb w3, [x16, x2]
|
||||
cbnz w3, LOCAL_LABEL(ExitXchg)
|
||||
mov w17, #1
|
||||
strb w17, [x16, x2]
|
||||
LOCAL_LABEL(SetGroupXchg):
|
||||
add x12, x16, #0x80
|
||||
ldrb w3, [x12, x15]
|
||||
cbnz w3, LOCAL_LABEL(CardSetXchg)
|
||||
strb w17, [x12, x15]
|
||||
LOCAL_LABEL(SetPageXchg):
|
||||
ldrb w3, [x16]
|
||||
cbnz w3, LOCAL_LABEL(CardSetXchg)
|
||||
strb w17, [x16]
|
||||
|
||||
LOCAL_LABEL(CardSet_Xchg):
|
||||
LOCAL_LABEL(CardSetXchg):
|
||||
// check if concurrent marking is still not in progress
|
||||
PREPARE_EXTERNAL_VAR_INDIRECT g_write_watch_table, x12 // !g_write_watch_table -> !concurrent
|
||||
cbnz x12, LOCAL_LABEL(DirtyCard_Xchg)
|
||||
b LOCAL_LABEL(Exit_Xchg)
|
||||
PREPARE_EXTERNAL_VAR_INDIRECT g_write_watch_table, x12
|
||||
cbnz x12, LOCAL_LABEL(DirtyCardXchg)
|
||||
|
||||
LOCAL_LABEL(ExitXchg):
|
||||
ldp x2, x3, [sp], 16
|
||||
ret lr
|
||||
|
||||
// DIRTYING CARD FOR X14
|
||||
LOCAL_LABEL(DirtyCard_Xchg):
|
||||
mov w16, #4
|
||||
add x2, x2, x17
|
||||
LOCAL_LABEL(DirtyCardXchg):
|
||||
mov w17, #4
|
||||
add x2, x2, x16
|
||||
// must be after the field write to allow concurrent clean
|
||||
stlrb w16, [x2]
|
||||
LOCAL_LABEL(DirtyGroup_Xchg):
|
||||
add x12, x17, #0x80
|
||||
ldrb w3, [x12, x1]
|
||||
tbnz w3, #2, LOCAL_LABEL(Exit_Xchg)
|
||||
strb w16, [x12, x1]
|
||||
LOCAL_LABEL(DirtyPage_Xchg):
|
||||
ldrb w3, [x17]
|
||||
tbnz w3, #2, LOCAL_LABEL(Exit_Xchg)
|
||||
strb w16, [x17]
|
||||
b LOCAL_LABEL(Exit_Xchg)
|
||||
stlrb w17, [x2]
|
||||
LOCAL_LABEL(DirtyGroupXchg):
|
||||
add x12, x16, #0x80
|
||||
ldrb w3, [x12, x15]
|
||||
tbnz w3, #2, LOCAL_LABEL(ExitXchg)
|
||||
strb w17, [x12, x15]
|
||||
LOCAL_LABEL(DirtyPagXchge):
|
||||
ldrb w3, [x16]
|
||||
tbnz w3, #2, LOCAL_LABEL(ExitXchg)
|
||||
strb w17, [x16]
|
||||
b LOCAL_LABEL(ExitXchg)
|
||||
|
||||
// this is expected to be rare.
|
||||
LOCAL_LABEL(RecordEscape_Xchg):
|
||||
|
||||
// 4) check if the source is escaped
|
||||
and x12, x1, #0xFFFFFFFFFFE00000 // source region
|
||||
add x16, x1, #8 // escape bit is MT + 1
|
||||
ubfx x17, x16, #9,#12 // word index = (dst >> 9) & 0x1FFFFF
|
||||
ldr x17, [x12, x17, lsl #3] // mark word = [region + index * 8]
|
||||
lsr x12, x16, #3 // bit = (dst >> 3) [& 63]
|
||||
add x12, x1, #8 // escape bit is MT + 1
|
||||
ubfx x17, x12, #9,#12 // word index = (dst >> 9) & 0x1FFFFF
|
||||
ldr x17, [x16, x17, lsl #3] // mark word = [region + index * 8]
|
||||
lsr x12, x12, #3 // bit = (dst >> 3) [& 63]
|
||||
lsr x17, x17, x12
|
||||
tbnz x17, #0, LOCAL_LABEL(AssignAndMarkCards_Xchg) // source is already escaped.
|
||||
|
||||
|
@ -1017,13 +1054,14 @@ ALTERNATE_ENTRY RhpCheckedXchgAVLocation2
|
|||
stp x0, x1, [sp, 16 * 1]
|
||||
|
||||
// void SatoriRegion::EscapeFn(SatoriObject** dst, SatoriObject* src, SatoriRegion* region)
|
||||
and x2, x1, #0xFFFFFFFFFFE00000 // source region
|
||||
ldr x12, [x2, #8] // EscapeFn address
|
||||
mov x2, x16 // source region
|
||||
ldr x12, [x16, #8] // EscapeFn address
|
||||
blr x12
|
||||
|
||||
ldp x0, x1, [sp, 16 * 1]
|
||||
ldp x29,x30, [sp], 16 * 2
|
||||
|
||||
and x16, x1, #0xFFFFFFFFFFE00000 // source region
|
||||
b LOCAL_LABEL(AssignAndMarkCards_Xchg)
|
||||
LEAF_END RhpCheckedXchg, _TEXT
|
||||
|
||||
|
|
|
@ -431,13 +431,17 @@ NoBarrierXchg
|
|||
;; x15 : the object reference (RHS of the assignment).
|
||||
;;
|
||||
;; On exit:
|
||||
;; x12, x17 : trashed
|
||||
;; x14 : incremented by 8
|
||||
;; x12 : trashed
|
||||
;; x14 : trashed (incremented by 8 to implement JIT_ByRefWriteBarrier contract)
|
||||
;; x15 : trashed
|
||||
;; x16 : trashed (ip0)
|
||||
;; x17 : trashed (ip1)
|
||||
LEAF_ENTRY RhpCheckedAssignRefArm64, _TEXT
|
||||
PREPARE_EXTERNAL_VAR_INDIRECT g_card_bundle_table, x12
|
||||
add x12, x12, x14, lsr #30
|
||||
ldrb w12, [x12]
|
||||
cbnz x12, RhpAssignRefArm64
|
||||
;; See if dst is in GCHeap
|
||||
PREPARE_EXTERNAL_VAR_INDIRECT g_card_bundle_table, x16
|
||||
lsr x17, x14, #30 ;; dst page index
|
||||
ldrb w12, [x16, x17]
|
||||
cbnz x12, CheckedEntry
|
||||
|
||||
NotInHeap
|
||||
ALTERNATE_ENTRY RhpCheckedAssignRefAVLocation
|
||||
|
@ -451,142 +455,153 @@ NotInHeap
|
|||
;; reside on the managed heap.
|
||||
;;
|
||||
;; On entry:
|
||||
;; x14 : the destination address (LHS of the assignment).
|
||||
;; x15 : the object reference (RHS of the assignment).
|
||||
;; x14 : the destination address (LHS of the assignment)
|
||||
;; x15 : the object reference (RHS of the assignment)
|
||||
;;
|
||||
;; On exit:
|
||||
;; x12, x17 : trashed
|
||||
;; x14 : incremented by 8
|
||||
;; x12 : trashed
|
||||
;; x14 : trashed (incremented by 8 to implement JIT_ByRefWriteBarrier contract)
|
||||
;; x15 : trashed
|
||||
;; x16 : trashed (ip0)
|
||||
;; x17 : trashed (ip1)
|
||||
LEAF_ENTRY RhpAssignRefArm64, _TEXT
|
||||
;; check for escaping assignment
|
||||
;; 1) check if we own the source region
|
||||
#ifdef FEATURE_SATORI_EXTERNAL_OBJECTS
|
||||
PREPARE_EXTERNAL_VAR_INDIRECT g_card_bundle_table, x12
|
||||
add x12, x12, x15, lsr #30
|
||||
ldrb w12, [x12]
|
||||
cbz x12, JustAssign
|
||||
PREPARE_EXTERNAL_VAR_INDIRECT g_card_bundle_table, x16
|
||||
ALTERNATE_ENTRY CheckedEntry
|
||||
lsr x17, x15, #30 ;; source page index
|
||||
ldrb w12, [x16, x17]
|
||||
cbz x12, JustAssign ;; null or external (immutable) object
|
||||
#else
|
||||
cbz x15, JustAssign ;; assigning null
|
||||
ALTERNATE_ENTRY CheckedEntry
|
||||
cbz x15, JustAssign ;; assigning null
|
||||
#endif
|
||||
and x12, x15, #0xFFFFFFFFFFE00000 ;; source region
|
||||
ldr x12, [x12] ; region tag
|
||||
cmp x12, x18 ; x18 - TEB
|
||||
bne AssignAndMarkCards ; not local to this thread
|
||||
and x16, x15, #0xFFFFFFFFFFE00000 ;; source region
|
||||
ldr x12, [x16] ;; region tag
|
||||
|
||||
cmp x12, x18 ;; x18 - TEB
|
||||
bne AssignAndMarkCards ;; not local to this thread
|
||||
|
||||
;; 2) check if the src and dst are from the same region
|
||||
eor x12, x14, x15
|
||||
lsr x12, x12, #21
|
||||
cbnz x12, RecordEscape ;; cross region assignment. definitely escaping
|
||||
and x12, x14, #0xFFFFFFFFFFE00000 ;; target aligned to region
|
||||
cmp x12, x16
|
||||
bne RecordEscape ;; cross region assignment. definitely escaping
|
||||
|
||||
;; 3) check if the target is exposed
|
||||
ubfx x17, x14,#9,#12 ;; word index = (dst >> 9) & 0x1FFFFF
|
||||
and x12, x15, #0xFFFFFFFFFFE00000 ;; source region
|
||||
ldr x17, [x12, x17, lsl #3] ;; mark word = [region + index * 8]
|
||||
ldr x17, [x16, x17, lsl #3] ;; mark word = [region + index * 8]
|
||||
lsr x12, x14, #3 ;; bit = (dst >> 3) [& 63]
|
||||
lsr x17, x17, x12
|
||||
tbnz x17, #0, RecordEscape ;; target is exposed. record an escape.
|
||||
|
||||
str x15, [x14], #8 ;; UNORDERED assignment of unescaped object
|
||||
ret lr
|
||||
tbnz x17, #0, RecordEscape ;; target is exposed. record an escape.
|
||||
|
||||
;; UNORDERED! assignment of unescaped, null or external (immutable) object
|
||||
JustAssign
|
||||
ALTERNATE_ENTRY RhpAssignRefAVLocationNotHeap
|
||||
stlr x15, [x14] ;; no card marking, src is not a heap object
|
||||
add x14, x14, 8
|
||||
ret lr
|
||||
str x15, [x14], #8
|
||||
ret lr
|
||||
|
||||
AssignAndMarkCards
|
||||
ALTERNATE_ENTRY RhpAssignRefAVLocation
|
||||
stlr x15, [x14]
|
||||
|
||||
;; need couple temps. Save before using.
|
||||
stp x2, x3, [sp, -16]!
|
||||
; TUNING: barriers in different modes could be separate pieces of code, but barrier switch
|
||||
; needs to suspend EE, not sure if skipping mode check would worth that much.
|
||||
PREPARE_EXTERNAL_VAR_INDIRECT g_write_watch_table, x17
|
||||
; check the barrier state. this must be done after the assignment (in program order
|
||||
; if state == 2 we do not set or dirty cards.
|
||||
tbz x17, #1, DoCards
|
||||
|
||||
eor x12, x14, x15
|
||||
lsr x12, x12, #21
|
||||
cbz x12, CheckConcurrent ;; same region, just check if barrier is not concurrent
|
||||
ExitNoCards
|
||||
add x14, x14, 8
|
||||
ret lr
|
||||
|
||||
;; if src is in gen2/3 and the barrier is not concurrent we do not need to mark cards
|
||||
and x2, x15, #0xFFFFFFFFFFE00000 ;; source region
|
||||
ldr w12, [x2, 16]
|
||||
DoCards
|
||||
; if same region, just check if barrier is not concurrent
|
||||
and x12, x14, #0xFFFFFFFFFFE00000 ; target aligned to region
|
||||
cmp x12, x16
|
||||
beq CheckConcurrent ; same region, just check if barrier is not concurrent
|
||||
|
||||
; if src is in gen2/3 and the barrier is not concurrent we do not need to mark cards
|
||||
ldr w12, [x16, 16] ; source region + 16 -> generation
|
||||
tbz x12, #1, MarkCards
|
||||
|
||||
CheckConcurrent
|
||||
PREPARE_EXTERNAL_VAR_INDIRECT g_write_watch_table, x12 ;; !g_write_watch_table -> !concurrent
|
||||
cbnz x12, MarkCards
|
||||
|
||||
; if not concurrent, exit
|
||||
cbz x17, ExitNoCards
|
||||
|
||||
MarkCards
|
||||
; need couple temps. Save before using.
|
||||
stp x2, x3, [sp, -16]!
|
||||
|
||||
; fetch card location for x14
|
||||
PREPARE_EXTERNAL_VAR_INDIRECT g_card_table, x12 ; fetch the page map
|
||||
lsr x16, x14, #30
|
||||
ldr x16, [x12, x16, lsl #3] ; page
|
||||
sub x2, x14, x16 ; offset in page
|
||||
lsr x15, x2, #20 ; group index
|
||||
lsr x2, x2, #9 ; card offset
|
||||
lsl x15, x15, #1 ; group offset (index * 2)
|
||||
|
||||
; check if concurrent marking is in progress
|
||||
cbnz x17, DirtyCard
|
||||
|
||||
; SETTING CARD FOR X14
|
||||
SetCard
|
||||
ldrb w3, [x16, x2]
|
||||
cbnz w3, Exit
|
||||
mov w17, #1
|
||||
strb w17, [x16, x2]
|
||||
SetGroup
|
||||
add x12, x16, #0x80
|
||||
ldrb w3, [x12, x15]
|
||||
cbnz w3, CardSet
|
||||
strb w17, [x12, x15]
|
||||
SetPage
|
||||
ldrb w3, [x16]
|
||||
cbnz w3, CardSet
|
||||
strb w17, [x16]
|
||||
|
||||
CardSet
|
||||
; check if concurrent marking is still not in progress
|
||||
PREPARE_EXTERNAL_VAR_INDIRECT g_write_watch_table, x12
|
||||
cbnz x12, DirtyCard
|
||||
|
||||
Exit
|
||||
ldp x2, x3, [sp], 16
|
||||
add x14, x14, 8
|
||||
ret lr
|
||||
|
||||
MarkCards
|
||||
;; fetch card location for x14
|
||||
PREPARE_EXTERNAL_VAR_INDIRECT g_card_table, x12 ;; fetch the page map
|
||||
lsr x17, x14, #30
|
||||
ldr x17, [x12, x17, lsl #3] ;; page
|
||||
sub x2, x14, x17 ;; offset in page
|
||||
lsr x15, x2, #21 ;; group index
|
||||
lsl x15, x15, #1 ;; group offset (index * 2)
|
||||
lsr x2, x2, #9 ;; card offset
|
||||
|
||||
;; check if concurrent marking is in progress
|
||||
PREPARE_EXTERNAL_VAR_INDIRECT g_write_watch_table, x12 ;; !g_write_watch_table -> !concurrent
|
||||
cbnz x12, DirtyCard
|
||||
|
||||
;; SETTING CARD FOR X14
|
||||
SetCard
|
||||
ldrb w3, [x17, x2]
|
||||
cbnz w3, CardSet
|
||||
mov w16, #1
|
||||
strb w16, [x17, x2]
|
||||
SetGroup
|
||||
add x12, x17, #0x80
|
||||
ldrb w3, [x12, x15]
|
||||
cbnz w3, CardSet
|
||||
strb w16, [x12, x15]
|
||||
SetPage
|
||||
ldrb w3, [x17]
|
||||
cbnz w3, CardSet
|
||||
strb w16, [x17]
|
||||
|
||||
CardSet
|
||||
;; check if concurrent marking is still not in progress
|
||||
PREPARE_EXTERNAL_VAR_INDIRECT g_write_watch_table, x12 ;; !g_write_watch_table -> !concurrent
|
||||
cbnz x12, DirtyCard
|
||||
b Exit
|
||||
|
||||
;; DIRTYING CARD FOR X14
|
||||
; DIRTYING CARD FOR X14
|
||||
DirtyCard
|
||||
mov w16, #4
|
||||
add x2, x2, x17
|
||||
;; must be after the field write to allow concurrent clean
|
||||
stlrb w16, [x2]
|
||||
mov w17, #4
|
||||
add x2, x2, x16
|
||||
; must be after the field write to allow concurrent clean
|
||||
stlrb w17, [x2]
|
||||
DirtyGroup
|
||||
add x12, x17, #0x80
|
||||
add x12, x16, #0x80
|
||||
ldrb w3, [x12, x15]
|
||||
tbnz w3, #2, Exit
|
||||
strb w16, [x12, x15]
|
||||
strb w17, [x12, x15]
|
||||
DirtyPage
|
||||
ldrb w3, [x17]
|
||||
ldrb w3, [x16]
|
||||
tbnz w3, #2, Exit
|
||||
strb w16, [x17]
|
||||
strb w17, [x16]
|
||||
b Exit
|
||||
|
||||
;; this is expected to be rare.
|
||||
RecordEscape
|
||||
|
||||
;; 4) check if the source is escaped
|
||||
and x12, x15, #0xFFFFFFFFFFE00000 ;; source region
|
||||
add x16, x15, #8 ;; escape bit is MT + 1
|
||||
ubfx x17, x16, #9,#12 ;; word index = (dst >> 9) & 0x1FFFFF
|
||||
ldr x17, [x12, x17, lsl #3] ;; mark word = [region + index * 8]
|
||||
lsr x12, x16, #3 ;; bit = (dst >> 3) [& 63]
|
||||
;; 4) check if the source is escaped (x16 has source region)
|
||||
add x12, x15, #8 ;; escape bit is MT + 1
|
||||
ubfx x17, x12, #9,#12 ;; word index = (dst >> 9) & 0x1FFFFF
|
||||
ldr x17, [x16, x17, lsl #3] ;; mark word = [region + index * 8]
|
||||
lsr x12, x12, #3 ;; bit = (dst >> 3) [& 63]
|
||||
lsr x17, x17, x12
|
||||
tbnz x17, #0, AssignAndMarkCards ;; source is already escaped.
|
||||
|
||||
;; because of the barrier call convention
|
||||
;; we need to preserve caller-saved x0 through x18 and x29/x30
|
||||
;; we need to preserve caller-saved x0 through x15 and x29/x30
|
||||
|
||||
stp x29,x30, [sp, -16 * 9]!
|
||||
stp x0, x1, [sp, 16 * 1]
|
||||
|
@ -601,8 +616,8 @@ RecordEscape
|
|||
;; void SatoriRegion::EscapeFn(SatoriObject** dst, SatoriObject* src, SatoriRegion* region)
|
||||
;; mov x0, x14 EscapeFn does not use dst, it is just to avoid arg shuffle on x64
|
||||
mov x1, x15
|
||||
and x2, x15, #0xFFFFFFFFFFE00000 ;; source region
|
||||
ldr x12, [x2, #8] ;; EscapeFn address
|
||||
mov x2, x16 ;; source region
|
||||
ldr x12, [x16, #8] ;; EscapeFn address
|
||||
blr x12
|
||||
|
||||
ldp x0, x1, [sp, 16 * 1]
|
||||
|
@ -615,6 +630,7 @@ RecordEscape
|
|||
ldp x14,x15, [sp, 16 * 8]
|
||||
ldp x29,x30, [sp], 16 * 9
|
||||
|
||||
and x16, x15, #0xFFFFFFFFFFE00000 ;; source region
|
||||
b AssignAndMarkCards
|
||||
LEAF_END RhpAssignRefArm64
|
||||
|
||||
|
@ -655,20 +671,20 @@ RecordEscape
|
|||
#else
|
||||
cbz x1, JustAssign_Cmp_Xchg ;; assigning null
|
||||
#endif
|
||||
and x12, x1, #0xFFFFFFFFFFE00000 ; source region
|
||||
ldr x12, [x12] ; region tag
|
||||
cmp x12, x18 ; x18 - TEB
|
||||
bne AssignAndMarkCards_Cmp_Xchg ; not local to this thread
|
||||
and x16, x1, #0xFFFFFFFFFFE00000 ;; source region
|
||||
ldr x12, [x16] ;; region tag
|
||||
|
||||
cmp x12, x18 ;; x18 - TEB
|
||||
bne AssignAndMarkCards_Cmp_Xchg ;; not local to this thread
|
||||
|
||||
;; 2) check if the src and dst are from the same region
|
||||
eor x12, x0, x1
|
||||
lsr x12, x12, #21
|
||||
cbnz x12, RecordEscape_Cmp_Xchg ;; cross region assignment. definitely escaping
|
||||
and x12, x0, #0xFFFFFFFFFFE00000 ;; target aligned to region
|
||||
cmp x12, x16
|
||||
bne RecordEscape_Cmp_Xchg ;; cross region assignment. definitely escaping
|
||||
|
||||
;; 3) check if the target is exposed
|
||||
ubfx x17, x0,#9,#12 ;; word index = (dst >> 9) & 0x1FFFFF
|
||||
and x12, x1, #0xFFFFFFFFFFE00000 ;; source region
|
||||
ldr x17, [x12, x17, lsl #3] ;; mark word = [region + index * 8]
|
||||
ldr x17, [x16, x17, lsl #3] ;; mark word = [region + index * 8]
|
||||
lsr x12, x0, #3 ;; bit = (dst >> 3) [& 63]
|
||||
lsr x17, x17, x12
|
||||
tbnz x17, #0, RecordEscape_Cmp_Xchg ;; target is exposed. record an escape.
|
||||
|
@ -683,8 +699,8 @@ AssignAndMarkCards_Cmp_Xchg
|
|||
mov x15, x1 ;; x15 = val
|
||||
|
||||
#ifndef LSE_INSTRUCTIONS_ENABLED_BY_DEFAULT
|
||||
PREPARE_EXTERNAL_VAR_INDIRECT_W g_cpuFeatures, 16
|
||||
tbz w16, #ARM64_ATOMICS_FEATURE_FLAG_BIT, TryAgain1_Cmp_Xchg
|
||||
PREPARE_EXTERNAL_VAR_INDIRECT_W g_cpuFeatures, 17
|
||||
tbz w17, #ARM64_ATOMICS_FEATURE_FLAG_BIT, TryAgain1_Cmp_Xchg
|
||||
#endif
|
||||
|
||||
mov x17, x2
|
||||
|
@ -692,7 +708,7 @@ AssignAndMarkCards_Cmp_Xchg
|
|||
casal x2, x1, [x0] ;; exchange
|
||||
mov x0, x2 ;; x0 = result
|
||||
cmp x2, x17
|
||||
bne Exit_Cmp_Xchg
|
||||
bne Exit_Cmp_XchgNoCards
|
||||
|
||||
#ifndef LSE_INSTRUCTIONS_ENABLED_BY_DEFAULT
|
||||
b SkipLLScCmpXchg
|
||||
|
@ -715,87 +731,98 @@ SkipLLScCmpXchg
|
|||
#endif
|
||||
|
||||
cbnz x10, DoCardsCmpXchg
|
||||
Exit_Cmp_Xchg
|
||||
Exit_Cmp_XchgNoCards
|
||||
ret lr
|
||||
|
||||
DoCardsCmpXchg
|
||||
|
||||
eor x12, x14, x15
|
||||
lsr x12, x12, #21
|
||||
cbz x12, CheckConcurrent_Cmp_Xchg ;; same region, just check if barrier is not concurrent
|
||||
; TUNING: barriers in different modes could be separate pieces of code, but barrier switch
|
||||
; needs to suspend EE, not sure if skipping mode check would worth that much.
|
||||
PREPARE_EXTERNAL_VAR_INDIRECT g_write_watch_table, x17
|
||||
|
||||
;; we will trash x2 and x3, this is a regular call, so it is ok
|
||||
;; if src is in gen2/3 and the barrier is not concurrent we do not need to mark cards
|
||||
and x2, x15, #0xFFFFFFFFFFE00000 ;; source region
|
||||
ldr w12, [x2, 16]
|
||||
tbz x12, #1, MarkCards_Cmp_Xchg
|
||||
; check the barrier state. this must be done after the assignment (in program order
|
||||
; if state == 2 we do not set or dirty cards.
|
||||
tbnz x17, #1, Exit_Cmp_XchgNoCards
|
||||
|
||||
CheckConcurrent_Cmp_Xchg
|
||||
PREPARE_EXTERNAL_VAR_INDIRECT g_write_watch_table, x12 ;; !g_write_watch_table -> !concurrent
|
||||
cbz x12, Exit_Cmp_Xchg
|
||||
|
||||
MarkCards_Cmp_Xchg
|
||||
;; fetch card location for x14
|
||||
PREPARE_EXTERNAL_VAR_INDIRECT g_card_table, x12 ;; fetch the page map
|
||||
lsr x17, x14, #30
|
||||
ldr x17, [x12, x17, lsl #3] ;; page
|
||||
sub x2, x14, x17 ;; offset in page
|
||||
lsr x15, x2, #21 ;; group index
|
||||
lsl x15, x15, #1 ;; group offset (index * 2)
|
||||
lsr x2, x2, #9 ;; card offset
|
||||
; if same region, just check if barrier is not concurrent
|
||||
and x12, x14, #0xFFFFFFFFFFE00000 ; target aligned to region
|
||||
cmp x12, x16
|
||||
beq CheckConcurrentCmpXchg ; same region, just check if barrier is not concurrent
|
||||
|
||||
;; check if concurrent marking is in progress
|
||||
PREPARE_EXTERNAL_VAR_INDIRECT g_write_watch_table, x12 ;; !g_write_watch_table -> !concurrent
|
||||
cbnz x12, DirtyCard_Cmp_Xchg
|
||||
; if src is in gen2/3 and the barrier is not concurrent we do not need to mark cards
|
||||
ldr w12, [x16, 16] ; source region + 16 -> generation
|
||||
tbz x12, #1, MarkCardsCmpXchg
|
||||
|
||||
;; SETTING CARD FOR X14
|
||||
SetCard_Cmp_Xchg
|
||||
ldrb w3, [x17, x2]
|
||||
cbnz w3, CardSet_Cmp_Xchg
|
||||
mov w16, #1
|
||||
strb w16, [x17, x2]
|
||||
SetGroup_Cmp_Xchg
|
||||
add x12, x17, #0x80
|
||||
CheckConcurrentCmpXchg
|
||||
; if not concurrent, exit
|
||||
cbz x17, Exit_Cmp_XchgNoCards
|
||||
|
||||
MarkCardsCmpXchg
|
||||
; need couple temps. Save before using.
|
||||
stp x2, x3, [sp, -16]!
|
||||
|
||||
; fetch card location for x14
|
||||
PREPARE_EXTERNAL_VAR_INDIRECT g_card_table, x12 ; fetch the page map
|
||||
lsr x16, x14, #30
|
||||
ldr x16, [x12, x16, lsl #3] ; page
|
||||
sub x2, x14, x16 ; offset in page
|
||||
lsr x15, x2, #20 ; group index
|
||||
lsr x2, x2, #9 ; card offset
|
||||
lsl x15, x15, #1 ; group offset (index * 2)
|
||||
|
||||
; check if concurrent marking is in progress
|
||||
cbnz x17, DirtyCardCmpXchg
|
||||
|
||||
; SETTING CARD FOR X14
|
||||
SetCardCmpXchg
|
||||
ldrb w3, [x16, x2]
|
||||
cbnz w3, ExitCmpXchg
|
||||
mov w17, #1
|
||||
strb w17, [x16, x2]
|
||||
SetGroupCmpXchg
|
||||
add x12, x16, #0x80
|
||||
ldrb w3, [x12, x15]
|
||||
cbnz w3, CardSet_Cmp_Xchg
|
||||
strb w16, [x12, x15]
|
||||
SetPage_Cmp_Xchg
|
||||
ldrb w3, [x17]
|
||||
cbnz w3, CardSet_Cmp_Xchg
|
||||
strb w16, [x17]
|
||||
cbnz w3, CardSetCmpXchg
|
||||
strb w17, [x12, x15]
|
||||
SetPageCmpXchg
|
||||
ldrb w3, [x16]
|
||||
cbnz w3, CardSetCmpXchg
|
||||
strb w17, [x16]
|
||||
|
||||
CardSet_Cmp_Xchg
|
||||
;; check if concurrent marking is still not in progress
|
||||
PREPARE_EXTERNAL_VAR_INDIRECT g_write_watch_table, x12 ;; !g_write_watch_table -> !concurrent
|
||||
cbnz x12, DirtyCard_Cmp_Xchg
|
||||
ret lr
|
||||
CardSetCmpXchg
|
||||
; check if concurrent marking is still not in progress
|
||||
PREPARE_EXTERNAL_VAR_INDIRECT g_write_watch_table, x12
|
||||
cbnz x12, DirtyCardCmpXchg
|
||||
|
||||
;; DIRTYING CARD FOR X14
|
||||
DirtyCard_Cmp_Xchg
|
||||
mov w16, #4
|
||||
add x2, x2, x17
|
||||
;; must be after the field write to allow concurrent clean
|
||||
stlrb w16, [x2]
|
||||
DirtyGroup_Cmp_Xchg
|
||||
add x12, x17, #0x80
|
||||
ExitCmpXchg
|
||||
ldp x2, x3, [sp], 16
|
||||
ret lr
|
||||
|
||||
; DIRTYING CARD FOR X14
|
||||
DirtyCardCmpXchg
|
||||
mov w17, #4
|
||||
add x2, x2, x16
|
||||
; must be after the field write to allow concurrent clean
|
||||
stlrb w17, [x2]
|
||||
DirtyGroupCmpXchg
|
||||
add x12, x16, #0x80
|
||||
ldrb w3, [x12, x15]
|
||||
tbnz w3, #2, Exit_Cmp_Xchg
|
||||
strb w16, [x12, x15]
|
||||
DirtyPage_Cmp_Xchg
|
||||
ldrb w3, [x17]
|
||||
tbnz w3, #2, Exit_Cmp_Xchg
|
||||
strb w16, [x17]
|
||||
ret lr
|
||||
tbnz w3, #2, ExitCmpXchg
|
||||
strb w17, [x12, x15]
|
||||
DirtyPageCmpXchg
|
||||
ldrb w3, [x16]
|
||||
tbnz w3, #2, ExitCmpXchg
|
||||
strb w17, [x16]
|
||||
b Exit
|
||||
|
||||
;; this is expected to be rare.
|
||||
RecordEscape_Cmp_Xchg
|
||||
|
||||
;; 4) check if the source is escaped
|
||||
and x12, x1, #0xFFFFFFFFFFE00000 ;; source region
|
||||
add x16, x1, #8 ;; escape bit is MT + 1
|
||||
ubfx x17, x16, #9,#12 ;; word index = (dst >> 9) & 0x1FFFFF
|
||||
ldr x17, [x12, x17, lsl #3] ;; mark word = [region + index * 8]
|
||||
lsr x12, x16, #3 ;; bit = (dst >> 3) [& 63]
|
||||
add x12, x1, #8 ;; escape bit is MT + 1
|
||||
ubfx x17, x12, #9,#12 ;; word index = (dst >> 9) & 0x1FFFFF
|
||||
ldr x17, [x16, x17, lsl #3] ;; mark word = [region + index * 8]
|
||||
lsr x12, x12, #3 ;; bit = (dst >> 3) [& 63]
|
||||
lsr x17, x17, x12
|
||||
tbnz x17, #0, AssignAndMarkCards_Cmp_Xchg ;; source is already escaped.
|
||||
|
||||
|
@ -806,8 +833,8 @@ RecordEscape_Cmp_Xchg
|
|||
str x2, [sp, 16 * 2]
|
||||
|
||||
;; void SatoriRegion::EscapeFn(SatoriObject** dst, SatoriObject* src, SatoriRegion* region)
|
||||
and x2, x1, #0xFFFFFFFFFFE00000 ;; source region
|
||||
ldr x12, [x2, #8] ;; EscapeFn address
|
||||
mov x2, x16 ;; source region
|
||||
ldr x12, [x16, #8] ;; EscapeFn address
|
||||
blr x12
|
||||
|
||||
ldp x0, x1, [sp, 16 * 1]
|
||||
|
@ -816,6 +843,7 @@ RecordEscape_Cmp_Xchg
|
|||
|
||||
;; x10 should be not 0 to indicate that can`t skip cards.
|
||||
mov x10,#1
|
||||
and x16, x1, #0xFFFFFFFFFFE00000 ;; source region
|
||||
b AssignAndMarkCards_Cmp_Xchg
|
||||
LEAF_END RhpCheckedLockCmpXchg
|
||||
|
||||
|
@ -838,43 +866,43 @@ RecordEscape_Cmp_Xchg
|
|||
;;
|
||||
LEAF_ENTRY RhpCheckedXchg, _TEXT
|
||||
|
||||
PREPARE_EXTERNAL_VAR_INDIRECT g_card_bundle_table, x10
|
||||
;; check if dst is in heap
|
||||
PREPARE_EXTERNAL_VAR_INDIRECT g_card_bundle_table, x12
|
||||
add x12, x12, x0, lsr #30
|
||||
add x12, x10, x0, lsr #30
|
||||
ldrb w12, [x12]
|
||||
cbz x12, JustAssign_Xchg
|
||||
|
||||
;; check for escaping assignment
|
||||
;; 1) check if we own the source region
|
||||
#ifdef FEATURE_SATORI_EXTERNAL_OBJECTS
|
||||
PREPARE_EXTERNAL_VAR_INDIRECT g_card_bundle_table, x12
|
||||
add x12, x12, x1, lsr #30
|
||||
add x12, x10, x1, lsr #30
|
||||
ldrb w12, [x12]
|
||||
cbz x12, JustAssign_Xchg
|
||||
#else
|
||||
cbz x1, JustAssign_Xchg ;; assigning null
|
||||
#endif
|
||||
and x12, x1, #0xFFFFFFFFFFE00000 ; source region
|
||||
ldr x12, [x12] ; region tag
|
||||
cmp x12, x18 ; x18 - TEB
|
||||
bne AssignAndMarkCards_Xchg ; not local to this thread
|
||||
and x16, x1, #0xFFFFFFFFFFE00000 ;; source region
|
||||
ldr x12, [x16] ;; region tag
|
||||
|
||||
cmp x12, x18 ;; x18 - TEB
|
||||
bne AssignAndMarkCards_Xchg ;; not local to this thread
|
||||
|
||||
;; 2) check if the src and dst are from the same region
|
||||
eor x12, x0, x1
|
||||
lsr x12, x12, #21
|
||||
cbnz x12, RecordEscape_Xchg ;; cross region assignment. definitely escaping
|
||||
and x12, x0, #0xFFFFFFFFFFE00000 ;; target aligned to region
|
||||
cmp x12, x16
|
||||
bne RecordEscape_Xchg ;; cross region assignment. definitely escaping
|
||||
|
||||
;; 3) check if the target is exposed
|
||||
ubfx x17, x0,#9,#12 ;; word index = (dst >> 9) & 0x1FFFFF
|
||||
and x12, x1, #0xFFFFFFFFFFE00000 ;; source region
|
||||
ldr x17, [x12, x17, lsl #3] ;; mark word = [region + index * 8]
|
||||
lsr x12, x0, #3 ;; bit = (dst >> 3) [& 63]
|
||||
ubfx x17, x0,#9,#12 // word index = (dst >> 9) & 0x1FFFFF
|
||||
ldr x17, [x16, x17, lsl #3] // mark word = [region + index * 8]
|
||||
lsr x12, x0, #3 // bit = (dst >> 3) [& 63]
|
||||
lsr x17, x17, x12
|
||||
tbnz x17, #0, RecordEscape_Xchg ;; target is exposed. record an escape.
|
||||
|
||||
JustAssign_Xchg
|
||||
TryAgain_Xchg
|
||||
ALTERNATE_ENTRY RhpCheckedXchgAVLocationNotHeap
|
||||
;; TODO: VS use LSE_INSTRUCTIONS_ENABLED_BY_DEFAULT instead
|
||||
ldaxr x17, [x0]
|
||||
stlxr w12, x1, [x0]
|
||||
cbnz w12, TryAgain_Xchg
|
||||
|
@ -893,85 +921,97 @@ TryAgain1_Xchg
|
|||
mov x0, x17
|
||||
dmb ish
|
||||
|
||||
eor x12, x14, x1
|
||||
lsr x12, x12, #21
|
||||
cbz x12, CheckConcurrent_Xchg ;; same region, just check if barrier is not concurrent
|
||||
; TUNING: barriers in different modes could be separate pieces of code, but barrier switch
|
||||
; needs to suspend EE, not sure if skipping mode check would worth that much.
|
||||
PREPARE_EXTERNAL_VAR_INDIRECT g_write_watch_table, x17
|
||||
|
||||
;; we will trash x2 and x3, this is a regular call, so it is ok
|
||||
;; if src is in gen2/3 and the barrier is not concurrent we do not need to mark cards
|
||||
and x2, x1, #0xFFFFFFFFFFE00000 ;; source region
|
||||
ldr w12, [x2, 16]
|
||||
tbz x12, #1, MarkCards_Xchg
|
||||
; check the barrier state. this must be done after the assignment (in program order
|
||||
; if state == 2 we do not set or dirty cards.
|
||||
tbz x17, #1, DoCardsXchg
|
||||
|
||||
CheckConcurrent_Xchg
|
||||
PREPARE_EXTERNAL_VAR_INDIRECT g_write_watch_table, x12 ;; !g_write_watch_table -> !concurrent
|
||||
cbnz x12, MarkCards_Xchg
|
||||
|
||||
Exit_Xchg
|
||||
ExitNoCardsXchg
|
||||
ret lr
|
||||
|
||||
DoCardsXchg
|
||||
; if same region, just check if barrier is not concurrent
|
||||
and x12, x14, #0xFFFFFFFFFFE00000 ; target aligned to region
|
||||
cmp x12, x16
|
||||
beq CheckConcurrentXchg ; same region, just check if barrier is not concurrent
|
||||
|
||||
; if src is in gen2/3 and the barrier is not concurrent we do not need to mark cards
|
||||
ldr w12, [x16, 16] ; source region + 16 -> generation
|
||||
tbz x12, #1, MarkCardsXchg
|
||||
|
||||
CheckConcurrentXchg
|
||||
; if not concurrent, exit
|
||||
cbz x17, ExitNoCardsXchg
|
||||
|
||||
MarkCardsXchg
|
||||
; need couple temps. Save before using.
|
||||
stp x2, x3, [sp, -16]!
|
||||
|
||||
; fetch card location for x14
|
||||
PREPARE_EXTERNAL_VAR_INDIRECT g_card_table, x12 ; fetch the page map
|
||||
lsr x16, x14, #30
|
||||
ldr x16, [x12, x16, lsl #3] ; page
|
||||
sub x2, x14, x16 ; offset in page
|
||||
lsr x15, x2, #20 ; group index
|
||||
lsr x2, x2, #9 ; card offset
|
||||
lsl x15, x15, #1 ; group offset (index * 2)
|
||||
|
||||
; check if concurrent marking is in progress
|
||||
cbnz x17, DirtyCardXchg
|
||||
|
||||
; SETTING CARD FOR X14
|
||||
SetCardXchg
|
||||
ldrb w3, [x16, x2]
|
||||
cbnz w3, ExitXchg
|
||||
mov w17, #1
|
||||
strb w17, [x16, x2]
|
||||
SetGroupXchg
|
||||
add x12, x16, #0x80
|
||||
ldrb w3, [x12, x15]
|
||||
cbnz w3, CardSetXchg
|
||||
strb w17, [x12, x15]
|
||||
SetPageXchg
|
||||
ldrb w3, [x16]
|
||||
cbnz w3, CardSetXchg
|
||||
strb w17, [x16]
|
||||
|
||||
CardSetXchg
|
||||
; check if concurrent marking is still not in progress
|
||||
PREPARE_EXTERNAL_VAR_INDIRECT g_write_watch_table, x12
|
||||
cbnz x12, DirtyCardXchg
|
||||
|
||||
ExitXchg
|
||||
ldp x2, x3, [sp], 16
|
||||
ret lr
|
||||
|
||||
MarkCards_Xchg
|
||||
;; fetch card location for x14
|
||||
PREPARE_EXTERNAL_VAR_INDIRECT g_card_table, x12 ;; fetch the page map
|
||||
lsr x17, x14, #30
|
||||
ldr x17, [x12, x17, lsl #3] ;; page
|
||||
sub x2, x14, x17 ;; offset in page
|
||||
lsr x1, x2, #21 ;; group index
|
||||
lsl x1, x1, #1 ;; group offset (index * 2)
|
||||
lsr x2, x2, #9 ;; card offset
|
||||
|
||||
;; check if concurrent marking is in progress
|
||||
PREPARE_EXTERNAL_VAR_INDIRECT g_write_watch_table, x12 ;; !g_write_watch_table -> !concurrent
|
||||
cbnz x12, DirtyCard_Xchg
|
||||
|
||||
;; SETTING CARD FOR X14
|
||||
SetCard_Xchg
|
||||
ldrb w3, [x17, x2]
|
||||
cbnz w3, CardSet_Xchg
|
||||
mov w16, #1
|
||||
strb w16, [x17, x2]
|
||||
SetGroup_Xchg
|
||||
add x12, x17, #0x80
|
||||
ldrb w3, [x12, x1]
|
||||
cbnz w3, CardSet_Xchg
|
||||
strb w16, [x12, x1]
|
||||
SetPage_Xchg
|
||||
ldrb w3, [x17]
|
||||
cbnz w3, CardSet_Xchg
|
||||
strb w16, [x17]
|
||||
|
||||
CardSet_Xchg
|
||||
;; check if concurrent marking is still not in progress
|
||||
PREPARE_EXTERNAL_VAR_INDIRECT g_write_watch_table, x12 ;; !g_write_watch_table -> !concurrent
|
||||
cbnz x12, DirtyCard_Xchg
|
||||
b Exit_Xchg
|
||||
|
||||
;; DIRTYING CARD FOR X14
|
||||
DirtyCard_Xchg
|
||||
mov w16, #4
|
||||
add x2, x2, x17
|
||||
;; must be after the field write to allow concurrent clean
|
||||
stlrb w16, [x2]
|
||||
DirtyGroup_Xchg
|
||||
add x12, x17, #0x80
|
||||
ldrb w3, [x12, x1]
|
||||
tbnz w3, #2, Exit_Xchg
|
||||
strb w16, [x12, x1]
|
||||
DirtyPage_Xchg
|
||||
ldrb w3, [x17]
|
||||
tbnz w3, #2, Exit_Xchg
|
||||
strb w16, [x17]
|
||||
b Exit_Xchg
|
||||
; DIRTYING CARD FOR X14
|
||||
DirtyCardXchg
|
||||
mov w17, #4
|
||||
add x2, x2, x16
|
||||
; must be after the field write to allow concurrent clean
|
||||
stlrb w17, [x2]
|
||||
DirtyGroupXchg
|
||||
add x12, x16, #0x80
|
||||
ldrb w3, [x12, x15]
|
||||
tbnz w3, #2, ExitXchg
|
||||
strb w17, [x12, x15]
|
||||
DirtyPageXchg
|
||||
ldrb w3, [x16]
|
||||
tbnz w3, #2, ExitXchg
|
||||
strb w17, [x16]
|
||||
b ExitXchg
|
||||
|
||||
;; this is expected to be rare.
|
||||
RecordEscape_Xchg
|
||||
|
||||
;; 4) check if the source is escaped
|
||||
and x12, x1, #0xFFFFFFFFFFE00000 ;; source region
|
||||
add x16, x1, #8 ;; escape bit is MT + 1
|
||||
ubfx x17, x16, #9,#12 ;; word index = (dst >> 9) & 0x1FFFFF
|
||||
ldr x17, [x12, x17, lsl #3] ;; mark word = [region + index * 8]
|
||||
lsr x12, x16, #3 ;; bit = (dst >> 3) [& 63]
|
||||
add x12, x1, #8 ;; escape bit is MT + 1
|
||||
ubfx x17, x12, #9,#12 ;; word index = (dst >> 9) & 0x1FFFFF
|
||||
ldr x17, [x16, x17, lsl #3] ;; mark word = [region + index * 8]
|
||||
lsr x12, x12, #3 ;; bit = (dst >> 3) [& 63]
|
||||
lsr x17, x17, x12
|
||||
tbnz x17, #0, AssignAndMarkCards_Xchg ;; source is already escaped.
|
||||
|
||||
|
@ -980,13 +1020,14 @@ RecordEscape_Xchg
|
|||
stp x0, x1, [sp, 16 * 1]
|
||||
|
||||
;; void SatoriRegion::EscapeFn(SatoriObject** dst, SatoriObject* src, SatoriRegion* region)
|
||||
and x2, x1, #0xFFFFFFFFFFE00000 ;; source region
|
||||
ldr x12, [x2, #8] ;; EscapeFn address
|
||||
mov x2, x16 ;; source region
|
||||
ldr x12, [x16, #8] ;; EscapeFn address
|
||||
blr x12
|
||||
|
||||
ldp x0, x1, [sp, 16 * 1]
|
||||
ldp x29,x30, [sp], 16 * 2
|
||||
|
||||
;; and x16, x1, #0xFFFFFFFFFFE00000 ;; source region
|
||||
b AssignAndMarkCards_Xchg
|
||||
LEAF_END RhpCheckedXchg
|
||||
|
||||
|
|
|
@ -468,7 +468,7 @@ void GCToEEInterface::StompWriteBarrier(WriteBarrierParameters* args)
|
|||
|
||||
#if FEATURE_SATORI_GC
|
||||
case WriteBarrierOp::StartConcurrentMarkingSatori:
|
||||
g_write_watch_table = (uint8_t*)1;
|
||||
g_write_watch_table = args->write_watch_table;
|
||||
g_sw_ww_enabled_for_gc_heap = true;
|
||||
if (!is_runtime_suspended)
|
||||
{
|
||||
|
@ -481,7 +481,7 @@ void GCToEEInterface::StompWriteBarrier(WriteBarrierParameters* args)
|
|||
|
||||
case WriteBarrierOp::StopConcurrentMarkingSatori:
|
||||
assert(args->is_runtime_suspended && "the runtime must be suspended here!");
|
||||
g_write_watch_table = (uint8_t*)0;
|
||||
g_write_watch_table = args->write_watch_table;
|
||||
g_sw_ww_enabled_for_gc_heap = false;
|
||||
return;
|
||||
#endif
|
||||
|
|
|
@ -501,6 +501,8 @@ set(GC_SOURCES_WKS
|
|||
../gc/satori/SatoriAllocationContext.cpp
|
||||
../gc/satori/SatoriUtil.cpp
|
||||
../gc/satori/SatoriLock.cpp
|
||||
../gc/satori/SatoriWorkList.cpp
|
||||
../gc/satori/SatoriGate.cpp
|
||||
)
|
||||
|
||||
set(GC_HEADERS_WKS
|
||||
|
@ -528,6 +530,7 @@ set(GC_HEADERS_WKS
|
|||
../gc/satori/SatoriAllocationContext.h
|
||||
../gc/satori/SatoriUtil.h
|
||||
../gc/satori/SatoriLock.h
|
||||
../gc/satori/SatoriGate.h
|
||||
)
|
||||
|
||||
if(FEATURE_EVENT_TRACE)
|
||||
|
|
|
@ -334,13 +334,12 @@ LEAF_END JIT_PatchedCodeStart, _TEXT
|
|||
|
||||
; void JIT_CheckedWriteBarrier(Object** dst, Object* src)
|
||||
LEAF_ENTRY JIT_CheckedWriteBarrier, _TEXT
|
||||
|
||||
; See if this is in GCHeap
|
||||
mov rax, rcx
|
||||
shr rax, 30 ; round to page size ( >> PAGE_BITS )
|
||||
add rax, [g_card_bundle_table] ; fetch the page byte map
|
||||
cmp byte ptr [rax], 0
|
||||
jne JIT_WriteBarrier
|
||||
; See if dst is in GCHeap
|
||||
mov rax, [g_card_bundle_table] ; fetch the page byte map
|
||||
mov r8, rcx
|
||||
shr r8, 30 ; dst page index
|
||||
cmp byte ptr [rax + r8], 0
|
||||
jne CheckedEntry
|
||||
|
||||
NotInHeap:
|
||||
; See comment above about possible AV
|
||||
|
@ -348,23 +347,32 @@ LEAF_ENTRY JIT_CheckedWriteBarrier, _TEXT
|
|||
ret
|
||||
LEAF_END_MARKED JIT_CheckedWriteBarrier, _TEXT
|
||||
|
||||
ALTERNATE_ENTRY macro Name
|
||||
|
||||
Name label proc
|
||||
PUBLIC Name
|
||||
endm
|
||||
|
||||
;
|
||||
; rcx - dest address
|
||||
; rdx - object
|
||||
;
|
||||
LEAF_ENTRY JIT_WriteBarrier, _TEXT
|
||||
align 16
|
||||
; check for escaping assignment
|
||||
; 1) check if we own the source region
|
||||
|
||||
ifdef FEATURE_SATORI_EXTERNAL_OBJECTS
|
||||
mov rax, rdx
|
||||
shr rax, 30 ; round to page size ( >> PAGE_BITS )
|
||||
add rax, [g_card_bundle_table] ; fetch the page byte map
|
||||
cmp byte ptr [rax], 0
|
||||
je JustAssign ; src not in heap
|
||||
; check if src is in heap
|
||||
mov rax, [g_card_bundle_table] ; fetch the page byte map
|
||||
ALTERNATE_ENTRY CheckedEntry
|
||||
mov r8, rdx
|
||||
shr r8, 30 ; src page index
|
||||
cmp byte ptr [rax + r8], 0
|
||||
je JustAssign ; src not in heap
|
||||
else
|
||||
ALTERNATE_ENTRY CheckedEntry
|
||||
endif
|
||||
|
||||
; check for escaping assignment
|
||||
; 1) check if we own the source region
|
||||
mov r8, rdx
|
||||
and r8, 0FFFFFFFFFFE00000h ; source region
|
||||
|
||||
|
@ -396,21 +404,30 @@ endif
|
|||
AssignAndMarkCards:
|
||||
mov [rcx], rdx
|
||||
|
||||
; TUNING: barriers in different modes could be separate pieces of code, but barrier switch
|
||||
; needs to suspend EE, not sure if skipping mode check would worth that much.
|
||||
mov r11, qword ptr [g_sw_ww_table]
|
||||
|
||||
; check the barrier state. this must be done after the assignment (in program order)
|
||||
; if state == 2 we do not set or dirty cards.
|
||||
cmp r11, 2h
|
||||
jne DoCards
|
||||
Exit:
|
||||
ret
|
||||
|
||||
DoCards:
|
||||
; if same region, just check if barrier is not concurrent
|
||||
xor rdx, rcx
|
||||
shr rdx, 21
|
||||
jz CheckConcurrent ; same region, just check if barrier is not concurrent
|
||||
|
||||
; TUNING: nonconcurrent and concurrent barriers could be separate pieces of code, but to switch
|
||||
; need to suspend EE, not sure if skipping concurrent check would worth that much.
|
||||
jz CheckConcurrent
|
||||
|
||||
; if src is in gen2/3 and the barrier is not concurrent we do not need to mark cards
|
||||
cmp dword ptr [r8 + 16], 2
|
||||
jl MarkCards
|
||||
|
||||
CheckConcurrent:
|
||||
cmp byte ptr [g_sw_ww_enabled_for_gc_heap], 0h
|
||||
jne MarkCards
|
||||
ret
|
||||
cmp r11, 0h
|
||||
je Exit
|
||||
|
||||
MarkCards:
|
||||
; fetch card location for rcx
|
||||
|
@ -421,21 +438,22 @@ endif
|
|||
sub r8, rax ; offset in page
|
||||
mov rdx,r8
|
||||
shr r8, 9 ; card offset
|
||||
shr rdx, 21 ; group offset
|
||||
shr rdx, 20 ; group index
|
||||
lea rdx, [rax + rdx * 2 + 80h] ; group offset
|
||||
|
||||
; check if concurrent marking is in progress
|
||||
cmp byte ptr [g_sw_ww_enabled_for_gc_heap], 0h
|
||||
cmp r11, 0h
|
||||
jne DirtyCard
|
||||
|
||||
; SETTING CARD FOR RCX
|
||||
SetCard:
|
||||
cmp byte ptr [rax + r8], 0
|
||||
jne CardSet
|
||||
jne Exit
|
||||
mov byte ptr [rax + r8], 1
|
||||
SetGroup:
|
||||
cmp byte ptr [rax + rdx * 2 + 80h], 0
|
||||
cmp byte ptr [rdx], 0
|
||||
jne CardSet
|
||||
mov byte ptr [rax + rdx * 2 + 80h], 1
|
||||
mov byte ptr [rdx], 1
|
||||
SetPage:
|
||||
cmp byte ptr [rax], 0
|
||||
jne CardSet
|
||||
|
@ -443,7 +461,7 @@ endif
|
|||
|
||||
CardSet:
|
||||
; check if concurrent marking is still not in progress
|
||||
cmp byte ptr [g_sw_ww_enabled_for_gc_heap], 0h
|
||||
cmp qword ptr [g_sw_ww_table], 0h
|
||||
jne DirtyCard
|
||||
ret
|
||||
|
||||
|
@ -451,15 +469,13 @@ endif
|
|||
DirtyCard:
|
||||
mov byte ptr [rax + r8], 4
|
||||
DirtyGroup:
|
||||
cmp byte ptr [rax + rdx * 2 + 80h], 4
|
||||
cmp byte ptr [rdx], 4
|
||||
je Exit
|
||||
mov byte ptr [rax + rdx * 2 + 80h], 4
|
||||
mov byte ptr [rdx], 4
|
||||
DirtyPage:
|
||||
cmp byte ptr [rax], 4
|
||||
je Exit
|
||||
mov byte ptr [rax], 4
|
||||
|
||||
Exit:
|
||||
ret
|
||||
|
||||
; this is expected to be rare.
|
||||
|
@ -467,12 +483,18 @@ endif
|
|||
|
||||
; 4) check if the source is escaped
|
||||
mov rax, rdx
|
||||
add rax, 8 ; escape bit is MT + 1
|
||||
and rax, 01FFFFFh
|
||||
shr rax, 3
|
||||
bt qword ptr [r8], rax
|
||||
jb AssignAndMarkCards ; source is already escaped.
|
||||
|
||||
; save rcx, rdx, r8 and have enough stack for the callee
|
||||
; Align rsp
|
||||
mov r9, rsp
|
||||
and rsp, -16
|
||||
|
||||
; save rsp, rcx, rdx, r8 and have enough stack for the callee
|
||||
push r9
|
||||
push rcx
|
||||
push rdx
|
||||
push r8
|
||||
|
@ -485,6 +507,7 @@ endif
|
|||
pop r8
|
||||
pop rdx
|
||||
pop rcx
|
||||
pop rsp
|
||||
jmp AssignAndMarkCards
|
||||
LEAF_END_MARKED JIT_WriteBarrier, _TEXT
|
||||
|
||||
|
@ -505,14 +528,13 @@ LEAF_ENTRY JIT_ByRefWriteBarrier, _TEXT
|
|||
add rdi, 8h
|
||||
add rsi, 8h
|
||||
|
||||
; See if assignment is into heap
|
||||
mov rax, rcx
|
||||
shr rax, 30 ; round to page size ( >> PAGE_BITS )
|
||||
add rax, [g_card_bundle_table] ; fetch the page byte map
|
||||
cmp byte ptr [rax], 0
|
||||
jne JIT_WriteBarrier
|
||||
; See if dst is in GCHeap
|
||||
mov rax, [g_card_bundle_table] ; fetch the page byte map
|
||||
mov r8, rcx
|
||||
shr r8, 30 ; dst page index
|
||||
cmp byte ptr [rax + r8], 0
|
||||
jne CheckedEntry
|
||||
|
||||
align 16
|
||||
NotInHeap:
|
||||
mov [rcx], rdx
|
||||
ret
|
||||
|
|
|
@ -215,6 +215,11 @@ LEAF_END_MARKED JIT_ByRefWriteBarrier, _TEXT
|
|||
|
||||
#else //FEATURE_SATORI_GC ##############################################################################
|
||||
|
||||
.macro ALTERNATE_ENTRY Name
|
||||
.global C_FUNC(\Name)
|
||||
C_FUNC(\Name):
|
||||
.endm
|
||||
|
||||
// Mark start of the code region that we patch at runtime
|
||||
LEAF_ENTRY JIT_PatchedCodeStart, _TEXT
|
||||
ret
|
||||
|
@ -224,13 +229,13 @@ LEAF_END JIT_PatchedCodeStart, _TEXT
|
|||
// void JIT_CheckedWriteBarrier(Object** dst, Object* src)
|
||||
LEAF_ENTRY JIT_CheckedWriteBarrier, _TEXT
|
||||
|
||||
// See if this is in GCHeap
|
||||
mov rax, rdi
|
||||
shr rax, 30 // round to page size ( >> PAGE_BITS )
|
||||
PREPARE_EXTERNAL_VAR g_card_bundle_table, r8
|
||||
add rax, [r8] // fetch the page byte map
|
||||
cmp byte ptr [rax], 0
|
||||
jne C_FUNC(JIT_WriteBarrier)
|
||||
// See if dst is in GCHeap
|
||||
PREPARE_EXTERNAL_VAR g_card_bundle_table, rax // fetch the page byte map
|
||||
mov rax, [rax]
|
||||
mov r8, rdi
|
||||
shr r8, 30 // dst page index
|
||||
cmp byte ptr [rax + r8], 0
|
||||
jne C_FUNC(CheckedEntry)
|
||||
|
||||
NotInHeap:
|
||||
// See comment above about possible AV
|
||||
|
@ -246,13 +251,18 @@ LEAF_END_MARKED JIT_CheckedWriteBarrier, _TEXT
|
|||
.balign 16
|
||||
LEAF_ENTRY JIT_WriteBarrier, _TEXT
|
||||
#ifdef FEATURE_SATORI_EXTERNAL_OBJECTS
|
||||
mov rax, rsi
|
||||
shr rax, 30 // round to page size ( >> PAGE_BITS )
|
||||
PREPARE_EXTERNAL_VAR g_card_bundle_table, r8
|
||||
add rax, [r8] // fetch the page byte map
|
||||
cmp byte ptr [rax], 0
|
||||
je JustAssign // src not in heap
|
||||
// check if src is in heap
|
||||
PREPARE_EXTERNAL_VAR g_card_bundle_table, rax // fetch the page byte map
|
||||
mov rax, [rax]
|
||||
ALTERNATE_ENTRY CheckedEntry
|
||||
mov r8, rsi
|
||||
shr r8, 30 // src page index
|
||||
cmp byte ptr [rax + r8], 0
|
||||
je JustAssign // src not in heap
|
||||
#else
|
||||
ALTERNATE_ENTRY CheckedEntry
|
||||
#endif
|
||||
|
||||
// check for escaping assignment
|
||||
// 1) check if we own the source region
|
||||
mov rdx, rsi
|
||||
|
@ -292,18 +302,30 @@ LEAF_ENTRY JIT_WriteBarrier, _TEXT
|
|||
AssignAndMarkCards:
|
||||
mov [rdi], rsi
|
||||
|
||||
PREPARE_EXTERNAL_VAR g_sw_ww_enabled_for_gc_heap, r11
|
||||
// TUNING: barriers in different modes could be separate pieces of code, but barrier switch
|
||||
// needs to suspend EE, not sure if skipping mode check would worth that much.
|
||||
PREPARE_EXTERNAL_VAR g_sw_ww_table, rcx
|
||||
mov r11, [rcx]
|
||||
|
||||
// set rdi per contract with JIT_ByRefWriteBarrier
|
||||
mov rax, rdi
|
||||
add rdi, 8
|
||||
|
||||
xor rsi, rax
|
||||
shr rsi, 21 // check if assigning within the same region (sets flags)
|
||||
|
||||
// check the barrier state. this must be done after the assignment (in program order)
|
||||
// if state == 2 we do not set or dirty cards.
|
||||
cmp r11, 2
|
||||
jne DoCards
|
||||
// set rsi per contract with JIT_ByRefWriteBarrier
|
||||
mov rsi, r10
|
||||
Exit:
|
||||
ret
|
||||
|
||||
DoCards:
|
||||
// if same region, just check if barrier is not concurrent
|
||||
xor rsi, rax
|
||||
shr rsi, 21
|
||||
// set rsi per contract with JIT_ByRefWriteBarrier
|
||||
mov rsi, r10
|
||||
jz CheckConcurrent // same region, just check if barrier is not concurrent
|
||||
|
||||
// if src is in gen2/3 and the barrier is not concurrent we do not need to mark cards
|
||||
|
@ -311,9 +333,9 @@ LEAF_ENTRY JIT_WriteBarrier, _TEXT
|
|||
jl MarkCards
|
||||
|
||||
CheckConcurrent:
|
||||
cmp byte ptr [r11], 0
|
||||
jne MarkCards
|
||||
ret
|
||||
// if concurrent, load card location
|
||||
cmp r11, 0
|
||||
je Exit
|
||||
|
||||
MarkCards:
|
||||
// fetch card location for rax (saved rdi)
|
||||
|
@ -325,21 +347,22 @@ LEAF_ENTRY JIT_WriteBarrier, _TEXT
|
|||
sub rdx, rax // offset in page
|
||||
mov r8, rdx
|
||||
shr rdx, 9 // card offset
|
||||
shr r8, 21 // group offset
|
||||
shr r8, 20 // group index
|
||||
lea r8, [rax + r8 * 2 + 0x80] // group offset
|
||||
|
||||
// check if concurrent marking is in progress
|
||||
cmp byte ptr [r11], 0
|
||||
cmp r11, 0
|
||||
jne DirtyCard
|
||||
|
||||
// SETTING CARD
|
||||
SetCard:
|
||||
cmp byte ptr [rax + rdx], 0
|
||||
jne CardSet
|
||||
jne Exit
|
||||
mov byte ptr [rax + rdx], 1
|
||||
SetGroup:
|
||||
cmp byte ptr [rax + r8 * 2 + 0x80], 0
|
||||
cmp byte ptr [r8], 0
|
||||
jne CardSet
|
||||
mov byte ptr [rax + r8 * 2 + 0x80], 1
|
||||
mov byte ptr [r8], 1
|
||||
SetPage:
|
||||
cmp byte ptr [rax], 0
|
||||
jne CardSet
|
||||
|
@ -347,7 +370,7 @@ LEAF_ENTRY JIT_WriteBarrier, _TEXT
|
|||
|
||||
CardSet:
|
||||
// check if concurrent marking is still not in progress
|
||||
cmp byte ptr [r11], 0
|
||||
cmp qword ptr [rcx], 0
|
||||
jne DirtyCard
|
||||
ret
|
||||
|
||||
|
@ -355,27 +378,33 @@ LEAF_ENTRY JIT_WriteBarrier, _TEXT
|
|||
DirtyCard:
|
||||
mov byte ptr [rax + rdx], 4
|
||||
DirtyGroup:
|
||||
cmp byte ptr [rax + r8 * 2 + 0x80], 4
|
||||
cmp byte ptr [r8], 4
|
||||
je Exit
|
||||
mov byte ptr [rax + r8 * 2 + 0x80], 4
|
||||
mov byte ptr [r8], 4
|
||||
DirtyPage:
|
||||
cmp byte ptr [rax], 4
|
||||
je Exit
|
||||
mov byte ptr [rax], 4
|
||||
|
||||
Exit:
|
||||
ret
|
||||
|
||||
// this is expected to be rare.
|
||||
RecordEscape:
|
||||
|
||||
// 4) check if the source is escaped
|
||||
mov rax, rsi
|
||||
add rax, 8 // escape bit is MT + 1
|
||||
and rax, 0x1FFFFF
|
||||
shr rax, 3
|
||||
bt qword ptr [rdx], rax
|
||||
jb AssignAndMarkCards // source is already escaped.
|
||||
|
||||
RecordEscape:
|
||||
// save rdi, rsi, rdx and r10 (possibly preadjusted rsi)
|
||||
// Align rsp
|
||||
mov r9, rsp
|
||||
and rsp, -16
|
||||
sub rsp, 8
|
||||
|
||||
// save rsp, rdi, rsi, rdx and r10 (possibly preadjusted rsi)
|
||||
push r9
|
||||
push rdi
|
||||
push rsi
|
||||
push rdx
|
||||
|
@ -388,6 +417,7 @@ LEAF_ENTRY JIT_WriteBarrier, _TEXT
|
|||
pop rdx
|
||||
pop rsi
|
||||
pop rdi
|
||||
pop rsp
|
||||
jmp AssignAndMarkCards
|
||||
LEAF_END_MARKED JIT_WriteBarrier, _TEXT
|
||||
|
||||
|
@ -407,19 +437,15 @@ LEAF_ENTRY JIT_ByRefWriteBarrier, _TEXT
|
|||
lea r10, [rsi + 8]
|
||||
mov rsi, [rsi]
|
||||
|
||||
// See if this is in GCHeap
|
||||
PREPARE_EXTERNAL_VAR g_highest_address, rax
|
||||
cmp rdi, [rax]
|
||||
ja NotInHeap_ByRefWriteBarrier
|
||||
// See if dst is in GCHeap
|
||||
PREPARE_EXTERNAL_VAR g_card_bundle_table, rax // fetch the page byte map
|
||||
mov rax, [rax]
|
||||
|
||||
PREPARE_EXTERNAL_VAR g_card_table, r8
|
||||
mov r8, [r8] // fetch the page map
|
||||
mov rax, rdi
|
||||
shr rax, 30 // round to page size ( >> PAGE_BITS )
|
||||
cmp qword ptr [r8 + rax * 8], 0
|
||||
jne C_FUNC(JIT_WriteBarrier)
|
||||
mov r8, rdi
|
||||
shr r8, 30 // dst page index
|
||||
cmp byte ptr [rax + r8], 0
|
||||
jne C_FUNC(CheckedEntry)
|
||||
|
||||
.balign 16
|
||||
NotInHeap_ByRefWriteBarrier:
|
||||
mov [rdi], rsi
|
||||
add rdi, 8
|
||||
|
|
|
@ -1154,7 +1154,7 @@ void GCToEEInterface::StompWriteBarrier(WriteBarrierParameters* args)
|
|||
|
||||
#if FEATURE_SATORI_GC
|
||||
case WriteBarrierOp::StartConcurrentMarkingSatori:
|
||||
g_sw_ww_table = (uint8_t*)1;
|
||||
g_sw_ww_table = args->write_watch_table;
|
||||
g_sw_ww_enabled_for_gc_heap = true;
|
||||
stompWBCompleteActions |= ::SwitchToWriteWatchBarrier(is_runtime_suspended);
|
||||
if (!is_runtime_suspended)
|
||||
|
@ -1163,11 +1163,12 @@ void GCToEEInterface::StompWriteBarrier(WriteBarrierParameters* args)
|
|||
// observing future allocations.
|
||||
FlushProcessWriteBuffers();
|
||||
}
|
||||
|
||||
return;
|
||||
|
||||
case WriteBarrierOp::StopConcurrentMarkingSatori:
|
||||
assert(args->is_runtime_suspended && "the runtime must be suspended here!");
|
||||
g_sw_ww_table = (uint8_t*)0;
|
||||
g_sw_ww_table = args->write_watch_table;
|
||||
g_sw_ww_enabled_for_gc_heap = false;
|
||||
stompWBCompleteActions |= ::SwitchToNonWriteWatchBarrier(true);
|
||||
return;
|
||||
|
|
|
@ -438,6 +438,7 @@ class Object
|
|||
LIMITED_METHOD_CONTRACT;
|
||||
SUPPORTS_DAC;
|
||||
|
||||
#if !defined(FEATURE_SATORI_GC)
|
||||
// lose GC marking bit and the reserved bit
|
||||
// A method table pointer should always be aligned. During GC we set the least
|
||||
// significant bit for marked objects, and the second to least significant
|
||||
|
@ -448,6 +449,11 @@ class Object
|
|||
#else
|
||||
return dac_cast<PTR_MethodTable>((dac_cast<TADDR>(m_pMethTab)) & ~((UINT_PTR)3));
|
||||
#endif //TARGET_64BIT
|
||||
#else
|
||||
// Satori does not mess up MT pointers.
|
||||
_ASSERTE((dac_cast<TADDR>(m_pMethTab) & 7) == 0);
|
||||
return dac_cast<PTR_MethodTable>((dac_cast<TADDR>(m_pMethTab)));
|
||||
#endif
|
||||
}
|
||||
|
||||
// There are some cases where it is unsafe to get the type handle during a GC.
|
||||
|
|
|
@ -3253,8 +3253,7 @@ COR_PRF_SUSPEND_REASON GCSuspendReasonToProfSuspendReason(ThreadSuspend::SUSPEND
|
|||
#endif // PROFILING_SUPPORTED
|
||||
|
||||
// exponential spinwait with an approximate time limit for waiting in microsecond range.
|
||||
// when iteration == -1, only usecLimit is used
|
||||
void SpinWait(int iteration, int usecLimit)
|
||||
void SpinWait(int usecLimit)
|
||||
{
|
||||
LARGE_INTEGER li;
|
||||
QueryPerformanceCounter(&li);
|
||||
|
@ -3264,20 +3263,26 @@ void SpinWait(int iteration, int usecLimit)
|
|||
int64_t ticksPerSecond = li.QuadPart;
|
||||
int64_t endTicks = startTicks + (usecLimit * ticksPerSecond) / 1000000;
|
||||
|
||||
int l = min((unsigned)iteration, 30);
|
||||
for (int i = 0; i < l; i++)
|
||||
#ifdef TARGET_UNIX
|
||||
if (usecLimit > 10)
|
||||
{
|
||||
for (int j = 0; j < (1 << i); j++)
|
||||
{
|
||||
System_YieldProcessor();
|
||||
}
|
||||
PAL_nanosleep(usecLimit * 1000);
|
||||
}
|
||||
#endif // TARGET_UNIX
|
||||
|
||||
for (int i = 0; i < 30; i++)
|
||||
{
|
||||
QueryPerformanceCounter(&li);
|
||||
int64_t currentTicks = li.QuadPart;
|
||||
if (currentTicks > endTicks)
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
for (int j = 0; j < (1 << i); j++)
|
||||
{
|
||||
System_YieldProcessor();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue