1
0
Fork 0
mirror of https://github.com/VSadov/Satori.git synced 2025-06-08 03:27:04 +09:00

Concurrent cleaning of cards dirtied by incremental marking. (#33)

* Concurrent card cleaning on all platforms

* larger cache

* update year

* use ld_classic in ILC build and in build integration

* Missing checkbox
This commit is contained in:
Vladimir Sadov 2024-02-02 22:47:12 -10:00 committed by vsadov
parent 8305906bdb
commit 4ad3284252
46 changed files with 636 additions and 360 deletions

View file

@ -98,7 +98,7 @@ C_ASSERT (HANDLE_HANDLES_PER_MASK * 2 == HANDLE_HANDLES_PER_BLOCK);
// cache layout metrics
#define HANDLE_CACHE_TYPE_SIZE 128 // 128 == 63 handles per bank
#define HANDLE_CACHE_TYPE_SIZE 1024 // 1024 == 511 handles per bank
#define HANDLES_PER_CACHE_BANK ((HANDLE_CACHE_TYPE_SIZE / 2) - 1)
// cache policy defines

View file

@ -45,7 +45,7 @@ A simple garbage collector that incorporates various ideas that I had over time.
### Roadmap: ###
- [ ] explicit memory limits
- [x] immortal allocations
- [ ] preallocated objects
- [x] preallocated objects
- [ ] perf tuning (possibly a lot of opportunities)
- [ ] more and better diagnostics (support for debuggers and profilers)
- [ ] NUMA awareness

View file

@ -1,4 +1,4 @@
// Copyright (c) 2022 Vladimir Sadov
// Copyright (c) 2024 Vladimir Sadov
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
@ -40,13 +40,14 @@ SatoriObject* SatoriAllocationContext::FinishAllocFromShared()
{
_ASSERTE(alloc_limit >= alloc_ptr);
size_t unusedStart = (size_t)alloc_ptr;
size_t unused = (size_t)alloc_limit - unusedStart + Satori::MIN_FREE_SIZE;
SatoriObject* freeObj = SatoriObject::FormatAsFree(unusedStart, unused);
size_t unused = (size_t)alloc_limit - unusedStart;
SatoriObject* freeObj = SatoriObject::FormatAsFree(unusedStart, unused + Satori::MIN_FREE_SIZE);
SatoriRegion* containingRegion = freeObj->ContainingRegion();
// this portion is now parsable
freeObj->ContainingRegion()->DecrementUnfinishedAlloc();
freeObj->ContainingRegion()->Allocator()->AllocationTickDecrement(unused);
// unclaim unused.
alloc_bytes -= alloc_limit - alloc_ptr;
alloc_bytes -= unused;
alloc_ptr = alloc_limit = nullptr;
return freeObj;
@ -59,8 +60,10 @@ void SatoriAllocationContext::Deactivate(SatoriRecycler* recycler, bool detach)
{
size_t allocPtr = (size_t)this->alloc_ptr;
this->alloc_bytes -= this->alloc_limit - this->alloc_ptr;
size_t unused = this->alloc_limit - this->alloc_ptr;
this->alloc_bytes -= unused;
this->alloc_limit = this->alloc_ptr = nullptr;
region->Allocator()->AllocationTickDecrement(unused);
if (region->IsAllocating())
{

View file

@ -1,4 +1,4 @@
// Copyright (c) 2022 Vladimir Sadov
// Copyright (c) 2024 Vladimir Sadov
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation

View file

@ -1,4 +1,4 @@
// Copyright (c) 2022 Vladimir Sadov
// Copyright (c) 2024 Vladimir Sadov
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
@ -216,6 +216,35 @@ void SatoriAllocator::ReturnRegion(SatoriRegion* region)
m_queues[SizeToBucket(region->Size())]->Push(region);
}
void SatoriAllocator::AllocationTickIncrement(AllocationTickKind allocationTickKind, size_t totalAdded, SatoriObject* obj, size_t objSize)
{
size_t& tickAmout = allocationTickKind == AllocationTickKind::Small ?
m_smallAllocTickAmount :
allocationTickKind == AllocationTickKind::Large ?
m_largeAllocTickAmount :
m_pinnedAllocTickAmount;
const size_t etw_allocation_tick = 100 * 1024;
size_t current = Interlocked::ExchangeAdd64(&tickAmout, totalAdded) + totalAdded;
if ((int64_t)current > etw_allocation_tick &&
Interlocked::CompareExchange(&tickAmout, (size_t)0, current) == current)
{
FIRE_EVENT(GCAllocationTick_V4,
current,
/*kind*/ (uint32_t)allocationTickKind,
/*heap_number*/ 0,
(void*)obj,
objSize);
}
}
void SatoriAllocator::AllocationTickDecrement(size_t totalUnused)
{
Interlocked::ExchangeAdd64(&m_smallAllocTickAmount, (size_t)(-(int64_t)totalUnused));
}
Object* SatoriAllocator::Alloc(SatoriAllocationContext* context, size_t size, uint32_t flags)
{
size = ALIGN_UP(size, Satori::OBJECT_ALIGNMENT);
@ -282,8 +311,9 @@ SatoriObject* SatoriAllocator::AllocRegular(SatoriAllocationContext* context, si
{
if (freeObj && freeObj->ContainingRegion() == m_regularRegion)
{
m_regularRegion->SetOccupancy(m_regularRegion->Occupancy() - freeObj->Size());
m_regularRegion->AddFreeSpace(freeObj);
size_t size = freeObj->Size();
m_regularRegion->SetOccupancy(m_regularRegion->Occupancy() - size);
m_regularRegion->AddFreeSpace(freeObj, size);
}
return AllocRegularShared(context, size, flags);
@ -326,13 +356,7 @@ SatoriObject* SatoriAllocator::AllocRegular(SatoriAllocationContext* context, si
result->CleanSyncBlock();
region->SetIndicesForObject(result, result->Start() + size);
FIRE_EVENT(GCAllocationTick_V4,
size,
/*gen_number*/ 1,
/*heap_number*/ 0,
(void*)result,
0);
AllocationTickIncrement(AllocationTickKind::Small, moreSpace, result, size);
return result;
}
else
@ -343,7 +367,10 @@ SatoriObject* SatoriAllocator::AllocRegular(SatoriAllocationContext* context, si
}
// unclaim unused.
context->alloc_bytes -= context->alloc_limit - context->alloc_ptr;
size_t unused = context->alloc_limit - context->alloc_ptr;
context->alloc_bytes -= unused;
AllocationTickDecrement(unused);
if (region->IsAllocating())
{
region->StopAllocating((size_t)context->alloc_ptr);
@ -492,13 +519,7 @@ SatoriObject* SatoriAllocator::AllocRegularShared(SatoriAllocationContext* conte
memset((uint8_t*)result + sizeof(size_t), 0, moreSpace - 2 * sizeof(size_t));
}
FIRE_EVENT(GCAllocationTick_V4,
size,
/*gen_number*/ 1,
/*heap_number*/ 0,
(void*)result,
0);
AllocationTickIncrement(AllocationTickKind::Small, moreSpace, result, size);
return result;
}
@ -640,13 +661,7 @@ tryAgain:
result->CleanSyncBlock();
region->SetIndicesForObject(result, result->Start() + size);
FIRE_EVENT(GCAllocationTick_V4,
size,
/*gen_number*/ 1,
/*heap_number*/ 0,
(void*)result,
0);
AllocationTickIncrement(AllocationTickKind::Large, size, result, size);
return result;
}
}
@ -748,13 +763,7 @@ SatoriObject* SatoriAllocator::AllocLargeShared(SatoriAllocationContext* context
memset((uint8_t*)result + sizeof(size_t), 0, size - 2 * sizeof(size_t));
}
FIRE_EVENT(GCAllocationTick_V4,
size,
/*gen_number*/ 1,
/*heap_number*/ 0,
(void*)result,
0);
AllocationTickIncrement(AllocationTickKind::Large, size, result, size);
return result;
}
@ -843,14 +852,8 @@ SatoriObject* SatoriAllocator::AllocHuge(SatoriAllocationContext* context, size_
return nullptr;
}
FIRE_EVENT(GCAllocationTick_V4,
size,
/*gen_number*/ 2,
/*heap_number*/ 0,
(void*)result,
0);
context->alloc_bytes_uoh += size;
AllocationTickIncrement(AllocationTickKind::Large, size, result, size);
return result;
}
@ -916,13 +919,7 @@ SatoriObject* SatoriAllocator::AllocPinned(SatoriAllocationContext* context, siz
memset((uint8_t*)result + sizeof(size_t), 0, size - 2 * sizeof(size_t));
}
FIRE_EVENT(GCAllocationTick_V4,
size,
/*gen_number*/ 1,
/*heap_number*/ 0,
(void*)result,
0);
AllocationTickIncrement(AllocationTickKind::Pinned, size, result, size);
return result;
}
@ -994,6 +991,8 @@ SatoriObject* SatoriAllocator::AllocImmortal(SatoriAllocationContext* context, s
{
context->alloc_bytes_uoh += size;
region->SetIndicesForObject(result, result->Start() + size);
AllocationTickIncrement(AllocationTickKind::Pinned, size, result, size);
}
else
{

View file

@ -1,4 +1,4 @@
// Copyright (c) 2022 Vladimir Sadov
// Copyright (c) 2024 Vladimir Sadov
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
@ -40,6 +40,13 @@ class SatoriAllocationContext;
class SatoriWorkChunk;
class SatoriWorkList;
enum AllocationTickKind
{
Small = 0,
Large = 1,
Pinned = 2,
};
class SatoriAllocator
{
public:
@ -50,6 +57,9 @@ public:
void AddRegion(SatoriRegion* region);
void ReturnRegion(SatoriRegion* region);
void AllocationTickIncrement(AllocationTickKind isSmall, size_t totalAdded, SatoriObject* obj, size_t obj_size);
void AllocationTickDecrement(size_t totalUnused);
SatoriWorkChunk* TryGetWorkChunk();
SatoriWorkChunk* GetWorkChunk();
void ReturnWorkChunk(SatoriWorkChunk* chunk);
@ -76,6 +86,11 @@ private:
volatile int32_t m_singePageAdders;
// for event trace
size_t m_smallAllocTickAmount;
size_t m_largeAllocTickAmount;
size_t m_pinnedAllocTickAmount;
SatoriObject* AllocRegular(SatoriAllocationContext* context, size_t size, uint32_t flags);
SatoriObject* AllocRegularShared(SatoriAllocationContext* context, size_t size, uint32_t flags);
SatoriObject* AllocLarge(SatoriAllocationContext* context, size_t size, uint32_t flags);

View file

@ -1,4 +1,4 @@
// Copyright (c) 2022 Vladimir Sadov
// Copyright (c) 2024 Vladimir Sadov
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation

View file

@ -1,4 +1,4 @@
// Copyright (c) 2022 Vladimir Sadov
// Copyright (c) 2024 Vladimir Sadov
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation

View file

@ -1,4 +1,4 @@
// Copyright (c) 2022 Vladimir Sadov
// Copyright (c) 2024 Vladimir Sadov
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
@ -446,8 +446,7 @@ size_t SatoriGC::GetLastGCStartTime(int generation)
size_t SatoriGC::GetLastGCDuration(int generation)
{
// NYI (this is used for mem pressure aggressiveness)
return 10;
return m_heap->Recycler()->GetGcDurationMillis(generation);
}
size_t SatoriGC::GetNow()
@ -483,7 +482,7 @@ void SatoriGC::PublishObject(uint8_t* obj)
// in such case we simulate retroactive write by dirtying the card for the MT location.
if (so->RawGetMethodTable()->Collectible())
{
region->ContainingPage()->DirtyCardForAddressUnordered(so->Start());
region->ContainingPage()->DirtyCardForAddressConcurrent(so->Start());
}
}
else if (so->IsUnfinished())
@ -654,6 +653,8 @@ size_t SatoriGC::GetPromotedBytes(int heap_index)
void SatoriGC::GetMemoryInfo(uint64_t* highMemLoadThresholdBytes, uint64_t* totalAvailableMemoryBytes, uint64_t* lastRecordedMemLoadBytes, uint64_t* lastRecordedHeapSizeBytes, uint64_t* lastRecordedFragmentationBytes, uint64_t* totalCommittedBytes, uint64_t* promotedBytes, uint64_t* pinnedObjectCount, uint64_t* finalizationPendingCount, uint64_t* index, uint32_t* generation, uint32_t* pauseTimePct, bool* isCompaction, bool* isConcurrent, uint64_t* genInfoRaw, uint64_t* pauseInfoRaw, int kind)
{
LastRecordedGcInfo* lastGcInfo = m_heap->Recycler()->GetLastGcInfo((gc_kind)kind);
uint64_t totalLimit = GCToOSInterface::GetPhysicalMemoryLimit();
*highMemLoadThresholdBytes = totalLimit * 99 / 100; // just say 99% for now
*totalAvailableMemoryBytes = totalLimit;
@ -666,18 +667,21 @@ void SatoriGC::GetMemoryInfo(uint64_t* highMemLoadThresholdBytes, uint64_t* tota
*lastRecordedHeapSizeBytes = GetTotalBytesInUse();
*finalizationPendingCount = GetNumberOfFinalizable();
// the rest seems implementation specific and not strictly required.
*lastRecordedFragmentationBytes = 0;
*totalCommittedBytes = 0;
*promotedBytes = 0;
*pinnedObjectCount = 0;
*index = 0;
*generation = 0;
*index = lastGcInfo->m_index;
*generation = lastGcInfo->m_condemnedGeneration;
*pauseTimePct = 0;
*isCompaction = 0;
*isConcurrent = 0;
*isCompaction = lastGcInfo->m_compaction;
*isConcurrent = lastGcInfo->m_concurrent;
*genInfoRaw = 0;
*pauseInfoRaw = 0;
for (int i = 0; i < 2; i++)
{
// convert it to 100-ns units that TimeSpan needs.
pauseInfoRaw[i] = (uint64_t)(lastGcInfo->m_pauseDurations[i]) * 10;
}
}
uint32_t SatoriGC::GetMemoryLoad()
@ -807,7 +811,7 @@ void SatoriGC::SetCardsAfterBulkCopy(size_t dst, size_t src, size_t len)
if (recycler->IsBarrierConcurrent())
{
page->DirtyCardsForRangeUnordered(dst, dst + len);
page->DirtyCardsForRangeConcurrent(dst, dst + len);
}
}
}

View file

@ -1,4 +1,4 @@
// Copyright (c) 2022 Vladimir Sadov
// Copyright (c) 2024 Vladimir Sadov
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation

View file

@ -1,4 +1,4 @@
// Copyright (c) 2022 Vladimir Sadov
// Copyright (c) 2024 Vladimir Sadov
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation

View file

@ -1,4 +1,4 @@
// Copyright (c) 2022 Vladimir Sadov
// Copyright (c) 2024 Vladimir Sadov
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation

View file

@ -1,4 +1,4 @@
// Copyright (c) 2022 Vladimir Sadov
// Copyright (c) 2024 Vladimir Sadov
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation

View file

@ -1,4 +1,4 @@
// Copyright (c) 2022 Vladimir Sadov
// Copyright (c) 2024 Vladimir Sadov
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation

View file

@ -1,4 +1,4 @@
// Copyright (c) 2022 Vladimir Sadov
// Copyright (c) 2024 Vladimir Sadov
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation

View file

@ -1,4 +1,4 @@
// Copyright (c) 2022 Vladimir Sadov
// Copyright (c) 2024 Vladimir Sadov
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
@ -93,9 +93,9 @@ private:
NOINLINE
void EnterSpin()
{
int localBackoff = m_backoff;
int localBackoff = 0;
while (VolatileLoadWithoutBarrier(&m_backoff) ||
!CompareExchangeAcq(&m_backoff, localBackoff / 4 + 1, 0))
!CompareExchangeAcq(&m_backoff, 1, 0))
{
localBackoff = Backoff(localBackoff);
}

View file

@ -1,4 +1,4 @@
// Copyright (c) 2022 Vladimir Sadov
// Copyright (c) 2024 Vladimir Sadov
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation

View file

@ -1,4 +1,4 @@
// Copyright (c) 2022 Vladimir Sadov
// Copyright (c) 2024 Vladimir Sadov
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation

View file

@ -1,4 +1,4 @@
// Copyright (c) 2022 Vladimir Sadov
// Copyright (c) 2024 Vladimir Sadov
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation

View file

@ -1,4 +1,4 @@
// Copyright (c) 2022 Vladimir Sadov
// Copyright (c) 2024 Vladimir Sadov
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
@ -179,21 +179,8 @@ void SatoriPage::SetCardForAddressOnly(size_t address)
m_cardTable[cardByteOffset] = Satori::CardState::REMEMBERED;
}
void SatoriPage::DirtyCardForAddress(size_t address)
{
size_t offset = address - Start();
size_t cardByteOffset = offset / Satori::BYTES_PER_CARD_BYTE;
_ASSERTE(cardByteOffset >= m_cardTableStart);
_ASSERTE(cardByteOffset < m_cardTableSize);
m_cardTable[cardByteOffset] = Satori::CardState::DIRTY;
size_t cardGroup = offset / Satori::REGION_SIZE_GRANULARITY;
VolatileStore(&this->m_cardGroups[cardGroup * 2], Satori::CardState::DIRTY);
VolatileStore(&this->m_cardState, Satori::CardState::DIRTY);
}
// setting cards is never concurent with unsetting (unlike dirtying)
// so there are no ordering implications.
void SatoriPage::SetCardsForRange(size_t start, size_t end)
{
_ASSERTE(end > start);
@ -217,7 +204,7 @@ void SatoriPage::SetCardsForRange(size_t start, size_t end)
_ASSERTE(lastCard < m_cardTableSize);
memset((void*)(m_cardTable + firstCard), Satori::CardState::REMEMBERED, lastCard - firstCard + 1);
size_t firstGroup = firstByteOffset / Satori::REGION_SIZE_GRANULARITY;
size_t lastGroup = lastByteOffset / Satori::REGION_SIZE_GRANULARITY;
for (size_t i = firstGroup; i <= lastGroup; i++)
@ -234,36 +221,23 @@ void SatoriPage::SetCardsForRange(size_t start, size_t end)
}
}
// dirtying in nonblocking phases could be unordered since we do not clean concurrently with mutator
void SatoriPage::DirtyCardsForRangeUnordered(size_t start, size_t end)
//
void SatoriPage::DirtyCardForAddress(size_t address)
{
size_t firstByteOffset = start - Start();
size_t lastByteOffset = end - Start() - 1;
size_t offset = address - Start();
size_t cardByteOffset = offset / Satori::BYTES_PER_CARD_BYTE;
size_t firstCard = firstByteOffset / Satori::BYTES_PER_CARD_BYTE;
_ASSERTE(firstCard >= m_cardTableStart);
_ASSERTE(firstCard < m_cardTableSize);
_ASSERTE(cardByteOffset >= m_cardTableStart);
_ASSERTE(cardByteOffset < m_cardTableSize);
size_t lastCard = lastByteOffset / Satori::BYTES_PER_CARD_BYTE;
_ASSERTE(lastCard >= m_cardTableStart);
_ASSERTE(lastCard < m_cardTableSize);
// we dirty the card unconditionally, since it can be concurrently cleaned
// however this does not get called after field writes (unlike in barriers),
// so the card dirtying write can be unordered.
m_cardTable[cardByteOffset] = Satori::CardState::DIRTY;
memset((void*)(m_cardTable + firstCard), Satori::CardState::DIRTY, lastCard - firstCard + 1);
size_t firstGroup = firstByteOffset / Satori::REGION_SIZE_GRANULARITY;
size_t lastGroup = lastByteOffset / Satori::REGION_SIZE_GRANULARITY;
for (size_t i = firstGroup; i <= lastGroup; i++)
{
if (m_cardGroups[i * 2] != Satori::CardState::DIRTY)
{
m_cardGroups[i * 2] = Satori::CardState::DIRTY;
}
}
if (m_cardState != Satori::CardState::DIRTY)
{
m_cardState = Satori::CardState::DIRTY;
}
size_t cardGroup = offset / Satori::REGION_SIZE_GRANULARITY;
VolatileStore(&this->m_cardGroups[cardGroup * 2], Satori::CardState::DIRTY);
VolatileStore(&this->m_cardState, Satori::CardState::DIRTY);
}
void SatoriPage::DirtyCardsForRange(size_t start, size_t end)
@ -279,9 +253,11 @@ void SatoriPage::DirtyCardsForRange(size_t start, size_t end)
_ASSERTE(lastCard >= m_cardTableStart);
_ASSERTE(lastCard < m_cardTableSize);
// we dirty these cards unconditionally, since they can be concurrently cleaned
// see similar note above
memset((void*)(m_cardTable + firstCard), Satori::CardState::DIRTY, lastCard - firstCard + 1);
// if dirtying can be concurrent with cleaning, so we must ensure order
// if dirtying can be concurrent with cleaning, we must ensure order
// of writes: cards, then groups, then page.
// cleaning will read in the opposite order
VolatileStoreBarrier();
@ -296,6 +272,44 @@ void SatoriPage::DirtyCardsForRange(size_t start, size_t end)
VolatileStore(&this->m_cardState, Satori::CardState::DIRTY);
}
// this is called concurently with mutator after bulk writes
// we may be concurrently cleaning cards, but not groups and pages
void SatoriPage::DirtyCardsForRangeConcurrent(size_t start, size_t end)
{
size_t firstByteOffset = start - Start();
size_t lastByteOffset = end - Start() - 1;
size_t firstCard = firstByteOffset / Satori::BYTES_PER_CARD_BYTE;
_ASSERTE(firstCard >= m_cardTableStart);
_ASSERTE(firstCard < m_cardTableSize);
size_t lastCard = lastByteOffset / Satori::BYTES_PER_CARD_BYTE;
_ASSERTE(lastCard >= m_cardTableStart);
_ASSERTE(lastCard < m_cardTableSize);
// this must be ordered after bulk writes and cannot be conditional
// since we may be cleaning concurrently
VolatileStoreBarrier();
memset((void*)(m_cardTable + firstCard), Satori::CardState::DIRTY, lastCard - firstCard + 1);
// we do not clean groups concurrently, so these can be conditional and unordered
// only the eventual final state matters
size_t firstGroup = firstByteOffset / Satori::REGION_SIZE_GRANULARITY;
size_t lastGroup = lastByteOffset / Satori::REGION_SIZE_GRANULARITY;
for (size_t i = firstGroup; i <= lastGroup; i++)
{
if (m_cardGroups[i * 2] != Satori::CardState::DIRTY)
{
m_cardGroups[i * 2] = Satori::CardState::DIRTY;
}
}
if (m_cardState != Satori::CardState::DIRTY)
{
m_cardState = Satori::CardState::DIRTY;
}
}
void SatoriPage::WipeCardsForRange(size_t start, size_t end, bool isTenured)
{
size_t firstByteOffset = start - Start();

View file

@ -1,4 +1,4 @@
// Copyright (c) 2022 Vladimir Sadov
// Copyright (c) 2024 Vladimir Sadov
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
@ -81,7 +81,11 @@ public:
}
}
inline void DirtyCardForAddressUnordered(size_t address)
// This is called to simulate a write concurrently with mutator.
// For example when concurrent mark can't mark a child object so we need to keep
// it logically gray, but can't keep it in the gray queue lest we never finish.
// So we make the reference dirty as if it was written, but there was no actual write.
inline void DirtyCardForAddressConcurrent(size_t address)
{
size_t offset = address - (size_t)this;
size_t cardByteOffset = offset / Satori::BYTES_PER_CARD_BYTE;
@ -89,17 +93,20 @@ public:
_ASSERTE(cardByteOffset >= m_cardTableStart);
_ASSERTE(cardByteOffset < m_cardTableSize);
if (m_cardTable[cardByteOffset] != Satori::CardState::DIRTY)
// we dirty the card unconditionally, since it can be concurrently cleaned
// however this does not get called after field writes (unlike in barriers or bulk helpers),
// so the dirtying write can be unordered.
m_cardTable[cardByteOffset] = Satori::CardState::DIRTY;
// we do not clean groups concurrently, so these can be conditional and unordered
// only the eventual final state matters
size_t cardGroup = offset / Satori::REGION_SIZE_GRANULARITY;
if (m_cardGroups[cardGroup * 2] != Satori::CardState::DIRTY)
{
m_cardTable[cardByteOffset] = Satori::CardState::DIRTY;
size_t cardGroup = offset / Satori::REGION_SIZE_GRANULARITY;
if (m_cardGroups[cardGroup * 2] != Satori::CardState::DIRTY)
m_cardGroups[cardGroup * 2] = Satori::CardState::DIRTY;
if (m_cardState != Satori::CardState::DIRTY)
{
m_cardGroups[cardGroup * 2] = Satori::CardState::DIRTY;
if (m_cardState != Satori::CardState::DIRTY)
{
m_cardState = Satori::CardState::DIRTY;
}
m_cardState = Satori::CardState::DIRTY;
}
}
}
@ -108,7 +115,7 @@ public:
void DirtyCardForAddress(size_t address);
void SetCardsForRange(size_t start, size_t end);
void DirtyCardsForRange(size_t start, size_t length);
void DirtyCardsForRangeUnordered(size_t start, size_t end);
void DirtyCardsForRangeConcurrent(size_t start, size_t end);
void WipeCardsForRange(size_t start, size_t end, bool isTenured);

View file

@ -1,4 +1,4 @@
// Copyright (c) 2022 Vladimir Sadov
// Copyright (c) 2024 Vladimir Sadov
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation

View file

@ -1,4 +1,4 @@
// Copyright (c) 2022 Vladimir Sadov
// Copyright (c) 2024 Vladimir Sadov
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation

View file

@ -1,4 +1,4 @@
// Copyright (c) 2022 Vladimir Sadov
// Copyright (c) 2024 Vladimir Sadov
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
@ -66,6 +66,18 @@ void ToggleWriteBarrier(bool concurrent, bool eeSuspended)
GCToEEInterface::StompWriteBarrier(&args);
}
static SatoriRegionQueue* AllocQueue(QueueKind kind)
{
const size_t align = 64;
#ifdef _MSC_VER
void* buffer = _aligned_malloc(sizeof(SatoriRegionQueue), align);
#else
void* buffer = malloc(sizeof(SatoriRegionQueue) + align);
buffer = (void*)ALIGN_UP((size_t)buffer, align);
#endif
return new(buffer)SatoriRegionQueue(kind);
}
void SatoriRecycler::Initialize(SatoriHeap* heap)
{
m_helpersGate = new (nothrow) GCEvent;
@ -76,36 +88,37 @@ void SatoriRecycler::Initialize(SatoriHeap* heap)
m_noWorkSince = 0;
m_perfCounterFrequencyMHz = GCToOSInterface::QueryPerformanceFrequency() / 1000;
m_perfCounterTicksPerMilli = GCToOSInterface::QueryPerformanceFrequency() / 1000;
m_perfCounterTicksPerMicro = GCToOSInterface::QueryPerformanceFrequency() / 1000000;
m_heap = heap;
m_trimmer = new (nothrow) SatoriTrimmer(heap);
m_ephemeralRegions = new (nothrow) SatoriRegionQueue(QueueKind::RecyclerEphemeral);
m_ephemeralFinalizationTrackingRegions = new (nothrow) SatoriRegionQueue(QueueKind::RecyclerEphemeralFinalizationTracking);
m_tenuredRegions = new (nothrow) SatoriRegionQueue(QueueKind::RecyclerTenured);
m_tenuredFinalizationTrackingRegions = new (nothrow) SatoriRegionQueue(QueueKind::RecyclerTenuredFinalizationTracking);
m_ephemeralRegions = AllocQueue(QueueKind::RecyclerEphemeral);
m_ephemeralFinalizationTrackingRegions = AllocQueue(QueueKind::RecyclerEphemeralFinalizationTracking);
m_tenuredRegions = AllocQueue(QueueKind::RecyclerTenured);
m_tenuredFinalizationTrackingRegions = AllocQueue(QueueKind::RecyclerTenuredFinalizationTracking);
m_finalizationPendingRegions = new (nothrow) SatoriRegionQueue(QueueKind::RecyclerFinalizationPending);
m_finalizationPendingRegions = AllocQueue(QueueKind::RecyclerFinalizationPending);
m_stayingRegions = new (nothrow) SatoriRegionQueue(QueueKind::RecyclerStaying);
m_relocatingRegions = new (nothrow) SatoriRegionQueue(QueueKind::RecyclerRelocating);
m_relocatedRegions = new (nothrow) SatoriRegionQueue(QueueKind::RecyclerRelocated);
m_relocatedToHigherGenRegions = new (nothrow) SatoriRegionQueue(QueueKind::RecyclerRelocatedToHigherGen);
m_stayingRegions = AllocQueue(QueueKind::RecyclerStaying);
m_relocatingRegions = AllocQueue(QueueKind::RecyclerRelocating);
m_relocatedRegions = AllocQueue(QueueKind::RecyclerRelocated);
m_relocatedToHigherGenRegions = AllocQueue(QueueKind::RecyclerRelocatedToHigherGen);
for (int i = 0; i < Satori::FREELIST_COUNT; i++)
{
m_relocationTargets[i] = new (nothrow) SatoriRegionQueue(QueueKind::RecyclerRelocationTarget);
m_relocationTargets[i] = AllocQueue(QueueKind::RecyclerRelocationTarget);
}
m_deferredSweepRegions = new (nothrow) SatoriRegionQueue(QueueKind::RecyclerDeferredSweep);
m_deferredSweepRegions = AllocQueue(QueueKind::RecyclerDeferredSweep);
m_deferredSweepCount = 0;
m_gen1AddedSinceLastCollection = 0;
m_gen2AddedSinceLastCollection = 0;
m_reusableRegions = new (nothrow) SatoriRegionQueue(QueueKind::RecyclerReusable);
m_reusableRegionsAlternate = new (nothrow) SatoriRegionQueue(QueueKind::RecyclerReusable);
m_demotedRegions = new (nothrow) SatoriRegionQueue(QueueKind::RecyclerDemoted);
m_reusableRegions = AllocQueue(QueueKind::RecyclerReusable);
m_ephemeralWithUnmarkedDemoted = AllocQueue(QueueKind::RecyclerDemoted);
m_reusableRegionsAlternate = AllocQueue(QueueKind::RecyclerReusable);
m_workList = new (nothrow) SatoriWorkList();
m_gcState = GC_STATE_NONE;
@ -139,8 +152,18 @@ void SatoriRecycler::Initialize(SatoriHeap* heap)
m_cardScanTicket = 0;
m_concurrentCardsDone = false;
m_concurrentHandlesDone = false;
m_ccStackMarkingThreadsNum = 0;
m_isLowLatencyMode = SatoriUtil::IsLowLatencyMode();
for (int i = 0; i < 2; i++)
{
m_gcStartMillis[i] = m_gcDurationMillis[i] = 0;
}
m_lastEphemeralGcInfo = { 0 };
m_lastTenuredGcInfo = { 0 };
m_CurrentGcInfo = nullptr;
}
void SatoriRecycler::ShutDown()
@ -184,9 +207,6 @@ void SatoriRecycler::IncrementRootScanTicket()
{
m_rootScanTicket++;
}
m_concurrentCardsDone = false;
m_concurrentHandlesDone = false;
}
void SatoriRecycler::IncrementCardScanTicket()
@ -223,7 +243,7 @@ size_t SatoriRecycler::Gen1RegionCount()
{
return m_ephemeralFinalizationTrackingRegions->Count() +
m_ephemeralRegions->Count() +
m_demotedRegions->Count();
m_ephemeralWithUnmarkedDemoted->Count();
}
size_t SatoriRecycler::Gen2RegionCount()
@ -260,19 +280,11 @@ SatoriRegion* SatoriRecycler::TryGetReusableForLarge()
void SatoriRecycler::PushToEphemeralQueues(SatoriRegion* region)
{
if (region->IsDemoted())
if (region->HasUnmarkedDemotedObjects())
{
m_demotedRegions->Push(region);
m_ephemeralWithUnmarkedDemoted->Push(region);
}
else
{
PushToEphemeralQueuesIgnoringDemoted(region);
}
}
void SatoriRecycler::PushToEphemeralQueuesIgnoringDemoted(SatoriRegion* region)
{
if (region->HasFinalizables())
else if (region->HasFinalizables())
{
m_ephemeralFinalizationTrackingRegions->Push(region);
}
@ -369,11 +381,12 @@ void SatoriRecycler::AddTenuredRegion(SatoriRegion* region)
size_t SatoriRecycler::GetNowMillis()
{
int64_t t = GCToOSInterface::QueryPerformanceCounter();
return (size_t)(t / m_perfCounterFrequencyMHz);
return (size_t)(t / m_perfCounterTicksPerMilli);
}
size_t SatoriRecycler::IncrementGen0Count()
{
// duration of Gen0 is typically << msec, so we will not record that.
m_gcStartMillis[0] = GetNowMillis();
return Interlocked::Increment((size_t*)&m_gcCount[0]);
}
@ -384,6 +397,10 @@ void SatoriRecycler::TryStartGC(int generation, gc_reason reason)
if (m_gcState == GC_STATE_NONE &&
Interlocked::CompareExchange(&m_gcState, newState, GC_STATE_NONE) == GC_STATE_NONE)
{
m_CurrentGcInfo = generation == 2 ? &m_lastTenuredGcInfo :&m_lastEphemeralGcInfo;
m_CurrentGcInfo->m_condemnedGeneration = (uint8_t)generation;
m_CurrentGcInfo->m_concurrent = SatoriUtil::IsConcurrent();
FIRE_EVENT(GCTriggered, (uint32_t)reason);
m_trimmer->SetStopSuggested();
@ -398,6 +415,8 @@ void SatoriRecycler::TryStartGC(int generation, gc_reason reason)
{
m_ccStackMarkState = CC_MARK_STATE_NONE;
IncrementRootScanTicket();
m_concurrentCardsDone = false;
m_concurrentHandlesDone = false;
SatoriHandlePartitioner::StartNextScan();
m_activeHelperFn = &SatoriRecycler::ConcurrentHelp;
}
@ -424,7 +443,7 @@ bool IsHelperThread()
int64_t SatoriRecycler::HelpQuantum()
{
return m_perfCounterFrequencyMHz / 8; // 1/8 msec
return m_perfCounterTicksPerMilli / 8; // 1/8 msec
}
bool SatoriRecycler::HelpOnceCore()
@ -440,13 +459,6 @@ bool SatoriRecycler::HelpOnceCore()
_ASSERTE(m_isBarrierConcurrent);
// help with marking stacks and f-queue, this is urgent since EE is stopped for this.
BlockingMarkForConcurrentHelper();
if (m_ccStackMarkState == CC_MARK_STATE_MARKING && IsHelperThread())
{
// before trying other things let other marking threads go ahead
// we do this curtesy because EE is stopped and we do not want to delay marking threads.
GCToOSInterface::YieldThread(0);
}
}
int64_t timeStamp = GCToOSInterface::QueryPerformanceCounter();
@ -470,11 +482,12 @@ bool SatoriRecycler::HelpOnceCore()
m_isBarrierConcurrent = true;
}
if (MarkOwnStackAndDrainQueues(deadline))
if (m_ccStackMarkState != CC_MARK_STATE_DONE)
{
return true;
MarkOwnStackOrDrainQueuesConcurrent(deadline);
}
// if stacks are not marked yet, start suspending EE
if (m_ccStackMarkState == CC_MARK_STATE_NONE)
{
// only one thread will win and drive this stage, others may help.
@ -483,20 +496,24 @@ bool SatoriRecycler::HelpOnceCore()
if (m_ccStackMarkState == CC_MARK_STATE_SUSPENDING_EE && !IsHelperThread())
{
// leave and suspend.
// this is a mutator thread and we are suspending them, leave and suspend.
return true;
}
if (MarkDemotedAndDrainQueuesConcurrent(deadline))
{
return true;
}
if (!m_concurrentHandlesDone)
{
if (MarkHandles(deadline))
{
return true;
}
else
bool moreWork = MarkHandles(deadline);
if (!moreWork)
{
m_concurrentHandlesDone = true;
}
return true;
}
if (!m_concurrentCardsDone)
@ -516,7 +533,7 @@ bool SatoriRecycler::HelpOnceCore()
if (m_ccStackMarkState == CC_MARK_STATE_MARKING)
{
_ASSERTE(IsHelperThread());
// come back and help with stack marking
// do not leave, come back and help
return true;
}
@ -556,14 +573,14 @@ private:
void SatoriRecycler::BlockingMarkForConcurrentHelper()
{
Interlocked::Increment(&m_ccMarkingThreadsNum);
Interlocked::Increment(&m_ccStackMarkingThreadsNum);
// check state again it could have changed if there were no marking threads
if (m_ccStackMarkState == CC_MARK_STATE_MARKING)
{
MarkAllStacksFinalizationAndDemotedRoots();
}
Interlocked::Decrement(&m_ccMarkingThreadsNum);
Interlocked::Decrement(&m_ccStackMarkingThreadsNum);
}
/* static */
@ -581,14 +598,14 @@ void SatoriRecycler::ConcurrentPhasePrepFn(gc_alloc_context* gcContext, void* pa
region->StopEscapeTracking();
}
if (region->IsDemoted())
if (region->HasUnmarkedDemotedObjects())
{
recycler->MarkDemoted(region, markContext);
}
}
region = context->LargeRegion();
if (region && region->IsDemoted())
if (region && region->HasUnmarkedDemotedObjects())
{
recycler->MarkDemoted(region, markContext);
}
@ -598,6 +615,7 @@ void SatoriRecycler::BlockingMarkForConcurrent()
{
if (Interlocked::CompareExchange(&m_ccStackMarkState, CC_MARK_STATE_SUSPENDING_EE, CC_MARK_STATE_NONE) == CC_MARK_STATE_NONE)
{
size_t blockingStart = GCToOSInterface::QueryPerformanceCounter();
GCToEEInterface::SuspendEE(SUSPEND_FOR_GC_PREP);
// swap reusable and alternate so that we could filter through reusables.
@ -623,7 +641,7 @@ void SatoriRecycler::BlockingMarkForConcurrent()
// done, wait for marking to finish and restart EE
Interlocked::Exchange(&m_ccStackMarkState, CC_MARK_STATE_DONE);
while (m_ccMarkingThreadsNum)
while (m_ccStackMarkingThreadsNum)
{
// since we are waiting anyways, try helping
if (!HelpOnceCore())
@ -632,6 +650,9 @@ void SatoriRecycler::BlockingMarkForConcurrent()
}
}
size_t blockingDuration = (GCToOSInterface::QueryPerformanceCounter() - blockingStart);
m_CurrentGcInfo->m_pauseDurations[1] = blockingDuration / m_perfCounterTicksPerMicro;
GCToEEInterface::RestartEE(false);
}
}
@ -695,10 +716,7 @@ int SatoriRecycler::MaxHelpers()
int cpuCount = GCToOSInterface::GetTotalProcessorCount();
// TUNING: should this be more dynamic? check CPU load and such.
// cpuCount is too aggressive?
helperCount = IsLowLatencyMode() ?
max(1, cpuCount / 2) : // leave some space for the mutator
cpuCount;
helperCount = cpuCount - 1;
}
return helperCount;
@ -889,11 +907,57 @@ void SatoriRecycler::AdjustHeuristics()
void SatoriRecycler::BlockingCollect()
{
if (m_condemnedGeneration == 2)
{
BlockingCollect2();
}
else
{
BlockingCollect1();
}
}
NOINLINE
void SatoriRecycler::BlockingCollect1()
{
size_t blockingStart = GCToOSInterface::QueryPerformanceCounter();
// stop other threads.
GCToEEInterface::SuspendEE(SUSPEND_FOR_GC);
FIRE_EVENT(GCStart_V2, (int)m_gcCount[0], m_condemnedGeneration, reason_empty, gc_etw_type_ngc);
BlockingCollectImpl();
size_t blockingDuration = (GCToOSInterface::QueryPerformanceCounter() - blockingStart);
m_CurrentGcInfo->m_pauseDurations[0] = blockingDuration / m_perfCounterTicksPerMicro;
m_gcDurationMillis[1] = blockingDuration / m_perfCounterTicksPerMicro;
m_CurrentGcInfo = nullptr;
// restart VM
GCToEEInterface::RestartEE(true);
}
NOINLINE
void SatoriRecycler::BlockingCollect2()
{
size_t blockingStart = GCToOSInterface::QueryPerformanceCounter();
// stop other threads.
GCToEEInterface::SuspendEE(SUSPEND_FOR_GC);
BlockingCollectImpl();
size_t blockingDuration = (GCToOSInterface::QueryPerformanceCounter() - blockingStart);
m_CurrentGcInfo->m_pauseDurations[0] = blockingDuration / m_perfCounterTicksPerMicro;
m_gcDurationMillis[2] = blockingDuration / m_perfCounterTicksPerMicro;
m_CurrentGcInfo = nullptr;
// restart VM
GCToEEInterface::RestartEE(true);
}
void SatoriRecycler::BlockingCollectImpl()
{
FIRE_EVENT(GCStart_V2, (uint32_t)GlobalGcIndex() + 1, (uint32_t)m_condemnedGeneration, (uint32_t)reason_empty, (uint32_t)gc_etw_type_ngc);
m_gcStartMillis[m_condemnedGeneration] = GetNowMillis();
#ifdef TIMED
@ -937,18 +1001,6 @@ void SatoriRecycler::BlockingCollect()
_ASSERTE(m_deferredSweepRegions->IsEmpty());
FIRE_EVENT(GCHeapStats_V2,
m_occupancy[0], 0,
m_occupancy[1], 0,
m_occupancy[2], 0,
0, 0,
0, 0,
0,
0,
0,
0,
0);
// now we know survivorship after the last GC
// and we can figure what we want to do in this GC and when we will do the next one
AdjustHeuristics();
@ -982,8 +1034,7 @@ void SatoriRecycler::BlockingCollect()
m_gcCount[2]++;
}
// we are done with gen0 here, update the occupancy
m_occupancy[0] = m_occupancyAcc[0];
m_CurrentGcInfo->m_index = GlobalGcIndex();
// we may still have some deferred sweeping to do, but
// that is unobservable to EE, so tell EE that we are done
@ -998,7 +1049,35 @@ void SatoriRecycler::BlockingCollect()
}
#endif
FIRE_EVENT(GCEnd_V1, int(m_gcCount[0] - 1), m_condemnedGeneration);
FIRE_EVENT(GCEnd_V1, (uint32_t)GlobalGcIndex(), (uint32_t)m_condemnedGeneration);
//void FireGCHeapStats_V2(uint64_t generationSize0,
// uint64_t totalPromotedSize0,
// uint64_t generationSize1,
// uint64_t totalPromotedSize1,
// uint64_t generationSize2,
// uint64_t totalPromotedSize2,
// uint64_t generationSize3,
// uint64_t totalPromotedSize3,
// uint64_t generationSize4,
// uint64_t totalPromotedSize4,
// uint64_t finalizationPromotedSize,
// uint64_t finalizationPromotedCount,
// uint32_t pinnedObjectCount,
// uint32_t sinkBlockCount,
// uint32_t gcHandleCount);
FIRE_EVENT(GCHeapStats_V2,
m_occupancy[0], m_occupancy[0],
m_occupancy[1], m_occupancy[1],
m_occupancy[2], m_occupancy[2],
(size_t)0, (size_t)0,
(size_t)0, (size_t)0,
(size_t)0,
(size_t)m_heap->FinalizationQueue()->Count(),
(uint32_t)0,
(uint32_t)0,
(uint32_t)0);
m_prevCondemnedGeneration = m_condemnedGeneration;
m_condemnedGeneration = 0;
@ -1019,16 +1098,21 @@ void SatoriRecycler::BlockingCollect()
m_occupancy[2] = m_occupancyAcc[2];
}
m_trimmer->SetOkToRun();
// we are done with gen0 here, update the occupancy
m_occupancy[0] = m_occupancyAcc[0];
// restart VM
GCToEEInterface::RestartEE(true);
m_trimmer->SetOkToRun();
}
void SatoriRecycler::RunWithHelp(void(SatoriRecycler::* method)())
{
m_activeHelperFn = method;
(this->*method)();
do
{
(this->*method)();
YieldProcessor();
} while (m_activeHelpers > 0);
m_activeHelperFn = nullptr;
// make sure everyone sees the new Fn before waiting for helpers to drain.
MemoryBarrier();
@ -1037,7 +1121,6 @@ void SatoriRecycler::RunWithHelp(void(SatoriRecycler::* method)())
// TUNING: are we wasting too many cycles here?
// should we find something more useful to do than mmpause,
// or perhaps Sleep(0) after a few spins?
(this->*method)();
YieldProcessor();
}
}
@ -1109,7 +1192,6 @@ void SatoriRecycler::MarkStrongReferencesWorker()
// it is still preferred to look at own stack on the same thread.
// this will also ask for helpers.
MarkOwnStackAndDrainQueues();
MarkHandles();
MarkAllStacksFinalizationAndDemotedRoots();
@ -1378,7 +1460,39 @@ void SatoriRecycler::MarkFnConcurrent(PTR_PTR_Object ppObject, ScanContext* sc,
}
};
bool SatoriRecycler::MarkOwnStackAndDrainQueues(int64_t deadline)
bool SatoriRecycler::MarkDemotedAndDrainQueuesConcurrent(int64_t deadline)
{
_ASSERTE(!IsBlockingPhase());
MarkContext markContext = MarkContext(this);
// in blocking case we go through demoted together with marking all stacks
// in concurrent case we do it here, since going through demoted does not need EE stopped.
SatoriRegion* curRegion = m_ephemeralWithUnmarkedDemoted->TryPop();
if (curRegion)
{
MaybeAskForHelp();
do
{
MarkDemoted(curRegion, &markContext);
PushToEphemeralQueues(curRegion);
if (deadline && ((GCToOSInterface::QueryPerformanceCounter() - deadline) > 0))
{
if (markContext.m_WorkChunk != nullptr)
{
m_workList->Push(markContext.m_WorkChunk);
}
return true;
}
} while ((curRegion = m_ephemeralWithUnmarkedDemoted->TryPop()));
}
return DrainMarkQueuesConcurrent(markContext.m_WorkChunk, deadline);
}
void SatoriRecycler::MarkOwnStackAndDrainQueues()
{
MarkContext markContext = MarkContext(this);
@ -1394,48 +1508,56 @@ bool SatoriRecycler::MarkOwnStackAndDrainQueues(int64_t deadline)
{
MaybeAskForHelp();
MarkOwnStack(aContext, &markContext);
}
}
}
// in blocking case we go through demoted together with marking all stacks
// in concurrent case we do it here, since going through demoted does not need EE stopped.
bool isBlockingPhase = IsBlockingPhase();
if (!isBlockingPhase)
{
SatoriRegion* curRegion = m_demotedRegions->TryPop();
if (curRegion)
{
MaybeAskForHelp();
do
{
MarkDemoted(curRegion, &markContext);
PushToEphemeralQueuesIgnoringDemoted(curRegion);
if (deadline && ((GCToOSInterface::QueryPerformanceCounter() - deadline) > 0))
// in concurrent prep stage we do not drain after self-scanning as we prefer to suspend quickly
if (!IsBlockingPhase())
{
if (markContext.m_WorkChunk != nullptr)
{
m_workList->Push(markContext.m_WorkChunk);
}
return true;
return;
}
} while ((curRegion = m_demotedRegions->TryPop()));
}
}
}
bool revisit = false;
if (isBlockingPhase)
DrainMarkQueues(markContext.m_WorkChunk);
}
void SatoriRecycler::MarkOwnStackOrDrainQueuesConcurrent(int64_t deadline)
{
MarkContext markContext = MarkContext(this);
if (!IsHelperThread())
{
DrainMarkQueues(markContext.m_WorkChunk);
}
else
{
revisit = DrainMarkQueuesConcurrent(markContext.m_WorkChunk, deadline);
gc_alloc_context* aContext = GCToEEInterface::GetAllocContext();
int threadScanTicket = VolatileLoadWithoutBarrier(&aContext->alloc_count);
int currentScanTicket = GetRootScanTicket();
if (threadScanTicket != currentScanTicket)
{
// claim our own stack for scanning
if (Interlocked::CompareExchange(&aContext->alloc_count, currentScanTicket, threadScanTicket) == threadScanTicket)
{
MaybeAskForHelp();
MarkOwnStack(aContext, &markContext);
// in concurrent prep stage we do not drain after self-scanning as we prefer to suspend quickly
if (!IsBlockingPhase())
{
if (markContext.m_WorkChunk != nullptr)
{
m_workList->Push(markContext.m_WorkChunk);
}
return;
}
}
}
}
return revisit;
DrainMarkQueuesConcurrent(markContext.m_WorkChunk, deadline);
}
void SatoriRecycler::MarkOwnStack(gc_alloc_context* aContext, MarkContext* markContext)
@ -1463,6 +1585,7 @@ void SatoriRecycler::MarkOwnStack(gc_alloc_context* aContext, MarkContext* markC
void SatoriRecycler::MarkDemoted(SatoriRegion* curRegion, MarkContext* markContext)
{
_ASSERTE(curRegion->Generation() == 1);
curRegion->HasUnmarkedDemotedObjects() = false;
if (m_condemnedGeneration == 1)
{
@ -1532,15 +1655,15 @@ void SatoriRecycler::MarkAllStacksFinalizationAndDemotedRoots()
// a part of the blocking phase or a part of concurrent GC
if (isBlockingPhase)
{
SatoriRegion* curRegion = m_demotedRegions->TryPop();
SatoriRegion* curRegion = m_ephemeralWithUnmarkedDemoted->TryPop();
if (curRegion)
{
MaybeAskForHelp();
do
{
MarkDemoted(curRegion, &markContext);
PushToEphemeralQueuesIgnoringDemoted(curRegion);
} while ((curRegion = m_demotedRegions->TryPop()));
PushToEphemeralQueues(curRegion);
} while ((curRegion = m_ephemeralWithUnmarkedDemoted->TryPop()));
}
}
else
@ -1555,7 +1678,8 @@ void SatoriRecycler::MarkAllStacksFinalizationAndDemotedRoots()
MaybeAskForHelp();
do
{
if (curRegion->IsDemoted() && curRegion->ReusableFor() != SatoriRegion::ReuseLevel::Gen0)
if (curRegion->HasUnmarkedDemotedObjects() &&
curRegion->ReusableFor() != SatoriRegion::ReuseLevel::Gen0)
{
MarkDemoted(curRegion, &markContext);
}
@ -1625,7 +1749,7 @@ bool SatoriRecycler::DrainMarkQueuesConcurrent(SatoriWorkChunk* srcChunk, int64_
ref = (SatoriObject**)o->Start();
}
parentRegion->ContainingPage()->DirtyCardForAddressUnordered((size_t)ref);
parentRegion->ContainingPage()->DirtyCardForAddressConcurrent((size_t)ref);
}
};
@ -1865,7 +1989,7 @@ size_t ThreadSpecificNumber(int64_t gcIndex)
int64_t SatoriRecycler::GlobalGcIndex()
{
return m_gcCount[1];
return m_gcCount[0];
}
struct ConcurrentCardsRestart
@ -1990,13 +2114,17 @@ bool SatoriRecycler::MarkThroughCardsConcurrent(int64_t deadline)
size_t start = page->LocationForCard(&cards[j]);
do
{
// We do not clean cards in concurrent mode since barrier can dirty cards ahead of writes.
// on x64 that is not the case, but we will keep the same logic.
cards[j] = resetValue;
} while (++j < Satori::CARD_BYTES_IN_CARD_GROUP && cards[j]);
size_t end = page->LocationForCard(&cards[j]);
size_t objLimit = min(end, region->Start() + Satori::REGION_SIZE_GRANULARITY);
SatoriObject* o = region->FindObject(start);
// TODO: VS can do only if we cleaned.
// read marks after cleaning cards
MemoryBarrier();
do
{
o->ForEachObjectRef(
@ -2029,14 +2157,15 @@ bool SatoriRecycler::MarkThroughCardsConcurrent(int64_t deadline)
ref = (SatoriObject**)o->Start();
}
parentRegion->ContainingPage()->DirtyCardForAddressUnordered((size_t)ref);
parentRegion->ContainingPage()->DirtyCardForAddressConcurrent((size_t)ref);
}
}, start, end);
o = o->Next();
} while (o->Start() < objLimit);
}
if (deadline && (GCToOSInterface::QueryPerformanceCounter() - deadline > 0))
_ASSERTE(deadline != 0);
if (GCToOSInterface::QueryPerformanceCounter() - deadline > 0)
{
// timed out, there could be more work
// save where we would restart if we see this page again
@ -2133,6 +2262,8 @@ bool SatoriRecycler::ScanDirtyCardsConcurrent(int64_t deadline)
// we should not be marking when there could be dead objects
_ASSERTE(!region->HasMarksSet());
const int8_t resetValue = region->Generation() >= 2 ? Satori::CardState::REMEMBERED : Satori::CardState::EPHEMERAL;
// allocating region is not parseable.
if (region->MaybeAllocatingAcquire())
{
@ -2159,15 +2290,26 @@ bool SatoriRecycler::ScanDirtyCardsConcurrent(int64_t deadline)
size_t start = page->LocationForCard(&cards[j]);
do
{
cards[j] = resetValue;
} while (++j < Satori::CARD_BYTES_IN_CARD_GROUP &&
cards[j] == Satori::CardState::DIRTY);
size_t end = page->LocationForCard(&cards[j]);
size_t objLimit = min(end, region->Start() + Satori::REGION_SIZE_GRANULARITY);
SatoriObject* o = region->FindObject(start);
// read marks after cleaning cards
MemoryBarrier();
do
{
if (o->IsMarked())
o = region->SkipUnmarked(o, objLimit);
if (o->Start() == objLimit)
{
break;
}
// if (o->IsMarked())
{
o->ForEachObjectRef(
[&](SatoriObject** ref)
@ -2176,7 +2318,6 @@ bool SatoriRecycler::ScanDirtyCardsConcurrent(int64_t deadline)
if (child && !child->IsExternal())
{
SatoriRegion* childRegion = child->ContainingRegion();
// cannot mark stuff in thread local regions. just mark as dirty to visit later.
if (!childRegion->MaybeEscapeTrackingAcquire())
{
if (!child->IsMarkedOrOlderThan(2))
@ -2187,7 +2328,21 @@ bool SatoriRecycler::ScanDirtyCardsConcurrent(int64_t deadline)
this->PushToMarkQueuesSlow(dstChunk, child);
}
}
return;
}
// cannot mark stuff in thread local regions. just mark as dirty to visit later.
// if ref is outside of the containing region, it is a fake ref to collectible allocator.
// dirty the MT location as if it points to the allocator object
// technically it does reference the allocator, by indirection.
SatoriRegion* parentRegion = o->ContainingRegion();
if ((size_t)ref - parentRegion->Start() > parentRegion->Size())
{
ref = (SatoriObject**)o->Start();
}
parentRegion->ContainingPage()->DirtyCardForAddressConcurrent((size_t)ref);
}
}, start, end);
}
@ -2195,7 +2350,8 @@ bool SatoriRecycler::ScanDirtyCardsConcurrent(int64_t deadline)
} while (o->Start() < objLimit);
}
if (deadline && (GCToOSInterface::QueryPerformanceCounter() - deadline > 0))
_ASSERTE(deadline != 0);
if (GCToOSInterface::QueryPerformanceCounter() - deadline > 0)
{
// timed out, there could be more work
revisit = true;
@ -2451,7 +2607,16 @@ void SatoriRecycler::CleanCards()
do
{
if (considerAllMarked || o->IsMarked())
if (!considerAllMarked)
{
o = region->SkipUnmarked(o, objLimit);
if (o->Start() == objLimit)
{
break;
}
}
// if (considerAllMarked || o->IsMarked())
{
o->ForEachObjectRef(
[&](SatoriObject** ref)
@ -2922,6 +3087,36 @@ void SatoriRecycler::PromoteSurvivedHandlesAndFreeRelocatedRegionsWorker()
FreeRelocatedRegionsWorker();
}
void SatoriRecycler::FreeRelocatedRegion(SatoriRegion* curRegion)
{
_ASSERTE(!curRegion->HasPinnedObjects());
curRegion->ClearMarks();
bool isNurseryRegion = curRegion->IsAttachedToAllocatingOwner();
if (isNurseryRegion)
{
curRegion->DetachFromAlocatingOwnerRelease();
}
// return nursery regions eagerly
// there should be a modest number of those, but we may need them soon
// defer blanking of others
if (SatoriUtil::IsConcurrent() && !isNurseryRegion)
{
#if _DEBUG
curRegion->HasMarksSet() = false;
#endif
curRegion->DoNotSweep() = true;
curRegion->SetOccupancy(0, 0);
m_deferredSweepRegions->Enqueue(curRegion);
}
else
{
curRegion->MakeBlank();
m_heap->Allocator()->ReturnRegion(curRegion);
}
}
void SatoriRecycler::FreeRelocatedRegionsWorker()
{
SatoriRegion* curRegion = m_relocatedRegions->TryPop();
@ -2930,32 +3125,7 @@ void SatoriRecycler::FreeRelocatedRegionsWorker()
MaybeAskForHelp();
do
{
_ASSERTE(!curRegion->HasPinnedObjects());
curRegion->ClearMarks();
bool isNurseryRegion = curRegion->IsAttachedToAllocatingOwner();
if (isNurseryRegion)
{
curRegion->DetachFromAlocatingOwnerRelease();
}
// return nursery regions eagerly
// there should be a modest number of those, but we may need them soon
// defer blanking of others
if (SatoriUtil::IsConcurrent() && !isNurseryRegion)
{
#if _DEBUG
curRegion->HasMarksSet() = false;
#endif
curRegion->DoNotSweep() = true;
curRegion->SetOccupancy(0, 0);
m_deferredSweepRegions->Enqueue(curRegion);
}
else
{
curRegion->MakeBlank();
m_heap->Allocator()->ReturnRegion(curRegion);
}
FreeRelocatedRegion(curRegion);
} while ((curRegion = m_relocatedRegions->TryPop()));
}
}
@ -3310,19 +3480,32 @@ void SatoriRecycler::RelocateRegion(SatoriRegion* relocationSource)
// the target may yet have more space and be a target for more relocations.
AddRelocationTarget(relocationTarget);
if (relocationIsPromotion)
if (objectsRelocated > 0)
{
relocationTarget->AcceptedPromotedObjects() = true;
m_relocatedToHigherGenRegions->Push(relocationSource);
if (relocationIsPromotion)
{
relocationTarget->AcceptedPromotedObjects() = true;
m_relocatedToHigherGenRegions->Push(relocationSource);
}
else
{
m_relocatedRegions->Push(relocationSource);
}
}
else
{
m_relocatedRegions->Push(relocationSource);
FreeRelocatedRegion(relocationSource);
}
}
void SatoriRecycler::Update()
{
// if we ended up not moving anything, this is no longer a relocating GC.
m_isRelocating = m_relocatedRegions->Count() > 0 ||
m_relocatedToHigherGenRegions->Count() > 0;
m_CurrentGcInfo->m_compaction = m_isRelocating;
if (m_isRelocating)
{
IncrementRootScanTicket();
@ -3844,6 +4027,11 @@ size_t SatoriRecycler::GetGcStartMillis(int generation)
return m_gcStartMillis[generation];
}
size_t SatoriRecycler::GetGcDurationMillis(int generation)
{
return m_gcDurationMillis[generation];
}
bool& SatoriRecycler::IsLowLatencyMode()
{
return m_isLowLatencyMode;

View file

@ -1,4 +1,4 @@
// Copyright (c) 2022 Vladimir Sadov
// Copyright (c) 2024 Vladimir Sadov
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
@ -37,6 +37,15 @@ class SatoriTrimmer;
class SatoriRegion;
class MarkContext;
struct LastRecordedGcInfo
{
size_t m_index;
size_t m_pauseDurations[2];
uint8_t m_condemnedGeneration;
bool m_compaction;
bool m_concurrent;
};
class SatoriRecycler
{
friend class MarkContext;
@ -78,6 +87,7 @@ public:
size_t GetTotalOccupancy();
size_t GetOccupancy(int i);
size_t GetGcStartMillis(int generation);
size_t GetGcDurationMillis(int generation);
int64_t GlobalGcIndex();
@ -92,6 +102,23 @@ public:
bool IsReuseCandidate(SatoriRegion* region);
bool IsPromotionCandidate(SatoriRegion* region);
LastRecordedGcInfo* GetLastGcInfo(gc_kind kind)
{
if (kind == gc_kind_ephemeral)
return &m_lastEphemeralGcInfo;
if (kind == gc_kind_full_blocking)
return GetLastGcInfo(gc_kind_any); // no concept of blocking GC, every GC has blocking part.
if (kind == gc_kind_background)
return GetLastGcInfo(gc_kind_any); // no concept of background GC, cant have 2 GCs at a time.
// if (kind == gc_kind_any)
return m_lastTenuredGcInfo.m_index > m_lastEphemeralGcInfo.m_index ?
&m_lastTenuredGcInfo :
&m_lastEphemeralGcInfo;
};
private:
SatoriHeap* m_heap;
@ -104,6 +131,8 @@ private:
// regions owned by recycler
SatoriRegionQueue* m_ephemeralRegions;
SatoriRegionQueue* m_ephemeralFinalizationTrackingRegions;
SatoriRegionQueue* m_ephemeralWithUnmarkedDemoted;
SatoriRegionQueue* m_tenuredRegions;
SatoriRegionQueue* m_tenuredFinalizationTrackingRegions;
@ -124,8 +153,6 @@ private:
SatoriRegionQueue* m_reusableRegions;
SatoriRegionQueue* m_reusableRegionsAlternate;
SatoriRegionQueue* m_demotedRegions;
static const int GC_STATE_NONE = 0;
static const int GC_STATE_CONCURRENT = 1;
static const int GC_STATE_BLOCKING = 2;
@ -139,7 +166,7 @@ private:
static const int CC_MARK_STATE_DONE = 3;
volatile int m_ccStackMarkState;
volatile int m_ccMarkingThreadsNum;
volatile int m_ccStackMarkingThreadsNum;
int m_syncBlockCacheScanDone;
@ -159,6 +186,7 @@ private:
int64_t m_gcCount[3];
int64_t m_gcStartMillis[3];
int64_t m_gcDurationMillis[3];
size_t m_gen1Budget;
size_t m_totalBudget;
@ -180,7 +208,8 @@ private:
int64_t m_currentAllocBytesDeadThreads;
int64_t m_totalAllocBytes;
int64_t m_perfCounterFrequencyMHz;
int64_t m_perfCounterTicksPerMilli;
int64_t m_perfCounterTicksPerMicro;
GCEvent* m_helpersGate;
volatile int m_gateSignaled;
@ -189,6 +218,10 @@ private:
int64_t m_noWorkSince;
LastRecordedGcInfo m_lastEphemeralGcInfo;
LastRecordedGcInfo m_lastTenuredGcInfo;
LastRecordedGcInfo* m_CurrentGcInfo;
private:
bool IsBlockingPhase();
@ -217,7 +250,6 @@ private:
bool HelpOnceCore();
void PushToEphemeralQueues(SatoriRegion* region);
void PushToEphemeralQueuesIgnoringDemoted(SatoriRegion* region);
void PushToEphemeralQueue(SatoriRegion* region);
void PushToTenuredQueues(SatoriRegion* region);
@ -236,8 +268,10 @@ private:
void PushToMarkQueuesSlow(SatoriWorkChunk*& currentWorkChunk, SatoriObject* o);
void DrainMarkQueues(SatoriWorkChunk* srcChunk = nullptr);
void MarkOwnStackAndDrainQueues();
void MarkOwnStackOrDrainQueuesConcurrent(int64_t deadline);
bool MarkDemotedAndDrainQueuesConcurrent(int64_t deadline);
bool DrainMarkQueuesConcurrent(SatoriWorkChunk* srcChunk = nullptr, int64_t deadline = 0);
bool MarkOwnStackAndDrainQueues(int64_t deadline = 0);
bool HasDirtyCards();
bool ScanDirtyCardsConcurrent(int64_t deadline);
@ -260,6 +294,11 @@ private:
void DependentHandlesRescanWorker();
void BlockingCollect();
// for profiling purposes Gen1 and Gen2 GC have distinct entrypoints, but the same implementation
void BlockingCollect1();
void BlockingCollect2();
void BlockingCollectImpl();
void BlockingMark();
void MarkNewReachable();
void DrainAndCleanWorker();
@ -278,6 +317,7 @@ private:
void Relocate();
void RelocateWorker();
void RelocateRegion(SatoriRegion* region);
void FreeRelocatedRegion(SatoriRegion* curRegion);
void FreeRelocatedRegionsWorker();
void PromoteHandlesAndFreeRelocatedRegions();

View file

@ -1,4 +1,4 @@
// Copyright (c) 2022 Vladimir Sadov
// Copyright (c) 2024 Vladimir Sadov
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
@ -114,6 +114,7 @@ void SatoriRegion::RearmCardsForTenured()
{
_ASSERTE(Generation() == 2);
m_containingPage->WipeCardsForRange(Start(), End(), /* tenured */ true);
HasUnmarkedDemotedObjects() = false;
FreeDemotedTrackers();
}
@ -169,6 +170,7 @@ void SatoriRegion::MakeBlank()
m_hasMarksSet = false;
m_doNotSweep = false;
m_reusableFor = ReuseLevel::None;
m_hasUnmarkedDemotedObjects = false;
//clear index and free list
ClearFreeLists();
@ -298,7 +300,7 @@ void SatoriRegion::StopAllocating(size_t allocPtr)
_ASSERTE(m_occupancy >= unused);
SetOccupancy(m_occupancy - unused);
SatoriObject* freeObj = SatoriObject::FormatAsFree(allocPtr, unused);
AddFreeSpace(freeObj);
AddFreeSpace(freeObj, unused);
}
m_allocStart = m_allocEnd = 0;
@ -309,11 +311,12 @@ void SatoriRegion::StopAllocating()
StopAllocating(m_allocStart);
}
void SatoriRegion::AddFreeSpace(SatoriObject* freeObj)
void SatoriRegion::AddFreeSpace(SatoriObject* freeObj, size_t size)
{
_ASSERTE(freeObj->Size() == size);
// allocSize is smaller than size to make sure the span can always be made parseable
// after allocating objects in it.
ptrdiff_t allocSize = freeObj->Size() - Satori::MIN_FREE_SIZE;
ptrdiff_t allocSize = size - Satori::MIN_FREE_SIZE;
if (allocSize < Satori::MIN_FREELIST_SIZE)
{
return;
@ -329,6 +332,7 @@ void SatoriRegion::AddFreeSpace(SatoriObject* freeObj)
m_freeLists[bucket] = freeObj;
}
bool SatoriRegion::HasFreeSpaceInTopBucket()
{
return m_freeLists[Satori::FREELIST_COUNT - 1];
@ -949,14 +953,14 @@ bool SatoriRegion::ThreadLocalCollect(size_t allocBytes)
m_allocBytesAtCollect = allocBytes;
size_t count = Recycler()->IncrementGen0Count();
FIRE_EVENT(GCStart_V2, (int)count - 1, 0, gc_reason::reason_alloc_loh, gc_etw_type_ngc);
FIRE_EVENT(GCStart_V2, (int)count, 0, gc_reason::reason_alloc_soh, gc_etw_type_ngc);
ThreadLocalMark();
ThreadLocalPlan();
ThreadLocalUpdatePointers();
ThreadLocalCompact();
FIRE_EVENT(GCEnd_V1, (int)count - 1, 0);
FIRE_EVENT(GCEnd_V1, (int)count, 0);
return true;
}
@ -1406,7 +1410,7 @@ void SatoriRegion::ThreadLocalCompact()
{
size_t freeSpace = d2->Start() - d1->Start();
SatoriObject* freeObj = SatoriObject::FormatAsFree(d1->Start(), freeSpace);
AddFreeSpace(freeObj);
AddFreeSpace(freeObj, freeSpace);
foundFree += freeSpace;
d1 = d2;
@ -1897,6 +1901,7 @@ bool SatoriRegion::TryDemote()
return false;
}
this->HasUnmarkedDemotedObjects() = true;
this->ResetCardsForEphemeral();
this->SetGeneration(1);
return true;
@ -1918,6 +1923,7 @@ bool SatoriRegion::NothingMarked()
void SatoriRegion::ClearMarks()
{
_ASSERTE(this->HasUnmarkedDemotedObjects() == false);
memset(&m_bitmap[BITMAP_START], 0, (BITMAP_LENGTH - BITMAP_START) * sizeof(size_t));
}

View file

@ -1,4 +1,4 @@
// Copyright (c) 2022 Vladimir Sadov
// Copyright (c) 2024 Vladimir Sadov
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
@ -80,7 +80,7 @@ public:
void StopAllocating();
bool IsAllocating();
void AddFreeSpace(SatoriObject* freeObj);
void AddFreeSpace(SatoriObject* freeObj, size_t size);
bool HasFreeSpaceInTopBucket();
bool HasFreeSpaceInTopNBuckets(int n);
@ -102,6 +102,7 @@ public:
bool TryDemote();
bool IsDemoted();
SatoriWorkChunk* &DemotedObjects();
bool& HasUnmarkedDemotedObjects();
void FreeDemotedTrackers();
int Generation();
@ -203,6 +204,9 @@ public:
void Verify(bool allowMarked = false);
SatoriAllocator* Allocator();
SatoriRecycler* Recycler();
private:
static const int BITMAP_LENGTH = Satori::REGION_SIZE_GRANULARITY / sizeof(size_t) / sizeof(size_t) / 8;
@ -270,6 +274,7 @@ private:
bool m_hasPendingFinalizables;
bool m_acceptedPromotedObjects;
bool m_individuallyPromoted;
bool m_hasUnmarkedDemotedObjects;
// when demoted, we remember our gen2 objects here
SatoriWorkChunk* m_gen2Objects;
@ -303,9 +308,6 @@ private:
NOINLINE void ClearPinned(SatoriObject* o);
void ThreadLocalPendFinalizables();
SatoriAllocator* Allocator();
SatoriRecycler* Recycler();
void PushToMarkStackIfHasPointers(SatoriObject* obj);
SatoriObject* PopFromMarkStack();
SatoriObject* ObjectForMarkBit(size_t bitmapIndex, int offset);

View file

@ -1,4 +1,4 @@
// Copyright (c) 2022 Vladimir Sadov
// Copyright (c) 2024 Vladimir Sadov
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
@ -161,6 +161,7 @@ inline void SatoriRegion::StopEscapeTracking()
}
}
// Used to simulate writes when containing region is individually promoted.
inline void SatoriRegion::SetCardsForObject(SatoriObject* o)
{
_ASSERTE(this->Size() == Satori::REGION_SIZE_GRANULARITY);
@ -173,10 +174,11 @@ inline void SatoriRegion::SetCardsForObject(SatoriObject* o)
!child->IsExternal() &&
child->ContainingRegion()->Generation() < 2)
{
// This does not happen concurrently with cleaning, so does not need to be ordered.
// If this does not run concurrently with mutator, we could do SetCardForAddress,
// but this should be relatively rare, so we will just dirty for simplicity.
ContainingPage()->DirtyCardForAddressUnordered((size_t)ppObject);
// This could run concurrenlty with mutator or in a blocking stage,
// but not when the blocking stage does cleaning.
// It also should be relatively rare, so we will just assume the mutator is running
// for simplicity and call a concurrent helper.
ContainingPage()->DirtyCardForAddressConcurrent((size_t)ppObject);
}
}
);
@ -289,7 +291,7 @@ bool SatoriRegion::Sweep()
size_t skipped = o->Start() - lastMarkedEnd;
SatoriObject* free = SatoriObject::FormatAsFree(lastMarkedEnd, skipped);
SetIndicesForObject(free, o->Start());
AddFreeSpace(free);
AddFreeSpace(free, skipped);
if (o->Start() >= objLimit)
{
@ -333,6 +335,7 @@ bool SatoriRegion::Sweep()
this->HasMarksSet() = false;
#endif
this->HasUnmarkedDemotedObjects() = IsDemoted();
this->m_hasFinalizables = hasFinalizables;
this->DoNotSweep() = true;
this->HasPinnedObjects() = false;
@ -478,6 +481,11 @@ inline SatoriWorkChunk* &SatoriRegion::DemotedObjects()
return m_gen2Objects;
}
inline bool& SatoriRegion::HasUnmarkedDemotedObjects()
{
return m_hasUnmarkedDemotedObjects;
}
inline void SatoriRegion::SetMarked(SatoriObject* o)
{
_ASSERTE(o->SameRegion(this));

View file

@ -1,4 +1,4 @@
// Copyright (c) 2022 Vladimir Sadov
// Copyright (c) 2024 Vladimir Sadov
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation

View file

@ -1,4 +1,4 @@
// Copyright (c) 2022 Vladimir Sadov
// Copyright (c) 2024 Vladimir Sadov
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation

View file

@ -1,4 +1,4 @@
// Copyright (c) 2022 Vladimir Sadov
// Copyright (c) 2024 Vladimir Sadov
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation

View file

@ -1,4 +1,4 @@
// Copyright (c) 2022 Vladimir Sadov
// Copyright (c) 2024 Vladimir Sadov
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation

View file

@ -1,4 +1,4 @@
// Copyright (c) 2022 Vladimir Sadov
// Copyright (c) 2024 Vladimir Sadov
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation

View file

@ -1,4 +1,4 @@
// Copyright (c) 2022 Vladimir Sadov
// Copyright (c) 2024 Vladimir Sadov
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation

View file

@ -1,4 +1,4 @@
// Copyright (c) 2022 Vladimir Sadov
// Copyright (c) 2024 Vladimir Sadov
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation

View file

@ -1,4 +1,4 @@
// Copyright (c) 2022 Vladimir Sadov
// Copyright (c) 2024 Vladimir Sadov
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation

View file

@ -245,6 +245,7 @@ The .NET Foundation licenses this file to you under the MIT license.
<LinkerArg Include="-L/usr/local/lib -linotify" Condition="'$(_targetOS)' == 'freebsd'" />
<LinkerArg Include="@(ExtraLinkerArg->'-Wl,%(Identity)')" />
<LinkerArg Include="@(NativeFramework->'-framework %(Identity)')" Condition="'$(_IsApplePlatform)' == 'true'" />
<LinkerArg Include="-ld_classic" Condition="'$(_IsApplePlatform)' == 'true'" />
<LinkerArg Include="-Wl,--eh-frame-hdr" Condition="'$(_IsApplePlatform)' != 'true'" />
<!-- Google requires all the native libraries to be aligned to 16 bytes (for 16k memory page size)

View file

@ -466,8 +466,6 @@ ALTERNATE_ENTRY RhpAssignRefAVLocation
// DIRTYING CARD
DirtyCard:
cmp byte ptr [rax + rdx], 4
je Exit
mov byte ptr [rax + rdx], 4
DirtyGroup:
cmp byte ptr [rax + r8 * 2 + 0x80], 4

View file

@ -469,8 +469,6 @@ ALTERNATE_ENTRY RhpAssignRefAVLocation
; DIRTYING CARD FOR RCX
DirtyCard:
cmp byte ptr [rax + r8], 4
je Exit
mov byte ptr [rax + r8], 4
DirtyGroup:
cmp byte ptr [rax + rdx * 2 + 80h], 4
@ -651,8 +649,6 @@ ALTERNATE_ENTRY RhpCheckedLockCmpXchgAVLocation
; DIRTYING CARD FOR RCX
DirtyCard:
cmp byte ptr [r11 + r8], 4
je Exit
mov byte ptr [r11 + r8], 4
DirtyGroup:
cmp byte ptr [r11 + rdx * 2 + 80h], 4
@ -801,8 +797,6 @@ ALTERNATE_ENTRY RhpCheckedXchgAVLocation
; DIRTYING CARD FOR RCX
DirtyCard:
cmp byte ptr [r11 + r8], 4
je Exit
mov byte ptr [r11 + r8], 4
DirtyGroup:
cmp byte ptr [r11 + rdx * 2 + 80h], 4

View file

@ -441,7 +441,8 @@ LEAF_ENTRY RhpCheckedAssignRefArm64, _TEXT
PREPARE_EXTERNAL_VAR_INDIRECT g_card_bundle_table, x12
add x12, x12, x14, lsr #30
ldrb w12, [x12]
cbnz x12, C_FUNC(RhpAssignRefArm64)
cbz x12, LOCAL_LABEL(NotInHeap)
b C_FUNC(RhpAssignRefArm64)
LOCAL_LABEL(NotInHeap):
ALTERNATE_ENTRY RhpCheckedAssignRefAVLocation
@ -568,10 +569,10 @@ ALTERNATE_ENTRY RhpAssignRefAVLocation
// DIRTYING CARD FOR X14
LOCAL_LABEL(DirtyCard):
ldrb w3, [x17, x2]
tbnz w3, #2, LOCAL_LABEL(Exit)
mov w16, #4
strb w16, [x17, x2]
add x2, x2, x17
// must be after the field write to allow concurrent clean
stlrb w16, [x2]
LOCAL_LABEL(DirtyGroup):
add x12, x17, #0x80
ldrb w3, [x12, x15]
@ -797,10 +798,10 @@ ALTERNATE_ENTRY RhpCheckedLockCmpXchgAVLocation
// DIRTYING CARD FOR X14
LOCAL_LABEL(DirtyCard_Cmp_Xchg):
ldrb w3, [x17, x2]
tbnz w3, #2, LOCAL_LABEL(Exit_Cmp_Xchg)
mov w16, #4
strb w16, [x17, x2]
add x2, x2, x17
// must be after the field write to allow concurrent clean
stlrb w16, [x2]
LOCAL_LABEL(DirtyGroup_Cmp_Xchg):
add x12, x17, #0x80
ldrb w3, [x12, x15]
@ -987,10 +988,10 @@ ALTERNATE_ENTRY RhpCheckedXchgAVLocation
// DIRTYING CARD FOR X14
LOCAL_LABEL(DirtyCard_Xchg):
ldrb w3, [x17, x2]
tbnz w3, #2, LOCAL_LABEL(Exit_Xchg)
mov w16, #4
strb w16, [x17, x2]
add x2, x2, x17
// must be after the field write to allow concurrent clean
stlrb w16, [x2]
LOCAL_LABEL(DirtyGroup_Xchg):
add x12, x17, #0x80
ldrb w3, [x12, x15]

View file

@ -558,10 +558,10 @@ CardSet
;; DIRTYING CARD FOR X14
DirtyCard
ldrb w3, [x17, x2]
tbnz w3, #2, Exit
mov w16, #4
strb w16, [x17, x2]
add x2, x2, x17
;; must be after the field write to allow concurrent clean
stlrb w16, [x2]
DirtyGroup
add x12, x17, #0x80
ldrb w3, [x12, x15]
@ -758,10 +758,10 @@ CardSet_Cmp_Xchg
;; DIRTYING CARD FOR X14
DirtyCard_Cmp_Xchg
ldrb w3, [x17, x2]
tbnz w3, #2, Exit_Cmp_Xchg
mov w16, #4
strb w16, [x17, x2]
add x2, x2, x17
;; must be after the field write to allow concurrent clean
stlrb w16, [x2]
DirtyGroup_Cmp_Xchg
add x12, x17, #0x80
ldrb w3, [x12, x15]
@ -932,10 +932,10 @@ CardSet_Xchg
;; DIRTYING CARD FOR X14
DirtyCard_Xchg
ldrb w3, [x17, x2]
tbnz w3, #2, Exit_Xchg
mov w16, #4
strb w16, [x17, x2]
add x2, x2, x17
;; must be after the field write to allow concurrent clean
stlrb w16, [x2]
DirtyGroup_Xchg
add x12, x17, #0x80
ldrb w3, [x12, x15]

View file

@ -449,8 +449,6 @@ endif
; DIRTYING CARD FOR RCX
DirtyCard:
cmp byte ptr [rax + r8], 4
je Exit
mov byte ptr [rax + r8], 4
DirtyGroup:
cmp byte ptr [rax + rdx * 2 + 80h], 4

View file

@ -353,8 +353,6 @@ LEAF_ENTRY JIT_WriteBarrier, _TEXT
// DIRTYING CARD
DirtyCard:
cmp byte ptr [rax + rdx], 4
je Exit
mov byte ptr [rax + rdx], 4
DirtyGroup:
cmp byte ptr [rax + r8 * 2 + 0x80], 4

View file

@ -1493,7 +1493,7 @@ void ErectWriteBarrier(OBJECTREF *dst, OBJECTREF ref)
return;
}
page->DirtyCardForAddressUnordered((size_t)dst);
page->DirtyCardForAddressConcurrent((size_t)dst);
#else
// if the dst is outside of the heap (unboxed value classes) then we

View file

@ -51,10 +51,10 @@ public class Program
// Just count the number of warnings and errors. There are so many right now that it's not worth enumerating the list
#if DEBUG
const int MinWarnings = 2000;
const int MinWarnings = 1000;
const int MaxWarnings = 4000;
#else
const int MinWarnings = 3000;
const int MinWarnings = 1000;
const int MaxWarnings = 5000;
#endif
int count = 0;

View file

@ -1074,12 +1074,12 @@ class TestSharedCode
Assert.AreEqual(int.MaxValue, GC.GetGeneration(val));
val = typeof(ClassWithTemplate<>).MakeGenericType(GetC4()).GetField("Array").GetValue(null);
Assert.AreEqual(0, GC.GetGeneration(val));
Assert.True(GC.GetGeneration(val) <= 2);
Assert.AreEqual(nameof(C4), val.GetType().GetElementType().Name);
static Type GetC4() => typeof(C4);
val = typeof(TestSharedCode).GetMethod(nameof(AccessArray)).MakeGenericMethod(GetC5()).Invoke(null, Array.Empty<object>());
Assert.AreEqual(0, GC.GetGeneration(val));
Assert.True(GC.GetGeneration(val) <= 2);
Assert.AreEqual(nameof(C5), val.GetType().GetElementType().Name);
static Type GetC5() => typeof(C5);
}