Extend usage of GC_ALLOC_ALIGN8 (#104781)

In the past we added FEATURE_64BIT_ALIGNMENT to support 8 byte alignment requirements on ARM, but for performance we also like to 8 byte align double arrays on other architectures. The GC has a GC_ALLOC_ALIGN8 flag that does that but it was only being used in the ARM case and we had a more complicated workaround being used elsewhere. This change expands GC_ALLOC_ALIGN8 so it is supported on all architectures and converges all our aligned double arrays to use that approach. GC_ALLOC_ALIGN8 only implies that the initial allocation is 8 byte aligned, not that it will stay aligned after relocation. On ARM it will stay aligned because RESPECT_LARGE_ALIGNMENT is defined but on other 32 bit architectures it is not guaranteed to stay aligned.
2025-06-09 17:44:48 +09:00 · 2024-07-12 22:45:07 -07:00 · 2024-07-12 22:45:07 -07:00 · 46338d5a5f
commit 46338d5a5f
parent bcd705a2c9
4 changed files with 19 additions and 69 deletions
--- a/src/coreclr/gc/gc.cpp
+++ b/src/coreclr/gc/gc.cpp
@ -49370,7 +49370,6 @@ bool GCHeap::StressHeap(gc_alloc_context * context)
    }                                                                                       \
 } while (false)

-#ifdef FEATURE_64BIT_ALIGNMENT
 // Allocate small object with an alignment requirement of 8-bytes.
 Object* AllocAlign8(alloc_context* acontext, gc_heap* hp, size_t size, uint32_t flags)
 {
@ -49436,7 +49435,6 @@ Object* AllocAlign8(alloc_context* acontext, gc_heap* hp, size_t size, uint32_t

    return newAlloc;
 }
-#endif // FEATURE_64BIT_ALIGNMENT

 Object*
 GCHeap::Alloc(gc_alloc_context* context, size_t size, uint32_t flags REQD_ALIGN_DCL)
@ -49497,15 +49495,11 @@ GCHeap::Alloc(gc_alloc_context* context, size_t size, uint32_t flags REQD_ALIGN_
    }
    else
    {
-#ifdef FEATURE_64BIT_ALIGNMENT
        if (flags & GC_ALLOC_ALIGN8)
        {
            newAlloc = AllocAlign8 (acontext, hp, size, flags);
        }
        else
-#else
-        assert ((flags & GC_ALLOC_ALIGN8) == 0);
-#endif
        {
            newAlloc = (Object*) hp->allocate (size + ComputeMaxStructAlignPad(requiredAlignment), acontext, flags);
        }
--- a/src/coreclr/gc/gcinterface.h
+++ b/src/coreclr/gc/gcinterface.h
@ -1046,7 +1046,9 @@ enum GC_ALLOC_FLAGS
    GC_ALLOC_FINALIZE           = 1,
    GC_ALLOC_CONTAINS_REF       = 2,
    GC_ALLOC_ALIGN8_BIAS        = 4,
-    GC_ALLOC_ALIGN8             = 8,
+    GC_ALLOC_ALIGN8             = 8, // Only implies the initial allocation is 8 byte aligned.
+                                     // Preserving the alignment across relocation depends on
+                                     // RESPECT_LARGE_ALIGNMENT also being defined.
    GC_ALLOC_ZEROING_OPTIONAL   = 16,
    GC_ALLOC_LARGE_OBJECT_HEAP  = 32,
    GC_ALLOC_PINNED_OBJECT_HEAP = 64,
--- a/src/coreclr/gc/gcpriv.h
+++ b/src/coreclr/gc/gcpriv.h
@ -1468,9 +1468,7 @@ class gc_heap
    friend struct ::alloc_context;
    friend void ProfScanRootsHelper(Object** object, ScanContext *pSC, uint32_t dwFlags);
    friend void GCProfileWalkHeapWorker(BOOL fProfilerPinned, BOOL fShouldWalkHeapRootsForEtw, BOOL fShouldWalkHeapObjectsForEtw);
-#ifdef FEATURE_64BIT_ALIGNMENT
    friend Object* AllocAlign8(alloc_context* acontext, gc_heap* hp, size_t size, uint32_t flags);
-#endif //FEATURE_64BIT_ALIGNMENT
    friend class t_join;
    friend class gc_mechanisms;
    friend class seg_free_spaces;
--- a/src/coreclr/vm/gchelpers.cpp
+++ b/src/coreclr/vm/gchelpers.cpp
@ -424,70 +424,26 @@ OBJECTREF AllocateSzArray(MethodTable* pArrayMT, INT32 cElements, GC_ALLOC_FLAGS
    }
    else
    {
-#ifndef FEATURE_64BIT_ALIGNMENT
-        if ((DATA_ALIGNMENT < sizeof(double)) && (pArrayMT->GetArrayElementType() == ELEMENT_TYPE_R8) &&
-            (totalSize < GCHeapUtilities::GetGCHeap()->GetLOHThreshold() - MIN_OBJECT_SIZE))
+#ifdef FEATURE_DOUBLE_ALIGNMENT_HINT
+        if (pArrayMT->GetArrayElementType() == ELEMENT_TYPE_R8)
        {
-            // Creation of an array of doubles, not in the large object heap.
-            // We want to align the doubles to 8 byte boundaries, but the GC gives us pointers aligned
-            // to 4 bytes only (on 32 bit platforms). To align, we ask for 12 bytes more to fill with a
-            // dummy object.
-            // If the GC gives us a 8 byte aligned address, we use it for the array and place the dummy
-            // object after the array, otherwise we put the dummy object first, shifting the base of
-            // the array to an 8 byte aligned address. Also, we need to make sure that the syncblock of the
-            // second object is zeroed. GC won't take care of zeroing it out with GC_ALLOC_ZEROING_OPTIONAL.
-            //
-            // Note: on 64 bit platforms, the GC always returns 8 byte aligned addresses, and we don't
-            // execute this code because DATA_ALIGNMENT < sizeof(double) is false.
-
-            _ASSERTE(DATA_ALIGNMENT == sizeof(double) / 2);
-            _ASSERTE((MIN_OBJECT_SIZE % sizeof(double)) == DATA_ALIGNMENT);   // used to change alignment
-            _ASSERTE(pArrayMT->GetComponentSize() == sizeof(double));
-            _ASSERTE(g_pObjectClass->GetBaseSize() == MIN_OBJECT_SIZE);
-            _ASSERTE(totalSize < totalSize + MIN_OBJECT_SIZE);
-            orArray = (ArrayBase*)Alloc(totalSize + MIN_OBJECT_SIZE, flags);
-
-            Object* orDummyObject;
-            if (((size_t)orArray % sizeof(double)) != 0)
-            {
-                orDummyObject = orArray;
-                orArray = (ArrayBase*)((size_t)orArray + MIN_OBJECT_SIZE);
-                if (flags & GC_ALLOC_ZEROING_OPTIONAL)
-                {
-                    // clean the syncblock of the aligned array.
-                    *(((void**)orArray)-1) = 0;
-                }
-            }
-            else
-            {
-                orDummyObject = (Object*)((size_t)orArray + totalSize);
-                if (flags & GC_ALLOC_ZEROING_OPTIONAL)
-                {
-                    // clean the syncblock of the dummy object.
-                    *(((void**)orDummyObject)-1) = 0;
-                }
-            }
-            _ASSERTE(((size_t)orArray % sizeof(double)) == 0);
-            orDummyObject->SetMethodTable(g_pObjectClass);
+            flags |= GC_ALLOC_ALIGN8;
        }
-        else
-#endif  // FEATURE_64BIT_ALIGNMENT
-        {
-#ifdef FEATURE_64BIT_ALIGNMENT
-            MethodTable* pElementMT = pArrayMT->GetArrayElementTypeHandle().GetMethodTable();
-            if (pElementMT->RequiresAlign8() && pElementMT->IsValueType())
-            {
-                // This platform requires that certain fields are 8-byte aligned (and the runtime doesn't provide
-                // this guarantee implicitly, e.g. on 32-bit platforms). Since it's the array payload, not the
-                // header that requires alignment we need to be careful. However it just so happens that all the
-                // cases we care about (single and multi-dim arrays of value types) have an even number of DWORDs
-                // in their headers so the alignment requirements for the header and the payload are the same.
-                _ASSERTE(((pArrayMT->GetBaseSize() - SIZEOF_OBJHEADER) & 7) == 0);
-                flags |= GC_ALLOC_ALIGN8;
-            }
 #endif
-            orArray = (ArrayBase*)Alloc(totalSize, flags);
+#ifdef FEATURE_64BIT_ALIGNMENT
+        MethodTable* pElementMT = pArrayMT->GetArrayElementTypeHandle().GetMethodTable();
+        if (pElementMT->RequiresAlign8() && pElementMT->IsValueType())
+        {
+            // This platform requires that certain fields are 8-byte aligned (and the runtime doesn't provide
+            // this guarantee implicitly, e.g. on 32-bit platforms). Since it's the array payload, not the
+            // header that requires alignment we need to be careful. However it just so happens that all the
+            // cases we care about (single and multi-dim arrays of value types) have an even number of DWORDs
+            // in their headers so the alignment requirements for the header and the payload are the same.
+            _ASSERTE(((pArrayMT->GetBaseSize() - SIZEOF_OBJHEADER) & 7) == 0);
+            flags |= GC_ALLOC_ALIGN8;
        }
+#endif
+        orArray = (ArrayBase*)Alloc(totalSize, flags);
        orArray->SetMethodTable(pArrayMT);
    }