mirror of
https://github.com/VSadov/Satori.git
synced 2025-06-08 03:27:04 +09:00
516 lines
16 KiB
ArmAsm
516 lines
16 KiB
ArmAsm
// Licensed to the .NET Foundation under one or more agreements.
|
|
// The .NET Foundation licenses this file to you under the MIT license.
|
|
|
|
.intel_syntax noprefix
|
|
#include "unixasmmacros.inc"
|
|
#include "asmconstants.h"
|
|
|
|
#ifndef FEATURE_SATORI_GC
|
|
|
|
// Mark start of the code region that we patch at runtime
|
|
LEAF_ENTRY JIT_PatchedCodeStart, _TEXT
|
|
ret
|
|
LEAF_END JIT_PatchedCodeStart, _TEXT
|
|
|
|
|
|
// There is an even more optimized version of these helpers possible which takes
|
|
// advantage of knowledge of which way the ephemeral heap is growing to only do 1/2
|
|
// that check (this is more significant in the JIT_WriteBarrier case).
|
|
//
|
|
// Additionally we can look into providing helpers which will take the src/dest from
|
|
// specific registers (like x86) which _could_ (??) make for easier register allocation
|
|
// for the JIT64, however it might lead to having to have some nasty code that treats
|
|
// these guys really special like... :(.
|
|
//
|
|
// Version that does the move, checks whether or not it's in the GC and whether or not
|
|
// it needs to have it's card updated
|
|
//
|
|
// void JIT_CheckedWriteBarrier(Object** dst, Object* src)
|
|
LEAF_ENTRY JIT_CheckedWriteBarrier, _TEXT
|
|
|
|
// When WRITE_BARRIER_CHECK is defined _NotInHeap will write the reference
|
|
// but if it isn't then it will just return.
|
|
//
|
|
// See if this is in GCHeap
|
|
PREPARE_EXTERNAL_VAR g_lowest_address, rax
|
|
cmp rdi, [rax]
|
|
// jb LOCAL_LABEL(NotInHeap)
|
|
.byte 0x72, 0x12
|
|
PREPARE_EXTERNAL_VAR g_highest_address, rax
|
|
cmp rdi, [rax]
|
|
|
|
// jnb LOCAL_LABEL(NotInHeap)
|
|
.byte 0x73, 0x06
|
|
jmp [rip + C_FUNC(JIT_WriteBarrier_Loc)]
|
|
|
|
LOCAL_LABEL(NotInHeap):
|
|
// See comment above about possible AV
|
|
mov [rdi], rsi
|
|
ret
|
|
LEAF_END_MARKED JIT_CheckedWriteBarrier, _TEXT
|
|
|
|
|
|
// This is used by the mechanism to hold either the JIT_WriteBarrier_PreGrow
|
|
// or JIT_WriteBarrier_PostGrow code (depending on the state of the GC). It _WILL_
|
|
// change at runtime as the GC changes. Initially it should simply be a copy of the
|
|
// larger of the two functions (JIT_WriteBarrier_PostGrow) to ensure we have created
|
|
// enough space to copy that code in.
|
|
.balign 16
|
|
LEAF_ENTRY JIT_WriteBarrier, _TEXT
|
|
#ifdef _DEBUG
|
|
// In debug builds, this just contains jump to the debug version of the write barrier by default
|
|
jmp C_FUNC(JIT_WriteBarrier_Debug)
|
|
#endif
|
|
|
|
#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
|
|
// JIT_WriteBarrier_WriteWatch_PostGrow64
|
|
|
|
// Regarding patchable constants:
|
|
// - 64-bit constants have to be loaded into a register
|
|
// - The constants have to be aligned to 8 bytes so that they can be patched easily
|
|
// - The constant loads have been located to minimize NOP padding required to align the constants
|
|
// - Using different registers for successive constant loads helps pipeline better. Should we decide to use a special
|
|
// non-volatile calling convention, this should be changed to use just one register.
|
|
|
|
// Do the move into the GC . It is correct to take an AV here, the EH code
|
|
// figures out that this came from a WriteBarrier and correctly maps it back
|
|
// to the managed method which called the WriteBarrier (see setup in
|
|
// InitializeExceptionHandling, vm\exceptionhandling.cpp).
|
|
mov [rdi], rsi
|
|
|
|
// Update the write watch table if necessary
|
|
mov rax, rdi
|
|
movabs r10, 0xF0F0F0F0F0F0F0F0
|
|
shr rax, 0xC // SoftwareWriteWatch::AddressToTableByteIndexShift
|
|
NOP_2_BYTE // padding for alignment of constant
|
|
movabs r11, 0xF0F0F0F0F0F0F0F0
|
|
add rax, r10
|
|
cmp byte ptr [rax], 0x0
|
|
.byte 0x75, 0x06
|
|
// jne LOCAL_LABEL(CheckCardTable)
|
|
mov byte ptr [rax], 0xFF
|
|
|
|
NOP_3_BYTE // padding for alignment of constant
|
|
|
|
// Check the lower and upper ephemeral region bounds
|
|
LOCAL_LABEL(CheckCardTable):
|
|
cmp rsi, r11
|
|
.byte 0x72,0x3D
|
|
// jb LOCAL_LABEL(Exit)
|
|
|
|
NOP_3_BYTE // padding for alignment of constant
|
|
|
|
movabs r10, 0xF0F0F0F0F0F0F0F0
|
|
|
|
cmp rsi, r10
|
|
.byte 0x73,0x2B
|
|
// jae LOCAL_LABEL(Exit)
|
|
|
|
nop // padding for alignment of constant
|
|
|
|
movabs rax, 0xF0F0F0F0F0F0F0F0
|
|
|
|
// Touch the card table entry, if not already dirty.
|
|
shr rdi, 0x0B
|
|
cmp byte ptr [rdi + rax], 0xFF
|
|
.byte 0x75, 0x02
|
|
// jne LOCAL_LABEL(UpdateCardTable)
|
|
REPRET
|
|
|
|
LOCAL_LABEL(UpdateCardTable):
|
|
mov byte ptr [rdi + rax], 0xFF
|
|
|
|
#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
|
|
NOP_2_BYTE // padding for alignment of constant
|
|
shr rdi, 0x0A
|
|
|
|
movabs rax, 0xF0F0F0F0F0F0F0F0
|
|
cmp byte ptr [rdi + rax], 0xFF
|
|
|
|
.byte 0x75, 0x02
|
|
// jne LOCAL_LABEL(UpdateCardBundle_WriteWatch_PostGrow64)
|
|
REPRET
|
|
|
|
LOCAL_LABEL(UpdateCardBundle_WriteWatch_PostGrow64):
|
|
mov byte ptr [rdi + rax], 0xFF
|
|
#endif
|
|
|
|
ret
|
|
|
|
.balign 16
|
|
LOCAL_LABEL(Exit):
|
|
REPRET
|
|
|
|
NOP_3_BYTE
|
|
NOP_3_BYTE
|
|
NOP_3_BYTE
|
|
NOP_3_BYTE
|
|
NOP_3_BYTE
|
|
|
|
NOP_3_BYTE
|
|
NOP_3_BYTE
|
|
NOP_3_BYTE
|
|
NOP_3_BYTE
|
|
NOP_3_BYTE
|
|
|
|
NOP_3_BYTE
|
|
NOP_3_BYTE
|
|
NOP_3_BYTE
|
|
NOP_3_BYTE
|
|
NOP_3_BYTE
|
|
|
|
NOP_3_BYTE
|
|
NOP_3_BYTE
|
|
NOP_3_BYTE
|
|
NOP_3_BYTE
|
|
NOP_3_BYTE
|
|
|
|
#else
|
|
// JIT_WriteBarrier_PostGrow64
|
|
|
|
// Do the move into the GC . It is correct to take an AV here, the EH code
|
|
// figures out that this came from a WriteBarrier and correctly maps it back
|
|
// to the managed method which called the WriteBarrier (see setup in
|
|
// InitializeExceptionHandling, vm\exceptionhandling.cpp).
|
|
mov [rdi], rsi
|
|
|
|
NOP_3_BYTE // padding for alignment of constant
|
|
|
|
// Can't compare a 64 bit immediate, so we have to move them into a
|
|
// register. Values of these immediates will be patched at runtime.
|
|
// By using two registers we can pipeline better. Should we decide to use
|
|
// a special non-volatile calling convention, this should be changed to
|
|
// just one.
|
|
|
|
movabs rax, 0xF0F0F0F0F0F0F0F0
|
|
|
|
// Check the lower and upper ephemeral region bounds
|
|
cmp rsi, rax
|
|
// jb LOCAL_LABEL(Exit)
|
|
.byte 0x72, 0x36
|
|
|
|
nop // padding for alignment of constant
|
|
|
|
movabs r8, 0xF0F0F0F0F0F0F0F0
|
|
|
|
cmp rsi, r8
|
|
// jae LOCAL_LABEL(Exit)
|
|
.byte 0x73, 0x26
|
|
|
|
nop // padding for alignment of constant
|
|
|
|
movabs rax, 0xF0F0F0F0F0F0F0F0
|
|
|
|
// Touch the card table entry, if not already dirty.
|
|
shr rdi, 0Bh
|
|
cmp byte ptr [rdi + rax], 0FFh
|
|
.byte 0x75, 0x02
|
|
// jne LOCAL_LABEL(UpdateCardTable)
|
|
REPRET
|
|
|
|
LOCAL_LABEL(UpdateCardTable):
|
|
mov byte ptr [rdi + rax], 0FFh
|
|
|
|
#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
|
|
NOP_6_BYTE // padding for alignment of constant
|
|
|
|
movabs rax, 0xF0F0F0F0F0F0F0F0
|
|
|
|
// Touch the card bundle, if not already dirty.
|
|
// rdi is already shifted by 0xB, so shift by 0xA more
|
|
shr rdi, 0x0A
|
|
cmp byte ptr [rdi + rax], 0FFh
|
|
|
|
.byte 0x75, 0x02
|
|
// jne LOCAL_LABEL(UpdateCardBundle)
|
|
REPRET
|
|
|
|
LOCAL_LABEL(UpdateCardBundle):
|
|
mov byte ptr [rdi + rax], 0FFh
|
|
#endif
|
|
|
|
ret
|
|
|
|
.balign 16
|
|
LOCAL_LABEL(Exit):
|
|
REPRET
|
|
#endif
|
|
|
|
// make sure this is bigger than any of the others
|
|
.balign 16
|
|
nop
|
|
LEAF_END_MARKED JIT_WriteBarrier, _TEXT
|
|
|
|
// Mark start of the code region that we patch at runtime
|
|
LEAF_ENTRY JIT_PatchedCodeLast, _TEXT
|
|
ret
|
|
LEAF_END JIT_PatchedCodeLast, _TEXT
|
|
|
|
#else //FEATURE_SATORI_GC ##############################################################################
|
|
|
|
.macro ALTERNATE_ENTRY Name
|
|
.global C_FUNC(\Name)
|
|
C_FUNC(\Name):
|
|
.endm
|
|
|
|
// Mark start of the code region that we patch at runtime
|
|
LEAF_ENTRY JIT_PatchedCodeStart, _TEXT
|
|
ret
|
|
LEAF_END JIT_PatchedCodeStart, _TEXT
|
|
|
|
|
|
// void JIT_CheckedWriteBarrier(Object** dst, Object* src)
|
|
LEAF_ENTRY JIT_CheckedWriteBarrier, _TEXT
|
|
|
|
// See if dst is in GCHeap
|
|
PREPARE_EXTERNAL_VAR g_card_bundle_table, rax // fetch the page byte map
|
|
mov rax, [rax]
|
|
mov r8, rdi
|
|
shr r8, 30 // dst page index
|
|
cmp byte ptr [rax + r8], 0
|
|
jne C_FUNC(CheckedEntry)
|
|
|
|
NotInHeap:
|
|
// See comment above about possible AV
|
|
mov [rdi], rsi
|
|
ret
|
|
LEAF_END_MARKED JIT_CheckedWriteBarrier, _TEXT
|
|
|
|
|
|
//
|
|
// rdi - dest address
|
|
// rsi - object
|
|
//
|
|
.balign 16
|
|
LEAF_ENTRY JIT_WriteBarrier, _TEXT
|
|
#ifdef FEATURE_SATORI_EXTERNAL_OBJECTS
|
|
// check if src is in heap
|
|
PREPARE_EXTERNAL_VAR g_card_bundle_table, rax // fetch the page byte map
|
|
mov rax, [rax]
|
|
ALTERNATE_ENTRY CheckedEntry
|
|
mov r8, rsi
|
|
shr r8, 30 // src page index
|
|
cmp byte ptr [rax + r8], 0
|
|
je JustAssign // src not in heap
|
|
#else
|
|
ALTERNATE_ENTRY CheckedEntry
|
|
#endif
|
|
|
|
// check for escaping assignment
|
|
// 1) check if we own the source region
|
|
mov rdx, rsi
|
|
and rdx, 0xFFFFFFFFFFE00000 // source region
|
|
#ifndef FEATURE_SATORI_EXTERNAL_OBJECTS
|
|
jz JustAssign // assigning null
|
|
#endif
|
|
#ifdef TARGET_OSX
|
|
mov rax, gs:[0] // thread tag
|
|
#else
|
|
mov rax, fs:[0] // thread tag
|
|
#endif
|
|
cmp qword ptr [rdx], rax
|
|
jne AssignAndMarkCards // not local to this thread
|
|
|
|
// 2) check if the src and dst are from the same region
|
|
mov rax, rdi
|
|
and rax, 0xFFFFFFFFFFE00000 // target aligned to region
|
|
cmp rax, rdx
|
|
jnz RecordEscape // cross region assignment. definitely escaping
|
|
|
|
// 3) check if the target is exposed
|
|
mov rax, rdi
|
|
and rax, 0x1FFFFF
|
|
shr rax, 3
|
|
bt qword ptr [rdx], rax
|
|
jb RecordEscape // target is exposed. record an escape.
|
|
|
|
JustAssign:
|
|
mov [rdi], rsi // no card marking, src is not a heap object
|
|
|
|
// set rdi, rsi per contract with JIT_ByRefWriteBarrier
|
|
add rdi, 8
|
|
mov rsi, r10
|
|
ret
|
|
|
|
AssignAndMarkCards:
|
|
mov [rdi], rsi
|
|
|
|
// TUNING: barriers in different modes could be separate pieces of code, but barrier switch
|
|
// needs to suspend EE, not sure if skipping mode check would worth that much.
|
|
PREPARE_EXTERNAL_VAR g_sw_ww_table, rcx
|
|
mov r11, [rcx]
|
|
|
|
// set rdi per contract with JIT_ByRefWriteBarrier
|
|
mov rax, rdi
|
|
add rdi, 8
|
|
|
|
// check the barrier state. this must be done after the assignment (in program order)
|
|
// if state == 2 we do not set or dirty cards.
|
|
cmp r11, 2
|
|
jne DoCards
|
|
// set rsi per contract with JIT_ByRefWriteBarrier
|
|
mov rsi, r10
|
|
Exit:
|
|
ret
|
|
|
|
DoCards:
|
|
// if same region, just check if barrier is not concurrent
|
|
xor rsi, rax
|
|
shr rsi, 21
|
|
// set rsi per contract with JIT_ByRefWriteBarrier
|
|
mov rsi, r10
|
|
jz CheckConcurrent // same region, just check if barrier is not concurrent
|
|
|
|
// if src is in gen2/3 and the barrier is not concurrent we do not need to mark cards
|
|
cmp dword ptr [rdx + 16], 2
|
|
jl MarkCards
|
|
|
|
CheckConcurrent:
|
|
// if concurrent, load card location
|
|
cmp r11, 0
|
|
je Exit
|
|
|
|
MarkCards:
|
|
// fetch card location for rax (saved rdi)
|
|
PREPARE_EXTERNAL_VAR g_card_table, r9
|
|
mov r9, [r9] // fetch the page map
|
|
mov rdx, rax
|
|
shr rax, 30
|
|
mov rax, qword ptr [r9 + rax * 8] // page
|
|
sub rdx, rax // offset in page
|
|
mov r8, rdx
|
|
shr rdx, 9 // card offset
|
|
shr r8, 20 // group index
|
|
lea r8, [rax + r8 * 2 + 0x80] // group offset
|
|
|
|
// check if concurrent marking is in progress
|
|
cmp r11, 0
|
|
jne DirtyCard
|
|
|
|
// SETTING CARD
|
|
SetCard:
|
|
cmp byte ptr [rax + rdx], 0
|
|
jne Exit
|
|
mov byte ptr [rax + rdx], 1
|
|
SetGroup:
|
|
cmp byte ptr [r8], 0
|
|
jne CardSet
|
|
mov byte ptr [r8], 1
|
|
SetPage:
|
|
cmp byte ptr [rax], 0
|
|
jne CardSet
|
|
mov byte ptr [rax], 1
|
|
|
|
CardSet:
|
|
// check if concurrent marking is still not in progress
|
|
cmp qword ptr [rcx], 0
|
|
jne DirtyCard
|
|
ret
|
|
|
|
// DIRTYING CARD
|
|
DirtyCard:
|
|
mov byte ptr [rax + rdx], 4
|
|
DirtyGroup:
|
|
cmp byte ptr [r8], 4
|
|
je Exit
|
|
mov byte ptr [r8], 4
|
|
DirtyPage:
|
|
cmp byte ptr [rax], 4
|
|
je Exit
|
|
mov byte ptr [rax], 4
|
|
ret
|
|
|
|
// this is expected to be rare.
|
|
RecordEscape:
|
|
|
|
// 4) check if the source is escaped
|
|
mov rax, rsi
|
|
add rax, 8 // escape bit is MT + 1
|
|
and rax, 0x1FFFFF
|
|
shr rax, 3
|
|
bt qword ptr [rdx], rax
|
|
jb AssignAndMarkCards // source is already escaped.
|
|
|
|
// Align rsp
|
|
mov r9, rsp
|
|
and rsp, -16
|
|
sub rsp, 8
|
|
|
|
// save rsp, rdi, rsi, rdx and r10 (possibly preadjusted rsi)
|
|
push r9
|
|
push rdi
|
|
push rsi
|
|
push rdx
|
|
push r10
|
|
|
|
// also save xmm0, in case it is used for stack clearing, as JIT_ByRefWriteBarrier should not trash xmm0
|
|
// Hopefully EscapeFn cannot corrupt other xmm regs, since there is no float math or vectorizable code in there.
|
|
sub rsp, 16
|
|
movdqu [rsp], xmm0
|
|
|
|
// void SatoriRegion::EscapeFn(SatoriObject** dst, SatoriObject* src, SatoriRegion* region)
|
|
call qword ptr [rdx + 8]
|
|
|
|
movdqu xmm0, [rsp]
|
|
add rsp, 16
|
|
|
|
pop r10
|
|
pop rdx
|
|
pop rsi
|
|
pop rdi
|
|
pop rsp
|
|
jmp AssignAndMarkCards
|
|
LEAF_END_MARKED JIT_WriteBarrier, _TEXT
|
|
|
|
|
|
// JIT_ByRefWriteBarrier has weird symantics, see usage in StubLinkerX86.cpp
|
|
//
|
|
// Entry:
|
|
// RDI - address of ref-field (assigned to)
|
|
// RSI - address of the data (source)
|
|
// Exit:
|
|
// RCX is trashed
|
|
// RAX is trashed
|
|
// RDI, RSI are incremented by SIZEOF(LPVOID)
|
|
LEAF_ENTRY JIT_ByRefWriteBarrier, _TEXT
|
|
// See if dst is in GCHeap
|
|
PREPARE_EXTERNAL_VAR g_card_bundle_table, rax // fetch the page byte map
|
|
mov rcx, rdi
|
|
shr rcx, 30 // dst page index
|
|
cmp byte ptr [rax + rcx], 0
|
|
jne InHeap
|
|
|
|
mov rcx, [rsi]
|
|
mov [rdi], rcx
|
|
add rdi, 8
|
|
add rsi, 8
|
|
ret
|
|
|
|
InHeap:
|
|
// JIT_WriteBarrier may trash these registers
|
|
push rdx
|
|
push r8
|
|
push r9
|
|
push r10
|
|
push r11
|
|
|
|
// save preadjusted rsi
|
|
lea r10, [rsi + 8]
|
|
mov rsi, [rsi]
|
|
|
|
call CheckedEntry
|
|
|
|
pop r11
|
|
pop r10
|
|
pop r9
|
|
pop r8
|
|
pop rdx
|
|
ret
|
|
LEAF_END_MARKED JIT_ByRefWriteBarrier, _TEXT
|
|
|
|
// Mark start of the code region that we patch at runtime
|
|
LEAF_ENTRY JIT_PatchedCodeLast, _TEXT
|
|
ret
|
|
LEAF_END JIT_PatchedCodeLast, _TEXT
|
|
|
|
#endif // FEATURE_SATORI_GC
|