1
0
Fork 0
mirror of https://github.com/VSadov/Satori.git synced 2025-06-08 03:27:04 +09:00
Satori/src/coreclr/vm/amd64/patchedcode.S

516 lines
16 KiB
ArmAsm

// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
.intel_syntax noprefix
#include "unixasmmacros.inc"
#include "asmconstants.h"
#ifndef FEATURE_SATORI_GC
// Mark start of the code region that we patch at runtime
LEAF_ENTRY JIT_PatchedCodeStart, _TEXT
ret
LEAF_END JIT_PatchedCodeStart, _TEXT
// There is an even more optimized version of these helpers possible which takes
// advantage of knowledge of which way the ephemeral heap is growing to only do 1/2
// that check (this is more significant in the JIT_WriteBarrier case).
//
// Additionally we can look into providing helpers which will take the src/dest from
// specific registers (like x86) which _could_ (??) make for easier register allocation
// for the JIT64, however it might lead to having to have some nasty code that treats
// these guys really special like... :(.
//
// Version that does the move, checks whether or not it's in the GC and whether or not
// it needs to have it's card updated
//
// void JIT_CheckedWriteBarrier(Object** dst, Object* src)
LEAF_ENTRY JIT_CheckedWriteBarrier, _TEXT
// When WRITE_BARRIER_CHECK is defined _NotInHeap will write the reference
// but if it isn't then it will just return.
//
// See if this is in GCHeap
PREPARE_EXTERNAL_VAR g_lowest_address, rax
cmp rdi, [rax]
// jb LOCAL_LABEL(NotInHeap)
.byte 0x72, 0x12
PREPARE_EXTERNAL_VAR g_highest_address, rax
cmp rdi, [rax]
// jnb LOCAL_LABEL(NotInHeap)
.byte 0x73, 0x06
jmp [rip + C_FUNC(JIT_WriteBarrier_Loc)]
LOCAL_LABEL(NotInHeap):
// See comment above about possible AV
mov [rdi], rsi
ret
LEAF_END_MARKED JIT_CheckedWriteBarrier, _TEXT
// This is used by the mechanism to hold either the JIT_WriteBarrier_PreGrow
// or JIT_WriteBarrier_PostGrow code (depending on the state of the GC). It _WILL_
// change at runtime as the GC changes. Initially it should simply be a copy of the
// larger of the two functions (JIT_WriteBarrier_PostGrow) to ensure we have created
// enough space to copy that code in.
.balign 16
LEAF_ENTRY JIT_WriteBarrier, _TEXT
#ifdef _DEBUG
// In debug builds, this just contains jump to the debug version of the write barrier by default
jmp C_FUNC(JIT_WriteBarrier_Debug)
#endif
#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
// JIT_WriteBarrier_WriteWatch_PostGrow64
// Regarding patchable constants:
// - 64-bit constants have to be loaded into a register
// - The constants have to be aligned to 8 bytes so that they can be patched easily
// - The constant loads have been located to minimize NOP padding required to align the constants
// - Using different registers for successive constant loads helps pipeline better. Should we decide to use a special
// non-volatile calling convention, this should be changed to use just one register.
// Do the move into the GC . It is correct to take an AV here, the EH code
// figures out that this came from a WriteBarrier and correctly maps it back
// to the managed method which called the WriteBarrier (see setup in
// InitializeExceptionHandling, vm\exceptionhandling.cpp).
mov [rdi], rsi
// Update the write watch table if necessary
mov rax, rdi
movabs r10, 0xF0F0F0F0F0F0F0F0
shr rax, 0xC // SoftwareWriteWatch::AddressToTableByteIndexShift
NOP_2_BYTE // padding for alignment of constant
movabs r11, 0xF0F0F0F0F0F0F0F0
add rax, r10
cmp byte ptr [rax], 0x0
.byte 0x75, 0x06
// jne LOCAL_LABEL(CheckCardTable)
mov byte ptr [rax], 0xFF
NOP_3_BYTE // padding for alignment of constant
// Check the lower and upper ephemeral region bounds
LOCAL_LABEL(CheckCardTable):
cmp rsi, r11
.byte 0x72,0x3D
// jb LOCAL_LABEL(Exit)
NOP_3_BYTE // padding for alignment of constant
movabs r10, 0xF0F0F0F0F0F0F0F0
cmp rsi, r10
.byte 0x73,0x2B
// jae LOCAL_LABEL(Exit)
nop // padding for alignment of constant
movabs rax, 0xF0F0F0F0F0F0F0F0
// Touch the card table entry, if not already dirty.
shr rdi, 0x0B
cmp byte ptr [rdi + rax], 0xFF
.byte 0x75, 0x02
// jne LOCAL_LABEL(UpdateCardTable)
REPRET
LOCAL_LABEL(UpdateCardTable):
mov byte ptr [rdi + rax], 0xFF
#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
NOP_2_BYTE // padding for alignment of constant
shr rdi, 0x0A
movabs rax, 0xF0F0F0F0F0F0F0F0
cmp byte ptr [rdi + rax], 0xFF
.byte 0x75, 0x02
// jne LOCAL_LABEL(UpdateCardBundle_WriteWatch_PostGrow64)
REPRET
LOCAL_LABEL(UpdateCardBundle_WriteWatch_PostGrow64):
mov byte ptr [rdi + rax], 0xFF
#endif
ret
.balign 16
LOCAL_LABEL(Exit):
REPRET
NOP_3_BYTE
NOP_3_BYTE
NOP_3_BYTE
NOP_3_BYTE
NOP_3_BYTE
NOP_3_BYTE
NOP_3_BYTE
NOP_3_BYTE
NOP_3_BYTE
NOP_3_BYTE
NOP_3_BYTE
NOP_3_BYTE
NOP_3_BYTE
NOP_3_BYTE
NOP_3_BYTE
NOP_3_BYTE
NOP_3_BYTE
NOP_3_BYTE
NOP_3_BYTE
NOP_3_BYTE
#else
// JIT_WriteBarrier_PostGrow64
// Do the move into the GC . It is correct to take an AV here, the EH code
// figures out that this came from a WriteBarrier and correctly maps it back
// to the managed method which called the WriteBarrier (see setup in
// InitializeExceptionHandling, vm\exceptionhandling.cpp).
mov [rdi], rsi
NOP_3_BYTE // padding for alignment of constant
// Can't compare a 64 bit immediate, so we have to move them into a
// register. Values of these immediates will be patched at runtime.
// By using two registers we can pipeline better. Should we decide to use
// a special non-volatile calling convention, this should be changed to
// just one.
movabs rax, 0xF0F0F0F0F0F0F0F0
// Check the lower and upper ephemeral region bounds
cmp rsi, rax
// jb LOCAL_LABEL(Exit)
.byte 0x72, 0x36
nop // padding for alignment of constant
movabs r8, 0xF0F0F0F0F0F0F0F0
cmp rsi, r8
// jae LOCAL_LABEL(Exit)
.byte 0x73, 0x26
nop // padding for alignment of constant
movabs rax, 0xF0F0F0F0F0F0F0F0
// Touch the card table entry, if not already dirty.
shr rdi, 0Bh
cmp byte ptr [rdi + rax], 0FFh
.byte 0x75, 0x02
// jne LOCAL_LABEL(UpdateCardTable)
REPRET
LOCAL_LABEL(UpdateCardTable):
mov byte ptr [rdi + rax], 0FFh
#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
NOP_6_BYTE // padding for alignment of constant
movabs rax, 0xF0F0F0F0F0F0F0F0
// Touch the card bundle, if not already dirty.
// rdi is already shifted by 0xB, so shift by 0xA more
shr rdi, 0x0A
cmp byte ptr [rdi + rax], 0FFh
.byte 0x75, 0x02
// jne LOCAL_LABEL(UpdateCardBundle)
REPRET
LOCAL_LABEL(UpdateCardBundle):
mov byte ptr [rdi + rax], 0FFh
#endif
ret
.balign 16
LOCAL_LABEL(Exit):
REPRET
#endif
// make sure this is bigger than any of the others
.balign 16
nop
LEAF_END_MARKED JIT_WriteBarrier, _TEXT
// Mark start of the code region that we patch at runtime
LEAF_ENTRY JIT_PatchedCodeLast, _TEXT
ret
LEAF_END JIT_PatchedCodeLast, _TEXT
#else //FEATURE_SATORI_GC ##############################################################################
.macro ALTERNATE_ENTRY Name
.global C_FUNC(\Name)
C_FUNC(\Name):
.endm
// Mark start of the code region that we patch at runtime
LEAF_ENTRY JIT_PatchedCodeStart, _TEXT
ret
LEAF_END JIT_PatchedCodeStart, _TEXT
// void JIT_CheckedWriteBarrier(Object** dst, Object* src)
LEAF_ENTRY JIT_CheckedWriteBarrier, _TEXT
// See if dst is in GCHeap
PREPARE_EXTERNAL_VAR g_card_bundle_table, rax // fetch the page byte map
mov rax, [rax]
mov r8, rdi
shr r8, 30 // dst page index
cmp byte ptr [rax + r8], 0
jne C_FUNC(CheckedEntry)
NotInHeap:
// See comment above about possible AV
mov [rdi], rsi
ret
LEAF_END_MARKED JIT_CheckedWriteBarrier, _TEXT
//
// rdi - dest address
// rsi - object
//
.balign 16
LEAF_ENTRY JIT_WriteBarrier, _TEXT
#ifdef FEATURE_SATORI_EXTERNAL_OBJECTS
// check if src is in heap
PREPARE_EXTERNAL_VAR g_card_bundle_table, rax // fetch the page byte map
mov rax, [rax]
ALTERNATE_ENTRY CheckedEntry
mov r8, rsi
shr r8, 30 // src page index
cmp byte ptr [rax + r8], 0
je JustAssign // src not in heap
#else
ALTERNATE_ENTRY CheckedEntry
#endif
// check for escaping assignment
// 1) check if we own the source region
mov rdx, rsi
and rdx, 0xFFFFFFFFFFE00000 // source region
#ifndef FEATURE_SATORI_EXTERNAL_OBJECTS
jz JustAssign // assigning null
#endif
#ifdef TARGET_OSX
mov rax, gs:[0] // thread tag
#else
mov rax, fs:[0] // thread tag
#endif
cmp qword ptr [rdx], rax
jne AssignAndMarkCards // not local to this thread
// 2) check if the src and dst are from the same region
mov rax, rdi
and rax, 0xFFFFFFFFFFE00000 // target aligned to region
cmp rax, rdx
jnz RecordEscape // cross region assignment. definitely escaping
// 3) check if the target is exposed
mov rax, rdi
and rax, 0x1FFFFF
shr rax, 3
bt qword ptr [rdx], rax
jb RecordEscape // target is exposed. record an escape.
JustAssign:
mov [rdi], rsi // no card marking, src is not a heap object
// set rdi, rsi per contract with JIT_ByRefWriteBarrier
add rdi, 8
mov rsi, r10
ret
AssignAndMarkCards:
mov [rdi], rsi
// TUNING: barriers in different modes could be separate pieces of code, but barrier switch
// needs to suspend EE, not sure if skipping mode check would worth that much.
PREPARE_EXTERNAL_VAR g_sw_ww_table, rcx
mov r11, [rcx]
// set rdi per contract with JIT_ByRefWriteBarrier
mov rax, rdi
add rdi, 8
// check the barrier state. this must be done after the assignment (in program order)
// if state == 2 we do not set or dirty cards.
cmp r11, 2
jne DoCards
// set rsi per contract with JIT_ByRefWriteBarrier
mov rsi, r10
Exit:
ret
DoCards:
// if same region, just check if barrier is not concurrent
xor rsi, rax
shr rsi, 21
// set rsi per contract with JIT_ByRefWriteBarrier
mov rsi, r10
jz CheckConcurrent // same region, just check if barrier is not concurrent
// if src is in gen2/3 and the barrier is not concurrent we do not need to mark cards
cmp dword ptr [rdx + 16], 2
jl MarkCards
CheckConcurrent:
// if concurrent, load card location
cmp r11, 0
je Exit
MarkCards:
// fetch card location for rax (saved rdi)
PREPARE_EXTERNAL_VAR g_card_table, r9
mov r9, [r9] // fetch the page map
mov rdx, rax
shr rax, 30
mov rax, qword ptr [r9 + rax * 8] // page
sub rdx, rax // offset in page
mov r8, rdx
shr rdx, 9 // card offset
shr r8, 20 // group index
lea r8, [rax + r8 * 2 + 0x80] // group offset
// check if concurrent marking is in progress
cmp r11, 0
jne DirtyCard
// SETTING CARD
SetCard:
cmp byte ptr [rax + rdx], 0
jne Exit
mov byte ptr [rax + rdx], 1
SetGroup:
cmp byte ptr [r8], 0
jne CardSet
mov byte ptr [r8], 1
SetPage:
cmp byte ptr [rax], 0
jne CardSet
mov byte ptr [rax], 1
CardSet:
// check if concurrent marking is still not in progress
cmp qword ptr [rcx], 0
jne DirtyCard
ret
// DIRTYING CARD
DirtyCard:
mov byte ptr [rax + rdx], 4
DirtyGroup:
cmp byte ptr [r8], 4
je Exit
mov byte ptr [r8], 4
DirtyPage:
cmp byte ptr [rax], 4
je Exit
mov byte ptr [rax], 4
ret
// this is expected to be rare.
RecordEscape:
// 4) check if the source is escaped
mov rax, rsi
add rax, 8 // escape bit is MT + 1
and rax, 0x1FFFFF
shr rax, 3
bt qword ptr [rdx], rax
jb AssignAndMarkCards // source is already escaped.
// Align rsp
mov r9, rsp
and rsp, -16
sub rsp, 8
// save rsp, rdi, rsi, rdx and r10 (possibly preadjusted rsi)
push r9
push rdi
push rsi
push rdx
push r10
// also save xmm0, in case it is used for stack clearing, as JIT_ByRefWriteBarrier should not trash xmm0
// Hopefully EscapeFn cannot corrupt other xmm regs, since there is no float math or vectorizable code in there.
sub rsp, 16
movdqu [rsp], xmm0
// void SatoriRegion::EscapeFn(SatoriObject** dst, SatoriObject* src, SatoriRegion* region)
call qword ptr [rdx + 8]
movdqu xmm0, [rsp]
add rsp, 16
pop r10
pop rdx
pop rsi
pop rdi
pop rsp
jmp AssignAndMarkCards
LEAF_END_MARKED JIT_WriteBarrier, _TEXT
// JIT_ByRefWriteBarrier has weird symantics, see usage in StubLinkerX86.cpp
//
// Entry:
// RDI - address of ref-field (assigned to)
// RSI - address of the data (source)
// Exit:
// RCX is trashed
// RAX is trashed
// RDI, RSI are incremented by SIZEOF(LPVOID)
LEAF_ENTRY JIT_ByRefWriteBarrier, _TEXT
// See if dst is in GCHeap
PREPARE_EXTERNAL_VAR g_card_bundle_table, rax // fetch the page byte map
mov rcx, rdi
shr rcx, 30 // dst page index
cmp byte ptr [rax + rcx], 0
jne InHeap
mov rcx, [rsi]
mov [rdi], rcx
add rdi, 8
add rsi, 8
ret
InHeap:
// JIT_WriteBarrier may trash these registers
push rdx
push r8
push r9
push r10
push r11
// save preadjusted rsi
lea r10, [rsi + 8]
mov rsi, [rsi]
call CheckedEntry
pop r11
pop r10
pop r9
pop r8
pop rdx
ret
LEAF_END_MARKED JIT_ByRefWriteBarrier, _TEXT
// Mark start of the code region that we patch at runtime
LEAF_ENTRY JIT_PatchedCodeLast, _TEXT
ret
LEAF_END JIT_PatchedCodeLast, _TEXT
#endif // FEATURE_SATORI_GC