diff --git a/src/coreclr/jit/targetamd64.h b/src/coreclr/jit/targetamd64.h index 7277d1af376..7fc3cc544cb 100644 --- a/src/coreclr/jit/targetamd64.h +++ b/src/coreclr/jit/targetamd64.h @@ -204,12 +204,11 @@ // Registers no longer containing GC pointers after CORINFO_HELP_ASSIGN_REF and CORINFO_HELP_CHECKED_ASSIGN_REF. #define RBM_CALLEE_GCTRASH_WRITEBARRIER RBM_CALLEE_TRASH_NOGC - // TODO: Satori make more precise? - // Registers killed by CORINFO_HELP_ASSIGN_BYREF. - #define RBM_CALLEE_TRASH_WRITEBARRIER_BYREF (RBM_RSI | RBM_RDI | RBM_CALLEE_TRASH_NOGC) - // Registers no longer containing GC pointers after CORINFO_HELP_ASSIGN_BYREF. - #define RBM_CALLEE_GCTRASH_WRITEBARRIER_BYREF (RBM_CALLEE_TRASH_NOGC & ~(RBM_RDI | RBM_RSI)) + #define RBM_CALLEE_GCTRASH_WRITEBARRIER_BYREF (RBM_RAX | RBM_RCX) + + // Registers killed by CORINFO_HELP_ASSIGN_BYREF. + #define RBM_CALLEE_TRASH_WRITEBARRIER_BYREF (RBM_RSI | RBM_RDI | RBM_CALLEE_GCTRASH_WRITEBARRIER_BYREF) // We have two register classifications // * callee trash: aka volatile or caller saved diff --git a/src/coreclr/nativeaot/Runtime/amd64/WriteBarriers.S b/src/coreclr/nativeaot/Runtime/amd64/WriteBarriers.S index caa00127b7f..822fbb56510 100644 --- a/src/coreclr/nativeaot/Runtime/amd64/WriteBarriers.S +++ b/src/coreclr/nativeaot/Runtime/amd64/WriteBarriers.S @@ -400,7 +400,7 @@ LEAF_ENTRY RhpAssignRef, _TEXT JustAssign: ALTERNATE_ENTRY RhpAssignRefAVLocationNotHeap - mov [rdi], rsi // no card marking, src is not a heap object + mov [rdi], rsi // no card marking, src is not a heap object // set rdi, rsi per contract with JIT_ByRefWriteBarrier add rdi, 8 mov rsi, r10 @@ -515,9 +515,17 @@ ALTERNATE_ENTRY RhpAssignRefAVLocation push rdx push r10 + // also save xmm0, in case it is used for stack clearing, as JIT_ByRefWriteBarrier should not trash xmm0 + // Hopefully EscapeFn cannot corrupt other xmm regs, since there is no float math or vectorizable code in there. + sub rsp, 16 + movdqu [rsp], xmm0 + // void SatoriRegion::EscapeFn(SatoriObject** dst, SatoriObject* src, SatoriRegion* region) call qword ptr [rdx + 8] + movdqu xmm0, [rsp] + add rsp, 16 + pop r10 pop rdx pop rsi @@ -529,31 +537,48 @@ LEAF_END RhpAssignRef, _TEXT // // RhpByRefAssignRef simulates movs instruction for object references. // -// On entry: -// rdi: address of ref-field (assigned to) -// rsi: address of the data (source) -// -// On exit: -// rdi, rsi are incremented by 8, -// rdi, rdx, r9, r10, r11: trashed -// +// Entry: +// RDI - address of ref-field (assigned to) +// RSI - address of the data (source) +// Exit: +// RCX is trashed +// RAX is trashed +// RDI, RSI are incremented by SIZEOF(LPVOID) LEAF_ENTRY RhpByRefAssignRef, _TEXT - lea r10, [rsi + 8] + // See if dst is in GCHeap + mov rax, [C_VAR(g_card_bundle_table)] // fetch the page byte map + mov rcx, rdi + shr rcx, 30 // dst page index + cmp byte ptr [rax + rcx], 0 + jne InHeap + ALTERNATE_ENTRY RhpByRefAssignRefAVLocation1 + mov rcx, [rsi] +ALTERNATE_ENTRY RhpByRefAssignRefAVLocation2 + mov [rdi], rcx + add rdi, 8 + add rsi, 8 + ret + + InHeap: + // JIT_WriteBarrier may trash these registers + push rdx + push r8 + push r9 + push r10 + push r11 + + // save preadjusted rsi + lea r10, [rsi + 8] mov rsi, [rsi] - // See if dst is in GCHeap - mov rax, [C_VAR(g_card_bundle_table)] // fetch the page byte map - mov r8, rdi - shr r8, 30 // dst page index - cmp byte ptr [rax + r8], 0 - jne C_FUNC(CheckedEntry) + call CheckedEntry - NotInHeap_RhpByRefAssignRef: -ALTERNATE_ENTRY RhpByRefAssignRefAVLocation2 - mov [rdi], rsi - add rdi, 8 - mov rsi, r10 + pop r11 + pop r10 + pop r9 + pop r8 + pop rdx ret LEAF_END RhpByRefAssignRef, _TEXT diff --git a/src/coreclr/nativeaot/Runtime/amd64/WriteBarriers.asm b/src/coreclr/nativeaot/Runtime/amd64/WriteBarriers.asm index fe4bee34dfe..fc51b226356 100644 --- a/src/coreclr/nativeaot/Runtime/amd64/WriteBarriers.asm +++ b/src/coreclr/nativeaot/Runtime/amd64/WriteBarriers.asm @@ -508,17 +508,28 @@ ALTERNATE_ENTRY RhpAssignRefAVLocation mov r9, rsp and rsp, -16 - ; save rsp, rcx, rdx, r8 and have enough stack for the callee + ; save rsp, rcx, rdx, r8 push r9 push rcx push rdx push r8 + + ; also save xmm0, in case it is used for stack clearing, as JIT_ByRefWriteBarrier should not trash xmm0 + ; Hopefully EscapeFn cannot corrupt other xmm regs, since there is no float math or vectorizable code in there. + sub rsp, 16 + movdqu [rsp], xmm0 + + ; shadow space sub rsp, 20h ; void SatoriRegion::EscapeFn(SatoriObject** dst, SatoriObject* src, SatoriRegion* region) call qword ptr [r8 + 8] add rsp, 20h + + movdqu xmm0, [rsp] + add rsp, 16 + pop r8 pop rdx pop rcx @@ -528,33 +539,51 @@ LEAF_END RhpAssignRef, _TEXT ;; ;; RhpByRefAssignRef simulates movs instruction for object references. -;; -;; On entry: -;; rdi: address of ref-field (assigned to) -;; rsi: address of the data (source) -;; -;; On exit: -;; rdi, rsi are incremented by 8, -;; rcx, r8, r9, r11: trashed -;; +;; Entry: +;; RDI - address of ref-field (assigned to) +;; RSI - address of the data (source) +;; Exit: +;; RCX is trashed +;; RAX is trashed +;; RDI, RSI are incremented by SIZEOF(LPVOID) LEAF_ENTRY RhpByRefAssignRef, _TEXT - mov rcx, rdi -ALTERNATE_ENTRY RhpByRefAssignRefAVLocation1 - mov rdx, [rsi] - add rdi, 8h - add rsi, 8h - ; See if dst is in GCHeap - mov rax, [g_card_bundle_table] ; fetch the page byte map - mov r8, rcx - shr r8, 30 ; dst page index - cmp byte ptr [rax + r8], 0 - jne CheckedEntry + mov rax, [g_card_bundle_table] ; fetch the page byte map + mov rcx, rdi + shr rcx, 30 ; dst page index + cmp byte ptr [rax + rcx], 0 + jne InHeap - NotInHeap: +ALTERNATE_ENTRY RhpByRefAssignRefAVLocation1 + mov rcx, [rsi] ALTERNATE_ENTRY RhpByRefAssignRefAVLocation2 - mov [rcx], rdx - ret + mov [rdi], rcx + add rdi, 8h + add rsi, 8h + ret + + InHeap: + + ; JIT_WriteBarrier may trash these registers + push rdx + push r8 + push r9 + push r10 + push r11 + + mov rcx, rdi + mov rdx, [rsi] + add rdi, 8h + add rsi, 8h + + call CheckedEntry + + pop r11 + pop r10 + pop r9 + pop r8 + pop rdx + ret LEAF_END RhpByRefAssignRef, _TEXT LEAF_ENTRY RhpCheckedLockCmpXchg, _TEXT diff --git a/src/coreclr/vm/amd64/patchedcode.S b/src/coreclr/vm/amd64/patchedcode.S index 167b5caf7d8..570fe335fa6 100644 --- a/src/coreclr/vm/amd64/patchedcode.S +++ b/src/coreclr/vm/amd64/patchedcode.S @@ -443,9 +443,17 @@ LEAF_ENTRY JIT_WriteBarrier, _TEXT push rdx push r10 + // also save xmm0, in case it is used for stack clearing, as JIT_ByRefWriteBarrier should not trash xmm0 + // Hopefully EscapeFn cannot corrupt other xmm regs, since there is no float math or vectorizable code in there. + sub rsp, 16 + movdqu [rsp], xmm0 + // void SatoriRegion::EscapeFn(SatoriObject** dst, SatoriObject* src, SatoriRegion* region) call qword ptr [rdx + 8] + movdqu xmm0, [rsp] + add rsp, 16 + pop r10 pop rdx pop rsi @@ -460,29 +468,43 @@ LEAF_END_MARKED JIT_WriteBarrier, _TEXT // Entry: // RDI - address of ref-field (assigned to) // RSI - address of the data (source) -// Note: RyuJIT assumes that all volatile registers can be trashed by -// the CORINFO_HELP_ASSIGN_BYREF helper (i.e. JIT_ByRefWriteBarrier) -// except RDI and RSI. This helper uses and defines RDI and RSI, so -// they remain as live GC refs or byrefs, and are not killed. // Exit: +// RCX is trashed +// RAX is trashed // RDI, RSI are incremented by SIZEOF(LPVOID) LEAF_ENTRY JIT_ByRefWriteBarrier, _TEXT + // See if dst is in GCHeap + PREPARE_EXTERNAL_VAR g_card_bundle_table, rax // fetch the page byte map + mov rcx, rdi + shr rcx, 30 // dst page index + cmp byte ptr [rax + rcx], 0 + jne InHeap + + mov rcx, [rsi] + mov [rdi], rcx + add rdi, 8 + add rsi, 8 + ret + + InHeap: + // JIT_WriteBarrier may trash these registers + push rdx + push r8 + push r9 + push r10 + push r11 + + // save preadjusted rsi lea r10, [rsi + 8] mov rsi, [rsi] - // See if dst is in GCHeap - PREPARE_EXTERNAL_VAR g_card_bundle_table, rax // fetch the page byte map - mov rax, [rax] + call CheckedEntry - mov r8, rdi - shr r8, 30 // dst page index - cmp byte ptr [rax + r8], 0 - jne C_FUNC(CheckedEntry) - - NotInHeap_ByRefWriteBarrier: - mov [rdi], rsi - add rdi, 8 - mov rsi, r10 + pop r11 + pop r10 + pop r9 + pop r8 + pop rdx ret LEAF_END_MARKED JIT_ByRefWriteBarrier, _TEXT diff --git a/src/coreclr/vm/amd64/patchedcode.asm b/src/coreclr/vm/amd64/patchedcode.asm index 9ad4727a47b..bfba575b29c 100644 --- a/src/coreclr/vm/amd64/patchedcode.asm +++ b/src/coreclr/vm/amd64/patchedcode.asm @@ -376,17 +376,28 @@ endif mov r9, rsp and rsp, -16 - ; save rsp, rcx, rdx, r8 and have enough stack for the callee + ; save rsp, rcx, rdx, r8 push r9 push rcx push rdx push r8 + + ; also save xmm0, in case it is used for stack clearing, as JIT_ByRefWriteBarrier should not trash xmm0 + ; Hopefully EscapeFn cannot corrupt other xmm regs, since there is no float math or vectorizable code in there. + sub rsp, 16 + movdqu [rsp], xmm0 + + ; shadow space sub rsp, 20h ; void SatoriRegion::EscapeFn(SatoriObject** dst, SatoriObject* src, SatoriRegion* region) call qword ptr [r8 + 8] add rsp, 20h + + movdqu xmm0, [rsp] + add rsp, 16 + pop r8 pop rdx pop rcx @@ -399,27 +410,45 @@ LEAF_END_MARKED JIT_WriteBarrier, _TEXT ; Entry: ; RDI - address of ref-field (assigned to) ; RSI - address of the data (source) -; Note: RyuJIT assumes that all volatile registers can be trashed by -; the CORINFO_HELP_ASSIGN_BYREF helper (i.e. JIT_ByRefWriteBarrier) -; except RDI and RSI. This helper uses and defines RDI and RSI, so -; they remain as live GC refs or byrefs, and are not killed. ; Exit: +; RCX is trashed +; RAX is trashed ; RDI, RSI are incremented by SIZEOF(LPVOID) LEAF_ENTRY JIT_ByRefWriteBarrier, _TEXT + ; See if dst is in GCHeap + mov rax, [g_card_bundle_table] ; fetch the page byte map + mov rcx, rdi + shr rcx, 30 ; dst page index + cmp byte ptr [rax + rcx], 0 + jne InHeap + + mov rcx, [rsi] + mov [rdi], rcx + add rdi, 8h + add rsi, 8h + ret + + InHeap: + + ; JIT_WriteBarrier may trash these registers + push rdx + push r8 + push r9 + push r10 + push r11 + mov rcx, rdi mov rdx, [rsi] add rdi, 8h add rsi, 8h - ; See if dst is in GCHeap - mov rax, [g_card_bundle_table] ; fetch the page byte map - mov r8, rcx - shr r8, 30 ; dst page index - cmp byte ptr [rax + r8], 0 - jne CheckedEntry + call CheckedEntry - NotInHeap: - mov [rcx], rdx + pop r11 + pop r10 + pop r9 + pop r8 + pop rdx ret LEAF_END_MARKED JIT_ByRefWriteBarrier, _TEXT