From 3ada93c33d88d0106d3d9b4240c896b378bd8776 Mon Sep 17 00:00:00 2001 From: SwapnilGaikwad Date: Fri, 31 Mar 2023 18:26:28 +0100 Subject: [PATCH] Use ldp/stp with SIMD registers on Arm64 (#84135) * Use ldp/stp with SIMD registers on Arm64 Use pairwise load/stores for 1. the instructions using SIMD registers ``` ldr q1, [x0, #0x20] ldr q2, [x0, #0x30] => ldp q1, q2, [x0, #0x20] ``` 2. the instructions using base and base plus immediate offset format ``` ldr w1, [x20] ldr w2, [x20, #0x04] => ldp w1, w2, [x20] ldr q1, [x0] ldr q2, [x0, #0x10] => ldp q1, q2, [x0] ``` * Incorporate review comments --- src/coreclr/jit/emitarm64.cpp | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index c88f854d095..a780dfa96a5 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -16248,6 +16248,12 @@ bool emitter::ReplaceLdrStrWithPairInstr( // ldr w1, [x20, #0x14] // ldr w2, [x20, #0x10] => ldp w2, w1, [x20, #0x10] // +// ldr w1, [x20] +// ldr w2, [x20, #0x04] => ldp w1, w2, [x20] +// +// ldr q1, [x0, #0x20] +// ldr q2, [x0, #0x30] => ldp q1, q2, [x0, #0x20] +// // Arguments: // ins - The instruction code // reg1 - Register 1 number @@ -16291,16 +16297,21 @@ emitter::RegisterOrder emitter::IsOptimizableLdrStrWithPair( return eRO_none; } - if ((!isGeneralRegisterOrZR(reg1)) || (!isGeneralRegisterOrZR(prevReg1))) + if ((reg1 == REG_SP) || (prevReg1 == REG_SP) || (isGeneralRegisterOrZR(reg1) != isGeneralRegisterOrZR(prevReg1))) { - // Either register 1 is not a general register or previous register 1 is not a general register - // or the zero register, so we cannot optimise. + // We cannot optimise when one of the following conditions are met + // 1. reg1 or prevReg1 is SP + // 2. both reg1 and prevReg1 are not of the same type (SIMD or non-SIMD) return eRO_none; } - if (lastInsFmt != fmt) + const bool compatibleFmt = (lastInsFmt == fmt) || (lastInsFmt == IF_LS_2B && fmt == IF_LS_2A) || + (lastInsFmt == IF_LS_2A && fmt == IF_LS_2B); + if (!compatibleFmt) { - // The formats of the two instructions differ. + // We cannot optimise when all of the following conditions are met + // 1. instruction formats differ + // 2. instructions are not using "base" or "base plus immediate offset" addressing modes return eRO_none; }