From 60d00ec126cce384ba6d374a5d2fa5e28d7c667f Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Mon, 19 Jun 2023 09:40:16 -0700 Subject: [PATCH] LSRA-throughput: Iterate over the regMaskTP instead all registers (#87424) * replace for-loop with regMaspTP iterator * jit format * REVERT * fix a bug * address review feedback * Add genFirstRegNumFromMaskAndToggle and genFirstRegNumFromMask * Use actualRegistersMask * jit format * review feedback * Inline BitScanForward * fix build error * remove commented code --- src/coreclr/jit/codegenarm64.cpp | 9 +- src/coreclr/jit/compiler.hpp | 44 +++++++++ src/coreclr/jit/gentree.cpp | 6 +- src/coreclr/jit/lsra.cpp | 155 ++++++++++++++++++------------- src/coreclr/jit/lsra.h | 4 +- src/coreclr/jit/lsrabuild.cpp | 55 ++++++----- src/coreclr/jit/utils.cpp | 60 ------------ src/coreclr/jit/utils.h | 60 +++++++++++- 8 files changed, 234 insertions(+), 159 deletions(-) diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 8e0faf1f05e..955cba0b42a 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -708,16 +708,13 @@ void CodeGen::genBuildRegPairsStack(regMaskTP regsMask, ArrayStack* reg while (regsMask != RBM_NONE) { - regMaskTP reg1Mask = genFindLowestBit(regsMask); - regNumber reg1 = genRegNumFromMask(reg1Mask); - regsMask &= ~reg1Mask; + regNumber reg1 = genFirstRegNumFromMaskAndToggle(regsMask); regsCount -= 1; bool isPairSave = false; if (regsCount > 0) { - regMaskTP reg2Mask = genFindLowestBit(regsMask); - regNumber reg2 = genRegNumFromMask(reg2Mask); + regNumber reg2 = genFirstRegNumFromMask(regsMask); if (reg2 == REG_NEXT(reg1)) { // The JIT doesn't allow saving pair (R28,FP), even though the @@ -733,7 +730,7 @@ void CodeGen::genBuildRegPairsStack(regMaskTP regsMask, ArrayStack* reg { isPairSave = true; - regsMask &= ~reg2Mask; + regsMask ^= genRegMask(reg2); regsCount -= 1; regStack->Push(RegPair(reg1, reg2)); diff --git a/src/coreclr/jit/compiler.hpp b/src/coreclr/jit/compiler.hpp index c79c5edbeea..2283fdb389d 100644 --- a/src/coreclr/jit/compiler.hpp +++ b/src/coreclr/jit/compiler.hpp @@ -706,6 +706,50 @@ inline regNumber genRegNumFromMask(regMaskTP mask) return regNum; } +//------------------------------------------------------------------------------ +// genFirstRegNumFromMaskAndToggle : Maps first bit set in the register mask to a +// register number and also toggle the bit in the `mask`. +// Arguments: +// mask - the register mask +// +// Return Value: +// The number of the first register contained in the mask and updates the `mask` to toggle +// the bit. +// + +inline regNumber genFirstRegNumFromMaskAndToggle(regMaskTP& mask) +{ + assert(mask != 0); // Must have one bit set, so can't have a mask of zero + + /* Convert the mask to a register number */ + + regNumber regNum = (regNumber)BitOperations::BitScanForward(mask); + mask ^= genRegMask(regNum); + + return regNum; +} + +//------------------------------------------------------------------------------ +// genFirstRegNumFromMask : Maps first bit set in the register mask to a register number. +// +// Arguments: +// mask - the register mask +// +// Return Value: +// The number of the first register contained in the mask. +// + +inline regNumber genFirstRegNumFromMask(regMaskTP mask) +{ + assert(mask != 0); // Must have one bit set, so can't have a mask of zero + + /* Convert the mask to a register number */ + + regNumber regNum = (regNumber)BitOperations::BitScanForward(mask); + + return regNum; +} + /***************************************************************************** * * Return the size in bytes of the given type. diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 30037e83245..17b2026fa64 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -25840,9 +25840,9 @@ regNumber GenTree::ExtractTempReg(regMaskTP mask /* = (regMaskTP)-1 */) { regMaskTP availableSet = gtRsvdRegs & mask; assert(genCountBits(availableSet) >= 1); - regMaskTP tempRegMask = genFindLowestBit(availableSet); - gtRsvdRegs &= ~tempRegMask; - return genRegNumFromMask(tempRegMask); + regNumber tempReg = genFirstRegNumFromMask(availableSet); + gtRsvdRegs ^= genRegMask(tempReg); + return tempReg; } //------------------------------------------------------------------------ diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index c5ea9443b22..d6f15f2f98b 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -297,9 +297,10 @@ regMaskTP LinearScan::getMatchingConstants(regMaskTP mask, Interval* currentInte regMaskTP result = RBM_NONE; while (candidates != RBM_NONE) { - regMaskTP candidateBit = genFindLowestBit(candidates); - candidates &= ~candidateBit; - regNumber regNum = genRegNumFromMask(candidateBit); + regNumber regNum = genFirstRegNumFromMask(candidates); + regMaskTP candidateBit = genRegMask(regNum); + candidates ^= candidateBit; + RegRecord* physRegRecord = getRegisterRecord(regNum); if (isMatchingConstant(physRegRecord, refPosition)) { @@ -3848,9 +3849,8 @@ void LinearScan::spillGCRefs(RefPosition* killRefPosition) INDEBUG(bool killedRegs = false); while (candidateRegs != RBM_NONE) { - regMaskTP nextRegBit = genFindLowestBit(candidateRegs); - candidateRegs &= ~nextRegBit; - regNumber nextReg = genRegNumFromMask(nextRegBit); + regNumber nextReg = genFirstRegNumFromMaskAndToggle(candidateRegs); + RegRecord* regRecord = getRegisterRecord(nextReg); Interval* assignedInterval = regRecord->assignedInterval; if (assignedInterval == nullptr || (assignedInterval->isActive == false)) @@ -3945,9 +3945,7 @@ regNumber LinearScan::rotateBlockStartLocation(Interval* interval, regNumber tar regNumber newReg = REG_NA; while (candidateRegs != RBM_NONE) { - regMaskTP nextRegBit = genFindLowestBit(candidateRegs); - candidateRegs &= ~nextRegBit; - regNumber nextReg = genRegNumFromMask(nextRegBit); + regNumber nextReg = genFirstRegNumFromMaskAndToggle(candidateRegs); if (nextReg > targetReg) { newReg = nextReg; @@ -4440,6 +4438,9 @@ void LinearScan::processBlockStartLocations(BasicBlock* currentBlock) resetRegState(); setRegsInUse(liveRegs); } + +#ifdef TARGET_ARM + for (regNumber reg = REG_FIRST; reg < AVAILABLE_REG_COUNT; reg = REG_NEXT(reg)) { RegRecord* physRegRecord = getRegisterRecord(reg); @@ -4472,7 +4473,6 @@ void LinearScan::processBlockStartLocations(BasicBlock* currentBlock) clearAssignedInterval(physRegRecord ARM_ARG(assignedInterval->registerType)); } -#ifdef TARGET_ARM // unassignPhysReg, above, may have restored a 'previousInterval', in which case we need to // get the value of 'physRegRecord->assignedInterval' rather than using 'assignedInterval'. if (physRegRecord->assignedInterval != nullptr) @@ -4486,10 +4486,8 @@ void LinearScan::processBlockStartLocations(BasicBlock* currentBlock) reg = REG_NEXT(reg); makeRegAvailable(reg, physRegRecord->registerType); } -#endif // TARGET_ARM } } -#ifdef TARGET_ARM else { Interval* assignedInterval = physRegRecord->assignedInterval; @@ -4501,8 +4499,46 @@ void LinearScan::processBlockStartLocations(BasicBlock* currentBlock) reg = REG_NEXT(reg); } } -#endif // TARGET_ARM } +#else + regMaskTP deadCandidates = ~liveRegs; + + // Only focus on actual registers present + deadCandidates &= actualRegistersMask; + + while (deadCandidates != RBM_NONE) + { + regNumber reg = genFirstRegNumFromMaskAndToggle(deadCandidates); + RegRecord* physRegRecord = getRegisterRecord(reg); + + makeRegAvailable(reg, physRegRecord->registerType); + Interval* assignedInterval = physRegRecord->assignedInterval; + + if (assignedInterval != nullptr) + { + assert(assignedInterval->isLocalVar || assignedInterval->isConstant || assignedInterval->IsUpperVector()); + + if (!assignedInterval->isConstant && assignedInterval->assignedReg == physRegRecord) + { + assignedInterval->isActive = false; + if (assignedInterval->getNextRefPosition() == nullptr) + { + unassignPhysReg(physRegRecord, nullptr); + } + if (!assignedInterval->IsUpperVector()) + { + inVarToRegMap[assignedInterval->getVarIndex(compiler)] = REG_STK; + } + } + else + { + // This interval may still be active, but was in another register in an + // intervening block. + clearAssignedInterval(physRegRecord ARM_ARG(assignedInterval->registerType)); + } + } + } +#endif // TARGET_ARM } //------------------------------------------------------------------------ @@ -4668,15 +4704,14 @@ void LinearScan::freeRegisters(regMaskTP regsToFree) makeRegsAvailable(regsToFree); while (regsToFree != RBM_NONE) { - regMaskTP nextRegBit = genFindLowestBit(regsToFree); - regsToFree &= ~nextRegBit; - regNumber nextReg = genRegNumFromMask(nextRegBit); + regNumber nextReg = genFirstRegNumFromMaskAndToggle(regsToFree); + RegRecord* regRecord = getRegisterRecord(nextReg); #ifdef TARGET_ARM if (regRecord->assignedInterval != nullptr && (regRecord->assignedInterval->registerType == TYP_DOUBLE)) { assert(genIsValidDoubleReg(nextReg)); - regsToFree &= ~(nextRegBit << 1); + regsToFree ^= genRegMask(regNumber(nextReg + 1)); } #endif freeRegister(regRecord); @@ -4798,9 +4833,7 @@ void LinearScan::allocateRegisters() regMaskTP tempRegsToMakeInactive = (regsToMakeInactive | delayRegsToMakeInactive); while (tempRegsToMakeInactive != RBM_NONE) { - regMaskTP nextRegBit = genFindLowestBit(tempRegsToMakeInactive); - tempRegsToMakeInactive &= ~nextRegBit; - regNumber nextReg = genRegNumFromMask(nextRegBit); + regNumber nextReg = genFirstRegNumFromMaskAndToggle(tempRegsToMakeInactive); RegRecord* regRecord = getRegisterRecord(nextReg); clearSpillCost(regRecord->regNum, regRecord->registerType); makeRegisterInactive(regRecord); @@ -8841,9 +8874,9 @@ void LinearScan::resolveEdge(BasicBlock* fromBlock, regMaskTP targetCandidates = targetRegsToDo; while (targetCandidates != RBM_NONE) { - regMaskTP targetRegMask = genFindLowestBit(targetCandidates); - targetCandidates &= ~targetRegMask; - regNumber targetReg = genRegNumFromMask(targetRegMask); + regNumber targetReg = genFirstRegNumFromMask(targetCandidates); + regMaskTP targetRegMask = genRegMask(targetReg); + targetCandidates ^= targetRegMask; if (location[targetReg] == REG_NA) { #ifdef TARGET_ARM @@ -8872,10 +8905,10 @@ void LinearScan::resolveEdge(BasicBlock* fromBlock, { while (targetRegsReady != RBM_NONE) { - regMaskTP targetRegMask = genFindLowestBit(targetRegsReady); - targetRegsToDo &= ~targetRegMask; - targetRegsReady &= ~targetRegMask; - regNumber targetReg = genRegNumFromMask(targetRegMask); + regNumber targetReg = genFirstRegNumFromMask(targetRegsReady); + regMaskTP targetRegMask = genRegMask(targetReg); + targetRegsToDo ^= targetRegMask; + targetRegsReady ^= targetRegMask; assert(location[targetReg] != targetReg); assert(targetReg < REG_COUNT); regNumber sourceReg = (regNumber)source[targetReg]; @@ -8943,8 +8976,8 @@ void LinearScan::resolveEdge(BasicBlock* fromBlock, } if (targetRegsToDo != RBM_NONE) { - regMaskTP targetRegMask = genFindLowestBit(targetRegsToDo); - regNumber targetReg = genRegNumFromMask(targetRegMask); + regNumber targetReg = genFirstRegNumFromMask(targetRegsToDo); + regMaskTP targetRegMask = genRegMask(targetReg); // Is it already there due to other moves? // If not, move it to the temp reg, OR swap it with another register @@ -9006,9 +9039,7 @@ void LinearScan::resolveEdge(BasicBlock* fromBlock, regMaskTP mask = targetRegsToDo; while (mask != RBM_NONE && otherTargetReg == REG_NA) { - regMaskTP nextRegMask = genFindLowestBit(mask); - regNumber nextReg = genRegNumFromMask(nextRegMask); - mask &= ~nextRegMask; + regNumber nextReg = genFirstRegNumFromMaskAndToggle(mask); if (location[source[nextReg]] == targetReg) { otherTargetReg = nextReg; @@ -9111,9 +9142,7 @@ void LinearScan::resolveEdge(BasicBlock* fromBlock, // All the target regs will be empty at this point while (targetRegsFromStack != RBM_NONE) { - regMaskTP targetRegMask = genFindLowestBit(targetRegsFromStack); - targetRegsFromStack &= ~targetRegMask; - regNumber targetReg = genRegNumFromMask(targetRegMask); + regNumber targetReg = genFirstRegNumFromMaskAndToggle(targetRegsFromStack); Interval* interval = stackToRegIntervals[targetReg]; assert(interval != nullptr); @@ -11166,9 +11195,8 @@ void LinearScan::verifyFinalAllocation() regMaskTP candidateRegs = currentRefPosition.registerAssignment; while (candidateRegs != RBM_NONE) { - regMaskTP nextRegBit = genFindLowestBit(candidateRegs); - candidateRegs &= ~nextRegBit; - regNumber nextReg = genRegNumFromMask(nextRegBit); + regNumber nextReg = genFirstRegNumFromMaskAndToggle(candidateRegs); + RegRecord* regRecord = getRegisterRecord(nextReg); Interval* assignedInterval = regRecord->assignedInterval; assert(assignedInterval == nullptr || !varTypeIsGC(assignedInterval->registerType)); @@ -11724,9 +11752,9 @@ void LinearScan::RegisterSelection::try_BEST_FIT() LsraLocation bestFitLocation = earliestIsBest ? MaxLocation : MinLocation; for (regMaskTP bestFitCandidates = candidates; bestFitCandidates != RBM_NONE;) { - regMaskTP bestFitCandidateBit = genFindLowestBit(bestFitCandidates); - bestFitCandidates &= ~bestFitCandidateBit; - regNumber bestFitCandidateRegNum = genRegNumFromMask(bestFitCandidateBit); + regNumber bestFitCandidateRegNum = genFirstRegNumFromMask(bestFitCandidates); + regMaskTP bestFitCandidateBit = genRegMask(bestFitCandidateRegNum); + bestFitCandidates ^= bestFitCandidateBit; // Find the next RefPosition of the register. LsraLocation nextIntervalLocation = linearScan->getNextIntervalRef(bestFitCandidateRegNum, regType); @@ -11813,10 +11841,11 @@ void LinearScan::RegisterSelection::try_REG_ORDER() regMaskTP lowestRegOrderBit = RBM_NONE; for (regMaskTP regOrderCandidates = candidates; regOrderCandidates != RBM_NONE;) { - regMaskTP regOrderCandidateBit = genFindLowestBit(regOrderCandidates); - regOrderCandidates &= ~regOrderCandidateBit; - regNumber regOrderCandidateRegNum = genRegNumFromMask(regOrderCandidateBit); - unsigned thisRegOrder = linearScan->getRegisterRecord(regOrderCandidateRegNum)->regOrder; + regNumber regOrderCandidateRegNum = genFirstRegNumFromMask(regOrderCandidates); + regMaskTP regOrderCandidateBit = genRegMask(regOrderCandidateRegNum); + regOrderCandidates ^= regOrderCandidateBit; + + unsigned thisRegOrder = linearScan->getRegisterRecord(regOrderCandidateRegNum)->regOrder; if (thisRegOrder < lowestRegOrder) { lowestRegOrder = thisRegOrder; @@ -11848,9 +11877,10 @@ void LinearScan::RegisterSelection::try_SPILL_COST() for (regMaskTP spillCandidates = candidates; spillCandidates != RBM_NONE;) { - regMaskTP spillCandidateBit = genFindLowestBit(spillCandidates); - spillCandidates &= ~spillCandidateBit; - regNumber spillCandidateRegNum = genRegNumFromMask(spillCandidateBit); + regNumber spillCandidateRegNum = genFirstRegNumFromMask(spillCandidates); + regMaskTP spillCandidateBit = genRegMask(spillCandidateRegNum); + spillCandidates ^= spillCandidateBit; + RegRecord* spillCandidateRegRecord = &linearScan->physRegs[spillCandidateRegNum]; Interval* assignedInterval = spillCandidateRegRecord->assignedInterval; RefPosition* recentRefPosition = assignedInterval != nullptr ? assignedInterval->recentRefPosition : nullptr; @@ -11956,9 +11986,9 @@ void LinearScan::RegisterSelection::try_FAR_NEXT_REF() regMaskTP farthestSet = RBM_NONE; for (regMaskTP farthestCandidates = candidates; farthestCandidates != RBM_NONE;) { - regMaskTP farthestCandidateBit = genFindLowestBit(farthestCandidates); - farthestCandidates &= ~farthestCandidateBit; - regNumber farthestCandidateRegNum = genRegNumFromMask(farthestCandidateBit); + regNumber farthestCandidateRegNum = genFirstRegNumFromMask(farthestCandidates); + regMaskTP farthestCandidateBit = genRegMask(farthestCandidateRegNum); + farthestCandidates ^= farthestCandidateBit; // Find the next RefPosition of the register. LsraLocation nextIntervalLocation = @@ -11989,11 +12019,11 @@ void LinearScan::RegisterSelection::try_PREV_REG_OPT() regMaskTP prevRegOptSet = RBM_NONE; for (regMaskTP prevRegOptCandidates = candidates; prevRegOptCandidates != RBM_NONE;) { - regMaskTP prevRegOptCandidateBit = genFindLowestBit(prevRegOptCandidates); - prevRegOptCandidates &= ~prevRegOptCandidateBit; - regNumber prevRegOptCandidateRegNum = genRegNumFromMask(prevRegOptCandidateBit); - Interval* assignedInterval = linearScan->physRegs[prevRegOptCandidateRegNum].assignedInterval; - bool foundPrevRegOptReg = true; + regNumber prevRegOptCandidateRegNum = genFirstRegNumFromMask(prevRegOptCandidates); + regMaskTP prevRegOptCandidateBit = genRegMask(prevRegOptCandidateRegNum); + prevRegOptCandidates ^= prevRegOptCandidateBit; + Interval* assignedInterval = linearScan->physRegs[prevRegOptCandidateRegNum].assignedInterval; + bool foundPrevRegOptReg = true; #ifdef DEBUG bool hasAssignedInterval = false; #endif @@ -12094,9 +12124,9 @@ void LinearScan::RegisterSelection::calculateCoversSets() regMaskTP coversCandidates = (preferenceSet == RBM_NONE) ? candidates : preferenceSet; for (; coversCandidates != RBM_NONE;) { - regMaskTP coversCandidateBit = genFindLowestBit(coversCandidates); - coversCandidates &= ~coversCandidateBit; - regNumber coversCandidateRegNum = genRegNumFromMask(coversCandidateBit); + regNumber coversCandidateRegNum = genFirstRegNumFromMask(coversCandidates); + regMaskTP coversCandidateBit = genRegMask(coversCandidateRegNum); + coversCandidates ^= coversCandidateBit; // If we have a single candidate we don't need to compute the preference-related sets, but we // do need to compute the unassignedSet. @@ -12397,9 +12427,10 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* currentInterval, regMaskTP checkConflictMask = candidates & linearScan->fixedRegs; while (checkConflictMask != RBM_NONE) { - regMaskTP checkConflictBit = genFindLowestBit(checkConflictMask); - checkConflictMask &= ~checkConflictBit; - regNumber checkConflictReg = genRegNumFromMask(checkConflictBit); + regNumber checkConflictReg = genFirstRegNumFromMask(checkConflictMask); + regMaskTP checkConflictBit = genRegMask(checkConflictReg); + checkConflictMask ^= checkConflictBit; + LsraLocation checkConflictLocation = linearScan->nextFixedRef[checkConflictReg]; if ((checkConflictLocation == refPosition->nodeLocation) || diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index a2ab79f5656..e058a8be852 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -1661,8 +1661,8 @@ private: VarToRegMap* outVarToRegMaps; // A temporary VarToRegMap used during the resolution of critical edges. - VarToRegMap sharedCriticalVarToRegMap; - + VarToRegMap sharedCriticalVarToRegMap; + PhasedVar actualRegistersMask; PhasedVar availableIntRegs; PhasedVar availableFloatRegs; PhasedVar availableDoubleRegs; diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index c03c53c9498..6771ae2bdc9 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -727,33 +727,30 @@ bool LinearScan::isContainableMemoryOp(GenTree* node) // void LinearScan::addRefsForPhysRegMask(regMaskTP mask, LsraLocation currentLoc, RefType refType, bool isLastUse) { - if (refType == RefTypeKill) - { - // The mask identifies a set of registers that will be used during - // codegen. Mark these as modified here, so when we do final frame - // layout, we'll know about all these registers. This is especially - // important if mask contains callee-saved registers, which affect the - // frame size since we need to save/restore them. In the case where we - // have a copyBlk with GC pointers, can need to call the - // CORINFO_HELP_ASSIGN_BYREF helper, which kills callee-saved RSI and - // RDI, if LSRA doesn't assign RSI/RDI, they wouldn't get marked as - // modified until codegen, which is too late. - compiler->codeGen->regSet.rsSetRegsModified(mask DEBUGARG(true)); - } + assert(refType == RefTypeKill); - for (regNumber reg = REG_FIRST; mask; reg = REG_NEXT(reg), mask >>= 1) + // The mask identifies a set of registers that will be used during + // codegen. Mark these as modified here, so when we do final frame + // layout, we'll know about all these registers. This is especially + // important if mask contains callee-saved registers, which affect the + // frame size since we need to save/restore them. In the case where we + // have a copyBlk with GC pointers, can need to call the + // CORINFO_HELP_ASSIGN_BYREF helper, which kills callee-saved RSI and + // RDI, if LSRA doesn't assign RSI/RDI, they wouldn't get marked as + // modified until codegen, which is too late. + compiler->codeGen->regSet.rsSetRegsModified(mask DEBUGARG(true)); + + for (regMaskTP candidates = mask; candidates != RBM_NONE;) { - if (mask & 1) + regNumber reg = genFirstRegNumFromMaskAndToggle(candidates); + // This assumes that these are all "special" RefTypes that + // don't need to be recorded on the tree (hence treeNode is nullptr) + RefPosition* pos = newRefPosition(reg, currentLoc, refType, nullptr, + genRegMask(reg)); // This MUST occupy the physical register (obviously) + + if (isLastUse) { - // This assumes that these are all "special" RefTypes that - // don't need to be recorded on the tree (hence treeNode is nullptr) - RefPosition* pos = newRefPosition(reg, currentLoc, refType, nullptr, - genRegMask(reg)); // This MUST occupy the physical register (obviously) - - if (isLastUse) - { - pos->lastUse = true; - } + pos->lastUse = true; } } } @@ -2756,6 +2753,16 @@ void LinearScan::buildIntervals() availableRegCount = REG_INT_COUNT; } + if (availableRegCount < (sizeof(regMaskTP) * 8)) + { + // Mask out the bits that are between 64 ~ availableRegCount + actualRegistersMask = (1ULL << availableRegCount) - 1; + } + else + { + actualRegistersMask = ~RBM_NONE; + } + #ifdef DEBUG // Make sure we don't have any blocks that were not visited for (BasicBlock* const block : compiler->Blocks()) diff --git a/src/coreclr/jit/utils.cpp b/src/coreclr/jit/utils.cpp index 27e5f14ac3f..76fa1ddb101 100644 --- a/src/coreclr/jit/utils.cpp +++ b/src/coreclr/jit/utils.cpp @@ -2556,66 +2556,6 @@ double FloatingPointUtils::normalize(double value) #endif } -//------------------------------------------------------------------------ -// BitOperations::BitScanForward: Search the mask data from least significant bit (LSB) to the most significant bit -// (MSB) for a set bit (1) -// -// Arguments: -// value - the value -// -// Return Value: -// 0 if the mask is zero; nonzero otherwise. -// -uint32_t BitOperations::BitScanForward(uint32_t value) -{ - assert(value != 0); - -#if defined(_MSC_VER) - unsigned long result; - ::_BitScanForward(&result, value); - return static_cast(result); -#else - int32_t result = __builtin_ctz(value); - return static_cast(result); -#endif -} - -//------------------------------------------------------------------------ -// BitOperations::BitScanForward: Search the mask data from least significant bit (LSB) to the most significant bit -// (MSB) for a set bit (1) -// -// Arguments: -// value - the value -// -// Return Value: -// 0 if the mask is zero; nonzero otherwise. -// -uint32_t BitOperations::BitScanForward(uint64_t value) -{ - assert(value != 0); - -#if defined(_MSC_VER) -#if defined(HOST_64BIT) - unsigned long result; - ::_BitScanForward64(&result, value); - return static_cast(result); -#else - uint32_t lower = static_cast(value); - - if (lower == 0) - { - uint32_t upper = static_cast(value >> 32); - return 32 + BitScanForward(upper); - } - - return BitScanForward(lower); -#endif // HOST_64BIT -#else - int32_t result = __builtin_ctzll(value); - return static_cast(result); -#endif -} - //------------------------------------------------------------------------ // BitOperations::BitScanReverse: Search the mask data from most significant bit (MSB) to least significant bit // (LSB) for a set bit (1). diff --git a/src/coreclr/jit/utils.h b/src/coreclr/jit/utils.h index c4b2832994a..32afd1a2e8e 100644 --- a/src/coreclr/jit/utils.h +++ b/src/coreclr/jit/utils.h @@ -775,9 +775,65 @@ public: class BitOperations { public: - static uint32_t BitScanForward(uint32_t value); + //------------------------------------------------------------------------ + // BitOperations::BitScanForward: Search the mask data from least significant bit (LSB) to the most significant bit + // (MSB) for a set bit (1) + // + // Arguments: + // value - the value + // + // Return Value: + // 0 if the mask is zero; nonzero otherwise. + // + FORCEINLINE static uint32_t BitScanForward(uint32_t value) + { + assert(value != 0); - static uint32_t BitScanForward(uint64_t value); +#if defined(_MSC_VER) + unsigned long result; + ::_BitScanForward(&result, value); + return static_cast(result); +#else + int32_t result = __builtin_ctz(value); + return static_cast(result); +#endif + } + + //------------------------------------------------------------------------ + // BitOperations::BitScanForward: Search the mask data from least significant bit (LSB) to the most significant bit + // (MSB) for a set bit (1) + // + // Arguments: + // value - the value + // + // Return Value: + // 0 if the mask is zero; nonzero otherwise. + // + FORCEINLINE static uint32_t BitScanForward(uint64_t value) + { + assert(value != 0); + +#if defined(_MSC_VER) +#if defined(HOST_64BIT) + unsigned long result; + ::_BitScanForward64(&result, value); + return static_cast(result); +#else + uint32_t lower = static_cast(value); + + if (lower == 0) + { + uint32_t upper = static_cast(value >> 32); + return 32 + BitScanForward(upper); + } + + return BitScanForward(lower); +#endif // HOST_64BIT +#else + int32_t result = __builtin_ctzll(value); + return static_cast(result); +#endif + } static uint32_t BitScanReverse(uint32_t value);