1
0
Fork 0
mirror of https://github.com/VSadov/Satori.git synced 2025-06-09 09:34:49 +09:00

LSRA-throughput: Iterate over the regMaskTP instead all registers (#87424)

* replace for-loop with regMaspTP iterator

* jit format

* REVERT

* fix a bug

* address review feedback

* Add genFirstRegNumFromMaskAndToggle and genFirstRegNumFromMask

* Use actualRegistersMask

* jit format

* review feedback

* Inline BitScanForward

* fix build error

* remove commented code
This commit is contained in:
Kunal Pathak 2023-06-19 09:40:16 -07:00 committed by GitHub
parent feff67d5ca
commit 60d00ec126
Signed by: github
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 234 additions and 159 deletions

View file

@ -708,16 +708,13 @@ void CodeGen::genBuildRegPairsStack(regMaskTP regsMask, ArrayStack<RegPair>* reg
while (regsMask != RBM_NONE)
{
regMaskTP reg1Mask = genFindLowestBit(regsMask);
regNumber reg1 = genRegNumFromMask(reg1Mask);
regsMask &= ~reg1Mask;
regNumber reg1 = genFirstRegNumFromMaskAndToggle(regsMask);
regsCount -= 1;
bool isPairSave = false;
if (regsCount > 0)
{
regMaskTP reg2Mask = genFindLowestBit(regsMask);
regNumber reg2 = genRegNumFromMask(reg2Mask);
regNumber reg2 = genFirstRegNumFromMask(regsMask);
if (reg2 == REG_NEXT(reg1))
{
// The JIT doesn't allow saving pair (R28,FP), even though the
@ -733,7 +730,7 @@ void CodeGen::genBuildRegPairsStack(regMaskTP regsMask, ArrayStack<RegPair>* reg
{
isPairSave = true;
regsMask &= ~reg2Mask;
regsMask ^= genRegMask(reg2);
regsCount -= 1;
regStack->Push(RegPair(reg1, reg2));

View file

@ -706,6 +706,50 @@ inline regNumber genRegNumFromMask(regMaskTP mask)
return regNum;
}
//------------------------------------------------------------------------------
// genFirstRegNumFromMaskAndToggle : Maps first bit set in the register mask to a
// register number and also toggle the bit in the `mask`.
// Arguments:
// mask - the register mask
//
// Return Value:
// The number of the first register contained in the mask and updates the `mask` to toggle
// the bit.
//
inline regNumber genFirstRegNumFromMaskAndToggle(regMaskTP& mask)
{
assert(mask != 0); // Must have one bit set, so can't have a mask of zero
/* Convert the mask to a register number */
regNumber regNum = (regNumber)BitOperations::BitScanForward(mask);
mask ^= genRegMask(regNum);
return regNum;
}
//------------------------------------------------------------------------------
// genFirstRegNumFromMask : Maps first bit set in the register mask to a register number.
//
// Arguments:
// mask - the register mask
//
// Return Value:
// The number of the first register contained in the mask.
//
inline regNumber genFirstRegNumFromMask(regMaskTP mask)
{
assert(mask != 0); // Must have one bit set, so can't have a mask of zero
/* Convert the mask to a register number */
regNumber regNum = (regNumber)BitOperations::BitScanForward(mask);
return regNum;
}
/*****************************************************************************
*
* Return the size in bytes of the given type.

View file

@ -25840,9 +25840,9 @@ regNumber GenTree::ExtractTempReg(regMaskTP mask /* = (regMaskTP)-1 */)
{
regMaskTP availableSet = gtRsvdRegs & mask;
assert(genCountBits(availableSet) >= 1);
regMaskTP tempRegMask = genFindLowestBit(availableSet);
gtRsvdRegs &= ~tempRegMask;
return genRegNumFromMask(tempRegMask);
regNumber tempReg = genFirstRegNumFromMask(availableSet);
gtRsvdRegs ^= genRegMask(tempReg);
return tempReg;
}
//------------------------------------------------------------------------

View file

@ -297,9 +297,10 @@ regMaskTP LinearScan::getMatchingConstants(regMaskTP mask, Interval* currentInte
regMaskTP result = RBM_NONE;
while (candidates != RBM_NONE)
{
regMaskTP candidateBit = genFindLowestBit(candidates);
candidates &= ~candidateBit;
regNumber regNum = genRegNumFromMask(candidateBit);
regNumber regNum = genFirstRegNumFromMask(candidates);
regMaskTP candidateBit = genRegMask(regNum);
candidates ^= candidateBit;
RegRecord* physRegRecord = getRegisterRecord(regNum);
if (isMatchingConstant(physRegRecord, refPosition))
{
@ -3848,9 +3849,8 @@ void LinearScan::spillGCRefs(RefPosition* killRefPosition)
INDEBUG(bool killedRegs = false);
while (candidateRegs != RBM_NONE)
{
regMaskTP nextRegBit = genFindLowestBit(candidateRegs);
candidateRegs &= ~nextRegBit;
regNumber nextReg = genRegNumFromMask(nextRegBit);
regNumber nextReg = genFirstRegNumFromMaskAndToggle(candidateRegs);
RegRecord* regRecord = getRegisterRecord(nextReg);
Interval* assignedInterval = regRecord->assignedInterval;
if (assignedInterval == nullptr || (assignedInterval->isActive == false))
@ -3945,9 +3945,7 @@ regNumber LinearScan::rotateBlockStartLocation(Interval* interval, regNumber tar
regNumber newReg = REG_NA;
while (candidateRegs != RBM_NONE)
{
regMaskTP nextRegBit = genFindLowestBit(candidateRegs);
candidateRegs &= ~nextRegBit;
regNumber nextReg = genRegNumFromMask(nextRegBit);
regNumber nextReg = genFirstRegNumFromMaskAndToggle(candidateRegs);
if (nextReg > targetReg)
{
newReg = nextReg;
@ -4440,6 +4438,9 @@ void LinearScan::processBlockStartLocations(BasicBlock* currentBlock)
resetRegState();
setRegsInUse(liveRegs);
}
#ifdef TARGET_ARM
for (regNumber reg = REG_FIRST; reg < AVAILABLE_REG_COUNT; reg = REG_NEXT(reg))
{
RegRecord* physRegRecord = getRegisterRecord(reg);
@ -4472,7 +4473,6 @@ void LinearScan::processBlockStartLocations(BasicBlock* currentBlock)
clearAssignedInterval(physRegRecord ARM_ARG(assignedInterval->registerType));
}
#ifdef TARGET_ARM
// unassignPhysReg, above, may have restored a 'previousInterval', in which case we need to
// get the value of 'physRegRecord->assignedInterval' rather than using 'assignedInterval'.
if (physRegRecord->assignedInterval != nullptr)
@ -4486,10 +4486,8 @@ void LinearScan::processBlockStartLocations(BasicBlock* currentBlock)
reg = REG_NEXT(reg);
makeRegAvailable(reg, physRegRecord->registerType);
}
#endif // TARGET_ARM
}
}
#ifdef TARGET_ARM
else
{
Interval* assignedInterval = physRegRecord->assignedInterval;
@ -4501,8 +4499,46 @@ void LinearScan::processBlockStartLocations(BasicBlock* currentBlock)
reg = REG_NEXT(reg);
}
}
#endif // TARGET_ARM
}
#else
regMaskTP deadCandidates = ~liveRegs;
// Only focus on actual registers present
deadCandidates &= actualRegistersMask;
while (deadCandidates != RBM_NONE)
{
regNumber reg = genFirstRegNumFromMaskAndToggle(deadCandidates);
RegRecord* physRegRecord = getRegisterRecord(reg);
makeRegAvailable(reg, physRegRecord->registerType);
Interval* assignedInterval = physRegRecord->assignedInterval;
if (assignedInterval != nullptr)
{
assert(assignedInterval->isLocalVar || assignedInterval->isConstant || assignedInterval->IsUpperVector());
if (!assignedInterval->isConstant && assignedInterval->assignedReg == physRegRecord)
{
assignedInterval->isActive = false;
if (assignedInterval->getNextRefPosition() == nullptr)
{
unassignPhysReg(physRegRecord, nullptr);
}
if (!assignedInterval->IsUpperVector())
{
inVarToRegMap[assignedInterval->getVarIndex(compiler)] = REG_STK;
}
}
else
{
// This interval may still be active, but was in another register in an
// intervening block.
clearAssignedInterval(physRegRecord ARM_ARG(assignedInterval->registerType));
}
}
}
#endif // TARGET_ARM
}
//------------------------------------------------------------------------
@ -4668,15 +4704,14 @@ void LinearScan::freeRegisters(regMaskTP regsToFree)
makeRegsAvailable(regsToFree);
while (regsToFree != RBM_NONE)
{
regMaskTP nextRegBit = genFindLowestBit(regsToFree);
regsToFree &= ~nextRegBit;
regNumber nextReg = genRegNumFromMask(nextRegBit);
regNumber nextReg = genFirstRegNumFromMaskAndToggle(regsToFree);
RegRecord* regRecord = getRegisterRecord(nextReg);
#ifdef TARGET_ARM
if (regRecord->assignedInterval != nullptr && (regRecord->assignedInterval->registerType == TYP_DOUBLE))
{
assert(genIsValidDoubleReg(nextReg));
regsToFree &= ~(nextRegBit << 1);
regsToFree ^= genRegMask(regNumber(nextReg + 1));
}
#endif
freeRegister(regRecord);
@ -4798,9 +4833,7 @@ void LinearScan::allocateRegisters()
regMaskTP tempRegsToMakeInactive = (regsToMakeInactive | delayRegsToMakeInactive);
while (tempRegsToMakeInactive != RBM_NONE)
{
regMaskTP nextRegBit = genFindLowestBit(tempRegsToMakeInactive);
tempRegsToMakeInactive &= ~nextRegBit;
regNumber nextReg = genRegNumFromMask(nextRegBit);
regNumber nextReg = genFirstRegNumFromMaskAndToggle(tempRegsToMakeInactive);
RegRecord* regRecord = getRegisterRecord(nextReg);
clearSpillCost(regRecord->regNum, regRecord->registerType);
makeRegisterInactive(regRecord);
@ -8841,9 +8874,9 @@ void LinearScan::resolveEdge(BasicBlock* fromBlock,
regMaskTP targetCandidates = targetRegsToDo;
while (targetCandidates != RBM_NONE)
{
regMaskTP targetRegMask = genFindLowestBit(targetCandidates);
targetCandidates &= ~targetRegMask;
regNumber targetReg = genRegNumFromMask(targetRegMask);
regNumber targetReg = genFirstRegNumFromMask(targetCandidates);
regMaskTP targetRegMask = genRegMask(targetReg);
targetCandidates ^= targetRegMask;
if (location[targetReg] == REG_NA)
{
#ifdef TARGET_ARM
@ -8872,10 +8905,10 @@ void LinearScan::resolveEdge(BasicBlock* fromBlock,
{
while (targetRegsReady != RBM_NONE)
{
regMaskTP targetRegMask = genFindLowestBit(targetRegsReady);
targetRegsToDo &= ~targetRegMask;
targetRegsReady &= ~targetRegMask;
regNumber targetReg = genRegNumFromMask(targetRegMask);
regNumber targetReg = genFirstRegNumFromMask(targetRegsReady);
regMaskTP targetRegMask = genRegMask(targetReg);
targetRegsToDo ^= targetRegMask;
targetRegsReady ^= targetRegMask;
assert(location[targetReg] != targetReg);
assert(targetReg < REG_COUNT);
regNumber sourceReg = (regNumber)source[targetReg];
@ -8943,8 +8976,8 @@ void LinearScan::resolveEdge(BasicBlock* fromBlock,
}
if (targetRegsToDo != RBM_NONE)
{
regMaskTP targetRegMask = genFindLowestBit(targetRegsToDo);
regNumber targetReg = genRegNumFromMask(targetRegMask);
regNumber targetReg = genFirstRegNumFromMask(targetRegsToDo);
regMaskTP targetRegMask = genRegMask(targetReg);
// Is it already there due to other moves?
// If not, move it to the temp reg, OR swap it with another register
@ -9006,9 +9039,7 @@ void LinearScan::resolveEdge(BasicBlock* fromBlock,
regMaskTP mask = targetRegsToDo;
while (mask != RBM_NONE && otherTargetReg == REG_NA)
{
regMaskTP nextRegMask = genFindLowestBit(mask);
regNumber nextReg = genRegNumFromMask(nextRegMask);
mask &= ~nextRegMask;
regNumber nextReg = genFirstRegNumFromMaskAndToggle(mask);
if (location[source[nextReg]] == targetReg)
{
otherTargetReg = nextReg;
@ -9111,9 +9142,7 @@ void LinearScan::resolveEdge(BasicBlock* fromBlock,
// All the target regs will be empty at this point
while (targetRegsFromStack != RBM_NONE)
{
regMaskTP targetRegMask = genFindLowestBit(targetRegsFromStack);
targetRegsFromStack &= ~targetRegMask;
regNumber targetReg = genRegNumFromMask(targetRegMask);
regNumber targetReg = genFirstRegNumFromMaskAndToggle(targetRegsFromStack);
Interval* interval = stackToRegIntervals[targetReg];
assert(interval != nullptr);
@ -11166,9 +11195,8 @@ void LinearScan::verifyFinalAllocation()
regMaskTP candidateRegs = currentRefPosition.registerAssignment;
while (candidateRegs != RBM_NONE)
{
regMaskTP nextRegBit = genFindLowestBit(candidateRegs);
candidateRegs &= ~nextRegBit;
regNumber nextReg = genRegNumFromMask(nextRegBit);
regNumber nextReg = genFirstRegNumFromMaskAndToggle(candidateRegs);
RegRecord* regRecord = getRegisterRecord(nextReg);
Interval* assignedInterval = regRecord->assignedInterval;
assert(assignedInterval == nullptr || !varTypeIsGC(assignedInterval->registerType));
@ -11724,9 +11752,9 @@ void LinearScan::RegisterSelection::try_BEST_FIT()
LsraLocation bestFitLocation = earliestIsBest ? MaxLocation : MinLocation;
for (regMaskTP bestFitCandidates = candidates; bestFitCandidates != RBM_NONE;)
{
regMaskTP bestFitCandidateBit = genFindLowestBit(bestFitCandidates);
bestFitCandidates &= ~bestFitCandidateBit;
regNumber bestFitCandidateRegNum = genRegNumFromMask(bestFitCandidateBit);
regNumber bestFitCandidateRegNum = genFirstRegNumFromMask(bestFitCandidates);
regMaskTP bestFitCandidateBit = genRegMask(bestFitCandidateRegNum);
bestFitCandidates ^= bestFitCandidateBit;
// Find the next RefPosition of the register.
LsraLocation nextIntervalLocation = linearScan->getNextIntervalRef(bestFitCandidateRegNum, regType);
@ -11813,10 +11841,11 @@ void LinearScan::RegisterSelection::try_REG_ORDER()
regMaskTP lowestRegOrderBit = RBM_NONE;
for (regMaskTP regOrderCandidates = candidates; regOrderCandidates != RBM_NONE;)
{
regMaskTP regOrderCandidateBit = genFindLowestBit(regOrderCandidates);
regOrderCandidates &= ~regOrderCandidateBit;
regNumber regOrderCandidateRegNum = genRegNumFromMask(regOrderCandidateBit);
unsigned thisRegOrder = linearScan->getRegisterRecord(regOrderCandidateRegNum)->regOrder;
regNumber regOrderCandidateRegNum = genFirstRegNumFromMask(regOrderCandidates);
regMaskTP regOrderCandidateBit = genRegMask(regOrderCandidateRegNum);
regOrderCandidates ^= regOrderCandidateBit;
unsigned thisRegOrder = linearScan->getRegisterRecord(regOrderCandidateRegNum)->regOrder;
if (thisRegOrder < lowestRegOrder)
{
lowestRegOrder = thisRegOrder;
@ -11848,9 +11877,10 @@ void LinearScan::RegisterSelection::try_SPILL_COST()
for (regMaskTP spillCandidates = candidates; spillCandidates != RBM_NONE;)
{
regMaskTP spillCandidateBit = genFindLowestBit(spillCandidates);
spillCandidates &= ~spillCandidateBit;
regNumber spillCandidateRegNum = genRegNumFromMask(spillCandidateBit);
regNumber spillCandidateRegNum = genFirstRegNumFromMask(spillCandidates);
regMaskTP spillCandidateBit = genRegMask(spillCandidateRegNum);
spillCandidates ^= spillCandidateBit;
RegRecord* spillCandidateRegRecord = &linearScan->physRegs[spillCandidateRegNum];
Interval* assignedInterval = spillCandidateRegRecord->assignedInterval;
RefPosition* recentRefPosition = assignedInterval != nullptr ? assignedInterval->recentRefPosition : nullptr;
@ -11956,9 +11986,9 @@ void LinearScan::RegisterSelection::try_FAR_NEXT_REF()
regMaskTP farthestSet = RBM_NONE;
for (regMaskTP farthestCandidates = candidates; farthestCandidates != RBM_NONE;)
{
regMaskTP farthestCandidateBit = genFindLowestBit(farthestCandidates);
farthestCandidates &= ~farthestCandidateBit;
regNumber farthestCandidateRegNum = genRegNumFromMask(farthestCandidateBit);
regNumber farthestCandidateRegNum = genFirstRegNumFromMask(farthestCandidates);
regMaskTP farthestCandidateBit = genRegMask(farthestCandidateRegNum);
farthestCandidates ^= farthestCandidateBit;
// Find the next RefPosition of the register.
LsraLocation nextIntervalLocation =
@ -11989,11 +12019,11 @@ void LinearScan::RegisterSelection::try_PREV_REG_OPT()
regMaskTP prevRegOptSet = RBM_NONE;
for (regMaskTP prevRegOptCandidates = candidates; prevRegOptCandidates != RBM_NONE;)
{
regMaskTP prevRegOptCandidateBit = genFindLowestBit(prevRegOptCandidates);
prevRegOptCandidates &= ~prevRegOptCandidateBit;
regNumber prevRegOptCandidateRegNum = genRegNumFromMask(prevRegOptCandidateBit);
Interval* assignedInterval = linearScan->physRegs[prevRegOptCandidateRegNum].assignedInterval;
bool foundPrevRegOptReg = true;
regNumber prevRegOptCandidateRegNum = genFirstRegNumFromMask(prevRegOptCandidates);
regMaskTP prevRegOptCandidateBit = genRegMask(prevRegOptCandidateRegNum);
prevRegOptCandidates ^= prevRegOptCandidateBit;
Interval* assignedInterval = linearScan->physRegs[prevRegOptCandidateRegNum].assignedInterval;
bool foundPrevRegOptReg = true;
#ifdef DEBUG
bool hasAssignedInterval = false;
#endif
@ -12094,9 +12124,9 @@ void LinearScan::RegisterSelection::calculateCoversSets()
regMaskTP coversCandidates = (preferenceSet == RBM_NONE) ? candidates : preferenceSet;
for (; coversCandidates != RBM_NONE;)
{
regMaskTP coversCandidateBit = genFindLowestBit(coversCandidates);
coversCandidates &= ~coversCandidateBit;
regNumber coversCandidateRegNum = genRegNumFromMask(coversCandidateBit);
regNumber coversCandidateRegNum = genFirstRegNumFromMask(coversCandidates);
regMaskTP coversCandidateBit = genRegMask(coversCandidateRegNum);
coversCandidates ^= coversCandidateBit;
// If we have a single candidate we don't need to compute the preference-related sets, but we
// do need to compute the unassignedSet.
@ -12397,9 +12427,10 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* currentInterval,
regMaskTP checkConflictMask = candidates & linearScan->fixedRegs;
while (checkConflictMask != RBM_NONE)
{
regMaskTP checkConflictBit = genFindLowestBit(checkConflictMask);
checkConflictMask &= ~checkConflictBit;
regNumber checkConflictReg = genRegNumFromMask(checkConflictBit);
regNumber checkConflictReg = genFirstRegNumFromMask(checkConflictMask);
regMaskTP checkConflictBit = genRegMask(checkConflictReg);
checkConflictMask ^= checkConflictBit;
LsraLocation checkConflictLocation = linearScan->nextFixedRef[checkConflictReg];
if ((checkConflictLocation == refPosition->nodeLocation) ||

View file

@ -1661,8 +1661,8 @@ private:
VarToRegMap* outVarToRegMaps;
// A temporary VarToRegMap used during the resolution of critical edges.
VarToRegMap sharedCriticalVarToRegMap;
VarToRegMap sharedCriticalVarToRegMap;
PhasedVar<regMaskTP> actualRegistersMask;
PhasedVar<regMaskTP> availableIntRegs;
PhasedVar<regMaskTP> availableFloatRegs;
PhasedVar<regMaskTP> availableDoubleRegs;

View file

@ -727,33 +727,30 @@ bool LinearScan::isContainableMemoryOp(GenTree* node)
//
void LinearScan::addRefsForPhysRegMask(regMaskTP mask, LsraLocation currentLoc, RefType refType, bool isLastUse)
{
if (refType == RefTypeKill)
{
// The mask identifies a set of registers that will be used during
// codegen. Mark these as modified here, so when we do final frame
// layout, we'll know about all these registers. This is especially
// important if mask contains callee-saved registers, which affect the
// frame size since we need to save/restore them. In the case where we
// have a copyBlk with GC pointers, can need to call the
// CORINFO_HELP_ASSIGN_BYREF helper, which kills callee-saved RSI and
// RDI, if LSRA doesn't assign RSI/RDI, they wouldn't get marked as
// modified until codegen, which is too late.
compiler->codeGen->regSet.rsSetRegsModified(mask DEBUGARG(true));
}
assert(refType == RefTypeKill);
for (regNumber reg = REG_FIRST; mask; reg = REG_NEXT(reg), mask >>= 1)
// The mask identifies a set of registers that will be used during
// codegen. Mark these as modified here, so when we do final frame
// layout, we'll know about all these registers. This is especially
// important if mask contains callee-saved registers, which affect the
// frame size since we need to save/restore them. In the case where we
// have a copyBlk with GC pointers, can need to call the
// CORINFO_HELP_ASSIGN_BYREF helper, which kills callee-saved RSI and
// RDI, if LSRA doesn't assign RSI/RDI, they wouldn't get marked as
// modified until codegen, which is too late.
compiler->codeGen->regSet.rsSetRegsModified(mask DEBUGARG(true));
for (regMaskTP candidates = mask; candidates != RBM_NONE;)
{
if (mask & 1)
regNumber reg = genFirstRegNumFromMaskAndToggle(candidates);
// This assumes that these are all "special" RefTypes that
// don't need to be recorded on the tree (hence treeNode is nullptr)
RefPosition* pos = newRefPosition(reg, currentLoc, refType, nullptr,
genRegMask(reg)); // This MUST occupy the physical register (obviously)
if (isLastUse)
{
// This assumes that these are all "special" RefTypes that
// don't need to be recorded on the tree (hence treeNode is nullptr)
RefPosition* pos = newRefPosition(reg, currentLoc, refType, nullptr,
genRegMask(reg)); // This MUST occupy the physical register (obviously)
if (isLastUse)
{
pos->lastUse = true;
}
pos->lastUse = true;
}
}
}
@ -2756,6 +2753,16 @@ void LinearScan::buildIntervals()
availableRegCount = REG_INT_COUNT;
}
if (availableRegCount < (sizeof(regMaskTP) * 8))
{
// Mask out the bits that are between 64 ~ availableRegCount
actualRegistersMask = (1ULL << availableRegCount) - 1;
}
else
{
actualRegistersMask = ~RBM_NONE;
}
#ifdef DEBUG
// Make sure we don't have any blocks that were not visited
for (BasicBlock* const block : compiler->Blocks())

View file

@ -2556,66 +2556,6 @@ double FloatingPointUtils::normalize(double value)
#endif
}
//------------------------------------------------------------------------
// BitOperations::BitScanForward: Search the mask data from least significant bit (LSB) to the most significant bit
// (MSB) for a set bit (1)
//
// Arguments:
// value - the value
//
// Return Value:
// 0 if the mask is zero; nonzero otherwise.
//
uint32_t BitOperations::BitScanForward(uint32_t value)
{
assert(value != 0);
#if defined(_MSC_VER)
unsigned long result;
::_BitScanForward(&result, value);
return static_cast<uint32_t>(result);
#else
int32_t result = __builtin_ctz(value);
return static_cast<uint32_t>(result);
#endif
}
//------------------------------------------------------------------------
// BitOperations::BitScanForward: Search the mask data from least significant bit (LSB) to the most significant bit
// (MSB) for a set bit (1)
//
// Arguments:
// value - the value
//
// Return Value:
// 0 if the mask is zero; nonzero otherwise.
//
uint32_t BitOperations::BitScanForward(uint64_t value)
{
assert(value != 0);
#if defined(_MSC_VER)
#if defined(HOST_64BIT)
unsigned long result;
::_BitScanForward64(&result, value);
return static_cast<uint32_t>(result);
#else
uint32_t lower = static_cast<uint32_t>(value);
if (lower == 0)
{
uint32_t upper = static_cast<uint32_t>(value >> 32);
return 32 + BitScanForward(upper);
}
return BitScanForward(lower);
#endif // HOST_64BIT
#else
int32_t result = __builtin_ctzll(value);
return static_cast<uint32_t>(result);
#endif
}
//------------------------------------------------------------------------
// BitOperations::BitScanReverse: Search the mask data from most significant bit (MSB) to least significant bit
// (LSB) for a set bit (1).

View file

@ -775,9 +775,65 @@ public:
class BitOperations
{
public:
static uint32_t BitScanForward(uint32_t value);
//------------------------------------------------------------------------
// BitOperations::BitScanForward: Search the mask data from least significant bit (LSB) to the most significant bit
// (MSB) for a set bit (1)
//
// Arguments:
// value - the value
//
// Return Value:
// 0 if the mask is zero; nonzero otherwise.
//
FORCEINLINE static uint32_t BitScanForward(uint32_t value)
{
assert(value != 0);
static uint32_t BitScanForward(uint64_t value);
#if defined(_MSC_VER)
unsigned long result;
::_BitScanForward(&result, value);
return static_cast<uint32_t>(result);
#else
int32_t result = __builtin_ctz(value);
return static_cast<uint32_t>(result);
#endif
}
//------------------------------------------------------------------------
// BitOperations::BitScanForward: Search the mask data from least significant bit (LSB) to the most significant bit
// (MSB) for a set bit (1)
//
// Arguments:
// value - the value
//
// Return Value:
// 0 if the mask is zero; nonzero otherwise.
//
FORCEINLINE static uint32_t BitScanForward(uint64_t value)
{
assert(value != 0);
#if defined(_MSC_VER)
#if defined(HOST_64BIT)
unsigned long result;
::_BitScanForward64(&result, value);
return static_cast<uint32_t>(result);
#else
uint32_t lower = static_cast<uint32_t>(value);
if (lower == 0)
{
uint32_t upper = static_cast<uint32_t>(value >> 32);
return 32 + BitScanForward(upper);
}
return BitScanForward(lower);
#endif // HOST_64BIT
#else
int32_t result = __builtin_ctzll(value);
return static_cast<uint32_t>(result);
#endif
}
static uint32_t BitScanReverse(uint32_t value);