mirror of
https://github.com/anyproto/any-sync.git
synced 2025-06-08 05:57:03 +09:00
223 lines
5 KiB
Go
223 lines
5 KiB
Go
package olddiff
|
|
|
|
import (
|
|
"math"
|
|
|
|
"github.com/huandu/skiplist"
|
|
"github.com/zeebo/blake3"
|
|
"golang.org/x/exp/slices"
|
|
)
|
|
|
|
type hashRange struct {
|
|
from, to uint64
|
|
parent *hashRange
|
|
isDivided bool
|
|
elements int
|
|
level int
|
|
hash []byte
|
|
}
|
|
|
|
type rangeTuple struct {
|
|
from, to uint64
|
|
}
|
|
|
|
type hashRanges struct {
|
|
ranges map[rangeTuple]*hashRange
|
|
topRange *hashRange
|
|
sl *skiplist.SkipList
|
|
dirty map[*hashRange]struct{}
|
|
divideFactor int
|
|
compareThreshold int
|
|
}
|
|
|
|
func newHashRanges(divideFactor, compareThreshold int, sl *skiplist.SkipList) *hashRanges {
|
|
h := &hashRanges{
|
|
ranges: make(map[rangeTuple]*hashRange),
|
|
dirty: make(map[*hashRange]struct{}),
|
|
divideFactor: divideFactor,
|
|
compareThreshold: compareThreshold,
|
|
sl: sl,
|
|
}
|
|
h.topRange = &hashRange{
|
|
from: 0,
|
|
to: math.MaxUint64,
|
|
isDivided: true,
|
|
level: 0,
|
|
}
|
|
h.ranges[rangeTuple{from: 0, to: math.MaxUint64}] = h.topRange
|
|
h.makeBottomRanges(h.topRange)
|
|
return h
|
|
}
|
|
|
|
func (h *hashRanges) hash() []byte {
|
|
return h.topRange.hash
|
|
}
|
|
|
|
func (h *hashRanges) addElement(elHash uint64) {
|
|
rng := h.topRange
|
|
rng.elements++
|
|
for rng.isDivided {
|
|
rng = h.getBottomRange(rng, elHash)
|
|
rng.elements++
|
|
}
|
|
h.dirty[rng] = struct{}{}
|
|
if rng.elements > h.compareThreshold {
|
|
rng.isDivided = true
|
|
h.makeBottomRanges(rng)
|
|
}
|
|
if rng.parent != nil {
|
|
if _, ok := h.dirty[rng.parent]; ok {
|
|
delete(h.dirty, rng.parent)
|
|
}
|
|
}
|
|
}
|
|
|
|
func (h *hashRanges) removeElement(elHash uint64) {
|
|
rng := h.topRange
|
|
rng.elements--
|
|
for rng.isDivided {
|
|
rng = h.getBottomRange(rng, elHash)
|
|
rng.elements--
|
|
}
|
|
parent := rng.parent
|
|
if parent.elements <= h.compareThreshold && parent != h.topRange {
|
|
ranges := genTupleRanges(parent.from, parent.to, h.divideFactor)
|
|
for _, tuple := range ranges {
|
|
child := h.ranges[tuple]
|
|
delete(h.ranges, tuple)
|
|
delete(h.dirty, child)
|
|
}
|
|
parent.isDivided = false
|
|
h.dirty[parent] = struct{}{}
|
|
} else {
|
|
h.dirty[rng] = struct{}{}
|
|
}
|
|
}
|
|
|
|
func (h *hashRanges) recalculateHashes() {
|
|
for len(h.dirty) > 0 {
|
|
var slDirty []*hashRange
|
|
for rng := range h.dirty {
|
|
slDirty = append(slDirty, rng)
|
|
}
|
|
slices.SortFunc(slDirty, func(a, b *hashRange) int {
|
|
if a.level < b.level {
|
|
return -1
|
|
} else if a.level > b.level {
|
|
return 1
|
|
} else {
|
|
return 0
|
|
}
|
|
})
|
|
for _, rng := range slDirty {
|
|
if rng.isDivided {
|
|
rng.hash = h.calcDividedHash(rng)
|
|
} else {
|
|
rng.hash, rng.elements = h.calcElementsHash(rng.from, rng.to)
|
|
}
|
|
delete(h.dirty, rng)
|
|
if rng.parent != nil {
|
|
h.dirty[rng.parent] = struct{}{}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
func (h *hashRanges) getRange(from, to uint64) *hashRange {
|
|
return h.ranges[rangeTuple{from: from, to: to}]
|
|
}
|
|
|
|
func (h *hashRanges) getBottomRange(rng *hashRange, elHash uint64) *hashRange {
|
|
df := uint64(h.divideFactor)
|
|
perRange := (rng.to - rng.from) / df
|
|
align := ((rng.to-rng.from)%df + 1) % df
|
|
if align == 0 {
|
|
perRange++
|
|
}
|
|
bucket := (elHash - rng.from) / perRange
|
|
tuple := rangeTuple{from: rng.from + bucket*perRange, to: rng.from - 1 + (bucket+1)*perRange}
|
|
if bucket == df-1 {
|
|
tuple.to += align
|
|
}
|
|
return h.ranges[tuple]
|
|
}
|
|
|
|
func (h *hashRanges) makeBottomRanges(rng *hashRange) {
|
|
ranges := genTupleRanges(rng.from, rng.to, h.divideFactor)
|
|
for _, tuple := range ranges {
|
|
newRange := h.makeRange(tuple, rng)
|
|
h.ranges[tuple] = newRange
|
|
if newRange.elements > h.compareThreshold {
|
|
if _, ok := h.dirty[rng]; ok {
|
|
delete(h.dirty, rng)
|
|
}
|
|
h.dirty[newRange] = struct{}{}
|
|
newRange.isDivided = true
|
|
h.makeBottomRanges(newRange)
|
|
}
|
|
}
|
|
}
|
|
|
|
func (h *hashRanges) makeRange(tuple rangeTuple, parent *hashRange) *hashRange {
|
|
newRange := &hashRange{
|
|
from: tuple.from,
|
|
to: tuple.to,
|
|
parent: parent,
|
|
}
|
|
hash, els := h.calcElementsHash(tuple.from, tuple.to)
|
|
newRange.hash = hash
|
|
newRange.level = parent.level + 1
|
|
newRange.elements = els
|
|
return newRange
|
|
}
|
|
|
|
func (h *hashRanges) calcDividedHash(rng *hashRange) (hash []byte) {
|
|
hasher := hashersPool.Get().(*blake3.Hasher)
|
|
defer hashersPool.Put(hasher)
|
|
hasher.Reset()
|
|
ranges := genTupleRanges(rng.from, rng.to, h.divideFactor)
|
|
for _, tuple := range ranges {
|
|
child := h.ranges[tuple]
|
|
hasher.Write(child.hash)
|
|
}
|
|
hash = hasher.Sum(nil)
|
|
return
|
|
}
|
|
|
|
func genTupleRanges(from, to uint64, divideFactor int) (prepare []rangeTuple) {
|
|
df := uint64(divideFactor)
|
|
perRange := (to - from) / df
|
|
align := ((to-from)%df + 1) % df
|
|
if align == 0 {
|
|
perRange++
|
|
}
|
|
var j = from
|
|
for i := 0; i < divideFactor; i++ {
|
|
if i == divideFactor-1 {
|
|
perRange += align
|
|
}
|
|
prepare = append(prepare, rangeTuple{from: j, to: j + perRange - 1})
|
|
j += perRange
|
|
}
|
|
return
|
|
}
|
|
|
|
func (h *hashRanges) calcElementsHash(from, to uint64) (hash []byte, els int) {
|
|
hasher := hashersPool.Get().(*blake3.Hasher)
|
|
defer hashersPool.Put(hasher)
|
|
hasher.Reset()
|
|
|
|
el := h.sl.Find(&element{hash: from})
|
|
for el != nil && el.Key().(*element).hash <= to {
|
|
elem := el.Key().(*element).Element
|
|
el = el.Next()
|
|
|
|
hasher.WriteString(elem.Id)
|
|
hasher.WriteString(elem.Head)
|
|
els++
|
|
}
|
|
if els != 0 {
|
|
hash = hasher.Sum(nil)
|
|
}
|
|
return
|
|
}
|