-
Notifications
You must be signed in to change notification settings - Fork 0
/
robin_hood.go
294 lines (249 loc) · 7.61 KB
/
robin_hood.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
package hashmaps
import (
"fmt"
)
const (
emptyBucket = -1
)
type bucket[K comparable, V any] struct {
key K
// psl is the probe sequence length (PSL), which is the distance value from
// the optimum insertion. -1 or `emptyBucket` signals a free slot.
// inspired from:
// - https://programming.guide/robin-hood-hashing.html
// - https://cs.uwaterloo.ca/research/tr/1986/CS-86-14.pdf
psl int8
value V
}
// RobinHood is a hash map that uses linear probing in combination with
// robin hood hashing as collision strategy. The map tracks the distance
// from the optimum bucket and minimized the variance over all buckets.
// The expected max PSL for a full robin hood hash map is O(ln(n)).
// The max load factor can be changed with `MaxLoad()`.
// The result is a good trade off between performance and memory consumption.
// Note that the performance for a open addressing hash map depends
// also on the key and value size. For higher storage sizes for the
// keys and values use a hashmap that uses another strategy
// like the golang std map or the Unordered map.
type RobinHood[K comparable, V any] struct {
buckets []bucket[K, V]
hasher HashFn[K]
// length stores the current inserted elements
length uintptr
// capMinus1 is used for a bitwise AND on the hash value,
// because the size of the underlying array is a power of two value
capMinus1 uintptr
nextResize uintptr
maxLoad float32
}
// go:inline
func newBucketArray[K comparable, V any](capacity uintptr) []bucket[K, V] {
buckets := make([]bucket[K, V], capacity)
for i := range buckets {
buckets[i].psl = emptyBucket
}
return buckets
}
// NewRobinHood creates a ready to use `RobinHood` hash map with default settings.
func NewRobinHood[K comparable, V any]() *RobinHood[K, V] {
return NewRobinHoodWithHasher[K, V](GetHasher[K]())
}
// NewRobinHoodWithHasher same as `NewRobinHood` but with a given hash function.
func NewRobinHoodWithHasher[K comparable, V any](hasher HashFn[K]) *RobinHood[K, V] {
capacity := uintptr(4)
return &RobinHood[K, V]{
buckets: newBucketArray[K, V](capacity),
capMinus1: capacity - 1,
hasher: hasher,
nextResize: 2,
maxLoad: DefaultMaxLoad,
}
}
// Get returns the value stored for this key, or false if there is no such value.
//
// Note:
// - There exists also other search strategies like organ-pipe search
// or smart search, where searching starts around the mean value
// (mean, mean − 1, mean + 1, mean − 2, mean + 2, ...)
// - Here it is used the simplest technic, which is more cache friendly and
// does not track other metic values.
func (m *RobinHood[K, V]) Get(key K) (V, bool) {
var (
idx = m.hasher(key) & m.capMinus1
v V
)
for psl := int8(0); psl <= m.buckets[idx].psl; psl++ {
if m.buckets[idx].key == key {
return m.buckets[idx].value, true
}
// next index
idx = (idx + 1) & m.capMinus1
}
return v, false
}
// Reserve sets the number of buckets to the most appropriate to contain at least n elements.
// If n is lower than that, the function may have no effect.
func (m *RobinHood[K, V]) Reserve(n uintptr) {
var (
needed = uintptr(float32(n) / m.maxLoad)
newCap = uintptr(NextPowerOf2(uint64(needed)))
)
if uintptr(cap(m.buckets)) < newCap {
m.resize(newCap)
}
}
func (m *RobinHood[K, V]) resize(n uintptr) {
newm := RobinHood[K, V]{
capMinus1: n - 1,
length: m.length,
buckets: newBucketArray[K, V](n),
hasher: m.hasher,
maxLoad: m.maxLoad,
nextResize: uintptr(float32(n) * m.maxLoad),
}
for i := range m.buckets {
if m.buckets[i].psl != emptyBucket {
idx := newm.hasher(m.buckets[i].key) & newm.capMinus1
m.buckets[i].psl = 0
newm.emplaceNew(&m.buckets[i], idx)
}
}
m.nextResize = newm.nextResize
m.capMinus1 = newm.capMinus1
m.buckets = newm.buckets
}
// Put maps the given key to the given value. If the key already exists its
// value will be overwritten with the new value.
// Returns true, if the element is a new item in the hash map.
func (m *RobinHood[K, V]) Put(key K, val V) bool {
if m.length >= m.nextResize {
m.resize(uintptr(cap(m.buckets)) * 2)
}
var (
idx = m.hasher(key) & m.capMinus1
psl = int8(0)
)
// search for the key
for ; psl <= m.buckets[idx].psl; psl++ {
if m.buckets[idx].key == key {
m.buckets[idx].value = val
return false // update already existing value
}
// next index
idx = (idx + 1) & m.capMinus1
}
m.length++
newBucket := bucket[K, V]{key: key, value: val, psl: psl}
m.emplaceNew(&newBucket, idx)
return true
}
// emplaceNew applies the Robin Hood creed to all following buckets until a empty is found.
// Robin Hood creed: "takes from the rich and gives to the poor".
// rich means, low psl
// poor means, higher psl
//
// The result is a better distribution of the PSL values,
// where the expected length of the longest PSL is O(log(n)).
//
// go:inline
func (m *RobinHood[K, V]) emplaceNew(current *bucket[K, V], idx uintptr) {
for ; ; current.psl++ {
if m.buckets[idx].psl == emptyBucket {
// emplace the element, a valid bucket was found
m.buckets[idx] = *current
return
}
if current.psl > m.buckets[idx].psl {
// swap values, apply the Robin Hood creed
*current, m.buckets[idx] = m.buckets[idx], *current
}
// next index
idx = (idx + 1) & m.capMinus1
}
}
// Remove removes the specified key-value pair from the map.
// Returns true, if the element was in the hash map.
func (m *RobinHood[K, V]) Remove(key K) bool {
var (
idx = m.hasher(key) & m.capMinus1
current *bucket[K, V]
)
// search for the key
for psl := int8(0); psl <= m.buckets[idx].psl; psl++ {
if m.buckets[idx].key == key {
current = &m.buckets[idx]
break
}
// next index
idx = (idx + 1) & m.capMinus1
}
if current == nil {
return false
}
// remove the key
m.length--
// mark as empty, because we want to remove it
current.psl = emptyBucket
idx = (idx + 1) & m.capMinus1
next := &m.buckets[idx]
// now, back shift all buckets until we found a optimum or empty one
for next.psl > 0 {
next.psl--
*current, *next = *next, *current // swap values
current = next
idx = (idx + 1) & m.capMinus1
next = &m.buckets[idx]
}
return true
}
// Clear removes all key-value pairs from the map.
func (m *RobinHood[K, V]) Clear() {
for i := range m.buckets {
m.buckets[i].psl = emptyBucket
}
m.length = 0
}
// Load return the current load of the hash map.
func (m *RobinHood[K, V]) Load() float32 {
return float32(m.length) / float32(cap(m.buckets))
}
// MaxLoad forces resizing if the ratio is reached.
// Useful values are in range [0.5-0.9].
// Returns ErrOutOfRange if `lf` is not in the open range (0.0,1.0).
func (m *RobinHood[K, V]) MaxLoad(lf float32) error {
if lf <= 0.0 || lf >= 1.0 {
return fmt.Errorf("%f: %w", lf, ErrOutOfRange)
}
m.maxLoad = lf
m.nextResize = uintptr(float32(cap(m.buckets)) * lf)
return nil
}
// Size returns the number of items in the map.
func (m *RobinHood[K, V]) Size() int {
return int(m.length)
}
// Copy returns a copy of this map.
func (m *RobinHood[K, V]) Copy() *RobinHood[K, V] {
newM := &RobinHood[K, V]{
buckets: make([]bucket[K, V], cap(m.buckets)),
capMinus1: m.capMinus1,
length: m.length,
hasher: m.hasher,
maxLoad: m.maxLoad,
nextResize: m.nextResize,
}
copy(newM.buckets, m.buckets)
return newM
}
// Each calls 'fn' on every key-value pair in the hash map in no particular order.
// If 'fn' returns true, the iteration stops.
func (m *RobinHood[K, V]) Each(fn func(key K, val V) bool) {
for i := range m.buckets {
if m.buckets[i].psl != emptyBucket {
if stop := fn(m.buckets[i].key, m.buckets[i].value); stop {
// stop iteration
return
}
}
}
}