Implementing a Cuckoo Filter in Go

  • They support adding / removing items dynamically
  • Higher lookup performance, even when 95% space is utilized
  • Easier to implement than alternatives
  • Less space used than bloom filters in *most* cases

Setting up the Cuckoo Filter

// Cuckoo filter based on https://www.pdl.cmu.edu/PDL-FTP/FS/cuckoo-conext2014.pdf
type Cuckoo struct {
buckets []bucket
m uint // buckets
b uint // entries per bucket
f uint // fingerprint length
n uint // filter capacity (rename cap?)
}
type bucket []fingerprint
type fingerprint []byte
var hasher = sha1.New()
// n = len(items), fp = false positive rate
func NewCuckoo(n uint, fp float64) *Cuckoo {
b := uint(4)
f := fingerprintLength(b, fp)
m := nextPower(n / f * 8)
buckets := make([]bucket, m)
for i := uint(0); i < m; i++ {
buckets[i] = make(bucket, b)
}
return &Cuckoo{
buckets: buckets,
m: m,
b: b,
f: f,
n: n,
}
}

Inserting values

func (c *Cuckoo) hashes(data string) (uint, uint, fingerprint) {
h := hash([]byte(data))
f := h[0:c.f]
i1 := uint(binary.BigEndian.Uint32(h))
i2 := i1 ^ uint(binary.BigEndian.Uint32(hash(f)))
return i1, i2, fingerprint(f)
}

func hash(data []byte) []byte {
hasher.Write([]byte(data))
hash := hasher.Sum(nil)
hasher.Reset()
return hash
}
// try to store in bucket 1
// if success-> done
// if not -> try to store in bucket 2
// if success -> done
// if not ->
// while retry < retryLimit
// pick random entry (r) from bucket 1
// move entry (r) to alternate location
// try store in new bucket
// if success -> done
(source: https://www.pdl.cmu.edu/PDL-FTP/FS/cuckoo-conext2014.pdf)
i1, i2, f := c.hashes(input)
// first try bucket one
b1 := c.buckets[i1%c.m]
if i, err := b1.nextIndex(); err == nil {
b1[i] = f
return
}

b2 := c.buckets[i2%c.m]
if i, err := b2.nextIndex(); err == nil {
b2[i] = f
return
}
// start relocating items
i := i1
for r := 0; r < retries; r++ {
index := i % c.m
entryIndex := rand.Intn(int(c.b))
// swap
f, c.buckets[index][entryIndex] = c.buckets[index][entryIndex], f
i = i ^ uint(binary.BigEndian.Uint32(hash(f)))
b := c.buckets[i%c.m]
if idx, err := b.nextIndex(); err == nil {
b[idx] = f
c.count++
return
}
}
panic("cuckoo filter full")

Membership test

// lookup needle in the cuckoo nest :)
func (c *Cuckoo) lookup(needle string) bool {
i1, i2, f := c.hashes(needle)
return c.buckets[i1%c.m].contains(f) || c.buckets[i2%c.m].contains(f)
}
// returns the index and successful lookup
func (b bucket) contains(f fingerprint) (int, bool) {
for i, x := range b {
if bytes.Equal(x, f) {
return i, true
}
}
return -1, false
}

Deletion

// delete the fingerprint from the cuckoo filter
func (c *Cuckoo) delete(needle string) {
i1, i2, f := c.hashes(needle)
// try to remove from f1
b1 := c.buckets[i1%c.m]
if ind, ok := b1.contains(f); ok {
b1[ind] = nil
return
}

b2 := c.buckets[i2%c.m]
if ind, ok := b2.contains(f); ok {
b2[ind] = nil
return
}
}

Resources

  • Following me here, on Medium
  • Or twitter Twitter
  • Or check out workwithgo.com to find cool Go jobs around the world. 😄

--

--

--

github.com/DylanMeeus | Software Engineer @ Amazon

Love podcasts or audiobooks? Learn on the go with our new app.

Recommended from Medium

pygraphviz or other Python3 packages Missing Error on Ubuntu when building ns-3 with Bake

Sprint Planning in Agile/Scrum Velocity vs Capacity

Code Snippets (Russian)

Dave Elkan on Dev Speed

A Challenge for the Immutable Law of On Call

Design Principles: Big Data Transfer in Python

[Performance Speedup] Customizing GraphQL Tracing on NewRelic

Java access modifiers: which to choose when, and why

Get the Medium app

A button that says 'Download on the App Store', and if clicked it will lead you to the iOS App store
A button that says 'Get it on, Google Play', and if clicked it will lead you to the Google Play store
Dylan Meeus

Dylan Meeus

github.com/DylanMeeus | Software Engineer @ Amazon

More from Medium

Golang Intellij Idea setup

Go: my first impressions

Use pprof to view go program stack traces

Go Context for a layman