Replace sharded hash function/misc mods

This commit is contained in:
Patrick Mylund Nielsen 2014-12-22 10:37:59 -05:00
parent 8a2f4f19ef
commit 03284ca422
3 changed files with 138 additions and 37 deletions

View File

@ -1472,7 +1472,8 @@ func BenchmarkRWMutexMapGetConcurrent(b *testing.B) {
func BenchmarkCacheGetManyConcurrent(b *testing.B) { func BenchmarkCacheGetManyConcurrent(b *testing.B) {
// This is the same as BenchmarkCacheGetConcurrent, but its result // This is the same as BenchmarkCacheGetConcurrent, but its result
// can be compared against BenchmarkShardedCacheGetManyConcurrent. // can be compared against BenchmarkShardedCacheGetManyConcurrent
// in sharded_test.go.
b.StopTimer() b.StopTimer()
n := 10000 n := 10000
tc := New(DefaultExpiration, 0) tc := New(DefaultExpiration, 0)
@ -1497,31 +1498,6 @@ func BenchmarkCacheGetManyConcurrent(b *testing.B) {
wg.Wait() wg.Wait()
} }
func BenchmarkShardedCacheGetManyConcurrent(b *testing.B) {
b.StopTimer()
n := 10000
tsc := unexportedNewSharded(20, DefaultExpiration, 0)
keys := make([]string, n)
for i := 0; i < n; i++ {
k := "foo" + strconv.Itoa(n)
keys[i] = k
tsc.Set(k, "bar", DefaultExpiration)
}
each := b.N / n
wg := new(sync.WaitGroup)
wg.Add(n)
for _, v := range keys {
go func() {
for j := 0; j < each; j++ {
tsc.Get(v)
}
wg.Done()
}()
}
b.StartTimer()
wg.Wait()
}
func BenchmarkCacheSet(b *testing.B) { func BenchmarkCacheSet(b *testing.B) {
b.StopTimer() b.StopTimer()
tc := New(DefaultExpiration, 0) tc := New(DefaultExpiration, 0)

View File

@ -1,8 +1,11 @@
package cache package cache
import ( import (
"encoding/binary" "crypto/rand"
"hash/fnv" "math"
"math/big"
insecurerand "math/rand"
"os"
"runtime" "runtime"
"time" "time"
) )
@ -10,8 +13,9 @@ import (
// This is an experimental and unexported (for now) attempt at making a cache // This is an experimental and unexported (for now) attempt at making a cache
// with better algorithmic complexity than the standard one, namely by // with better algorithmic complexity than the standard one, namely by
// preventing write locks of the entire cache when an item is added. As of the // preventing write locks of the entire cache when an item is added. As of the
// time of writing, the overhead of selecting buckets is much too great to be // time of writing, the overhead of selecting buckets results in cache
// worth it except perhaps with extremely large caches. // operations being about twice as slow as for the standard cache with small
// total cache sizes, and faster for larger ones.
// //
// See cache_test.go for a few benchmarks. // See cache_test.go for a few benchmarks.
@ -20,16 +24,46 @@ type unexportedShardedCache struct {
} }
type shardedCache struct { type shardedCache struct {
seed uint32
m uint32 m uint32
cs []*cache cs []*cache
janitor *shardedJanitor janitor *shardedJanitor
} }
// djb2 with better shuffling. 5x faster than FNV with the hash.Hash overhead.
func djb33(seed uint32, k string) uint32 {
var (
l = uint32(len(k))
d = 5381 + seed + l
i = uint32(0)
)
// Why is all this 5x faster than a for loop?
if l >= 4 {
for i < l-4 {
d = (d * 33) ^ uint32(k[i])
d = (d * 33) ^ uint32(k[i+1])
d = (d * 33) ^ uint32(k[i+2])
d = (d * 33) ^ uint32(k[i+3])
i += 4
}
}
switch l - i {
case 1:
case 2:
d = (d * 33) ^ uint32(k[i])
case 3:
d = (d * 33) ^ uint32(k[i])
d = (d * 33) ^ uint32(k[i+1])
case 4:
d = (d * 33) ^ uint32(k[i])
d = (d * 33) ^ uint32(k[i+1])
d = (d * 33) ^ uint32(k[i+2])
}
return d ^ (d >> 16)
}
func (sc *shardedCache) bucket(k string) *cache { func (sc *shardedCache) bucket(k string) *cache {
h := fnv.New32() return sc.cs[djb33(sc.seed, k)%sc.m]
h.Write([]byte(k))
n := binary.BigEndian.Uint32(h.Sum(nil))
return sc.cs[n%sc.m]
} }
func (sc *shardedCache) Set(k string, x interface{}, d time.Duration) { func (sc *shardedCache) Set(k string, x interface{}, d time.Duration) {
@ -70,6 +104,19 @@ func (sc *shardedCache) DeleteExpired() {
} }
} }
// Returns the items in the cache. This may include items that have expired,
// but have not yet been cleaned up. If this is significant, the Expiration
// fields of the items should be checked. Note that explicit synchronization
// is needed to use a cache and its corresponding Items() return values at
// the same time, as the maps are shared.
func (sc *shardedCache) Items() []map[string]*Item {
res := make([]map[string]*Item, len(sc.cs))
for i, v := range sc.cs {
res[i] = v.Items()
}
return res
}
func (sc *shardedCache) Flush() { func (sc *shardedCache) Flush() {
for _, v := range sc.cs { for _, v := range sc.cs {
v.Flush() v.Flush()
@ -107,9 +154,19 @@ func runShardedJanitor(sc *shardedCache, ci time.Duration) {
} }
func newShardedCache(n int, de time.Duration) *shardedCache { func newShardedCache(n int, de time.Duration) *shardedCache {
max := big.NewInt(0).SetUint64(uint64(math.MaxUint32))
rnd, err := rand.Int(rand.Reader, max)
var seed uint32
if err != nil {
os.Stderr.Write([]byte("WARNING: go-cache's newShardedCache failed to read from the system CSPRNG (/dev/urandom or equivalent.) Your system's security may be compromised. Continuing with an insecure seed.\n"))
seed = insecurerand.Uint32()
} else {
seed = uint32(rnd.Uint64())
}
sc := &shardedCache{ sc := &shardedCache{
m: uint32(n - 1), seed: seed,
cs: make([]*cache, n), m: uint32(n),
cs: make([]*cache, n),
} }
for i := 0; i < n; i++ { for i := 0; i < n; i++ {
c := &cache{ c := &cache{
@ -121,7 +178,7 @@ func newShardedCache(n int, de time.Duration) *shardedCache {
return sc return sc
} }
func unexportedNewSharded(shards int, defaultExpiration, cleanupInterval time.Duration) *unexportedShardedCache { func unexportedNewSharded(defaultExpiration, cleanupInterval time.Duration, shards int) *unexportedShardedCache {
if defaultExpiration == 0 { if defaultExpiration == 0 {
defaultExpiration = -1 defaultExpiration = -1
} }

68
sharded_test.go Normal file
View File

@ -0,0 +1,68 @@
package cache
import (
"strconv"
"sync"
"testing"
)
// func TestDjb33(t *testing.T) {
// }
var shardedKeys = []string{
"f",
"fo",
"foo",
"barf",
"barfo",
"foobar",
"bazbarf",
"bazbarfo",
"bazbarfoo",
"foobarbazq",
"foobarbazqu",
"foobarbazquu",
"foobarbazquux",
}
func TestShardedCache(t *testing.T) {
tc := unexportedNewSharded(DefaultExpiration, 0, 13)
for _, v := range shardedKeys {
tc.Set(v, "value", DefaultExpiration)
}
}
func BenchmarkShardedCacheGet(b *testing.B) {
b.StopTimer()
tc := unexportedNewSharded(DefaultExpiration, 0, 10)
tc.Set("foobarba", "zquux", DefaultExpiration)
b.StartTimer()
for i := 0; i < b.N; i++ {
tc.Get("foobarba")
}
}
func BenchmarkShardedCacheGetManyConcurrent(b *testing.B) {
b.StopTimer()
n := 10000
tsc := unexportedNewSharded(DefaultExpiration, 0, 20)
keys := make([]string, n)
for i := 0; i < n; i++ {
k := "foo" + strconv.Itoa(n)
keys[i] = k
tsc.Set(k, "bar", DefaultExpiration)
}
each := b.N / n
wg := new(sync.WaitGroup)
wg.Add(n)
for _, v := range keys {
go func() {
for j := 0; j < each; j++ {
tsc.Get(v)
}
wg.Done()
}()
}
b.StartTimer()
wg.Wait()
}