shard.rs: update hashing

This commit is contained in:
John Spray
2023-11-09 16:38:11 +00:00
parent 93c52b5763
commit 733877a8ff
4 changed files with 37 additions and 25 deletions

7
Cargo.lock generated
View File

@@ -2846,12 +2846,6 @@ version = "0.8.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e5ce46fe64a9d73be07dcbe690a38ce1b293be448fd8ce1e6c1b8062c9f72c6a"
[[package]]
name = "mur3"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "97af489e1e21b68de4c390ecca6703318bc1aa16e9733bcb62c089b73c6fbb1b"
[[package]]
name = "native-tls"
version = "0.2.11"
@@ -3303,7 +3297,6 @@ dependencies = [
"bytes",
"const_format",
"enum-map",
"mur3",
"postgres_ffi",
"serde",
"serde_json",

View File

@@ -94,7 +94,6 @@ jsonwebtoken = "8"
libc = "0.2"
md5 = "0.7.0"
memoffset = "0.8"
mur3 = "0.1.0"
native-tls = "0.2"
nix = "0.26"
notify = "5.0.0"

View File

@@ -17,7 +17,6 @@ postgres_ffi.workspace = true
enum-map.workspace = true
strum.workspace = true
strum_macros.workspace = true
mur3.workspace = true
url.workspace = true
workspace_hack.workspace = true
workspace_hack.workspace = true

View File

@@ -1,7 +1,4 @@
use std::hash::Hasher;
use crate::key::Key;
use mur3;
use serde::{Deserialize, Serialize};
use utils::id::NodeId;
@@ -124,7 +121,7 @@ impl ShardIdentity {
/// Return true if the key should be ingested by this shard
pub fn is_key_local(&self, key: &Key) -> bool {
if self.count < ShardCount(2) || key_is_broadcast(key) {
return true;
true
} else {
key_to_shard_number(self.count, self.stripe_size, key) == self.number
}
@@ -164,26 +161,50 @@ fn key_is_broadcast(key: &Key) -> bool {
!is_rel_block_key(key)
}
/// Provide the same result as the function in postgres `hashfn.h` with the same name
fn murmurhash32(data: u32) -> u32 {
let mut h = data;
h ^= h >> 16;
h *= 0x85ebca6b;
h ^= h >> 13;
h *= 0xc2b2ae35;
h ^= h >> 16;
h
}
/// Provide the same result as the function in postgres `hashfn.h` with the same name
fn hash_combine(mut a: u32, b: u32) -> u32 {
a ^= b + 0x9e3779b9 + (a << 6) + (a >> 2);
a
}
/// Where a Key is to be distributed across shards, select the shard. This function
/// does not account for keys that should be broadcast across shards.
///
/// The hashing in this function must exactly match what we do in postgres smgr
/// code. The resulting distribution of pages is intended to preserve locality within
/// `stripe_size` ranges of contiguous block numbers in the same relation, while otherwise
/// distributing data pseudo-randomly.
///
/// The mapping of key to shard is not stable across changes to ShardCount: this is intentional
/// and will be handled at higher levels when shards are split.
fn key_to_shard_number(count: ShardCount, stripe_size: ShardStripeSize, key: &Key) -> ShardNumber {
// Fast path for un-sharded tenants or broadcast keys
if count < ShardCount(2) || key_is_broadcast(key) {
return ShardNumber(0);
}
let mut hasher = mur3::Hasher32::with_seed(0);
hasher.write_u8(key.field1);
hasher.write_u32(key.field2);
hasher.write_u32(key.field3);
hasher.write_u32(key.field4);
let hash = hasher.finish32();
// spcNode
let mut hash = murmurhash32(key.field2);
// dbNode
hash = hash_combine(hash, murmurhash32(key.field3));
// relNode
hash = hash_combine(hash, murmurhash32(key.field4));
// blockNum/stripe size
hash = hash_combine(hash, murmurhash32(key.field6 / stripe_size.0));
let blkno = key.field6;
let stripe = hash + (blkno / stripe_size.0);
let shard = stripe as u8 % (count.0 as u8);
let shard = (hash % count.0 as u32) as u8;
ShardNumber(shard)
}