mirror of
https://github.com/neondatabase/neon.git
synced 2025-12-28 00:23:00 +00:00
R-Tree layer map (#2317)
Replace the layer array and linear search with R-tree So far, the in-memory layer map that holds information about layer files that exist, has used a simple Vec, in no particular order, to hold information about all the layers. That obviously doesn't scale very well; with thousands of layer files the linear search was consuming a lot of CPU. Replace it with a two-dimensional R-tree, with Key and LSN ranges as the dimensions. For the R-tree, use the 'rstar' crate. To be able to use that, we convert the Keys and LSNs into 256-bit integers. 64 bits would be enough to represent LSNs, and 128 bits would be enough to represent Keys. However, we use 256 bits, because rstar internally performs multiplication to calculate the area of rectangles, and the result of multiplying two 128 bit integers doesn't necessarily fit in 128 bits, causing integer overflow and, if overflow-checks are enabled, panic. To avoid that, we use 256 bit integers. Add a performance test that creates a lot of layer files, to demonstrate the benefit.
This commit is contained in:
committed by
GitHub
parent
e9a103c09f
commit
f3073a4db9
222
Cargo.lock
generated
222
Cargo.lock
generated
@@ -37,6 +37,12 @@ dependencies = [
|
||||
"memchr",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "amplify_num"
|
||||
version = "0.4.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f27d3d00d3d115395a7a8a4dc045feb7aa82b641e485f7e15f4e67ac16f4f56d"
|
||||
|
||||
[[package]]
|
||||
name = "ansi_term"
|
||||
version = "0.12.1"
|
||||
@@ -135,6 +141,15 @@ dependencies = [
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "atomic-polyfill"
|
||||
version = "0.1.10"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9c041a8d9751a520ee19656232a18971f18946a7900f1520ee4400002244dd89"
|
||||
dependencies = [
|
||||
"critical-section",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "atty"
|
||||
version = "0.2.14"
|
||||
@@ -212,6 +227,21 @@ dependencies = [
|
||||
"rustc-demangle",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "bare-metal"
|
||||
version = "0.2.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5deb64efa5bd81e31fcd1938615a6d98c82eafcbcd787162b6f63b91d6bac5b3"
|
||||
dependencies = [
|
||||
"rustc_version 0.2.3",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "bare-metal"
|
||||
version = "1.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f8fe8f5a8a398345e52358e18ff07cc17a568fbca5c6f73873d3a62056309603"
|
||||
|
||||
[[package]]
|
||||
name = "base64"
|
||||
version = "0.13.0"
|
||||
@@ -250,6 +280,18 @@ dependencies = [
|
||||
"which",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "bit_field"
|
||||
version = "0.10.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dcb6dd1c2376d2e096796e234a70e17e94cc2d5d54ff8ce42b28cef1d0d359a4"
|
||||
|
||||
[[package]]
|
||||
name = "bitfield"
|
||||
version = "0.13.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "46afbd2983a5d5a7bd740ccb198caf5b82f45c40c09c0eed36052d91cb92e719"
|
||||
|
||||
[[package]]
|
||||
name = "bitflags"
|
||||
version = "1.3.2"
|
||||
@@ -528,6 +570,18 @@ version = "0.8.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5827cebf4670468b8772dd191856768aedcb1b0278a04f989f7766351917b9dc"
|
||||
|
||||
[[package]]
|
||||
name = "cortex-m"
|
||||
version = "0.7.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "70858629a458fdfd39f9675c4dc309411f2a3f83bede76988d81bf1a0ecee9e0"
|
||||
dependencies = [
|
||||
"bare-metal 0.2.5",
|
||||
"bitfield",
|
||||
"embedded-hal",
|
||||
"volatile-register",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cpp_demangle"
|
||||
version = "0.3.5"
|
||||
@@ -552,7 +606,7 @@ version = "0.6.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3dfea2db42e9927a3845fb268a10a72faed6d416065f77873f05e411457c363e"
|
||||
dependencies = [
|
||||
"rustc_version",
|
||||
"rustc_version 0.4.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -600,6 +654,18 @@ dependencies = [
|
||||
"itertools",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "critical-section"
|
||||
version = "0.2.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "95da181745b56d4bd339530ec393508910c909c784e8962d15d722bacf0bcbcd"
|
||||
dependencies = [
|
||||
"bare-metal 1.0.0",
|
||||
"cfg-if",
|
||||
"cortex-m",
|
||||
"riscv",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crossbeam-channel"
|
||||
version = "0.5.6"
|
||||
@@ -844,6 +910,16 @@ version = "1.7.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3f107b87b6afc2a64fd13cac55fe06d6c8859f12d4b14cbcdd2c67d0976781be"
|
||||
|
||||
[[package]]
|
||||
name = "embedded-hal"
|
||||
version = "0.2.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "35949884794ad573cf46071e41c9b60efb0cb311e3ca01f7af807af1debc66ff"
|
||||
dependencies = [
|
||||
"nb 0.1.3",
|
||||
"void",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "encoding_rs"
|
||||
version = "0.8.31"
|
||||
@@ -1165,6 +1241,15 @@ version = "1.8.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "eabb4a44450da02c90444cf74558da904edde8fb4e9035a9a6a4e15445af0bd7"
|
||||
|
||||
[[package]]
|
||||
name = "hash32"
|
||||
version = "0.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b0c35f58762feb77d74ebe43bdbc3210f09be9fe6742234d573bacc26ed92b67"
|
||||
dependencies = [
|
||||
"byteorder",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hashbrown"
|
||||
version = "0.12.3"
|
||||
@@ -1174,6 +1259,19 @@ dependencies = [
|
||||
"ahash",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "heapless"
|
||||
version = "0.7.16"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "db04bc24a18b9ea980628ecf00e6c0264f3c1426dac36c00cb49b6fbad8b0743"
|
||||
dependencies = [
|
||||
"atomic-polyfill",
|
||||
"hash32",
|
||||
"rustc_version 0.4.0",
|
||||
"spin 0.9.4",
|
||||
"stable_deref_trait",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "heck"
|
||||
version = "0.3.3"
|
||||
@@ -1491,6 +1589,12 @@ dependencies = [
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "libm"
|
||||
version = "0.2.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "292a948cd991e376cf75541fe5b97a1081d713c618b4f1b9500f8844e49eb565"
|
||||
|
||||
[[package]]
|
||||
name = "lock_api"
|
||||
version = "0.4.7"
|
||||
@@ -1649,6 +1753,21 @@ dependencies = [
|
||||
"tempfile",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "nb"
|
||||
version = "0.1.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "801d31da0513b6ec5214e9bf433a77966320625a37860f910be265be6e18d06f"
|
||||
dependencies = [
|
||||
"nb 1.0.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "nb"
|
||||
version = "1.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "546c37ac5d9e56f55e73b677106873d9d9f5190605e41a856503623648488cae"
|
||||
|
||||
[[package]]
|
||||
name = "nix"
|
||||
version = "0.23.1"
|
||||
@@ -1716,6 +1835,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "578ede34cf02f8924ab9447f50c28075b4d3e5b269972345e7e0372b38c6cdcd"
|
||||
dependencies = [
|
||||
"autocfg",
|
||||
"libm",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -1828,6 +1948,7 @@ checksum = "648001efe5d5c0102d8cea768e348da85d90af8ba91f0bea908f157951493cd4"
|
||||
name = "pageserver"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"amplify_num",
|
||||
"anyhow",
|
||||
"async-stream",
|
||||
"async-trait",
|
||||
@@ -1852,6 +1973,7 @@ dependencies = [
|
||||
"itertools",
|
||||
"metrics",
|
||||
"nix",
|
||||
"num-traits",
|
||||
"once_cell",
|
||||
"postgres",
|
||||
"postgres-protocol",
|
||||
@@ -1861,6 +1983,7 @@ dependencies = [
|
||||
"rand",
|
||||
"regex",
|
||||
"remote_storage",
|
||||
"rstar",
|
||||
"scopeguard",
|
||||
"serde",
|
||||
"serde_json",
|
||||
@@ -2515,12 +2638,33 @@ dependencies = [
|
||||
"cc",
|
||||
"libc",
|
||||
"once_cell",
|
||||
"spin",
|
||||
"spin 0.5.2",
|
||||
"untrusted",
|
||||
"web-sys",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "riscv"
|
||||
version = "0.7.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6907ccdd7a31012b70faf2af85cd9e5ba97657cc3987c4f13f8e4d2c2a088aba"
|
||||
dependencies = [
|
||||
"bare-metal 1.0.0",
|
||||
"bit_field",
|
||||
"riscv-target",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "riscv-target"
|
||||
version = "0.1.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "88aa938cda42a0cf62a20cfe8d139ff1af20c2e681212b5b34adb5a58333f222"
|
||||
dependencies = [
|
||||
"lazy_static",
|
||||
"regex",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "routerify"
|
||||
version = "3.0.0"
|
||||
@@ -2534,6 +2678,17 @@ dependencies = [
|
||||
"regex",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rstar"
|
||||
version = "0.9.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b40f1bfe5acdab44bc63e6699c28b74f75ec43afb59f3eda01e145aff86a25fa"
|
||||
dependencies = [
|
||||
"heapless",
|
||||
"num-traits",
|
||||
"smallvec",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rstest"
|
||||
version = "0.12.0"
|
||||
@@ -2543,7 +2698,7 @@ dependencies = [
|
||||
"cfg-if",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"rustc_version",
|
||||
"rustc_version 0.4.0",
|
||||
"syn",
|
||||
]
|
||||
|
||||
@@ -2565,7 +2720,7 @@ dependencies = [
|
||||
"log",
|
||||
"rusoto_credential",
|
||||
"rusoto_signature",
|
||||
"rustc_version",
|
||||
"rustc_version 0.4.0",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"tokio",
|
||||
@@ -2623,7 +2778,7 @@ dependencies = [
|
||||
"percent-encoding",
|
||||
"pin-project-lite",
|
||||
"rusoto_credential",
|
||||
"rustc_version",
|
||||
"rustc_version 0.4.0",
|
||||
"serde",
|
||||
"sha2 0.9.9",
|
||||
"tokio",
|
||||
@@ -2641,13 +2796,22 @@ version = "1.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
|
||||
|
||||
[[package]]
|
||||
name = "rustc_version"
|
||||
version = "0.2.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "138e3e0acb6c9fb258b19b67cb8abd63c00679d2851805ea151465464fe9030a"
|
||||
dependencies = [
|
||||
"semver 0.9.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rustc_version"
|
||||
version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bfa0f585226d2e68097d4f95d113b15b83a82e819ab25717ec0590d9584ef366"
|
||||
dependencies = [
|
||||
"semver",
|
||||
"semver 1.0.13",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -2800,12 +2964,27 @@ dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "semver"
|
||||
version = "0.9.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1d7eb9ef2c18661902cc47e535f9bc51b78acd254da71d375c2f6720d9a40403"
|
||||
dependencies = [
|
||||
"semver-parser",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "semver"
|
||||
version = "1.0.13"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "93f6841e709003d68bb2deee8c343572bf446003ec20a583e76f7b15cebf3711"
|
||||
|
||||
[[package]]
|
||||
name = "semver-parser"
|
||||
version = "0.7.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3"
|
||||
|
||||
[[package]]
|
||||
name = "serde"
|
||||
version = "1.0.142"
|
||||
@@ -2999,6 +3178,15 @@ version = "0.5.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d"
|
||||
|
||||
[[package]]
|
||||
name = "spin"
|
||||
version = "0.9.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7f6002a767bff9e83f8eeecf883ecb8011875a21ae8da43bffb817a57e78cc09"
|
||||
dependencies = [
|
||||
"lock_api",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "stable_deref_trait"
|
||||
version = "1.2.0"
|
||||
@@ -3675,6 +3863,12 @@ version = "0.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "830b7e5d4d90034032940e4ace0d9a9a057e7a45cd94e6c007832e39edb82f6d"
|
||||
|
||||
[[package]]
|
||||
name = "vcell"
|
||||
version = "0.1.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "77439c1b53d2303b20d9459b1ade71a83c716e3f9c34f3228c00e6f185d6c002"
|
||||
|
||||
[[package]]
|
||||
name = "vcpkg"
|
||||
version = "0.2.15"
|
||||
@@ -3687,6 +3881,21 @@ version = "0.9.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
|
||||
|
||||
[[package]]
|
||||
name = "void"
|
||||
version = "1.0.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6a02e4885ed3bc0f2de90ea6dd45ebcbb66dacffe03547fadbb0eeae2770887d"
|
||||
|
||||
[[package]]
|
||||
name = "volatile-register"
|
||||
version = "0.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9ee8f19f9d74293faf70901bc20ad067dc1ad390d2cbf1e3f75f721ffee908b6"
|
||||
dependencies = [
|
||||
"vcell",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "wal_craft"
|
||||
version = "0.1.0"
|
||||
@@ -3952,6 +4161,7 @@ dependencies = [
|
||||
"regex-syntax",
|
||||
"scopeguard",
|
||||
"serde",
|
||||
"stable_deref_trait",
|
||||
"syn",
|
||||
"time 0.3.12",
|
||||
"tokio",
|
||||
|
||||
@@ -54,6 +54,9 @@ once_cell = "1.13.0"
|
||||
crossbeam-utils = "0.8.5"
|
||||
fail = "0.5.0"
|
||||
git-version = "0.3.5"
|
||||
rstar = "0.9.3"
|
||||
num-traits = "0.2.15"
|
||||
amplify_num = "0.4.1"
|
||||
|
||||
postgres_ffi = { path = "../libs/postgres_ffi" }
|
||||
etcd_broker = { path = "../libs/etcd_broker" }
|
||||
|
||||
@@ -24,6 +24,19 @@ pub struct Key {
|
||||
pub const KEY_SIZE: usize = 18;
|
||||
|
||||
impl Key {
|
||||
/// 'field2' is used to store tablespaceid for relations and small enum numbers for other relish.
|
||||
/// As long as Neon does not support tablespace (because of lack of access to local file system),
|
||||
/// we can assume that only some predefined namespace OIDs are used which can fit in u16
|
||||
pub fn to_i128(&self) -> i128 {
|
||||
assert!(self.field2 < 0xFFFF || self.field2 == 0xFFFFFFFF || self.field2 == 0x22222222);
|
||||
(((self.field1 & 0xf) as i128) << 120)
|
||||
| (((self.field2 & 0xFFFF) as i128) << 104)
|
||||
| ((self.field3 as i128) << 72)
|
||||
| ((self.field4 as i128) << 40)
|
||||
| ((self.field5 as i128) << 32)
|
||||
| self.field6 as i128
|
||||
}
|
||||
|
||||
pub fn next(&self) -> Key {
|
||||
self.add(1)
|
||||
}
|
||||
|
||||
@@ -713,7 +713,7 @@ impl DeltaLayerWriter {
|
||||
for buf in block_buf.blocks {
|
||||
file.write_all(buf.as_ref())?;
|
||||
}
|
||||
|
||||
assert!(self.lsn_range.start < self.lsn_range.end);
|
||||
// Fill in the summary on blk 0
|
||||
let summary = Summary {
|
||||
magic: DELTA_FILE_MAGIC,
|
||||
|
||||
@@ -15,9 +15,15 @@ use crate::repository::Key;
|
||||
use crate::tenant::inmemory_layer::InMemoryLayer;
|
||||
use crate::tenant::storage_layer::Layer;
|
||||
use crate::tenant::storage_layer::{range_eq, range_overlaps};
|
||||
use amplify_num::i256;
|
||||
use anyhow::Result;
|
||||
use num_traits::identities::{One, Zero};
|
||||
use num_traits::{Bounded, Num, Signed};
|
||||
use rstar::{RTree, RTreeObject, AABB};
|
||||
use std::cmp::Ordering;
|
||||
use std::collections::VecDeque;
|
||||
use std::ops::Range;
|
||||
use std::ops::{Add, Div, Mul, Neg, Rem, Sub};
|
||||
use std::sync::Arc;
|
||||
use tracing::*;
|
||||
use utils::lsn::Lsn;
|
||||
@@ -47,14 +53,163 @@ pub struct LayerMap {
|
||||
pub frozen_layers: VecDeque<Arc<InMemoryLayer>>,
|
||||
|
||||
/// All the historic layers are kept here
|
||||
historic_layers: RTree<LayerRTreeObject>,
|
||||
|
||||
/// TODO: This is a placeholder implementation of a data structure
|
||||
/// to hold information about all the layer files on disk and in
|
||||
/// S3. Currently, it's just a vector and all operations perform a
|
||||
/// linear scan over it. That obviously becomes slow as the
|
||||
/// number of layers grows. I'm imagining that an R-tree or some
|
||||
/// other 2D data structure would be the long-term solution here.
|
||||
historic_layers: Vec<Arc<dyn Layer>>,
|
||||
/// L0 layers have key range Key::MIN..Key::MAX, and locating them using R-Tree search is very inefficient.
|
||||
/// So L0 layers are held in l0_delta_layers vector, in addition to the R-tree.
|
||||
l0_delta_layers: Vec<Arc<dyn Layer>>,
|
||||
}
|
||||
|
||||
struct LayerRTreeObject {
|
||||
layer: Arc<dyn Layer>,
|
||||
}
|
||||
|
||||
// Representation of Key as numeric type.
|
||||
// We can not use native implementation of i128, because rstar::RTree
|
||||
// doesn't handle properly integer overflow during area calculation: sum(Xi*Yi).
|
||||
// Overflow will cause panic in debug mode and incorrect area calculation in release mode,
|
||||
// which leads to non-optimally balanced R-Tree (but doesn't fit correctness of R-Tree work).
|
||||
// By using i256 as the type, even though all the actual values would fit in i128, we can be
|
||||
// sure that multiplication doesn't overflow.
|
||||
//
|
||||
|
||||
#[derive(Clone, PartialEq, Eq, PartialOrd, Debug)]
|
||||
struct IntKey(i256);
|
||||
|
||||
impl Copy for IntKey {}
|
||||
|
||||
impl IntKey {
|
||||
fn from(i: i128) -> Self {
|
||||
IntKey(i256::from(i))
|
||||
}
|
||||
}
|
||||
|
||||
impl Bounded for IntKey {
|
||||
fn min_value() -> Self {
|
||||
IntKey(i256::MIN)
|
||||
}
|
||||
fn max_value() -> Self {
|
||||
IntKey(i256::MAX)
|
||||
}
|
||||
}
|
||||
|
||||
impl Signed for IntKey {
|
||||
fn is_positive(&self) -> bool {
|
||||
self.0 > i256::ZERO
|
||||
}
|
||||
fn is_negative(&self) -> bool {
|
||||
self.0 < i256::ZERO
|
||||
}
|
||||
fn signum(&self) -> Self {
|
||||
match self.0.cmp(&i256::ZERO) {
|
||||
Ordering::Greater => IntKey(i256::ONE),
|
||||
Ordering::Less => IntKey(-i256::ONE),
|
||||
Ordering::Equal => IntKey(i256::ZERO),
|
||||
}
|
||||
}
|
||||
fn abs(&self) -> Self {
|
||||
IntKey(self.0.abs())
|
||||
}
|
||||
fn abs_sub(&self, other: &Self) -> Self {
|
||||
if self.0 <= other.0 {
|
||||
IntKey(i256::ZERO)
|
||||
} else {
|
||||
IntKey(self.0 - other.0)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Neg for IntKey {
|
||||
type Output = Self;
|
||||
fn neg(self) -> Self::Output {
|
||||
IntKey(-self.0)
|
||||
}
|
||||
}
|
||||
|
||||
impl Rem for IntKey {
|
||||
type Output = Self;
|
||||
fn rem(self, rhs: Self) -> Self::Output {
|
||||
IntKey(self.0 % rhs.0)
|
||||
}
|
||||
}
|
||||
|
||||
impl Div for IntKey {
|
||||
type Output = Self;
|
||||
fn div(self, rhs: Self) -> Self::Output {
|
||||
IntKey(self.0 / rhs.0)
|
||||
}
|
||||
}
|
||||
|
||||
impl Add for IntKey {
|
||||
type Output = Self;
|
||||
fn add(self, rhs: Self) -> Self::Output {
|
||||
IntKey(self.0 + rhs.0)
|
||||
}
|
||||
}
|
||||
|
||||
impl Sub for IntKey {
|
||||
type Output = Self;
|
||||
fn sub(self, rhs: Self) -> Self::Output {
|
||||
IntKey(self.0 - rhs.0)
|
||||
}
|
||||
}
|
||||
|
||||
impl Mul for IntKey {
|
||||
type Output = Self;
|
||||
fn mul(self, rhs: Self) -> Self::Output {
|
||||
IntKey(self.0 * rhs.0)
|
||||
}
|
||||
}
|
||||
|
||||
impl One for IntKey {
|
||||
fn one() -> Self {
|
||||
IntKey(i256::ONE)
|
||||
}
|
||||
}
|
||||
|
||||
impl Zero for IntKey {
|
||||
fn zero() -> Self {
|
||||
IntKey(i256::ZERO)
|
||||
}
|
||||
fn is_zero(&self) -> bool {
|
||||
self.0 == i256::ZERO
|
||||
}
|
||||
}
|
||||
|
||||
impl Num for IntKey {
|
||||
type FromStrRadixErr = <i128 as Num>::FromStrRadixErr;
|
||||
fn from_str_radix(str: &str, radix: u32) -> Result<Self, Self::FromStrRadixErr> {
|
||||
Ok(IntKey(i256::from(i128::from_str_radix(str, radix)?)))
|
||||
}
|
||||
}
|
||||
|
||||
impl PartialEq for LayerRTreeObject {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
// FIXME: ptr_eq might fail to return true for 'dyn'
|
||||
// references. Clippy complains about this. In practice it
|
||||
// seems to work, the assertion below would be triggered
|
||||
// otherwise but this ought to be fixed.
|
||||
#[allow(clippy::vtable_address_comparisons)]
|
||||
Arc::ptr_eq(&self.layer, &other.layer)
|
||||
}
|
||||
}
|
||||
|
||||
impl RTreeObject for LayerRTreeObject {
|
||||
type Envelope = AABB<[IntKey; 2]>;
|
||||
fn envelope(&self) -> Self::Envelope {
|
||||
let key_range = self.layer.get_key_range();
|
||||
let lsn_range = self.layer.get_lsn_range();
|
||||
AABB::from_corners(
|
||||
[
|
||||
IntKey::from(key_range.start.to_i128()),
|
||||
IntKey::from(lsn_range.start.0 as i128),
|
||||
],
|
||||
[
|
||||
IntKey::from(key_range.end.to_i128() - 1),
|
||||
IntKey::from(lsn_range.end.0 as i128 - 1),
|
||||
], // AABB::upper is inclusive, while `key_range.end` and `lsn_range.end` are exclusive
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/// Return value of LayerMap::search
|
||||
@@ -80,19 +235,24 @@ impl LayerMap {
|
||||
// Find the latest image layer that covers the given key
|
||||
let mut latest_img: Option<Arc<dyn Layer>> = None;
|
||||
let mut latest_img_lsn: Option<Lsn> = None;
|
||||
for l in self.historic_layers.iter() {
|
||||
let envelope = AABB::from_corners(
|
||||
[IntKey::from(key.to_i128()), IntKey::from(0i128)],
|
||||
[
|
||||
IntKey::from(key.to_i128()),
|
||||
IntKey::from(end_lsn.0 as i128 - 1),
|
||||
],
|
||||
);
|
||||
for e in self
|
||||
.historic_layers
|
||||
.locate_in_envelope_intersecting(&envelope)
|
||||
{
|
||||
let l = &e.layer;
|
||||
if l.is_incremental() {
|
||||
continue;
|
||||
}
|
||||
if !l.get_key_range().contains(&key) {
|
||||
continue;
|
||||
}
|
||||
assert!(l.get_key_range().contains(&key));
|
||||
let img_lsn = l.get_lsn_range().start;
|
||||
|
||||
if img_lsn >= end_lsn {
|
||||
// too new
|
||||
continue;
|
||||
}
|
||||
assert!(img_lsn < end_lsn);
|
||||
if Lsn(img_lsn.0 + 1) == end_lsn {
|
||||
// found exact match
|
||||
return Ok(Some(SearchResult {
|
||||
@@ -108,19 +268,24 @@ impl LayerMap {
|
||||
|
||||
// Search the delta layers
|
||||
let mut latest_delta: Option<Arc<dyn Layer>> = None;
|
||||
for l in self.historic_layers.iter() {
|
||||
for e in self
|
||||
.historic_layers
|
||||
.locate_in_envelope_intersecting(&envelope)
|
||||
{
|
||||
let l = &e.layer;
|
||||
if !l.is_incremental() {
|
||||
continue;
|
||||
}
|
||||
if !l.get_key_range().contains(&key) {
|
||||
continue;
|
||||
}
|
||||
|
||||
assert!(l.get_key_range().contains(&key));
|
||||
if l.get_lsn_range().start >= end_lsn {
|
||||
// too new
|
||||
continue;
|
||||
info!(
|
||||
"Candidate delta layer {}..{} is too new for lsn {}",
|
||||
l.get_lsn_range().start,
|
||||
l.get_lsn_range().end,
|
||||
end_lsn
|
||||
);
|
||||
}
|
||||
|
||||
assert!(l.get_lsn_range().start < end_lsn);
|
||||
if l.get_lsn_range().end >= end_lsn {
|
||||
// this layer contains the requested point in the key/lsn space.
|
||||
// No need to search any further
|
||||
@@ -170,7 +335,10 @@ impl LayerMap {
|
||||
/// Insert an on-disk layer
|
||||
///
|
||||
pub fn insert_historic(&mut self, layer: Arc<dyn Layer>) {
|
||||
self.historic_layers.push(layer);
|
||||
if layer.get_key_range() == (Key::MIN..Key::MAX) {
|
||||
self.l0_delta_layers.push(layer.clone());
|
||||
}
|
||||
self.historic_layers.insert(LayerRTreeObject { layer });
|
||||
NUM_ONDISK_LAYERS.inc();
|
||||
}
|
||||
|
||||
@@ -180,17 +348,22 @@ impl LayerMap {
|
||||
/// This should be called when the corresponding file on disk has been deleted.
|
||||
///
|
||||
pub fn remove_historic(&mut self, layer: Arc<dyn Layer>) {
|
||||
let len_before = self.historic_layers.len();
|
||||
if layer.get_key_range() == (Key::MIN..Key::MAX) {
|
||||
let len_before = self.l0_delta_layers.len();
|
||||
|
||||
// FIXME: ptr_eq might fail to return true for 'dyn'
|
||||
// references. Clippy complains about this. In practice it
|
||||
// seems to work, the assertion below would be triggered
|
||||
// otherwise but this ought to be fixed.
|
||||
#[allow(clippy::vtable_address_comparisons)]
|
||||
self.historic_layers
|
||||
.retain(|other| !Arc::ptr_eq(other, &layer));
|
||||
|
||||
assert_eq!(self.historic_layers.len(), len_before - 1);
|
||||
// FIXME: ptr_eq might fail to return true for 'dyn'
|
||||
// references. Clippy complains about this. In practice it
|
||||
// seems to work, the assertion below would be triggered
|
||||
// otherwise but this ought to be fixed.
|
||||
#[allow(clippy::vtable_address_comparisons)]
|
||||
self.l0_delta_layers
|
||||
.retain(|other| !Arc::ptr_eq(other, &layer));
|
||||
assert_eq!(self.l0_delta_layers.len(), len_before - 1);
|
||||
}
|
||||
assert!(self
|
||||
.historic_layers
|
||||
.remove(&LayerRTreeObject { layer })
|
||||
.is_some());
|
||||
NUM_ONDISK_LAYERS.dec();
|
||||
}
|
||||
|
||||
@@ -207,15 +380,26 @@ impl LayerMap {
|
||||
|
||||
loop {
|
||||
let mut made_progress = false;
|
||||
for l in self.historic_layers.iter() {
|
||||
let envelope = AABB::from_corners(
|
||||
[
|
||||
IntKey::from(range_remain.start.to_i128()),
|
||||
IntKey::from(lsn_range.start.0 as i128),
|
||||
],
|
||||
[
|
||||
IntKey::from(range_remain.end.to_i128() - 1),
|
||||
IntKey::from(lsn_range.end.0 as i128 - 1),
|
||||
],
|
||||
);
|
||||
for e in self
|
||||
.historic_layers
|
||||
.locate_in_envelope_intersecting(&envelope)
|
||||
{
|
||||
let l = &e.layer;
|
||||
if l.is_incremental() {
|
||||
continue;
|
||||
}
|
||||
let img_lsn = l.get_lsn_range().start;
|
||||
if !l.is_incremental()
|
||||
&& l.get_key_range().contains(&range_remain.start)
|
||||
&& lsn_range.contains(&img_lsn)
|
||||
{
|
||||
if l.get_key_range().contains(&range_remain.start) && lsn_range.contains(&img_lsn) {
|
||||
made_progress = true;
|
||||
let img_key_end = l.get_key_range().end;
|
||||
|
||||
@@ -232,8 +416,8 @@ impl LayerMap {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn iter_historic_layers(&self) -> impl Iterator<Item = &Arc<dyn Layer>> {
|
||||
self.historic_layers.iter()
|
||||
pub fn iter_historic_layers(&self) -> impl '_ + Iterator<Item = Arc<dyn Layer>> {
|
||||
self.historic_layers.iter().map(|e| e.layer.clone())
|
||||
}
|
||||
|
||||
/// Find the last image layer that covers 'key', ignoring any image layers
|
||||
@@ -241,19 +425,22 @@ impl LayerMap {
|
||||
fn find_latest_image(&self, key: Key, lsn: Lsn) -> Option<Arc<dyn Layer>> {
|
||||
let mut candidate_lsn = Lsn(0);
|
||||
let mut candidate = None;
|
||||
for l in self.historic_layers.iter() {
|
||||
let envelope = AABB::from_corners(
|
||||
[IntKey::from(key.to_i128()), IntKey::from(0)],
|
||||
[IntKey::from(key.to_i128()), IntKey::from(lsn.0 as i128)],
|
||||
);
|
||||
for e in self
|
||||
.historic_layers
|
||||
.locate_in_envelope_intersecting(&envelope)
|
||||
{
|
||||
let l = &e.layer;
|
||||
if l.is_incremental() {
|
||||
continue;
|
||||
}
|
||||
|
||||
if !l.get_key_range().contains(&key) {
|
||||
continue;
|
||||
}
|
||||
|
||||
assert!(l.get_key_range().contains(&key));
|
||||
let this_lsn = l.get_lsn_range().start;
|
||||
if this_lsn > lsn {
|
||||
continue;
|
||||
}
|
||||
assert!(this_lsn <= lsn);
|
||||
if this_lsn < candidate_lsn {
|
||||
// our previous candidate was better
|
||||
continue;
|
||||
@@ -279,10 +466,19 @@ impl LayerMap {
|
||||
lsn: Lsn,
|
||||
) -> Result<Vec<(Range<Key>, Option<Arc<dyn Layer>>)>> {
|
||||
let mut points = vec![key_range.start];
|
||||
for l in self.historic_layers.iter() {
|
||||
if l.get_lsn_range().start > lsn {
|
||||
continue;
|
||||
}
|
||||
let envelope = AABB::from_corners(
|
||||
[IntKey::from(key_range.start.to_i128()), IntKey::from(0)],
|
||||
[
|
||||
IntKey::from(key_range.end.to_i128()),
|
||||
IntKey::from(lsn.0 as i128),
|
||||
],
|
||||
);
|
||||
for e in self
|
||||
.historic_layers
|
||||
.locate_in_envelope_intersecting(&envelope)
|
||||
{
|
||||
let l = &e.layer;
|
||||
assert!(l.get_lsn_range().start <= lsn);
|
||||
let range = l.get_key_range();
|
||||
if key_range.contains(&range.start) {
|
||||
points.push(l.get_key_range().start);
|
||||
@@ -315,16 +511,29 @@ impl LayerMap {
|
||||
/// given key and LSN range.
|
||||
pub fn count_deltas(&self, key_range: &Range<Key>, lsn_range: &Range<Lsn>) -> Result<usize> {
|
||||
let mut result = 0;
|
||||
for l in self.historic_layers.iter() {
|
||||
if lsn_range.start >= lsn_range.end {
|
||||
return Ok(0);
|
||||
}
|
||||
let envelope = AABB::from_corners(
|
||||
[
|
||||
IntKey::from(key_range.start.to_i128()),
|
||||
IntKey::from(lsn_range.start.0 as i128),
|
||||
],
|
||||
[
|
||||
IntKey::from(key_range.end.to_i128() - 1),
|
||||
IntKey::from(lsn_range.end.0 as i128 - 1),
|
||||
],
|
||||
);
|
||||
for e in self
|
||||
.historic_layers
|
||||
.locate_in_envelope_intersecting(&envelope)
|
||||
{
|
||||
let l = &e.layer;
|
||||
if !l.is_incremental() {
|
||||
continue;
|
||||
}
|
||||
if !range_overlaps(&l.get_lsn_range(), lsn_range) {
|
||||
continue;
|
||||
}
|
||||
if !range_overlaps(&l.get_key_range(), key_range) {
|
||||
continue;
|
||||
}
|
||||
assert!(range_overlaps(&l.get_lsn_range(), lsn_range));
|
||||
assert!(range_overlaps(&l.get_key_range(), key_range));
|
||||
|
||||
// We ignore level0 delta layers. Unless the whole keyspace fits
|
||||
// into one partition
|
||||
@@ -341,17 +550,7 @@ impl LayerMap {
|
||||
|
||||
/// Return all L0 delta layers
|
||||
pub fn get_level0_deltas(&self) -> Result<Vec<Arc<dyn Layer>>> {
|
||||
let mut deltas = Vec::new();
|
||||
for l in self.historic_layers.iter() {
|
||||
if !l.is_incremental() {
|
||||
continue;
|
||||
}
|
||||
if l.get_key_range() != (Key::MIN..Key::MAX) {
|
||||
continue;
|
||||
}
|
||||
deltas.push(Arc::clone(l));
|
||||
}
|
||||
Ok(deltas)
|
||||
Ok(self.l0_delta_layers.clone())
|
||||
}
|
||||
|
||||
/// debugging function to print out the contents of the layer map
|
||||
@@ -370,8 +569,8 @@ impl LayerMap {
|
||||
}
|
||||
|
||||
println!("historic_layers:");
|
||||
for layer in self.historic_layers.iter() {
|
||||
layer.dump(verbose)?;
|
||||
for e in self.historic_layers.iter() {
|
||||
e.layer.dump(verbose)?;
|
||||
}
|
||||
println!("End dump LayerMap");
|
||||
Ok(())
|
||||
|
||||
@@ -2050,7 +2050,7 @@ impl Timeline {
|
||||
l.filename().display(),
|
||||
l.is_incremental(),
|
||||
);
|
||||
layers_to_remove.push(Arc::clone(l));
|
||||
layers_to_remove.push(Arc::clone(&l));
|
||||
}
|
||||
|
||||
// Actually delete the layers from disk and remove them from the map.
|
||||
|
||||
39
test_runner/performance/test_layer_map.py
Normal file
39
test_runner/performance/test_layer_map.py
Normal file
@@ -0,0 +1,39 @@
|
||||
import time
|
||||
|
||||
from fixtures.neon_fixtures import NeonEnvBuilder
|
||||
|
||||
|
||||
#
|
||||
# Benchmark searching the layer map, when there are a lot of small layer files.
|
||||
#
|
||||
def test_layer_map(neon_env_builder: NeonEnvBuilder, zenbenchmark):
|
||||
|
||||
env = neon_env_builder.init_start()
|
||||
n_iters = 10
|
||||
n_records = 100000
|
||||
|
||||
# We want to have a lot of lot of layer files to exercise the layer map. Make
|
||||
# gc_horizon and checkpoint_distance very small, so that we get a lot of small layer files.
|
||||
tenant, _ = env.neon_cli.create_tenant(
|
||||
conf={
|
||||
"gc_period": "100 m",
|
||||
"gc_horizon": "1048576",
|
||||
"checkpoint_distance": "8192",
|
||||
"compaction_period": "1 s",
|
||||
"compaction_threshold": "1",
|
||||
"compaction_target_size": "8192",
|
||||
}
|
||||
)
|
||||
|
||||
env.neon_cli.create_timeline("test_layer_map", tenant_id=tenant)
|
||||
pg = env.postgres.create_start("test_layer_map", tenant_id=tenant)
|
||||
cur = pg.connect().cursor()
|
||||
cur.execute("create table t(x integer)")
|
||||
for i in range(n_iters):
|
||||
cur.execute(f"insert into t values (generate_series(1,{n_records}))")
|
||||
time.sleep(1)
|
||||
|
||||
cur.execute("vacuum t")
|
||||
with zenbenchmark.record_duration("test_query"):
|
||||
cur.execute("SELECT count(*) from t")
|
||||
assert cur.fetchone() == (n_iters * n_records,)
|
||||
@@ -30,7 +30,7 @@ memchr = { version = "2", features = ["std", "use_std"] }
|
||||
nom = { version = "7", features = ["alloc", "std"] }
|
||||
num-bigint = { version = "0.4", features = ["std"] }
|
||||
num-integer = { version = "0.1", default-features = false, features = ["i128", "std"] }
|
||||
num-traits = { version = "0.2", features = ["i128", "std"] }
|
||||
num-traits = { version = "0.2", features = ["i128", "libm", "std"] }
|
||||
prost = { version = "0.10", features = ["prost-derive", "std"] }
|
||||
rand = { version = "0.8", features = ["alloc", "getrandom", "libc", "rand_chacha", "rand_hc", "small_rng", "std", "std_rng"] }
|
||||
regex = { version = "1", features = ["aho-corasick", "memchr", "perf", "perf-cache", "perf-dfa", "perf-inline", "perf-literal", "std", "unicode", "unicode-age", "unicode-bool", "unicode-case", "unicode-gencat", "unicode-perl", "unicode-script", "unicode-segment"] }
|
||||
@@ -38,6 +38,7 @@ regex-automata = { version = "0.1", features = ["regex-syntax", "std"] }
|
||||
regex-syntax = { version = "0.6", features = ["unicode", "unicode-age", "unicode-bool", "unicode-case", "unicode-gencat", "unicode-perl", "unicode-script", "unicode-segment"] }
|
||||
scopeguard = { version = "1", features = ["use_std"] }
|
||||
serde = { version = "1", features = ["alloc", "derive", "serde_derive", "std"] }
|
||||
stable_deref_trait = { version = "1", features = ["alloc", "std"] }
|
||||
time = { version = "0.3", features = ["alloc", "formatting", "itoa", "macros", "parsing", "std", "time-macros"] }
|
||||
tokio = { version = "1", features = ["bytes", "fs", "io-std", "io-util", "libc", "macros", "memchr", "mio", "net", "num_cpus", "once_cell", "process", "rt", "rt-multi-thread", "signal-hook-registry", "socket2", "sync", "time", "tokio-macros"] }
|
||||
tokio-util = { version = "0.7", features = ["codec", "io", "io-util", "tracing"] }
|
||||
|
||||
Reference in New Issue
Block a user