From ddceda4ff7c391bc8d0d06435ff3435213b065c4 Mon Sep 17 00:00:00 2001 From: Rob Meng Date: Tue, 4 Jun 2024 19:03:53 -0400 Subject: [PATCH] feat: add fast path to dataset reload (#1354) most of the time we don't need to reload. Locking the write lock and performing IO is not an ideal pattern. This PR tries to make the critical section of `.write()` happen less frequently. This isn't the most ideal solution. The most ideal solution should not lock until the new dataset has been loaded. But that would require too much refactoring. --- rust/lancedb/src/table/dataset.rs | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/rust/lancedb/src/table/dataset.rs b/rust/lancedb/src/table/dataset.rs index 91c605e0..f1f27a11 100644 --- a/rust/lancedb/src/table/dataset.rs +++ b/rust/lancedb/src/table/dataset.rs @@ -66,6 +66,15 @@ impl DatasetRef { Ok(()) } + async fn need_reload(&self) -> Result { + Ok(match self { + Self::Latest { dataset, .. } => { + dataset.latest_version_id().await? != dataset.version().version + } + Self::TimeTravel { dataset, version } => dataset.version().version != *version, + }) + } + async fn as_latest(&mut self, read_consistency_interval: Option) -> Result<()> { match self { Self::Latest { .. } => Ok(()), @@ -129,7 +138,7 @@ impl DatasetConsistencyWrapper { Self(Arc::new(RwLock::new(DatasetRef::Latest { dataset, read_consistency_interval, - last_consistency_check: None, + last_consistency_check: Some(Instant::now()), }))) } @@ -183,7 +192,18 @@ impl DatasetConsistencyWrapper { } pub async fn reload(&self) -> Result<()> { - self.0.write().await.reload().await + if !self.0.read().await.need_reload().await? { + return Ok(()); + } + + let mut write_guard = self.0.write().await; + // on lock escalation -- check if someone else has already reloaded + if !write_guard.need_reload().await? { + return Ok(()); + } + + // actually need reloading + write_guard.reload().await } /// Returns the version, if in time travel mode, or None otherwise