From ce19fedb08d189952f08b7038d279597e15859a6 Mon Sep 17 00:00:00 2001 From: Rob Meng Date: Sat, 21 Oct 2023 12:21:41 -0400 Subject: [PATCH] feat: include manifest files in mirrow store (#589) --- node/src/integration_test/test.ts | 19 +++++++++++++------ rust/vectordb/src/io/object_store.rs | 13 +++++++++---- 2 files changed, 22 insertions(+), 10 deletions(-) diff --git a/node/src/integration_test/test.ts b/node/src/integration_test/test.ts index 4828213b..7a09db67 100644 --- a/node/src/integration_test/test.ts +++ b/node/src/integration_test/test.ts @@ -65,8 +65,8 @@ describe('LanceDB Mirrored Store Integration test', function () { const mirroredPath = path.join(dir, `${tableName}.lance`) fs.readdir(mirroredPath, { withFileTypes: true }, (err, files) => { if (err != null) throw err - // there should be two dirs - assert.equal(files.length, 2) + // there should be three dirs + assert.equal(files.length, 3) assert.isTrue(files[0].isDirectory()) assert.isTrue(files[1].isDirectory()) @@ -76,6 +76,12 @@ describe('LanceDB Mirrored Store Integration test', function () { assert.isTrue(files[0].name.endsWith('.txn')) }) + fs.readdir(path.join(mirroredPath, '_versions'), { withFileTypes: true }, (err, files) => { + if (err != null) throw err + assert.equal(files.length, 1) + assert.isTrue(files[0].name.endsWith('.manifest')) + }) + fs.readdir(path.join(mirroredPath, 'data'), { withFileTypes: true }, (err, files) => { if (err != null) throw err assert.equal(files.length, 1) @@ -88,8 +94,8 @@ describe('LanceDB Mirrored Store Integration test', function () { fs.readdir(mirroredPath, { withFileTypes: true }, (err, files) => { if (err != null) throw err - // there should be two dirs - assert.equal(files.length, 3) + // there should be four dirs + assert.equal(files.length, 4) assert.isTrue(files[0].isDirectory()) assert.isTrue(files[1].isDirectory()) assert.isTrue(files[2].isDirectory()) @@ -128,12 +134,13 @@ describe('LanceDB Mirrored Store Integration test', function () { fs.readdir(mirroredPath, { withFileTypes: true }, (err, files) => { if (err != null) throw err - // there should be two dirs - assert.equal(files.length, 4) + // there should be five dirs + assert.equal(files.length, 5) assert.isTrue(files[0].isDirectory()) assert.isTrue(files[1].isDirectory()) assert.isTrue(files[2].isDirectory()) assert.isTrue(files[3].isDirectory()) + assert.isTrue(files[4].isDirectory()) // Three TXs now fs.readdir(path.join(mirroredPath, '_transactions'), { withFileTypes: true }, (err, files) => { diff --git a/rust/vectordb/src/io/object_store.rs b/rust/vectordb/src/io/object_store.rs index 77234b14..1d0fbdc6 100644 --- a/rust/vectordb/src/io/object_store.rs +++ b/rust/vectordb/src/io/object_store.rs @@ -57,7 +57,7 @@ trait PrimaryOnly { impl PrimaryOnly for Path { fn primary_only(&self) -> bool { - self.to_string().contains("manifest") + self.filename().unwrap_or("") == "_latest.manifest" } } @@ -118,8 +118,10 @@ impl ObjectStore for MirroringObjectStore { self.primary.head(location).await } - // garbage collection on secondary will happen async from other means async fn delete(&self, location: &Path) -> Result<()> { + if !location.primary_only() { + self.secondary.delete(location).await?; + } self.primary.delete(location).await } @@ -132,7 +134,7 @@ impl ObjectStore for MirroringObjectStore { } async fn copy(&self, from: &Path, to: &Path) -> Result<()> { - if from.primary_only() { + if to.primary_only() { self.primary.copy(from, to).await } else { self.secondary.copy(from, to).await?; @@ -142,6 +144,9 @@ impl ObjectStore for MirroringObjectStore { } async fn copy_if_not_exists(&self, from: &Path, to: &Path) -> Result<()> { + if !to.primary_only() { + self.secondary.copy(from, to).await?; + } self.primary.copy_if_not_exists(from, to).await } } @@ -379,7 +384,7 @@ mod test { let primary_f = primary_elem.unwrap().unwrap(); // hit manifest, skip, _versions contains all the manifest and should not exist on secondary let primary_raw_path = primary_f.file_name().to_str().unwrap(); - if primary_raw_path.contains("manifest") || primary_raw_path.contains("_versions") { + if primary_raw_path.contains("_latest.manifest") { primary_elem = primary_iter.next(); continue; }