From 08f770697351238570ec14c12512ee89d27de2c0 Mon Sep 17 00:00:00 2001 From: Paul Masurel Date: Sat, 9 Jan 2021 10:27:03 +0900 Subject: [PATCH] test store --- Cargo.toml | 1 + src/core/index.rs | 10 ++++++-- src/functional_test.rs | 5 ++-- src/store/index/mod.rs | 2 +- src/store/tests_store.rs | 50 ++++++++++++++++++++++++++++++++++++++++ src/store/writer.rs | 2 +- 6 files changed, 64 insertions(+), 6 deletions(-) create mode 100644 src/store/tests_store.rs diff --git a/Cargo.toml b/Cargo.toml index 6648e0a83..0c71eb858 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -47,6 +47,7 @@ murmurhash32 = "0.2" chrono = "0.4" smallvec = "1" rayon = "1" +env_logger = "0.8" lru = "0.6" [target.'cfg(windows)'.dependencies] diff --git a/src/core/index.rs b/src/core/index.rs index 12ef5e37f..4cda59e44 100644 --- a/src/core/index.rs +++ b/src/core/index.rs @@ -35,12 +35,18 @@ fn load_metas( inventory: &SegmentMetaInventory, ) -> crate::Result { let meta_data = directory.atomic_read(&META_FILEPATH)?; - let meta_string = String::from_utf8_lossy(&meta_data); + let meta_string = String::from_utf8(meta_data) + .map_err(|utf8_err| { + DataCorruption::new( + META_FILEPATH.to_path_buf(), + format!("Meta file is not valid utf-8. {:?}", utf8_err) + ) + })?; IndexMeta::deserialize(&meta_string, &inventory) .map_err(|e| { DataCorruption::new( META_FILEPATH.to_path_buf(), - format!("Meta file cannot be deserialized. {:?}.", e), + format!("Meta file cannot be deserialized. {:?}. content = {}", e, meta_string), ) }) .map_err(From::from) diff --git a/src/functional_test.rs b/src/functional_test.rs index 478a99686..d7ecdad7e 100644 --- a/src/functional_test.rs +++ b/src/functional_test.rs @@ -20,6 +20,7 @@ fn check_index_content(searcher: &Searcher, vals: &[u64]) -> crate::Result<()> { #[test] #[ignore] fn test_functional_store() -> crate::Result<()> { + env_logger::init(); let mut schema_builder = Schema::builder(); let id_field = schema_builder.add_u64_field("id", INDEXED | STORED); @@ -35,8 +36,7 @@ fn test_functional_store() -> crate::Result<()> { let mut doc_set: Vec = Vec::new(); let mut doc_id = 0u64; - for iteration in 0..500 { - dbg!(iteration); + for iteration in 0.. { let num_docs: usize = rng.gen_range(0..4); if doc_set.len() >= 1 { let doc_to_remove_id = rng.gen_range(0..doc_set.len()); @@ -51,6 +51,7 @@ fn test_functional_store() -> crate::Result<()> { index_writer.commit()?; reader.reload()?; let searcher = reader.searcher(); + println!("#{} - {}", iteration, searcher.segment_readers().len()); check_index_content(&searcher, &doc_set)?; } Ok(()) diff --git a/src/store/index/mod.rs b/src/store/index/mod.rs index 708b6fbfe..d6c5b60ab 100644 --- a/src/store/index/mod.rs +++ b/src/store/index/mod.rs @@ -1,4 +1,4 @@ -const CHECKPOINT_PERIOD: usize = 8; +const CHECKPOINT_PERIOD: usize = 2; use std::fmt; mod block; diff --git a/src/store/tests_store.rs b/src/store/tests_store.rs new file mode 100644 index 000000000..a59a09b7e --- /dev/null +++ b/src/store/tests_store.rs @@ -0,0 +1,50 @@ +use std::path::Path; + +use crate::HasLen; +use crate::directory::{Directory, ManagedDirectory, MmapDirectory, RAMDirectory}; +use crate::fastfield::DeleteBitSet; + +use super::{StoreReader, StoreWriter}; + +#[test] +fn test_toto2() -> crate::Result<()> { + let directory = ManagedDirectory::wrap(MmapDirectory::open("src/store/broken_seg")?)?; + let path = Path::new("b6029ade1b954ea1acad15b432eaacb9.store"); + assert!(directory.validate_checksum(path)?); + let store_file = directory.open_read(path)?; + let store = StoreReader::open(store_file)?; + let documents = store.documents(); + // for doc in documents { + // println!("{:?}", doc); + // } + let doc= store.get(15_086)?; + Ok(()) +} + +#[test] +fn test_toto() -> crate::Result<()> { + let directory = ManagedDirectory::wrap(MmapDirectory::open("src/store/broken_seg")?)?; + assert!(directory.validate_checksum(Path::new("e6ece22e5bca4e0dbe7ce3e4dcbd5bbf.store"))?); + let store_file = directory.open_read(Path::new("e6ece22e5bca4e0dbe7ce3e4dcbd5bbf.store.patched"))?; + let store = StoreReader::open(store_file)?; + let doc= store.get(53)?; + println!("{:?}", doc); + // let documents = store.documents(); + // let ram_directory = RAMDirectory::create(); + // let path = Path::new("store"); + + // let store_wrt = ram_directory.open_write(path)?; + // let mut store_writer = StoreWriter::new(store_wrt); + // for doc in &documents { + // store_writer.store(doc)?; + // } + // store_writer.close()?; + // let store_data = ram_directory.open_read(path)?; + // let new_store = StoreReader::open(store_data)?; + // for doc in 0..59 { + // println!("{}", doc); + // let doc = new_store.get(doc)?; + // println!("{:?}", doc); + // } + Ok(()) +} diff --git a/src/store/writer.rs b/src/store/writer.rs index 3309f1a64..540c4db95 100644 --- a/src/store/writer.rs +++ b/src/store/writer.rs @@ -10,7 +10,7 @@ use crate::store::index::Checkpoint; use crate::DocId; use std::io::{self, Write}; -const BLOCK_SIZE: usize = 16_384; +const BLOCK_SIZE: usize = 30; /// Write tantivy's [`Store`](./index.html) ///