mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-01-05 00:32:55 +00:00
Compare commits
4 Commits
issue/969-
...
debugging-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
08f7706973 | ||
|
|
bf6e6e8a7c | ||
|
|
203b0256a3 | ||
|
|
caf2a38b7e |
@@ -47,6 +47,7 @@ murmurhash32 = "0.2"
|
||||
chrono = "0.4"
|
||||
smallvec = "1"
|
||||
rayon = "1"
|
||||
env_logger = "0.8"
|
||||
lru = "0.6"
|
||||
|
||||
[target.'cfg(windows)'.dependencies]
|
||||
|
||||
@@ -35,12 +35,18 @@ fn load_metas(
|
||||
inventory: &SegmentMetaInventory,
|
||||
) -> crate::Result<IndexMeta> {
|
||||
let meta_data = directory.atomic_read(&META_FILEPATH)?;
|
||||
let meta_string = String::from_utf8_lossy(&meta_data);
|
||||
let meta_string = String::from_utf8(meta_data)
|
||||
.map_err(|utf8_err| {
|
||||
DataCorruption::new(
|
||||
META_FILEPATH.to_path_buf(),
|
||||
format!("Meta file is not valid utf-8. {:?}", utf8_err)
|
||||
)
|
||||
})?;
|
||||
IndexMeta::deserialize(&meta_string, &inventory)
|
||||
.map_err(|e| {
|
||||
DataCorruption::new(
|
||||
META_FILEPATH.to_path_buf(),
|
||||
format!("Meta file cannot be deserialized. {:?}.", e),
|
||||
format!("Meta file cannot be deserialized. {:?}. content = {}", e, meta_string),
|
||||
)
|
||||
})
|
||||
.map_err(From::from)
|
||||
|
||||
@@ -20,6 +20,7 @@ fn check_index_content(searcher: &Searcher, vals: &[u64]) -> crate::Result<()> {
|
||||
#[test]
|
||||
#[ignore]
|
||||
fn test_functional_store() -> crate::Result<()> {
|
||||
env_logger::init();
|
||||
let mut schema_builder = Schema::builder();
|
||||
|
||||
let id_field = schema_builder.add_u64_field("id", INDEXED | STORED);
|
||||
@@ -35,8 +36,7 @@ fn test_functional_store() -> crate::Result<()> {
|
||||
let mut doc_set: Vec<u64> = Vec::new();
|
||||
|
||||
let mut doc_id = 0u64;
|
||||
for iteration in 0..500 {
|
||||
dbg!(iteration);
|
||||
for iteration in 0.. {
|
||||
let num_docs: usize = rng.gen_range(0..4);
|
||||
if doc_set.len() >= 1 {
|
||||
let doc_to_remove_id = rng.gen_range(0..doc_set.len());
|
||||
@@ -51,6 +51,7 @@ fn test_functional_store() -> crate::Result<()> {
|
||||
index_writer.commit()?;
|
||||
reader.reload()?;
|
||||
let searcher = reader.searcher();
|
||||
println!("#{} - {}", iteration, searcher.segment_readers().len());
|
||||
check_index_content(&searcher, &doc_set)?;
|
||||
}
|
||||
Ok(())
|
||||
|
||||
@@ -43,6 +43,9 @@ impl CheckpointBlock {
|
||||
|
||||
/// Adding another checkpoint in the block.
|
||||
pub fn push(&mut self, checkpoint: Checkpoint) {
|
||||
if let Some(prev_checkpoint) = self.checkpoints.last() {
|
||||
assert!(checkpoint.follows(prev_checkpoint));
|
||||
}
|
||||
self.checkpoints.push(checkpoint);
|
||||
}
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
const CHECKPOINT_PERIOD: usize = 8;
|
||||
const CHECKPOINT_PERIOD: usize = 2;
|
||||
|
||||
use std::fmt;
|
||||
mod block;
|
||||
@@ -26,6 +26,13 @@ pub struct Checkpoint {
|
||||
pub end_offset: u64,
|
||||
}
|
||||
|
||||
impl Checkpoint {
|
||||
pub(crate) fn follows(&self, other: &Checkpoint) -> bool {
|
||||
(self.start_doc == other.end_doc) &&
|
||||
(self.start_offset == other.end_offset)
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Debug for Checkpoint {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(
|
||||
@@ -137,7 +144,7 @@ mod tests {
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_merge_store_with_stacking() -> crate::Result<()> {
|
||||
fn test_merge_store_with_stacking_reproducing_issue969() -> crate::Result<()> {
|
||||
let mut schema_builder = SchemaBuilder::default();
|
||||
let text = schema_builder.add_text_field("text", STORED | STRING);
|
||||
let body = schema_builder.add_text_field("body", STORED);
|
||||
|
||||
@@ -28,18 +28,20 @@ impl LayerBuilder {
|
||||
///
|
||||
/// If the block was empty to begin with, simply return None.
|
||||
fn flush_block(&mut self) -> Option<Checkpoint> {
|
||||
self.block.doc_interval().map(|(start_doc, end_doc)| {
|
||||
if let Some((start_doc, end_doc)) = self.block.doc_interval() {
|
||||
let start_offset = self.buffer.len() as u64;
|
||||
self.block.serialize(&mut self.buffer);
|
||||
let end_offset = self.buffer.len() as u64;
|
||||
self.block.clear();
|
||||
Checkpoint {
|
||||
Some(Checkpoint {
|
||||
start_doc,
|
||||
end_doc,
|
||||
start_offset,
|
||||
end_offset,
|
||||
}
|
||||
})
|
||||
})
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
fn push(&mut self, checkpoint: Checkpoint) {
|
||||
@@ -48,7 +50,7 @@ impl LayerBuilder {
|
||||
|
||||
fn insert(&mut self, checkpoint: Checkpoint) -> Option<Checkpoint> {
|
||||
self.push(checkpoint);
|
||||
let emit_skip_info = (self.block.len() % CHECKPOINT_PERIOD) == 0;
|
||||
let emit_skip_info = self.block.len() >= CHECKPOINT_PERIOD;
|
||||
if emit_skip_info {
|
||||
self.flush_block()
|
||||
} else {
|
||||
|
||||
50
src/store/tests_store.rs
Normal file
50
src/store/tests_store.rs
Normal file
@@ -0,0 +1,50 @@
|
||||
use std::path::Path;
|
||||
|
||||
use crate::HasLen;
|
||||
use crate::directory::{Directory, ManagedDirectory, MmapDirectory, RAMDirectory};
|
||||
use crate::fastfield::DeleteBitSet;
|
||||
|
||||
use super::{StoreReader, StoreWriter};
|
||||
|
||||
#[test]
|
||||
fn test_toto2() -> crate::Result<()> {
|
||||
let directory = ManagedDirectory::wrap(MmapDirectory::open("src/store/broken_seg")?)?;
|
||||
let path = Path::new("b6029ade1b954ea1acad15b432eaacb9.store");
|
||||
assert!(directory.validate_checksum(path)?);
|
||||
let store_file = directory.open_read(path)?;
|
||||
let store = StoreReader::open(store_file)?;
|
||||
let documents = store.documents();
|
||||
// for doc in documents {
|
||||
// println!("{:?}", doc);
|
||||
// }
|
||||
let doc= store.get(15_086)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_toto() -> crate::Result<()> {
|
||||
let directory = ManagedDirectory::wrap(MmapDirectory::open("src/store/broken_seg")?)?;
|
||||
assert!(directory.validate_checksum(Path::new("e6ece22e5bca4e0dbe7ce3e4dcbd5bbf.store"))?);
|
||||
let store_file = directory.open_read(Path::new("e6ece22e5bca4e0dbe7ce3e4dcbd5bbf.store.patched"))?;
|
||||
let store = StoreReader::open(store_file)?;
|
||||
let doc= store.get(53)?;
|
||||
println!("{:?}", doc);
|
||||
// let documents = store.documents();
|
||||
// let ram_directory = RAMDirectory::create();
|
||||
// let path = Path::new("store");
|
||||
|
||||
// let store_wrt = ram_directory.open_write(path)?;
|
||||
// let mut store_writer = StoreWriter::new(store_wrt);
|
||||
// for doc in &documents {
|
||||
// store_writer.store(doc)?;
|
||||
// }
|
||||
// store_writer.close()?;
|
||||
// let store_data = ram_directory.open_read(path)?;
|
||||
// let new_store = StoreReader::open(store_data)?;
|
||||
// for doc in 0..59 {
|
||||
// println!("{}", doc);
|
||||
// let doc = new_store.get(doc)?;
|
||||
// println!("{:?}", doc);
|
||||
// }
|
||||
Ok(())
|
||||
}
|
||||
@@ -10,7 +10,7 @@ use crate::store::index::Checkpoint;
|
||||
use crate::DocId;
|
||||
use std::io::{self, Write};
|
||||
|
||||
const BLOCK_SIZE: usize = 16_384;
|
||||
const BLOCK_SIZE: usize = 30;
|
||||
|
||||
/// Write tantivy's [`Store`](./index.html)
|
||||
///
|
||||
@@ -72,6 +72,7 @@ impl StoreWriter {
|
||||
if !self.current_block.is_empty() {
|
||||
self.write_and_compress_block()?;
|
||||
}
|
||||
assert_eq!(self.first_doc_in_block, self.doc);
|
||||
let doc_shift = self.doc;
|
||||
let start_shift = self.writer.written_bytes() as u64;
|
||||
|
||||
@@ -86,12 +87,17 @@ impl StoreWriter {
|
||||
checkpoint.end_doc += doc_shift;
|
||||
checkpoint.start_offset += start_shift;
|
||||
checkpoint.end_offset += start_shift;
|
||||
self.offset_index_writer.insert(checkpoint);
|
||||
self.doc = checkpoint.end_doc;
|
||||
self.register_checkpoint(checkpoint);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn register_checkpoint(&mut self, checkpoint: Checkpoint) {
|
||||
self.offset_index_writer.insert(checkpoint);
|
||||
self.first_doc_in_block = checkpoint.end_doc;
|
||||
self.doc = checkpoint.end_doc;
|
||||
}
|
||||
|
||||
fn write_and_compress_block(&mut self) -> io::Result<()> {
|
||||
assert!(self.doc > 0);
|
||||
self.intermediary_buffer.clear();
|
||||
@@ -100,14 +106,13 @@ impl StoreWriter {
|
||||
self.writer.write_all(&self.intermediary_buffer)?;
|
||||
let end_offset = self.writer.written_bytes();
|
||||
let end_doc = self.doc;
|
||||
self.offset_index_writer.insert(Checkpoint {
|
||||
self.register_checkpoint(Checkpoint {
|
||||
start_doc: self.first_doc_in_block,
|
||||
end_doc,
|
||||
start_offset,
|
||||
end_offset,
|
||||
});
|
||||
self.current_block.clear();
|
||||
self.first_doc_in_block = self.doc;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user