mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-01-06 01:02:55 +00:00
issue/43 refactoring segment meta
This commit is contained in:
@@ -41,25 +41,20 @@ impl Segment {
|
||||
pub fn schema(&self,) -> Schema {
|
||||
self.index.schema()
|
||||
}
|
||||
|
||||
|
||||
pub fn meta(&self,) -> &SegmentMeta {
|
||||
&self.meta
|
||||
}
|
||||
|
||||
pub fn meta_mut(&mut self,) -> &mut SegmentMeta {
|
||||
&mut self.meta
|
||||
}
|
||||
|
||||
/// Returns the segment's id.
|
||||
pub fn id(&self,) -> SegmentId {
|
||||
self.meta.segment_id
|
||||
}
|
||||
|
||||
pub fn with_delete_opstamp(self, opstamp: u64) -> Segment {
|
||||
let mut meta = self.meta;
|
||||
meta.delete_opstamp = Some(opstamp);
|
||||
Segment {
|
||||
index: self.index,
|
||||
meta: meta,
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the relative path of a component of our segment.
|
||||
///
|
||||
/// It just joins the segment id with the extension
|
||||
@@ -75,7 +70,7 @@ impl Segment {
|
||||
STORE => ".store".to_string(),
|
||||
FASTFIELDS => ".fast".to_string(),
|
||||
FIELDNORMS => ".fieldnorm".to_string(),
|
||||
DELETE => {format!(".{}.del", self.meta.delete_opstamp.unwrap_or(0))},
|
||||
DELETE => {format!(".{}.del", self.meta.delete_opstamp().unwrap_or(0))},
|
||||
});
|
||||
PathBuf::from(path)
|
||||
}
|
||||
|
||||
@@ -1,14 +1,17 @@
|
||||
use core::SegmentId;
|
||||
|
||||
|
||||
// TODO Option<DeleteMeta>
|
||||
#[derive(Clone, Debug, RustcDecodable,RustcEncodable)]
|
||||
struct DeleteMeta {
|
||||
num_deleted_docs: u32,
|
||||
opstamp: u64,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, RustcDecodable,RustcEncodable)]
|
||||
pub struct SegmentMeta {
|
||||
pub segment_id: SegmentId,
|
||||
pub num_docs: u32,
|
||||
pub num_deleted_docs: u32,
|
||||
pub delete_opstamp: Option<u64>,
|
||||
num_docs: u32,
|
||||
deletes: Option<DeleteMeta>,
|
||||
}
|
||||
|
||||
impl SegmentMeta {
|
||||
@@ -16,8 +19,36 @@ impl SegmentMeta {
|
||||
SegmentMeta {
|
||||
segment_id: segment_id,
|
||||
num_docs: 0,
|
||||
num_deleted_docs: 0,
|
||||
delete_opstamp: None,
|
||||
deletes: None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn id(&self) -> SegmentId {
|
||||
self.segment_id
|
||||
}
|
||||
|
||||
pub fn num_docs(&self) -> u32 {
|
||||
self.num_docs
|
||||
}
|
||||
|
||||
pub fn delete_opstamp(&self) -> Option<u64> {
|
||||
self.deletes
|
||||
.as_ref()
|
||||
.map(|delete_meta| delete_meta.opstamp)
|
||||
}
|
||||
|
||||
pub fn has_deletes(&self) -> bool {
|
||||
self.deletes.is_some()
|
||||
}
|
||||
|
||||
pub fn set_num_docs(&mut self, num_docs: u32) {
|
||||
self.num_docs = num_docs;
|
||||
}
|
||||
|
||||
pub fn set_deletes(&mut self, num_deleted_docs: u32, opstamp: u64) {
|
||||
self.deletes = Some(DeleteMeta {
|
||||
num_deleted_docs: num_deleted_docs,
|
||||
opstamp: opstamp,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
@@ -148,7 +148,7 @@ impl SegmentReader {
|
||||
|
||||
// TODO 0u64
|
||||
let delete_bitset =
|
||||
if segment.meta().delete_opstamp.is_some() {
|
||||
if segment.meta().has_deletes() {
|
||||
let delete_data = segment.open_read(SegmentComponent::DELETE)?;
|
||||
DeleteBitSet::open(delete_data)
|
||||
}
|
||||
|
||||
@@ -172,34 +172,27 @@ fn index_documents(heap: &mut Heap,
|
||||
}
|
||||
}
|
||||
let num_docs = segment_writer.max_doc();
|
||||
|
||||
// this is ensured by the call to peek before starting
|
||||
// the worker thread.
|
||||
assert!(num_docs > 0);
|
||||
|
||||
|
||||
segment
|
||||
.meta_mut()
|
||||
.set_num_docs(num_docs);
|
||||
|
||||
let last_opstamp = segment_writer.last_opstamp();
|
||||
|
||||
let doc_opstamps: Vec<u64> = segment_writer.finalize()?;
|
||||
|
||||
let segment_meta =
|
||||
if let Some((last_opstamp_after_deletes, deleted_docset)) = advance_deletes(&segment, delete_cursor, DocToOpstampMapping::WithMap(doc_opstamps))? {
|
||||
let mut delete_file = segment.with_delete_opstamp(last_opstamp_after_deletes).open_write(SegmentComponent::DELETE)?;
|
||||
write_delete_bitset(&deleted_docset, &mut delete_file)?;
|
||||
SegmentMeta {
|
||||
segment_id: segment_id,
|
||||
num_docs: num_docs,
|
||||
num_deleted_docs: deleted_docset.len() as DocId,
|
||||
delete_opstamp: Some(last_opstamp_after_deletes),
|
||||
}
|
||||
}
|
||||
else {
|
||||
SegmentMeta {
|
||||
segment_id: segment_id,
|
||||
num_docs: num_docs,
|
||||
num_deleted_docs: 0,
|
||||
delete_opstamp: None,
|
||||
}
|
||||
};
|
||||
if let Some((last_opstamp_after_deletes, deleted_docset)) = advance_deletes(&segment, delete_cursor, DocToOpstampMapping::WithMap(doc_opstamps))? {
|
||||
let num_deleted_docs = deleted_docset.len();
|
||||
segment.meta_mut().set_deletes(num_deleted_docs as u32, last_opstamp_after_deletes);
|
||||
let mut delete_file = segment.open_write(SegmentComponent::DELETE)?;
|
||||
write_delete_bitset(&deleted_docset, &mut delete_file)?;
|
||||
}
|
||||
|
||||
let segment_entry = SegmentEntry::new(segment_meta, delete_cursor.clone());
|
||||
let segment_entry = SegmentEntry::new(segment.meta().clone(), delete_cursor.clone());
|
||||
|
||||
segment_updater
|
||||
.add_segment(generation, segment_entry)
|
||||
|
||||
@@ -53,7 +53,7 @@ impl MergePolicy for LogMergePolicy {
|
||||
}
|
||||
|
||||
let mut size_sorted_tuples = segments.iter()
|
||||
.map(|x| x.num_docs)
|
||||
.map(|x| x.num_docs())
|
||||
.enumerate()
|
||||
.collect::<Vec<(usize, u32)>>();
|
||||
|
||||
@@ -121,12 +121,9 @@ mod tests {
|
||||
}
|
||||
|
||||
fn seg_meta(num_docs: u32) -> SegmentMeta {
|
||||
SegmentMeta {
|
||||
segment_id: SegmentId::generate_random(),
|
||||
num_docs: num_docs,
|
||||
num_deleted_docs: 0u32,
|
||||
delete_opstamp: None,
|
||||
}
|
||||
let mut segment_metas = SegmentMeta::new(SegmentId::generate_random());
|
||||
segment_metas.set_num_docs(num_docs);
|
||||
segment_metas
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
@@ -183,7 +183,11 @@ impl SegmentUpdater {
|
||||
segment_entry.delete_cursor(),
|
||||
DocToOpstampMapping::None).unwrap()
|
||||
{
|
||||
let mut delete_file = segment.with_delete_opstamp(target_opstamp).open_write(SegmentComponent::DELETE)?;
|
||||
let num_deleted_docs = deleted_docset.len();
|
||||
// TODO previous mask?
|
||||
// TODO save the resulting segment_entry
|
||||
segment.meta_mut().set_deletes(num_deleted_docs as u32, target_opstamp);
|
||||
let mut delete_file = segment.open_write(SegmentComponent::DELETE)?;
|
||||
write_delete_bitset(&deleted_docset, &mut delete_file)?;
|
||||
}
|
||||
}
|
||||
@@ -252,12 +256,8 @@ impl SegmentUpdater {
|
||||
// to merge the two segments.
|
||||
let segment_serializer = SegmentSerializer::for_segment(&mut merged_segment).expect("Creating index serializer failed");
|
||||
let num_docs = merger.write(segment_serializer).expect("Serializing merged index failed");
|
||||
let segment_meta = SegmentMeta {
|
||||
segment_id: merged_segment.id(),
|
||||
num_docs: num_docs,
|
||||
num_deleted_docs: 0u32,
|
||||
delete_opstamp: None, // TODO fix delete_opstamp
|
||||
};
|
||||
let mut segment_meta = SegmentMeta::new(merged_segment.id());
|
||||
segment_meta.set_num_docs(num_docs);
|
||||
|
||||
// TODO fix delete cursor
|
||||
let delete_queue = DeleteQueue::default();
|
||||
|
||||
Reference in New Issue
Block a user