mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-01-06 01:02:55 +00:00
toto
This commit is contained in:
171
src/core/skip.rs
171
src/core/skip.rs
@@ -83,19 +83,21 @@ impl LayerBuilder {
|
||||
}
|
||||
}
|
||||
|
||||
fn insert<S: BinarySerializable>(&mut self, doc_id: DocId, value: &S) -> InsertResult {
|
||||
fn insert<S: BinarySerializable>(&mut self, doc_id: DocId, value: &S) -> Option<(DocId, u32)> {
|
||||
self.remaining -= 1;
|
||||
self.len += 1;
|
||||
let offset = self.written_size(); // TODO not sure if we want after or here
|
||||
self.buffer.write_u32::<BigEndian>(doc_id);
|
||||
value.serialize(&mut self.buffer);
|
||||
let offset = self.written_size() as u32; // TODO not sure if we want after or here
|
||||
let mut res;
|
||||
if self.remaining == 0 {
|
||||
self.remaining = self.period;
|
||||
InsertResult::SkipPointer(offset as u32)
|
||||
res = Some((doc_id, offset));
|
||||
}
|
||||
else {
|
||||
InsertResult::NoNeedForSkip
|
||||
res = None;
|
||||
}
|
||||
self.buffer.write_u32::<BigEndian>(doc_id);
|
||||
value.serialize(&mut self.buffer);
|
||||
res
|
||||
}
|
||||
}
|
||||
|
||||
@@ -106,11 +108,6 @@ pub struct SkipListBuilder {
|
||||
}
|
||||
|
||||
|
||||
enum InsertResult {
|
||||
SkipPointer(u32),
|
||||
NoNeedForSkip,
|
||||
}
|
||||
|
||||
impl SkipListBuilder {
|
||||
|
||||
pub fn new(period: usize) -> SkipListBuilder {
|
||||
@@ -131,25 +128,17 @@ impl SkipListBuilder {
|
||||
|
||||
pub fn insert<S: BinarySerializable>(&mut self, doc_id: DocId, dest: &S) {
|
||||
let mut layer_id = 0;
|
||||
match self.get_layer(0).insert(doc_id, dest) {
|
||||
InsertResult::SkipPointer(mut offset) => {
|
||||
loop {
|
||||
layer_id += 1;
|
||||
let skip_result = self.get_layer(layer_id)
|
||||
.insert(doc_id, &offset);
|
||||
match skip_result {
|
||||
InsertResult::SkipPointer(next_offset) => {
|
||||
offset = next_offset;
|
||||
},
|
||||
InsertResult::NoNeedForSkip => {
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
InsertResult::NoNeedForSkip => {
|
||||
return;
|
||||
}
|
||||
let mut skip_pointer = self.get_layer(layer_id).insert(doc_id, dest);
|
||||
loop {
|
||||
layer_id += 1;
|
||||
println!("skip pointer {:?}", skip_pointer);
|
||||
skip_pointer = match skip_pointer {
|
||||
Some((skip_doc_id, skip_offset)) =>
|
||||
self
|
||||
.get_layer(layer_id)
|
||||
.insert(skip_doc_id, &skip_offset),
|
||||
None => { return; }
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
@@ -215,18 +204,18 @@ fn test_rebase_cursor() {
|
||||
|
||||
struct Layer<'a, T> {
|
||||
cursor: Cursor<&'a [u8]>,
|
||||
next_id: u32,
|
||||
next_id: DocId,
|
||||
_phantom_: PhantomData<T>,
|
||||
}
|
||||
|
||||
|
||||
|
||||
impl<'a, T: BinarySerializable> Iterator for Layer<'a, T> {
|
||||
|
||||
type Item = (DocId, T);
|
||||
|
||||
fn next(&mut self,)-> Option<(DocId, T)> {
|
||||
println!("next id! {}", self.next_id);
|
||||
println!("datalen{}", self.cursor.get_ref().len());
|
||||
println!("eeeeee");
|
||||
if self.next_id == u32::max_value() {
|
||||
None
|
||||
}
|
||||
@@ -238,7 +227,6 @@ impl<'a, T: BinarySerializable> Iterator for Layer<'a, T> {
|
||||
Ok(val) => val,
|
||||
Err(_) => u32::max_value()
|
||||
};
|
||||
println!("next id==> {}", self.next_id);
|
||||
Some((cur_id, cur_val))
|
||||
}
|
||||
}
|
||||
@@ -255,7 +243,6 @@ impl<'a, T: BinarySerializable> Layer<'a, T> {
|
||||
Ok(val) => val,
|
||||
Err(_) => u32::max_value(),
|
||||
};
|
||||
println!("next_id {:?}", next_id);
|
||||
Layer {
|
||||
cursor: cursor,
|
||||
next_id: next_id,
|
||||
@@ -266,21 +253,57 @@ impl<'a, T: BinarySerializable> Layer<'a, T> {
|
||||
fn empty() -> Layer<'a, T> {
|
||||
Layer {
|
||||
cursor: Cursor::new(&EMPTY),
|
||||
next_id: u32::max_value(),
|
||||
next_id: DocId::max_value(),
|
||||
_phantom_: PhantomData,
|
||||
}
|
||||
}
|
||||
|
||||
fn seek(doc_id: DocId) {
|
||||
// while self.next_doc_id < doc_id {
|
||||
// self.next_doc_id = cursor.read_u32::<BigEndian>();
|
||||
// self.cur_val = self.next_val;
|
||||
// self.next_doc_id = bincode::Deserializer::new(self.cursor, 8).read_u32();
|
||||
// self.next_val = bincode::Deserializer::new(self.cursor, 8).read_u32();
|
||||
// }
|
||||
|
||||
fn seek_offset(&mut self, offset: usize) {
|
||||
self.cursor.seek(SeekFrom::Start(offset as u64));
|
||||
self.next_id = match self.cursor.read_u32::<BigEndian>() {
|
||||
Ok(val) => val,
|
||||
Err(_) => u32::max_value(),
|
||||
};
|
||||
}
|
||||
|
||||
// Returns the last element (key, val)
|
||||
// such that (key < doc_id)
|
||||
//
|
||||
// If there is no such element anymore,
|
||||
// returns None.
|
||||
fn seek(&mut self, doc_id: DocId) -> Option<(DocId, T)> {
|
||||
let mut val = None;
|
||||
while self.next_id < doc_id {
|
||||
match self.next() {
|
||||
None => { break; },
|
||||
v => { val = v; }
|
||||
}
|
||||
}
|
||||
val
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
fn display_layer<'a, T: BinarySerializable>(layer: &mut Layer<'a, T>) {
|
||||
for it in layer {
|
||||
println!(" - {:?}", it);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn display_skip_list<'a, T: BinarySerializable>(skip_list: &mut SkipList<'a, T>) {
|
||||
let mut i = 0;
|
||||
for mut layer in skip_list.skip_layers.iter_mut() {
|
||||
println!("SkipLayer {}", i);
|
||||
display_layer(&mut layer);
|
||||
i += 1;
|
||||
}
|
||||
println!("DataLayer {}", i);
|
||||
display_layer(&mut skip_list.data_layer);
|
||||
}
|
||||
|
||||
|
||||
|
||||
pub struct SkipList<'a, T: BinarySerializable> {
|
||||
data_layer: Layer<'a, T>,
|
||||
skip_layers: Vec<Layer<'a, u32>>,
|
||||
@@ -297,48 +320,32 @@ impl<'a, T: BinarySerializable> Iterator for SkipList<'a, T> {
|
||||
|
||||
impl<'a, T: BinarySerializable> SkipList<'a, T> {
|
||||
|
||||
pub fn seek(&mut self, doc_id: DocId) {
|
||||
// let mut next_layer_offset: u64 = 0;
|
||||
// for skip_layer_id in 0..self.skip_layers.len() {
|
||||
// println!("LAYER {}", skip_layer_id);
|
||||
// let mut skip_layer: &mut Layer<'a, u32> = &mut self.skip_layers[skip_layer_id];
|
||||
// println!("seek {}", next_layer_offset);
|
||||
// if next_layer_offset > 0 {
|
||||
// skip_layer.cursor.seek(SeekFrom::Start(next_layer_offset));
|
||||
// next_layer_offset = 0;
|
||||
// }
|
||||
// println!("next id {}", skip_layer.next_id);
|
||||
// while skip_layer.next_id < doc_id {
|
||||
// match skip_layer.next() {
|
||||
// Some((_, offset)) => {
|
||||
// println!("bipoffset {}", offset);
|
||||
// next_layer_offset = offset as u64;
|
||||
// },
|
||||
// None => {
|
||||
// break;
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// for skip_layer in self.skip_layers.iter() {
|
||||
// println!("{}", skip_layer.len());
|
||||
// }
|
||||
// println!("last seek {}", next_layer_offset);
|
||||
// if next_layer_offset > 0 {
|
||||
// self.data_layer.cursor.seek(SeekFrom::Start(next_layer_offset));
|
||||
// }
|
||||
while self.data_layer.next_id < doc_id {
|
||||
match self.data_layer.next() {
|
||||
None => { break; },
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn seek(&mut self, doc_id: DocId) -> Option<(DocId, T)> {
|
||||
let mut next_layer_skip: Option<(DocId, u32)> = None;
|
||||
for skip_layer_id in 0..self.skip_layers.len() {
|
||||
let mut skip_layer: &mut Layer<'a, u32> = &mut self.skip_layers[skip_layer_id];
|
||||
println!("\n\nLAYER {}", skip_layer_id);
|
||||
println!("nextid before skip {}", skip_layer.next_id);
|
||||
match next_layer_skip {
|
||||
Some((_, offset)) => { skip_layer.seek_offset(offset as usize); },
|
||||
None => {}
|
||||
};
|
||||
println!("nextid after skip {}", skip_layer.next_id);
|
||||
next_layer_skip = skip_layer.seek(doc_id);
|
||||
println!("nextid after seek {}", skip_layer.next_id);
|
||||
println!("--- nextlayerskip {:?}", next_layer_skip);
|
||||
}
|
||||
match next_layer_skip {
|
||||
Some((_, offset)) => { self.data_layer.seek_offset(offset as usize); },
|
||||
None => {}
|
||||
};
|
||||
self.data_layer.seek(doc_id)
|
||||
}
|
||||
|
||||
pub fn read(data: &'a [u8]) -> SkipList<'a, T> {
|
||||
let mut cursor = Cursor::new(data);
|
||||
let offsets: Vec<u32> = Vec::deserialize(&mut cursor).unwrap();
|
||||
println!("offsets {:?}", offsets);
|
||||
let num_layers = offsets.len();
|
||||
println!("{} layers ", num_layers);
|
||||
|
||||
@@ -357,9 +364,7 @@ impl<'a, T: BinarySerializable> SkipList<'a, T> {
|
||||
skip_layers = offsets.iter()
|
||||
.zip(&offsets[1..])
|
||||
.map(|(start, stop)| {
|
||||
println!("start {} stop {}", start, stop);
|
||||
let layer_data: &[u8] = &data[*start as usize..*stop as usize];
|
||||
println!("datalen2 {}", layer_data.len());
|
||||
let layer_data: &[u8] = &layers_data[*start as usize..*stop as usize];
|
||||
let cursor = Cursor::new(layer_data);
|
||||
Layer::read(cursor)
|
||||
})
|
||||
|
||||
@@ -2,7 +2,7 @@ extern crate tantivy;
|
||||
extern crate byteorder;
|
||||
use std::io::{Write, Seek};
|
||||
use std::io::SeekFrom;
|
||||
use tantivy::core::skip::{SkipListBuilder, SkipList};
|
||||
use tantivy::core::skip::*;
|
||||
use std::io::Cursor;
|
||||
use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt};
|
||||
|
||||
@@ -91,4 +91,49 @@ fn test_skip_list_reader() {
|
||||
assert_eq!(skip_list.next().unwrap(), (9, ()));
|
||||
assert_eq!(skip_list.next(), None);
|
||||
}
|
||||
{
|
||||
let mut output: Vec<u8> = Vec::new();
|
||||
let mut skip_list_builder: SkipListBuilder = SkipListBuilder::new(3);
|
||||
skip_list_builder.insert(2, &());
|
||||
skip_list_builder.insert(3, &());
|
||||
skip_list_builder.insert(5, &());
|
||||
skip_list_builder.insert(6, &());
|
||||
skip_list_builder.write::<Vec<u8>>(&mut output);
|
||||
let mut skip_list: SkipList<()> = SkipList::read(&mut output);
|
||||
assert_eq!(skip_list.next().unwrap(), (2, ()));
|
||||
skip_list.seek(6);
|
||||
assert_eq!(skip_list.next().unwrap(), (6, ()));
|
||||
assert_eq!(skip_list.next(), None);
|
||||
|
||||
}
|
||||
{
|
||||
let mut output: Vec<u8> = Vec::new();
|
||||
let mut skip_list_builder: SkipListBuilder = SkipListBuilder::new(2);
|
||||
skip_list_builder.insert(2, &());
|
||||
skip_list_builder.insert(3, &());
|
||||
skip_list_builder.insert(5, &());
|
||||
skip_list_builder.insert(7, &());
|
||||
skip_list_builder.insert(9, &());
|
||||
skip_list_builder.write::<Vec<u8>>(&mut output);
|
||||
let mut skip_list: SkipList<()> = SkipList::read(&mut output);
|
||||
assert_eq!(skip_list.next().unwrap(), (2, ()));
|
||||
skip_list.seek(10);
|
||||
assert_eq!(skip_list.next(), None);
|
||||
}
|
||||
{
|
||||
let mut output: Vec<u8> = Vec::new();
|
||||
let mut skip_list_builder: SkipListBuilder = SkipListBuilder::new(3);
|
||||
for i in (0..1000) {
|
||||
skip_list_builder.insert(i, &());
|
||||
}
|
||||
skip_list_builder.insert(1004, &());
|
||||
skip_list_builder.write::<Vec<u8>>(&mut output);
|
||||
let mut skip_list: SkipList<()> = SkipList::read(&mut output);
|
||||
assert_eq!(skip_list.next().unwrap(), (0, ()));
|
||||
skip_list.seek(431);
|
||||
assert_eq!(skip_list.next().unwrap(), (431,()) );
|
||||
skip_list.seek(1003);
|
||||
assert_eq!(skip_list.next().unwrap(), (1004,()) );
|
||||
assert_eq!(skip_list.next(), None);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user