mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-01-09 10:32:55 +00:00
babs
This commit is contained in:
@@ -9,3 +9,4 @@ itertools = "0.4.5"
|
||||
memmap = "0.2.3"
|
||||
lazy_static = "0.1.*"
|
||||
regex = "0.1"
|
||||
fst = "0.1.26"
|
||||
|
||||
@@ -81,8 +81,7 @@ impl Segment {
|
||||
// MemoryPointer
|
||||
|
||||
pub trait MemoryPointer {
|
||||
fn len(&self) -> usize;
|
||||
fn ptr(&self) -> *const u8;
|
||||
fn data(&self) -> &[u8];
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////////
|
||||
@@ -94,31 +93,25 @@ pub struct ResidentMemoryPointer {
|
||||
}
|
||||
|
||||
impl MemoryPointer for ResidentMemoryPointer {
|
||||
fn len(&self) -> usize {
|
||||
self.len
|
||||
}
|
||||
fn ptr(&self) -> *const u8 {
|
||||
&self.data[0]
|
||||
fn data(&self) -> &[u8] {
|
||||
self.data.deref()
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/////////////////////////////////////////////////////////
|
||||
// MmapMemory
|
||||
//
|
||||
//
|
||||
|
||||
|
||||
pub struct MmapMemory(Mmap);
|
||||
|
||||
impl MemoryPointer for MmapMemory {
|
||||
fn len(&self) -> usize {
|
||||
fn data(&self) -> &[u8] {
|
||||
let &MmapMemory(ref mmap) = self;
|
||||
mmap.len()
|
||||
}
|
||||
fn ptr(&self) -> *const u8 {
|
||||
let &MmapMemory(ref mmap) = self;
|
||||
mmap.ptr()
|
||||
unsafe {
|
||||
mmap.as_slice()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1,6 +1,13 @@
|
||||
use std::io::{BufWriter, Write};
|
||||
use std::io;
|
||||
|
||||
pub type DocId = usize;
|
||||
|
||||
// pub trait SeekableIterator<T>: Iterator<T> {
|
||||
// pub fn seek(&mut self, el: &T) -> bool;
|
||||
// }
|
||||
|
||||
|
||||
pub trait Flushable {
|
||||
fn flush<W: Write>(&self, writer: &mut W) -> Result<usize, io::Error>;
|
||||
}
|
||||
|
||||
@@ -7,6 +7,18 @@ use core::global::DocId;
|
||||
// use std::slice;
|
||||
use std::vec;
|
||||
|
||||
|
||||
/////////////////////////////
|
||||
|
||||
|
||||
pub trait PostingsWriter {
|
||||
fn suscribe(&mut self, DocId);
|
||||
}
|
||||
|
||||
|
||||
////////////////////////////////////
|
||||
|
||||
|
||||
pub trait Postings {
|
||||
type IteratorType: Iterator<Item=DocId>;
|
||||
fn iter(&self) -> Self::IteratorType;
|
||||
|
||||
@@ -6,27 +6,57 @@ use core::directory::Directory;
|
||||
use core::analyzer::tokenize;
|
||||
use std::collections::{HashMap, BTreeMap};
|
||||
use core::DocId;
|
||||
use core::postings::PostingsWriter;
|
||||
use core::global::Flushable;
|
||||
use std::io::{BufWriter, Write};
|
||||
use std::mem;
|
||||
use byteorder::{NativeEndian, ReadBytesExt, WriteBytesExt};
|
||||
|
||||
pub struct PostingsWriter {
|
||||
|
||||
pub struct SimplePostingsWriter {
|
||||
doc_ids: Vec<DocId>,
|
||||
}
|
||||
|
||||
impl PostingsWriter {
|
||||
pub fn new()->PostingsWriter {
|
||||
PostingsWriter {
|
||||
impl SimplePostingsWriter {
|
||||
pub fn new() -> SimplePostingsWriter {
|
||||
SimplePostingsWriter {
|
||||
doc_ids: Vec::new(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn suscribe(&mut self, doc_id: DocId) {
|
||||
impl PostingsWriter for SimplePostingsWriter {
|
||||
fn suscribe(&mut self, doc_id: DocId) {
|
||||
self.doc_ids.push(doc_id);
|
||||
}
|
||||
}
|
||||
|
||||
impl Flushable for SimplePostingsWriter {
|
||||
fn flush<W: Write>(&self, writer: &mut W) -> Result<usize, io::Error> {
|
||||
let num_docs = self.doc_ids.len() as u64;
|
||||
writer.write_u64::<NativeEndian>(num_docs);
|
||||
for &doc_id in self.doc_ids.iter() {
|
||||
writer.write_u64::<NativeEndian>(doc_id as u64);
|
||||
}
|
||||
Ok(1)
|
||||
}
|
||||
}
|
||||
|
||||
struct FieldWriter {
|
||||
postings: Vec<PostingsWriter>,
|
||||
postings: Vec<SimplePostingsWriter>,
|
||||
term_index: BTreeMap<String, usize>,
|
||||
}
|
||||
//
|
||||
// impl Flushable for FieldWriter {
|
||||
// fn flush<W: Write>(&self, writer: &mut W) -> Result<usize, io::Error> {
|
||||
// let num_docs = self.doc_ids.len() as u64;
|
||||
// writer.write_u64::<NativeEndian>(num_docs);
|
||||
// for &doc_id in self.doc_ids.iter() {
|
||||
// writer.write_u64::<NativeEndian>(doc_id as u64);
|
||||
// }
|
||||
// Ok(1)
|
||||
// }
|
||||
// }
|
||||
|
||||
impl FieldWriter {
|
||||
pub fn new() -> FieldWriter {
|
||||
@@ -36,7 +66,7 @@ impl FieldWriter {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get_postings_writer(&mut self, term_text: &str) -> &mut PostingsWriter {
|
||||
pub fn get_postings_writer(&mut self, term_text: &str) -> &mut SimplePostingsWriter {
|
||||
match self.term_index.get(term_text) {
|
||||
Some(unord_id) => {
|
||||
return &mut self.postings[*unord_id];
|
||||
@@ -44,7 +74,7 @@ impl FieldWriter {
|
||||
None => {}
|
||||
}
|
||||
let unord_id = self.term_index.len();
|
||||
self.postings.push(PostingsWriter::new());
|
||||
self.postings.push(SimplePostingsWriter::new());
|
||||
self.term_index.insert(String::from(term_text), unord_id.clone());
|
||||
&mut self.postings[unord_id]
|
||||
}
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
#[macro_use]
|
||||
extern crate lazy_static;
|
||||
extern crate byteorder;
|
||||
|
||||
pub mod core;
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
extern crate tantivy;
|
||||
extern crate itertools;
|
||||
extern crate byteorder;
|
||||
|
||||
use tantivy::core::DocId;
|
||||
use tantivy::core::postings::{VecPostings, intersection};
|
||||
@@ -9,6 +10,11 @@ use tantivy::core::writer::IndexWriter;
|
||||
use tantivy::core::directory::Directory;
|
||||
use tantivy::core::schema::{Field, Document};
|
||||
use tantivy::core::reader::IndexReader;
|
||||
use tantivy::core::writer::SimplePostingsWriter;
|
||||
use tantivy::core::postings::PostingsWriter;
|
||||
use tantivy::core::global::Flushable;
|
||||
use std::io::{ BufWriter, Write };
|
||||
use std::convert::From;
|
||||
|
||||
#[test]
|
||||
fn test_intersection() {
|
||||
@@ -39,3 +45,16 @@ fn test_indexing() {
|
||||
let index_reader = IndexReader::open(&directory);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_postings_writer() {
|
||||
let mut postings_writer = SimplePostingsWriter::new();
|
||||
postings_writer.suscribe(1);
|
||||
postings_writer.suscribe(4);
|
||||
postings_writer.suscribe(5);
|
||||
postings_writer.suscribe(17);
|
||||
let mut buffer: Vec<u8> = Vec::new();
|
||||
assert_eq!(buffer.len(), 0);
|
||||
postings_writer.flush(&mut buffer);
|
||||
assert_eq!(buffer.len(), 5 * 8);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user