This commit is contained in:
Paul Masurel
2016-01-13 10:03:58 +09:00
parent f443ec59b3
commit 6519e73d2b
5 changed files with 46 additions and 24 deletions

View File

@@ -10,3 +10,4 @@ memmap = "0.2.3"
lazy_static = "0.1.*"
regex = "0.1"
fst = "0.1.26"
rand = "0.3.13"

View File

@@ -9,9 +9,19 @@ use std::rc::Rc;
use std::ops::Deref;
use std::cell::RefCell;
use std::sync::Arc;
use rand::{thread_rng, Rng};
#[derive(Clone, Debug)]
pub struct SegmentId(String);
pub struct SegmentId(pub String);
pub fn generate_segment_name() -> SegmentId {
static CHARS: &'static [u8] = b"abcdefghijklmnopqrstuvwxyz0123456789";
let random_name: String = (0..8)
.map(|_| thread_rng().choose(CHARS).unwrap().clone() as char)
.collect();
SegmentId( String::from("_") + &random_name)
}
pub trait Dir {
fn get_data(&self, segment_id: &SegmentId, component: SegmentComponent) -> Result<SharedMmapMemory, io::Error>; // {
@@ -30,6 +40,10 @@ impl Directory {
}
}
pub fn new_segment(&self,) -> Segment {
self.segment(&generate_segment_name())
}
fn from<T: Dir + 'static>(directory: T) -> Directory {
Directory {
dir: Rc::new(directory),

View File

@@ -31,32 +31,25 @@ impl PostingsWriter for SimplePostingsWriter {
}
}
impl Flushable for SimplePostingsWriter {
fn flush<W: Write>(&self, writer: &mut W) -> Result<usize, io::Error> {
let num_docs = self.doc_ids.len() as u64;
writer.write_u64::<NativeEndian>(num_docs);
for &doc_id in self.doc_ids.iter() {
writer.write_u64::<NativeEndian>(doc_id as u64);
}
Ok(1)
}
}
struct FieldWriter {
postings: Vec<SimplePostingsWriter>,
term_index: BTreeMap<String, usize>,
}
//
// impl Flushable for FieldWriter {
// fn flush<W: Write>(&self, writer: &mut W) -> Result<usize, io::Error> {
// let num_docs = self.doc_ids.len() as u64;
// writer.write_u64::<NativeEndian>(num_docs);
// for &doc_id in self.doc_ids.iter() {
// writer.write_u64::<NativeEndian>(doc_id as u64);
// }
// Ok(1)
// }
// }
impl Flushable for SimplePostingsWriter {
fn flush<W: Write>(&self, writer: &mut W) -> Result<usize, io::Error> {
let mut num_bytes_written = 0;
let num_docs = self.doc_ids.len() as u64;
writer.write_u64::<NativeEndian>(num_docs);
num_bytes_written += 8;
for &doc_id in self.doc_ids.iter() {
writer.write_u64::<NativeEndian>(doc_id as u64);
num_bytes_written += 8;
}
Ok(num_bytes_written)
}
}
impl FieldWriter {
pub fn new() -> FieldWriter {
@@ -120,6 +113,7 @@ impl IndexWriter {
}
pub fn sync(&mut self,) -> Result<(), io::Error> {
self.directory.new_segment();
Ok(())
}

View File

@@ -1,5 +1,9 @@
#[macro_use]
extern crate lazy_static;
extern crate byteorder;
extern crate rand;
extern crate regex;
pub mod core;

View File

@@ -1,19 +1,21 @@
extern crate tantivy;
extern crate itertools;
extern crate byteorder;
extern crate regex;
use tantivy::core::DocId;
use tantivy::core::postings::{VecPostings, intersection};
use tantivy::core::postings::Postings;
use tantivy::core::analyzer::tokenize;
use tantivy::core::writer::IndexWriter;
use tantivy::core::directory::Directory;
use tantivy::core::directory::{Directory, generate_segment_name, SegmentId};
use tantivy::core::schema::{Field, Document};
use tantivy::core::reader::IndexReader;
use tantivy::core::writer::SimplePostingsWriter;
use tantivy::core::postings::PostingsWriter;
use tantivy::core::global::Flushable;
use std::io::{ BufWriter, Write };
use std::io::{ BufWriter, Write};
use regex::Regex;
use std::convert::From;
#[test]
@@ -58,3 +60,10 @@ fn test_postings_writer() {
postings_writer.flush(&mut buffer);
assert_eq!(buffer.len(), 5 * 8);
}
#[test]
fn test_new_segment() {
let SegmentId(segment_name) = generate_segment_name();
let segment_ptn = Regex::new(r"^_[a-z0-9]{8}$").unwrap();
assert!(segment_ptn.is_match(&segment_name));
}