diff --git a/Cargo.toml b/Cargo.toml index ce2fe204b..b5c31321b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,3 +10,4 @@ memmap = "0.2.3" lazy_static = "0.1.*" regex = "0.1" fst = "0.1.26" +rand = "0.3.13" diff --git a/src/core/directory.rs b/src/core/directory.rs index 14f2e9733..7af731685 100644 --- a/src/core/directory.rs +++ b/src/core/directory.rs @@ -9,9 +9,19 @@ use std::rc::Rc; use std::ops::Deref; use std::cell::RefCell; use std::sync::Arc; +use rand::{thread_rng, Rng}; + #[derive(Clone, Debug)] -pub struct SegmentId(String); +pub struct SegmentId(pub String); + +pub fn generate_segment_name() -> SegmentId { + static CHARS: &'static [u8] = b"abcdefghijklmnopqrstuvwxyz0123456789"; + let random_name: String = (0..8) + .map(|_| thread_rng().choose(CHARS).unwrap().clone() as char) + .collect(); + SegmentId( String::from("_") + &random_name) +} pub trait Dir { fn get_data(&self, segment_id: &SegmentId, component: SegmentComponent) -> Result; // { @@ -30,6 +40,10 @@ impl Directory { } } + pub fn new_segment(&self,) -> Segment { + self.segment(&generate_segment_name()) + } + fn from(directory: T) -> Directory { Directory { dir: Rc::new(directory), diff --git a/src/core/writer.rs b/src/core/writer.rs index b711d9ea0..4c88b1be9 100644 --- a/src/core/writer.rs +++ b/src/core/writer.rs @@ -31,32 +31,25 @@ impl PostingsWriter for SimplePostingsWriter { } } -impl Flushable for SimplePostingsWriter { - fn flush(&self, writer: &mut W) -> Result { - let num_docs = self.doc_ids.len() as u64; - writer.write_u64::(num_docs); - for &doc_id in self.doc_ids.iter() { - writer.write_u64::(doc_id as u64); - } - Ok(1) - } -} struct FieldWriter { postings: Vec, term_index: BTreeMap, } -// -// impl Flushable for FieldWriter { -// fn flush(&self, writer: &mut W) -> Result { -// let num_docs = self.doc_ids.len() as u64; -// writer.write_u64::(num_docs); -// for &doc_id in self.doc_ids.iter() { -// writer.write_u64::(doc_id as u64); -// } -// Ok(1) -// } -// } + +impl Flushable for SimplePostingsWriter { + fn flush(&self, writer: &mut W) -> Result { + let mut num_bytes_written = 0; + let num_docs = self.doc_ids.len() as u64; + writer.write_u64::(num_docs); + num_bytes_written += 8; + for &doc_id in self.doc_ids.iter() { + writer.write_u64::(doc_id as u64); + num_bytes_written += 8; + } + Ok(num_bytes_written) + } +} impl FieldWriter { pub fn new() -> FieldWriter { @@ -120,6 +113,7 @@ impl IndexWriter { } pub fn sync(&mut self,) -> Result<(), io::Error> { + self.directory.new_segment(); Ok(()) } diff --git a/src/lib.rs b/src/lib.rs index 3f535347f..5b58ea342 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,5 +1,9 @@ #[macro_use] extern crate lazy_static; + extern crate byteorder; +extern crate rand; +extern crate regex; + pub mod core; diff --git a/tests/core.rs b/tests/core.rs index 2f422a483..767e572b0 100644 --- a/tests/core.rs +++ b/tests/core.rs @@ -1,19 +1,21 @@ extern crate tantivy; extern crate itertools; extern crate byteorder; +extern crate regex; use tantivy::core::DocId; use tantivy::core::postings::{VecPostings, intersection}; use tantivy::core::postings::Postings; use tantivy::core::analyzer::tokenize; use tantivy::core::writer::IndexWriter; -use tantivy::core::directory::Directory; +use tantivy::core::directory::{Directory, generate_segment_name, SegmentId}; use tantivy::core::schema::{Field, Document}; use tantivy::core::reader::IndexReader; use tantivy::core::writer::SimplePostingsWriter; use tantivy::core::postings::PostingsWriter; use tantivy::core::global::Flushable; -use std::io::{ BufWriter, Write }; +use std::io::{ BufWriter, Write}; +use regex::Regex; use std::convert::From; #[test] @@ -58,3 +60,10 @@ fn test_postings_writer() { postings_writer.flush(&mut buffer); assert_eq!(buffer.len(), 5 * 8); } + +#[test] +fn test_new_segment() { + let SegmentId(segment_name) = generate_segment_name(); + let segment_ptn = Regex::new(r"^_[a-z0-9]{8}$").unwrap(); + assert!(segment_ptn.is_match(&segment_name)); +}