diff --git a/Cargo.toml b/Cargo.toml index d76ddb781..12c0d2157 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,7 +11,6 @@ memmap = "0.2" lazy_static = "0.1.*" regex = "0.1" fst = { path = "../fst" } -rand = "0.3.13" atomicwrites = "0.0.14" tempfile = "2.0.0" rustc-serialize = "0.3.16" @@ -23,6 +22,10 @@ serde = "0.6.11" libc = "0.2.6" lz4 = "1.13.131" time = "0.1.34" +uuid = "0.1" + +[dev-dependencies] +rand = "0.3.13" [build-dependencies] gcc = "0.3.24" diff --git a/src/core/index.rs b/src/core/index.rs index eaac69ac5..df7eee0f4 100644 --- a/src/core/index.rs +++ b/src/core/index.rs @@ -5,29 +5,37 @@ use core::schema::DocId; use std::io::Write; use std::sync::{Arc, RwLock, RwLockWriteGuard, RwLockReadGuard}; use std::fmt; -use rand::{thread_rng, Rng}; use rustc_serialize::json; use std::io::Read; use std::io::ErrorKind as IOErrorKind; use core::directory::{Directory, MmapDirectory, RAMDirectory, ReadOnlySource, WritePtr}; use core::writer::IndexWriter; use core::searcher::Searcher; +use uuid::Uuid; -#[derive(Clone, Debug, PartialEq, Eq, Hash)] -pub struct SegmentId(pub String); +#[derive(Clone, PartialEq, Eq, Hash,RustcDecodable,RustcEncodable)] +pub struct SegmentId(Uuid); +impl SegmentId { + pub fn new() -> SegmentId { + SegmentId(Uuid::new_v4()) + } -pub fn generate_segment_name() -> SegmentId { - static CHARS: &'static [u8] = b"abcdefghijklmnopqrstuvwxyz0123456789"; - let random_name: String = (0..8) - .map(|_| thread_rng().choose(CHARS).unwrap().clone() as char) - .collect(); - SegmentId( String::from("_") + &random_name) + pub fn uuid_string(&self,) -> String { + self.0.to_simple_string() + } } +impl fmt::Debug for SegmentId { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "Segment({:?})", self.uuid_string()) + } +} + + #[derive(Clone,Debug,RustcDecodable,RustcEncodable)] pub struct IndexMeta { - segments: Vec, + segments: Vec, schema: Schema, } @@ -122,7 +130,7 @@ impl Index { // TODO find a rusty way to hide that, while keeping // it visible for IndexWriters. pub fn publish_segment(&mut self, segment: Segment) -> io::Result<()> { - self.metas.write().unwrap().segments.push(segment.segment_id.0.clone()); + self.metas.write().unwrap().segments.push(segment.segment_id.clone()); // TODO use logs self.save_metas() } @@ -158,13 +166,11 @@ impl Index { .segments .iter() .cloned() - .map(SegmentId) .collect() } pub fn new_segment(&self,) -> Segment { - // TODO check it does not exists - self.segment(&generate_segment_name()) + self.segment(&SegmentId::new()) } pub fn load_metas(&mut self,) -> io::Result<()> { @@ -226,8 +232,8 @@ impl Segment { } pub fn relative_path(&self, component: &SegmentComponent) -> PathBuf { - let SegmentId(ref segment_id_str) = self.segment_id; - let filename = String::new() + segment_id_str + Segment::path_suffix(component); + let SegmentId(ref segment_uuid) = self.segment_id; + let filename = segment_uuid.to_simple_string() + Segment::path_suffix(component); PathBuf::from(filename) } @@ -241,19 +247,3 @@ impl Segment { self.index.directory.write().unwrap().open_write(&path) } } - - -#[cfg(test)] -mod test { - - use super::*; - use regex::Regex; - - #[test] - fn test_new_segment() { - let SegmentId(segment_name) = generate_segment_name(); - let segment_ptn = Regex::new(r"^_[a-z0-9]{8}$").unwrap(); - assert!(segment_ptn.is_match(&segment_name)); - } - -} diff --git a/src/core/schema.rs b/src/core/schema.rs index 5c5441279..a5a19066e 100644 --- a/src/core/schema.rs +++ b/src/core/schema.rs @@ -111,7 +111,9 @@ struct FieldEntry { option: FieldOptions, } - +/// Tantivy has a very strict schema. +/// You need to specify in advance, whether a field is indexed or not, +/// stored or not, and RAM-based or not. #[derive(Clone, Debug)] pub struct Schema { fields: Vec, @@ -147,6 +149,8 @@ impl Encodable for Schema { } impl Schema { + + /// Creates a new, empty schema. pub fn new() -> Schema { Schema { fields: Vec::new(), @@ -155,8 +159,7 @@ impl Schema { } } - /// Returns the field handle associated with the given name, - /// as well as its FieldOptions. + /// Given a name, returns the field handle, as well as its associated FieldOptions pub fn get(&self, field_name: &str) -> Option<(Field, FieldOptions)> { self.fields_map .get(field_name) @@ -166,7 +169,7 @@ impl Schema { }) } - /// Returns the field handle associated with the given name. + /// Returns the field options associated with a given name. /// /// # Panics /// Panics if the field name does not exist. @@ -174,16 +177,20 @@ impl Schema { /// and control the content of their schema. /// /// If panicking is not an option for you, - /// you may use get(&self, field_name: &str). + /// you may use `get(&self, field_name: &str)`. pub fn field(&self, fieldname: &str) -> Field { self.fields_map.get(&String::from(fieldname)).map(|field| field.clone()).unwrap() } + /// Returns the field options associated to a field handle. pub fn field_options(&self, field: &Field) -> FieldOptions { let Field(field_id) = *field; self.field_options[field_id as usize].clone() } + + /// Creates a new field. + /// Return the associated field handle. pub fn add_field(&mut self, field_name_str: &str, field_options: &FieldOptions) -> Field { let field = Field(self.fields.len() as u8); // TODO case if field already exists diff --git a/src/lib.rs b/src/lib.rs index af4627c56..ccbd0ac2e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,9 +1,6 @@ //#![feature(test,associated_consts)] #![cfg_attr(test, feature(test))] -#[allow(unused_imports)] - - #[macro_use] extern crate lazy_static; @@ -11,7 +8,6 @@ extern crate lazy_static; extern crate fst; extern crate byteorder; extern crate memmap; -extern crate rand; extern crate regex; extern crate tempfile; extern crate rustc_serialize; @@ -23,8 +19,10 @@ extern crate time; extern crate serde; extern crate libc; extern crate lz4; +extern crate uuid; #[cfg(test)] extern crate test; +#[cfg(test)] extern crate rand; mod core;