Schema now:

- internally wrapped by an Arc, cloning aggressively is ok.
- a field in schema, redundant with metas
- read-only. Needs to be built via a SchemaBuilder.
This commit is contained in:
Paul Masurel
2016-08-23 22:23:13 +09:00
parent 054405adf1
commit e6200e85af
13 changed files with 307 additions and 209 deletions

View File

@@ -1,4 +1,5 @@
use Result;
use Error;
use std::path::{PathBuf, Path};
use schema::Schema;
use DocId;
@@ -44,13 +45,15 @@ impl fmt::Debug for Index {
pub struct Index {
metas: Arc<RwLock<IndexMeta>>,
directory: Box<Directory>,
schema: Schema,
}
impl Clone for Index {
fn clone(&self,) -> Index {
Index {
metas: self.metas.clone(),
directory: self.directory.box_clone()
directory: self.directory.box_clone(),
schema: self.schema.clone(),
}
}
}
@@ -59,6 +62,18 @@ lazy_static! {
static ref META_FILEPATH: PathBuf = PathBuf::from("meta.json");
}
fn load_metas(directory: &Directory) -> Result<IndexMeta> {
let meta_file = try!(directory.open_read(&META_FILEPATH));
let meta_content = String::from_utf8_lossy(meta_file.as_slice());
let loaded_meta = try!(
json::decode(&meta_content)
.map_err(|e| Error::CorruptedFile(META_FILEPATH.clone(), Box::new(e)))
);
Ok(loaded_meta)
}
impl Index {
pub fn create_in_ram(schema: Schema) -> Index {
@@ -78,10 +93,14 @@ impl Index {
pub fn open(directory_path: &Path) -> Result<Index> {
let directory = try!(MmapDirectory::open(directory_path));
let directory_ptr = Box::new(directory);
let mut index = Index::from_directory(directory_ptr, Schema::new());
try!(index.load_metas()); //< TODO does the directory already exists?
Ok(index)
let metas = try!(load_metas(&directory)); //< TODO does the directory already exists?
let schema = metas.schema.clone();
let locked_metas = Arc::new(RwLock::new(metas));
Ok(Index {
directory: Box::new(directory),
metas: locked_metas,
schema: schema,
})
}
pub fn docstamp(&self,) -> Result<u64> {
@@ -110,17 +129,16 @@ impl Index {
pub fn from_directory(directory: Box<Directory>, schema: Schema) -> Index {
Index {
metas: Arc::new(RwLock::new(IndexMeta::with_schema(schema))),
metas: Arc::new(RwLock::new(IndexMeta::with_schema(schema.clone()))),
directory: directory,
schema: schema,
}
}
pub fn schema(&self,) -> Schema {
self.metas.read().unwrap().schema.clone()
self.schema.clone()
}
/// Marks the segment as published.
// TODO find a rusty way to hide that, while keeping
// it visible for IndexWriters.
@@ -179,14 +197,6 @@ impl Index {
pub fn new_segment(&self,) -> Segment {
self.segment(SegmentId::new())
}
pub fn load_metas(&mut self,) -> Result<()> {
let meta_file = try!(self.directory.open_read(&META_FILEPATH));
let meta_content = String::from_utf8_lossy(meta_file.as_slice());
let loaded_meta: IndexMeta = json::decode(&meta_content).unwrap();
self.metas.write().unwrap().clone_from(&loaded_meta);
Ok(())
}
pub fn save_metas(&mut self,) -> Result<()> {
let mut w = Vec::new();

View File

@@ -26,7 +26,7 @@ mod tests {
use std::path::Path;
use directory::{Directory, WritePtr, RAMDirectory};
use schema::Document;
use schema::Schema;
use schema::{Schema, SchemaBuilder};
use schema::FAST;
use test::Bencher;
use test;
@@ -34,6 +34,17 @@ mod tests {
use rand::SeedableRng;
use rand::XorShiftRng;
lazy_static! {
static ref SCHEMA: Schema = {
let mut schema_builder = SchemaBuilder::new();
schema_builder.add_u32_field("field", FAST);
schema_builder.build()
};
static ref FIELD: Field = {
SCHEMA.get_field("field").unwrap()
};
}
#[test]
fn test_compute_num_bits() {
assert_eq!(compute_num_bits(1), 1u8);
@@ -55,15 +66,13 @@ mod tests {
fn test_intfastfield_small() {
let path = Path::new("test");
let mut directory: RAMDirectory = RAMDirectory::create();
let mut schema = Schema::new();
let field = schema.add_u32_field("field", FAST);
{
let write: WritePtr = directory.open_write(Path::new("test")).unwrap();
let mut serializer = FastFieldSerializer::new(write).unwrap();
let mut fast_field_writers = U32FastFieldsWriter::from_schema(&schema);
add_single_field_doc(&mut fast_field_writers, field, 13u32);
add_single_field_doc(&mut fast_field_writers, field, 14u32);
add_single_field_doc(&mut fast_field_writers, field, 2u32);
let mut fast_field_writers = U32FastFieldsWriter::from_schema(&SCHEMA);
add_single_field_doc(&mut fast_field_writers, *FIELD, 13u32);
add_single_field_doc(&mut fast_field_writers, *FIELD, 14u32);
add_single_field_doc(&mut fast_field_writers, *FIELD, 2u32);
fast_field_writers.serialize(&mut serializer).unwrap();
serializer.close().unwrap();
}
@@ -73,7 +82,7 @@ mod tests {
}
{
let fast_field_readers = U32FastFieldsReader::open(source).unwrap();
let fast_field_reader = fast_field_readers.get_field(field).unwrap();
let fast_field_reader = fast_field_readers.get_field(*FIELD).unwrap();
assert_eq!(fast_field_reader.get(0), 13u32);
assert_eq!(fast_field_reader.get(1), 14u32);
assert_eq!(fast_field_reader.get(2), 2u32);
@@ -84,21 +93,19 @@ mod tests {
fn test_intfastfield_large() {
let path = Path::new("test");
let mut directory: RAMDirectory = RAMDirectory::create();
let mut schema = Schema::new();
let field = schema.add_u32_field("field", FAST);
{
let write: WritePtr = directory.open_write(Path::new("test")).unwrap();
let mut serializer = FastFieldSerializer::new(write).unwrap();
let mut fast_field_writers = U32FastFieldsWriter::from_schema(&schema);
add_single_field_doc(&mut fast_field_writers, field, 4u32);
add_single_field_doc(&mut fast_field_writers, field, 14_082_001u32);
add_single_field_doc(&mut fast_field_writers, field, 3_052u32);
add_single_field_doc(&mut fast_field_writers, field, 9002u32);
add_single_field_doc(&mut fast_field_writers, field, 15_001u32);
add_single_field_doc(&mut fast_field_writers, field, 777u32);
add_single_field_doc(&mut fast_field_writers, field, 1_002u32);
add_single_field_doc(&mut fast_field_writers, field, 1_501u32);
add_single_field_doc(&mut fast_field_writers, field, 215u32);
let mut fast_field_writers = U32FastFieldsWriter::from_schema(&SCHEMA);
add_single_field_doc(&mut fast_field_writers, *FIELD, 4u32);
add_single_field_doc(&mut fast_field_writers, *FIELD, 14_082_001u32);
add_single_field_doc(&mut fast_field_writers, *FIELD, 3_052u32);
add_single_field_doc(&mut fast_field_writers, *FIELD, 9002u32);
add_single_field_doc(&mut fast_field_writers, *FIELD, 15_001u32);
add_single_field_doc(&mut fast_field_writers, *FIELD, 777u32);
add_single_field_doc(&mut fast_field_writers, *FIELD, 1_002u32);
add_single_field_doc(&mut fast_field_writers, *FIELD, 1_501u32);
add_single_field_doc(&mut fast_field_writers, *FIELD, 215u32);
fast_field_writers.serialize(&mut serializer).unwrap();
serializer.close().unwrap();
}
@@ -108,7 +115,7 @@ mod tests {
}
{
let fast_field_readers = U32FastFieldsReader::open(source).unwrap();
let fast_field_reader = fast_field_readers.get_field(field).unwrap();
let fast_field_reader = fast_field_readers.get_field(*FIELD).unwrap();
assert_eq!(fast_field_reader.get(0), 4u32);
assert_eq!(fast_field_reader.get(1), 14_082_001u32);
assert_eq!(fast_field_reader.get(2), 3_052u32);
@@ -125,14 +132,14 @@ mod tests {
fn test_intfastfield_null_amplitude() {
let path = Path::new("test");
let mut directory: RAMDirectory = RAMDirectory::create();
let mut schema = Schema::new();
let field = schema.add_u32_field("field", FAST);
{
let write: WritePtr = directory.open_write(Path::new("test")).unwrap();
let mut serializer = FastFieldSerializer::new(write).unwrap();
let mut fast_field_writers = U32FastFieldsWriter::from_schema(&schema);
let mut fast_field_writers = U32FastFieldsWriter::from_schema(&SCHEMA);
for _ in 0..10_000 {
add_single_field_doc(&mut fast_field_writers, field, 100_000u32);
add_single_field_doc(&mut fast_field_writers, *FIELD, 100_000u32);
}
fast_field_writers.serialize(&mut serializer).unwrap();
serializer.close().unwrap();
@@ -143,7 +150,7 @@ mod tests {
}
{
let fast_field_readers = U32FastFieldsReader::open(source).unwrap();
let fast_field_reader = fast_field_readers.get_field(field).unwrap();
let fast_field_reader = fast_field_readers.get_field(*FIELD).unwrap();
for doc in 0..10_000 {
assert_eq!(fast_field_reader.get(doc), 100_000u32);
}
@@ -164,14 +171,12 @@ mod tests {
let permutation = generate_permutation();
let n = permutation.len();
let mut directory = RAMDirectory::create();
let mut schema = Schema::new();
let field = schema.add_u32_field("field", FAST);
{
let write: WritePtr = directory.open_write(Path::new("test")).unwrap();
let mut serializer = FastFieldSerializer::new(write).unwrap();
let mut fast_field_writers = U32FastFieldsWriter::from_schema(&schema);
let mut fast_field_writers = U32FastFieldsWriter::from_schema(&SCHEMA);
for x in &permutation {
add_single_field_doc(&mut fast_field_writers, field, *x);
add_single_field_doc(&mut fast_field_writers, *FIELD, *x);
}
fast_field_writers.serialize(&mut serializer).unwrap();
serializer.close().unwrap();
@@ -179,7 +184,7 @@ mod tests {
let source = directory.open_read(&path).unwrap();
{
let fast_field_readers = U32FastFieldsReader::open(source).unwrap();
let fast_field_reader = fast_field_readers.get_field(field).unwrap();
let fast_field_reader = fast_field_readers.get_field(*FIELD).unwrap();
let mut a = 0u32;
for _ in 0..n {
assert_eq!(fast_field_reader.get(a as u32), permutation[a as usize]);
@@ -219,14 +224,12 @@ mod tests {
let path = Path::new("test");
let permutation = generate_permutation();
let mut directory: RAMDirectory = RAMDirectory::create();
let mut schema = Schema::new();
let field = schema.add_u32_field("field", FAST);
{
let write: WritePtr = directory.open_write(Path::new("test")).unwrap();
let mut serializer = FastFieldSerializer::new(write).unwrap();
let mut fast_field_writers = U32FastFieldsWriter::from_schema(&schema);
let mut fast_field_writers = U32FastFieldsWriter::from_schema(&SCHEMA);
for x in &permutation {
add_single_field_doc(&mut fast_field_writers, field, *x);
add_single_field_doc(&mut fast_field_writers, *FIELD, *x);
}
fast_field_writers.serialize(&mut serializer).unwrap();
serializer.close().unwrap();
@@ -234,7 +237,7 @@ mod tests {
let source = directory.open_read(&path).unwrap();
{
let fast_field_readers = U32FastFieldsReader::open(source).unwrap();
let fast_field_reader = fast_field_readers.get_field(field).unwrap();
let fast_field_reader = fast_field_readers.get_field(*FIELD).unwrap();
b.iter(|| {
let n = test::black_box(7000u32);
let mut a = 0u32;
@@ -251,14 +254,12 @@ mod tests {
let path = Path::new("test");
let permutation = generate_permutation();
let mut directory: RAMDirectory = RAMDirectory::create();
let mut schema = Schema::new();
let field = schema.add_u32_field("field", FAST);
{
let write: WritePtr = directory.open_write(Path::new("test")).unwrap();
let mut serializer = FastFieldSerializer::new(write).unwrap();
let mut fast_field_writers = U32FastFieldsWriter::from_schema(&schema);
let mut fast_field_writers = U32FastFieldsWriter::from_schema(&SCHEMA);
for x in &permutation {
add_single_field_doc(&mut fast_field_writers, field, *x);
add_single_field_doc(&mut fast_field_writers, *FIELD, *x);
}
fast_field_writers.serialize(&mut serializer).unwrap();
serializer.close().unwrap();
@@ -266,7 +267,7 @@ mod tests {
let source = directory.open_read(&path).unwrap();
{
let fast_field_readers = U32FastFieldsReader::open(source).unwrap();
let fast_field_reader = fast_field_readers.get_field(field).unwrap();
let fast_field_reader = fast_field_readers.get_field(*FIELD).unwrap();
b.iter(|| {
let n = test::black_box(1000u32);
let mut a = 0u32;

View File

@@ -284,12 +284,12 @@ mod tests {
#[test]
fn test_index_merger() {
let mut schema = schema::Schema::new();
let mut schema_builder = schema::SchemaBuilder::new();
let text_fieldtype = schema::TextOptions::new().set_indexing_options(TextIndexingOptions::TokenizedWithFreq).set_stored();
let text_field = schema.add_text_field("text", text_fieldtype);
let text_field = schema_builder.add_text_field("text", text_fieldtype);
let score_fieldtype = schema::U32Options::new().set_fast();
let score_field = schema.add_u32_field("score", score_fieldtype);
let index = Index::create_in_ram(schema);
let score_field = schema_builder.add_u32_field("score", score_fieldtype);
let index = Index::create_in_ram(schema_builder.build());
{
{

View File

@@ -265,9 +265,9 @@ mod tests {
#[test]
fn test_commit_and_rollback() {
let mut schema = schema::Schema::new();
let text_field = schema.add_text_field("text", schema::TEXT);
let index = Index::create_in_ram(schema);
let mut schema_builder = schema::SchemaBuilder::new();
let text_field = schema_builder.add_text_field("text", schema::TEXT);
let index = Index::create_in_ram(schema_builder.build());
let num_docs_containing = |s: &str| {

View File

@@ -6,8 +6,8 @@
# extern crate rustc_serialize;
# extern crate tantivy;
# use std::fs;
use tantivy::{Document, Index};
use tantivy::schema::{Schema, TEXT, STORED};
use tantivy::Index;
use tantivy::schema::*;
use tantivy::collector::TopCollector;
use tantivy::query::QueryParser;
use tantivy::query::Query;
@@ -17,14 +17,16 @@ use std::path::PathBuf;
# fn wrapper_err() -> tantivy::Result<()> {
// We need to declare a schema
// to create a new index.
let mut schema = Schema::new();
let mut schema_builder = SchemaBuilder::new();
// TEXT | STORED is some syntactic sugar to describe
// how tantivy should index this field.
// It means the field should be tokenized and indexed,
// along with its term frequency and term positions.
let title = schema.add_text_field("title", TEXT | STORED);
let body = schema.add_text_field("body", TEXT);
let title = schema_builder.add_text_field("title", TEXT | STORED);
let body = schema_builder.add_text_field("body", TEXT);
let schema = schema_builder.build();
// the path in which our index will be created.
# fs::create_dir("./tantivy-index").unwrap();
let index_path = PathBuf::from("./tantivy-index");
@@ -225,15 +227,19 @@ impl ScoredDoc {
#[cfg(test)]
mod tests {
use super::*;
use collector::TestCollector;
use query::MultiTermQuery;
use Index;
use core::SegmentReader;
use schema::*;
use DocSet;
use Postings;
#[test]
fn test_indexing() {
let mut schema = schema::Schema::new();
let text_field = schema.add_text_field("text", schema::TEXT);
let mut schema_builder = SchemaBuilder::new();
let text_field = schema_builder.add_text_field("text", TEXT);
let schema = schema_builder.build();
let index = Index::create_from_tempdir(schema).unwrap();
{
// writing the segment
@@ -264,9 +270,9 @@ mod tests {
#[test]
fn test_docfreq() {
let mut schema = schema::Schema::new();
let text_field = schema.add_text_field("text", schema::TEXT);
let index = Index::create_in_ram(schema);
let mut schema_builder = SchemaBuilder::new();
let text_field = schema_builder.add_text_field("text", TEXT);
let index = Index::create_in_ram(schema_builder.build());
{
let mut index_writer = index.writer_with_num_threads(1).unwrap();
let mut doc = Document::new();
@@ -311,9 +317,9 @@ mod tests {
#[test]
fn test_fieldnorm() {
let mut schema = schema::Schema::new();
let text_field = schema.add_text_field("text", schema::TEXT);
let index = Index::create_in_ram(schema);
let mut schema_builder = SchemaBuilder::new();
let text_field = schema_builder.add_text_field("text", TEXT);
let index = Index::create_in_ram(schema_builder.build());
{
let mut index_writer = index.writer_with_num_threads(1).unwrap();
{
@@ -345,8 +351,9 @@ mod tests {
#[test]
fn test_termfreq() {
let mut schema = schema::Schema::new();
let text_field = schema.add_text_field("text", schema::TEXT);
let mut schema_builder = SchemaBuilder::new();
let text_field = schema_builder.add_text_field("text", TEXT);
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
{
// writing the segment
@@ -371,8 +378,9 @@ mod tests {
#[test]
fn test_searcher() {
let mut schema = schema::Schema::new();
let text_field = schema.add_text_field("text", schema::TEXT);
let mut schema_builder = SchemaBuilder::new();
let text_field = schema_builder.add_text_field("text", TEXT);
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
{
@@ -439,8 +447,9 @@ mod tests {
#[test]
fn test_searcher_2() {
let mut schema = schema::Schema::new();
let text_field = schema.add_text_field("text", schema::TEXT);
let mut schema_builder = SchemaBuilder::new();
let text_field = schema_builder.add_text_field("text", TEXT);
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
{

View File

@@ -35,7 +35,7 @@ pub use self::segment_postings_option::SegmentPostingsOption;
mod tests {
use super::*;
use schema::{Document, TEXT, Schema, Term};
use schema::{Document, TEXT, SchemaBuilder, Term};
use core::SegmentComponent;
use indexer::SegmentWriter;
use core::SegmentReader;
@@ -45,8 +45,9 @@ mod tests {
#[test]
pub fn test_position_write() {
let mut schema = Schema::new();
let text_field = schema.add_text_field("text", TEXT);
let mut schema_builder = SchemaBuilder::new();
let text_field = schema_builder.add_text_field("text", TEXT);
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
let mut segment = index.new_segment();
let mut posting_serializer = PostingsSerializer::open(&mut segment).unwrap();
@@ -64,8 +65,9 @@ mod tests {
#[test]
pub fn test_position_and_fieldnorm_write_fullstack() {
let mut schema = Schema::new();
let text_field = schema.add_text_field("text", TEXT);
let mut schema_builder = SchemaBuilder::new();
let text_field = schema_builder.add_text_field("text", TEXT);
let schema = schema_builder.build();
let index = Index::create_in_ram(schema.clone());
let segment = index.new_segment();
{

View File

@@ -277,11 +277,11 @@ mod tests {
#[test]
pub fn test_query_parser() {
let mut schema = Schema::new();
let text_field = schema.add_text_field("text", STRING);
let title_field = schema.add_text_field("title", STRING);
let author_field = schema.add_text_field("author", STRING);
let query_parser = QueryParser::new(schema, vec!(text_field, author_field));
let mut schema_builder = SchemaBuilder::new();
let text_field = schema_builder.add_text_field("text", STRING);
let title_field = schema_builder.add_text_field("title", STRING);
let author_field = schema_builder.add_text_field("author", STRING);
let query_parser = QueryParser::new(schema_builder.build(), vec!(text_field, author_field));
assert!(query_parser.parse_query("a:b").is_err());
{
let terms = vec!(Term::from_field_text(title_field, "abctitle"));

View File

@@ -110,14 +110,12 @@ impl From<Vec<FieldValue>> for Document {
#[cfg(test)]
mod tests {
use super::*;
use schema::Schema;
use schema::TEXT;
use schema::*;
#[test]
fn test_doc() {
let mut schema = Schema::new();
let text_field = schema.add_text_field("title", TEXT);
let mut schema_builder = SchemaBuilder::new();
let text_field = schema_builder.add_text_field("title", TEXT);
let mut doc = Document::new();
doc.add_text(text_field, "My title");
assert_eq!(doc.get_fields().len(), 1);

View File

@@ -27,11 +27,12 @@ directory.
```
use tantivy::schema::*;
let mut schema = Schema::new();
let mut schema_builder = SchemaBuilder::new();
let title_options = TextOptions::new()
.set_stored()
.set_indexing_options(TextIndexingOptions::TokenizedWithFreqAndPosition);
schema.add_text_field("title_options", title_options);
schema_builder.add_text_field("title_options", title_options);
let schema = schema_builder.build();
```
We can split the problem of generating a search result page into two phases :
@@ -56,8 +57,9 @@ The example can be rewritten :
```
use tantivy::schema::*;
let mut schema = Schema::new();
schema.add_text_field("title_options", TEXT | STORED);
let mut schema_builder = SchemaBuilder::new();
schema_builder.add_text_field("title_options", TEXT | STORED);
let schema = schema_builder.build();
```
@@ -68,11 +70,12 @@ schema.add_text_field("title_options", TEXT | STORED);
```
use tantivy::schema::*;
let mut schema = Schema::new();
let mut schema_builder = SchemaBuilder::new();
let num_stars_options = U32Options::new()
.set_stored()
.set_indexed();
schema.add_u32_field("num_stars", num_stars_options);
schema_builder.add_u32_field("num_stars", num_stars_options);
let schema = schema_builder.build();
```
Just like for Text fields (see above),
@@ -106,9 +109,8 @@ mod value;
mod named_field_document;
pub use self::named_field_document::NamedFieldDocument;
pub use self::schema::Schema;
pub use self::schema::{Schema, SchemaBuilder};
pub use self::value::Value;
pub use self::schema::DocParsingError;

View File

@@ -8,7 +8,13 @@ use rustc_serialize::json;
use rustc_serialize::json::Json;
use std::collections::BTreeMap;
use schema::field_entry::ValueParsingError;
use std::sync::Arc;
use super::*;
use std::fmt;
/// Tantivy has a very strict schema.
@@ -24,58 +30,119 @@ use super::*;
/// ```
/// use tantivy::schema::*;
///
/// let mut schema = Schema::new();
/// let id_field = schema.add_text_field("id", STRING);
/// let title_field = schema.add_text_field("title", TEXT);
/// let body_field = schema.add_text_field("body", TEXT);
/// let mut schema_builder = SchemaBuilder::new();
/// let id_field = schema_builder.add_text_field("id", STRING);
/// let title_field = schema_builder.add_text_field("title", TEXT);
/// let body_field = schema_builder.add_text_field("body", TEXT);
/// let schema = schema_builder.build();
///
/// ```
#[derive(Clone, Debug)]
pub struct Schema {
pub struct SchemaBuilder {
fields: Vec<FieldEntry>,
fields_map: HashMap<String, Field>, // transient
fields_map: HashMap<String, Field
>, // transient
}
impl Decodable for Schema {
fn decode<D: Decoder>(d: &mut D) -> Result <Self, D::Error> {
let mut schema = Schema::new();
try!(d.read_seq(|d, num_fields| {
for _ in 0..num_fields {
let field_entry = try!(FieldEntry::decode(d));
schema.add_field(field_entry);
}
Ok(())
}));
Ok(schema)
}
}
impl SchemaBuilder {
impl Encodable for Schema {
fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
try!(s.emit_seq(self.fields.len(),
|mut e| {
for (ord, field) in self.fields.iter().enumerate() {
try!(e.emit_seq_elt(ord, |e| field.encode(e)));
}
Ok(())
}));
Ok(())
}
}
impl Schema {
/// Creates a new, empty schema.
pub fn new() -> Schema {
Schema {
pub fn new() -> SchemaBuilder {
SchemaBuilder {
fields: Vec::new(),
fields_map: HashMap::new(),
}
}
/// Adds a new u32 field.
/// Returns the associated field handle
///
/// # Caution
///
/// Appending two fields with the same name
/// will result in the shadowing of the first
/// by the second one.
/// The first field will get a field id
/// but only the second one will be indexed
pub fn add_u32_field(
&mut self,
field_name_str: &str,
field_options: U32Options) -> Field {
let field_name = String::from(field_name_str);
let field_entry = FieldEntry::new_u32(field_name, field_options);
self.add_field(field_entry)
}
/// Adds a new text field.
/// Returns the associated field handle
///
/// # Caution
///
/// Appending two fields with the same name
/// will result in the shadowing of the first
/// by the second one.
/// The first field will get a field id
/// but only the second one will be indexed
pub fn add_text_field(
&mut self,
field_name_str: &str,
field_options: TextOptions) -> Field {
let field_name = String::from(field_name_str);
let field_entry = FieldEntry::new_text(field_name, field_options);
self.add_field(field_entry)
}
fn add_field(&mut self, field_entry: FieldEntry) -> Field {
let field = Field(self.fields.len() as u8);
let field_name = field_entry.name().clone();
self.fields.push(field_entry);
self.fields_map.insert(field_name, field);
field
}
pub fn build(self,) -> Schema {
Schema(Arc::new(InnerSchema {
fields: self.fields,
fields_map: self.fields_map,
}))
}
}
#[derive(Debug)]
struct InnerSchema {
fields: Vec<FieldEntry>,
fields_map: HashMap<String, Field>, // transient
}
/// Tantivy has a very strict schema.
/// You need to specify in advance, whether a field is indexed or not,
/// stored or not, and RAM-based or not.
///
/// This is done by creating a schema object, and
/// setting up the fields one by one.
/// It is for the moment impossible to remove fields.
///
/// # Examples
///
/// ```
/// use tantivy::schema::*;
///
/// let mut schema_builder = SchemaBuilder::new();
/// let id_field = schema_builder.add_text_field("id", STRING);
/// let title_field = schema_builder.add_text_field("title", TEXT);
/// let body_field = schema_builder.add_text_field("body", TEXT);
/// let schema = schema_builder.build();
///
/// ```
#[derive(Clone)]
pub struct Schema(Arc<InnerSchema>);
impl Schema {
pub fn get_field_entry(&self, field: Field) -> &FieldEntry {
&self.fields[field.0 as usize]
&self.0.fields[field.0 as usize]
}
pub fn get_field_name(&self, field: Field) -> &String {
@@ -83,7 +150,7 @@ impl Schema {
}
pub fn fields(&self,) -> &Vec<FieldEntry> {
&self.fields
&self.0.fields
}
/// Returns the field options associated with a given name.
@@ -96,40 +163,9 @@ impl Schema {
/// If panicking is not an option for you,
/// you may use `get(&self, field_name: &str)`.
pub fn get_field(&self, field_name: &str) -> Option<Field> {
self.fields_map.get(field_name).map(|field| field.clone())
self.0.fields_map.get(field_name).map(|field| field.clone())
}
/// Creates a new field.
/// Return the associated field handle.
pub fn add_u32_field(
&mut self,
field_name_str: &str,
field_options: U32Options) -> Field {
let field_name = String::from(field_name_str);
let field_entry = FieldEntry::new_u32(field_name, field_options);
self.add_field(field_entry)
}
pub fn add_text_field(
&mut self,
field_name_str: &str,
field_options: TextOptions) -> Field {
// TODO case if field already exists
let field_name = String::from(field_name_str);
let field_entry = FieldEntry::new_text(field_name, field_options);
self.add_field(field_entry)
}
fn add_field(&mut self, field_entry: FieldEntry) -> Field {
let field = Field(self.fields.len() as u8);
// TODO case if field already exists
let field_name = field_entry.name().clone();
self.fields.push(field_entry);
self.fields_map.insert(field_name, field.clone());
field
}
pub fn to_named_doc(&self, doc: &Document) -> NamedFieldDocument {
let mut field_map = BTreeMap::new();
for (field, field_values) in doc.get_sorted_fields() {
@@ -205,9 +241,47 @@ impl Schema {
}
Ok(doc)
}
}
impl fmt::Debug for Schema {
fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
self.0.fmt(f)
}
}
impl Decodable for Schema {
fn decode<D: Decoder>(d: &mut D) -> Result <Self, D::Error> {
let mut schema_builder = SchemaBuilder::new();
try!(d.read_seq(|d, num_fields| {
for _ in 0..num_fields {
let field_entry = try!(FieldEntry::decode(d));
schema_builder.add_field(field_entry);
}
Ok(())
}));
Ok(schema_builder.build())
}
}
impl Encodable for Schema {
fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
try!(s.emit_seq(self.0.fields.len(),
|mut e| {
for (ord, field) in self.0.fields.iter().enumerate() {
try!(e.emit_seq_elt(ord, |e| field.encode(e)));
}
Ok(())
}));
Ok(())
}
}
impl From<SchemaBuilder> for Schema {
fn from(schema_builder: SchemaBuilder) -> Schema {
schema_builder.build()
}
}
@@ -238,11 +312,12 @@ mod tests {
#[test]
pub fn test_schema_serialization() {
let mut schema = Schema::new();
let mut schema_builder = SchemaBuilder::new();
let count_options = U32Options::new().set_stored().set_fast();
schema.add_text_field("title", TEXT);
schema.add_text_field("author", STRING);
schema.add_u32_field("count", count_options);
schema_builder.add_text_field("title", TEXT);
schema_builder.add_text_field("author", STRING);
schema_builder.add_u32_field("count", count_options);
let schema = schema_builder.build();
let schema_json: String = format!("{}", json::as_pretty_json(&schema));
println!("{}", schema_json);
let expected = r#"[
@@ -280,11 +355,12 @@ mod tests {
#[test]
pub fn test_document_to_json() {
let mut schema = Schema::new();
let mut schema_builder = SchemaBuilder::new();
let count_options = U32Options::new().set_stored().set_fast();
schema.add_text_field("title", TEXT);
schema.add_text_field("author", STRING);
schema.add_u32_field("count", count_options);
schema_builder.add_text_field("title", TEXT);
schema_builder.add_text_field("author", STRING);
schema_builder.add_u32_field("count", count_options);
let schema = schema_builder.build();
let doc_json = r#"{
"title": "my title",
"author": "fulmicoton",
@@ -297,11 +373,12 @@ mod tests {
#[test]
pub fn test_parse_document() {
let mut schema = Schema::new();
let mut schema_builder = SchemaBuilder::new();
let count_options = U32Options::new().set_stored().set_fast();
let title_field = schema.add_text_field("title", TEXT);
let author_field = schema.add_text_field("author", STRING);
let count_field = schema.add_u32_field("count", count_options);
let title_field = schema_builder.add_text_field("title", TEXT);
let author_field = schema_builder.add_text_field("author", STRING);
let count_field = schema_builder.add_u32_field("count", count_options);
let schema = schema_builder.build();
{
let doc = schema.parse_document("{}").unwrap();
assert!(doc.get_fields().is_empty());

View File

@@ -64,10 +64,10 @@ mod tests {
#[test]
pub fn test_term() {
let mut schema = Schema::new();
let _ = schema.add_text_field("text", STRING);
let title_field = schema.add_text_field("title", STRING);
let count_field = schema.add_text_field("count", STRING);
let mut schema_builder = SchemaBuilder::new();
schema_builder.add_text_field("text", STRING);
let title_field = schema_builder.add_text_field("title", STRING);
let count_field = schema_builder.add_text_field("count", STRING);
{
let term = Term::from_field_text(title_field, "test");
assert_eq!(term.get_field(), title_field);

View File

@@ -181,10 +181,7 @@ impl BitOr for TextOptions {
#[cfg(test)]
mod tests {
use schema::Schema;
use schema::Field;
use schema::FieldType;
use super::*;
use schema::*;
#[test]
fn test_field_options() {
@@ -194,8 +191,9 @@ mod tests {
assert!(field_options.get_indexing_options().is_tokenized());
}
{
let mut schema = Schema::new();
let _body_field: Field = schema.add_text_field("body", TEXT);
let mut schema_builder = SchemaBuilder::new();
schema_builder.add_text_field("body", TEXT);
let schema = schema_builder.build();
let field = schema.get_field("body").unwrap();
let field_entry = schema.get_field_entry(field);
match field_entry.field_type() {

View File

@@ -14,15 +14,16 @@ mod tests {
use super::*;
use test::Bencher;
use std::path::Path;
use schema::Schema;
use schema::{Schema, SchemaBuilder};
use schema::TextOptions;
use schema::FieldValue;
use directory::{RAMDirectory, Directory, MmapDirectory, WritePtr};
fn write_lorem_ipsum_store(writer: WritePtr) -> Schema {
let mut schema = Schema::new();
let field_body = schema.add_text_field("body", TextOptions::new().set_stored());
let field_title = schema.add_text_field("title", TextOptions::new().set_stored());
let mut schema_builder = SchemaBuilder::new();
let field_body = schema_builder.add_text_field("body", TextOptions::new().set_stored());
let field_title = schema_builder.add_text_field("title", TextOptions::new().set_stored());
let schema = schema_builder.build();
let lorem = String::from("Doc Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.");
{
let mut store_writer = StoreWriter::new(writer);