mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-05-22 11:10:40 +00:00
blop
This commit is contained in:
1
build.rs
1
build.rs
@@ -5,6 +5,7 @@ fn main() {
|
||||
.cpp(true)
|
||||
.flag("-std=c++11")
|
||||
.flag("-O3")
|
||||
.flag("-mssse3")
|
||||
.include("./cpp/SIMDCompressionAndIntersection/include")
|
||||
.object("cpp/SIMDCompressionAndIntersection/bitpacking.o")
|
||||
.object("cpp/SIMDCompressionAndIntersection/integratedbitpacking.o")
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
#include <iostream>
|
||||
#include <stdint.h>
|
||||
|
||||
|
||||
#include "codecfactory.h"
|
||||
#include "intersection.h"
|
||||
|
||||
|
||||
@@ -30,13 +30,21 @@ pub fn generate_segment_name() -> SegmentId {
|
||||
|
||||
#[derive(Clone,Debug,RustcDecodable, RustcEncodable)]
|
||||
pub struct DirectoryMeta {
|
||||
segments: Vec<String>
|
||||
segments: Vec<String>,
|
||||
schema: Schema,
|
||||
}
|
||||
|
||||
impl DirectoryMeta {
|
||||
fn new() -> DirectoryMeta {
|
||||
DirectoryMeta {
|
||||
segments: Vec::new()
|
||||
segments: Vec::new(),
|
||||
schema: Schema::new(),
|
||||
}
|
||||
}
|
||||
fn with_schema(schema: Schema) -> DirectoryMeta {
|
||||
DirectoryMeta {
|
||||
segments: Vec::new(),
|
||||
schema: schema,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -80,13 +88,7 @@ pub struct Directory {
|
||||
impl Directory {
|
||||
|
||||
pub fn schema(&self,) -> Schema {
|
||||
self.get_read().unwrap().schema.clone()
|
||||
}
|
||||
|
||||
pub fn set_schema(&mut self, schema: &Schema) {
|
||||
self.get_write()
|
||||
.unwrap()
|
||||
.set_schema(schema);
|
||||
self.get_read().unwrap().metas.schema.clone()
|
||||
}
|
||||
|
||||
fn get_write(&mut self) -> Result<RwLockWriteGuard<InnerDirectory>> {
|
||||
@@ -99,18 +101,22 @@ impl Directory {
|
||||
}
|
||||
|
||||
fn get_read(&self) -> Result<RwLockReadGuard<InnerDirectory>> {
|
||||
match self.inner_directory.read() {
|
||||
Ok(dir) =>
|
||||
Ok(dir),
|
||||
Err(e) =>
|
||||
Err(Error::LockError(format!("Could not acquire read lock on directory. {:?}", e)))
|
||||
}
|
||||
self.inner_directory.read().map_err(
|
||||
|e| Error::LockError(format!("Could not acquire read lock on directory. {:?}", e))
|
||||
)
|
||||
}
|
||||
|
||||
pub fn publish_segment(&mut self, segment: Segment) -> Result<()> {
|
||||
return try!(self.get_write()).publish_segment(segment);
|
||||
}
|
||||
|
||||
pub fn create(filepath: &Path, schema: Schema) -> Result<Directory> {
|
||||
let inner_directory = try!(InnerDirectory::create(filepath, schema));
|
||||
Ok(Directory {
|
||||
inner_directory: Arc::new(RwLock::new(inner_directory)),
|
||||
})
|
||||
}
|
||||
|
||||
pub fn open(filepath: &Path) -> Result<Directory> {
|
||||
let inner_directory = try!(InnerDirectory::open(filepath));
|
||||
Ok(Directory {
|
||||
@@ -173,7 +179,6 @@ struct InnerDirectory {
|
||||
index_path: PathBuf,
|
||||
mmap_cache: RefCell<HashMap<PathBuf, MmapReadOnly>>,
|
||||
metas: DirectoryMeta,
|
||||
schema: Schema,
|
||||
_temp_directory: Option<TempDir>,
|
||||
}
|
||||
|
||||
@@ -197,8 +202,14 @@ impl InnerDirectory {
|
||||
self.save_metas()
|
||||
}
|
||||
|
||||
pub fn set_schema(&mut self, schema: &Schema) {
|
||||
self.schema = schema.clone();
|
||||
pub fn create(filepath: &Path, schema: Schema) -> Result<InnerDirectory> {
|
||||
let mut directory = InnerDirectory {
|
||||
index_path: PathBuf::from(filepath),
|
||||
mmap_cache: RefCell::new(HashMap::new()),
|
||||
metas: DirectoryMeta::with_schema(schema),
|
||||
_temp_directory: None,
|
||||
};
|
||||
Ok(directory)
|
||||
}
|
||||
|
||||
pub fn open(filepath: &Path) -> Result<InnerDirectory> {
|
||||
@@ -206,7 +217,6 @@ impl InnerDirectory {
|
||||
index_path: PathBuf::from(filepath),
|
||||
mmap_cache: RefCell::new(HashMap::new()),
|
||||
metas: DirectoryMeta::new(),
|
||||
schema: Schema::new(), // TODO schema
|
||||
_temp_directory: None,
|
||||
};
|
||||
try!(directory.load_metas()); //< does the directory already exists?
|
||||
@@ -229,7 +239,6 @@ impl InnerDirectory {
|
||||
index_path: PathBuf::from(tempdir_path),
|
||||
mmap_cache: RefCell::new(HashMap::new()),
|
||||
metas: DirectoryMeta::new(),
|
||||
schema: Schema::new(),
|
||||
_temp_directory: Some(tempdir)
|
||||
};
|
||||
//< does the directory already exists?
|
||||
@@ -245,6 +254,7 @@ impl InnerDirectory {
|
||||
// TODO check that the directory is empty.
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let mut meta_file = File::open(&meta_filepath).unwrap();
|
||||
let mut meta_content = String::new();
|
||||
meta_file.read_to_string(&mut meta_content);
|
||||
|
||||
@@ -6,10 +6,15 @@ use std::slice;
|
||||
use std::fmt;
|
||||
use std::io::Read;
|
||||
use core::serialize::BinarySerializable;
|
||||
use rustc_serialize::Decodable;
|
||||
use rustc_serialize::Encodable;
|
||||
use rustc_serialize::Decoder;
|
||||
use rustc_serialize::Encoder;
|
||||
|
||||
|
||||
pub type DocId = u32;
|
||||
|
||||
#[derive(Clone,Debug,PartialEq,Eq)]
|
||||
#[derive(Clone,Debug,PartialEq,Eq, RustcDecodable, RustcEncodable)]
|
||||
pub struct FieldOptions {
|
||||
// untokenized_indexed: bool,
|
||||
tokenized_indexed: bool,
|
||||
@@ -48,7 +53,6 @@ impl FieldOptions {
|
||||
}
|
||||
|
||||
|
||||
|
||||
#[derive(Clone,Debug,PartialEq,PartialOrd,Eq)]
|
||||
pub struct FieldValue {
|
||||
pub field: Field,
|
||||
@@ -87,27 +91,63 @@ impl BinarySerializable for FieldValue {
|
||||
|
||||
|
||||
|
||||
#[derive(Clone,PartialEq,PartialOrd,Ord,Eq,Hash)]
|
||||
#[derive(Clone, PartialEq, PartialOrd, Ord, Eq, Hash)]
|
||||
pub struct Term {
|
||||
data: Vec<u8>,
|
||||
}
|
||||
|
||||
#[derive(Clone,Debug)]
|
||||
#[derive(Clone, Debug, RustcDecodable, RustcEncodable)]
|
||||
struct FieldEntry {
|
||||
name: String,
|
||||
option: FieldOptions,
|
||||
}
|
||||
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Schema {
|
||||
fields: HashMap<String, Field>,
|
||||
field_options: Vec<FieldOptions>,
|
||||
fields: Vec<FieldEntry>,
|
||||
fields_map: HashMap<String, Field>, // transient
|
||||
field_options: Vec<FieldOptions>, // transient
|
||||
}
|
||||
|
||||
impl Decodable for Schema {
|
||||
fn decode<D: Decoder>(d: &mut D) -> Result <Self, D::Error> {
|
||||
let mut schema = Schema::new();
|
||||
try!(d.read_seq(|d, num_fields| {
|
||||
for i in 0..num_fields {
|
||||
let field_entry = try!(FieldEntry::decode(d));
|
||||
schema.add_field(&field_entry.name, &field_entry.option);
|
||||
}
|
||||
Ok(())
|
||||
}));
|
||||
Ok(schema)
|
||||
}
|
||||
}
|
||||
|
||||
impl Encodable for Schema {
|
||||
fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
|
||||
try!(s.emit_seq(self.fields.len(),
|
||||
|mut e| {
|
||||
for (ord, field) in self.fields.iter().enumerate() {
|
||||
try!(e.emit_seq_elt(ord, |e| field.encode(e)));
|
||||
}
|
||||
Ok(())
|
||||
}));
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl Schema {
|
||||
pub fn new() -> Schema {
|
||||
Schema {
|
||||
fields: HashMap::new(),
|
||||
fields: Vec::new(),
|
||||
fields_map: HashMap::new(),
|
||||
field_options: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn find_field_name(&self, field_name: &str) -> Option<(Field, FieldOptions)> {
|
||||
self.fields
|
||||
self.fields_map
|
||||
.get(field_name)
|
||||
.map(|&Field(field_id)| {
|
||||
let field_options = self.field_options[field_id as usize].clone();
|
||||
@@ -115,24 +155,25 @@ impl Schema {
|
||||
})
|
||||
}
|
||||
|
||||
pub fn get_field(&self, field: &Field) -> FieldOptions {
|
||||
pub fn field(&self, fieldname: &str) -> Option<Field> {
|
||||
self.fields_map.get(&String::from(fieldname)).map(|field| field.clone())
|
||||
}
|
||||
|
||||
pub fn field_options(&self, field: &Field) -> FieldOptions {
|
||||
let Field(field_id) = *field;
|
||||
self.field_options[field_id as usize].clone()
|
||||
}
|
||||
|
||||
pub fn add_field(&mut self, field_name: &str, field_options: &FieldOptions) -> Field {
|
||||
let next_field = Field(self.fields.len() as u8);
|
||||
let field = self.fields
|
||||
.entry(String::from(field_name))
|
||||
.or_insert(next_field.clone())
|
||||
.clone();
|
||||
if field == next_field {
|
||||
self.field_options.push(field_options.clone());
|
||||
}
|
||||
else {
|
||||
let Field(field_id) = field;
|
||||
self.field_options[field_id as usize] = field_options.clone();
|
||||
}
|
||||
pub fn add_field(&mut self, field_name_str: &str, field_options: &FieldOptions) -> Field {
|
||||
let field = Field(self.fields.len() as u8);
|
||||
// TODO case if field already exists
|
||||
let field_name = String::from(field_name_str);
|
||||
self.fields.push(FieldEntry {
|
||||
name: field_name.clone(),
|
||||
option: field_options.clone(),
|
||||
});
|
||||
self.fields_map.insert(field_name, field.clone());
|
||||
self.field_options.push(field_options.clone());
|
||||
field
|
||||
}
|
||||
}
|
||||
|
||||
@@ -136,13 +136,12 @@ impl SegmentWriter {
|
||||
pub fn add(&mut self, doc: Document, schema: &Schema) {
|
||||
let doc_id = self.max_doc;
|
||||
for field_value in doc.fields() {
|
||||
let field_options = schema.get_field(&field_value.field);
|
||||
let field_options = schema.field_options(&field_value.field);
|
||||
if field_options.is_tokenized_indexed() {
|
||||
let mut tokens = self.tokenizer.tokenize(&field_value.text);
|
||||
loop {
|
||||
match tokens.next() {
|
||||
Some(token) => {
|
||||
// println!("TOKEN :{}:", token);
|
||||
let term = Term::from_field_text(&field_value.field, token);
|
||||
self.suscribe(doc_id, term);
|
||||
self.num_tokens += 1;
|
||||
@@ -153,8 +152,7 @@ impl SegmentWriter {
|
||||
}
|
||||
}
|
||||
let mut stored_fieldvalues_it = doc.fields().filter(|field_value| {
|
||||
schema.get_field(&field_value.field)
|
||||
.is_stored()
|
||||
schema.field_options(&field_value.field).is_stored()
|
||||
});
|
||||
self.segment_serializer.store_doc(&mut stored_fieldvalues_it);
|
||||
self.max_doc += 1;
|
||||
|
||||
Reference in New Issue
Block a user