Compare commits

...

3 Commits
doc ... wasm

Author SHA1 Message Date
Paul Masurel
232ca5c06c Added convert to static [u8] 2018-04-10 21:18:32 +09:00
Paul Masurel
e78af20375 remove comment 2018-04-09 21:51:17 +09:00
Paul Masurel
30637f7a7f Ok on wasm 2018-03-31 17:42:26 +09:00
13 changed files with 247 additions and 17 deletions

View File

@@ -18,7 +18,6 @@ tinysegmenter = "0.1.0"
regex = "0.2"
fst = {version="0.2", default-features=false}
atomicwrites = {version="0.1", optional=true}
tempfile = "2.1"
log = "0.3.6"
combine = "2.2"
tempdir = "0.3"
@@ -27,7 +26,6 @@ serde_derive = "1.0"
serde_json = "1.0"
num_cpus = "1.2"
itertools = "0.5.9"
lz4 = "1.20"
bit-set = "0.4.0"
uuid = { version = "0.6", features = ["v4", "serde"] }
chan = "0.1"
@@ -40,13 +38,15 @@ stable_deref_trait = "1.0.0"
rust-stemmers = "0.1.0"
downcast = { version="0.9", features = ["nightly"]}
matches = "0.1"
bitpacking = "0.3"
snap = "0.2"
bitpacking = {path = "../bitpacking"}
[target.'cfg(windows)'.dependencies]
winapi = "0.2"
[dev-dependencies]
rand = "0.3"
tempfile = "2.1"
env_logger = "0.4"
[profile.release]
@@ -68,3 +68,12 @@ travis-ci = { repository = "tantivy-search/tantivy" }
[[example]]
name = "simple_search"
required-features = ["mmap"]
[[bin]]
name = "convert_to_static"
path = "./bin/convert_to_static.rs"
[[bin]]
name = "test_static_dir"
path = "./bin/test_static_dir.rs"

20
bin/convert_to_static.rs Normal file
View File

@@ -0,0 +1,20 @@
use std::env;
use std::path::PathBuf;
use std::fs::File;
use std::io::Write;
extern crate tantivy;
use tantivy::directory::write_static_from_directory;
fn main() {
// Prints each argument on a separate line
let mut args = env::args();
args.next().unwrap();
let directory_path= args.next().expect("Expect 2 args.<directory_path> <outputfile>");
let output_path = args.next().expect("Expect 2 args.<directory_path> <outputfile>");
println!("{} => {}", directory_path, output_path);
let buffer = write_static_from_directory(&PathBuf::from(directory_path)).unwrap();
println!("Read all");
let mut output = File::create(output_path).unwrap();
output.write_all(&buffer[..]).unwrap();
output.flush().unwrap();
}

51
bin/test_static_dir.rs Normal file
View File

@@ -0,0 +1,51 @@
use std::env;
use std::path::PathBuf;
use std::fs::File;
use std::io::Write;
extern crate tantivy;
use tantivy::directory::{StaticDirectory, write_static_from_directory};
use tantivy::Index;
use tantivy::query::QueryParser;
use tantivy::collector::TopCollector;
static DATA: &'static [u8] = include_bytes!("output.bin");
fn run() -> tantivy::Result<()> {
// Prints each argument on a separate line
let directory = StaticDirectory::open(DATA).unwrap();
let index = Index::open_directory(directory).unwrap();
index.load_searchers().unwrap();
let searcher = index.searcher();
let schema = index.schema();
let title = schema.get_field("title").unwrap();
let body = schema.get_field("body").unwrap();
let query_parser = QueryParser::for_index(&index, vec![title, body]);
let query = query_parser.parse_query("sea whale")?;
let mut top_collector = TopCollector::with_limit(10);
searcher.search(&*query, &mut top_collector)?;
let doc_addresses = top_collector.docs();
// The actual documents still need to be
// retrieved from Tantivy's store.
//
// Since the body field was not configured as stored,
// the document returned will only contain
// a title.
for doc_address in doc_addresses {
let retrieved_doc = searcher.doc(&doc_address)?;
println!("{}", schema.to_json(&retrieved_doc));
}
Ok(())
}
fn main() {
run().unwrap();
}

View File

@@ -86,6 +86,7 @@ impl Index {
/// The temp directory is only used for testing the `MmapDirectory`.
/// For other unit tests, prefer the `RAMDirectory`, see: `create_in_ram`.
#[cfg(feature="mmap")]
#[cfg(test)]
pub fn create_from_tempdir(schema: Schema) -> Result<Index> {
let mmap_directory = MmapDirectory::create_from_tempdir()?;
let directory = ManagedDirectory::new(mmap_directory)?;
@@ -121,6 +122,13 @@ impl Index {
Index::create_from_metas(directory, &metas)
}
pub fn open_directory<TDirectory: Directory>(directory: TDirectory) -> Result<Index> {
let directory = ManagedDirectory::new(directory)?;
let metas = load_metas(&directory)?;
Index::create_from_metas(directory, &metas)
}
/// Reads the index meta file from the directory.
pub fn load_metas(&self) -> Result<IndexMeta> {
load_metas(self.directory())

View File

@@ -12,12 +12,15 @@ mod directory;
mod read_only_source;
mod shared_vec_slice;
mod managed_directory;
mod static_directory;
/// Errors specific to the directory module.
pub mod error;
use std::io::{BufWriter, Seek, Write};
pub use self::static_directory::StaticDirectory;
pub use self::static_directory::write_static_from_directory;
pub use self::read_only_source::ReadOnlySource;
pub use self::directory::Directory;
pub use self::ram_directory::RAMDirectory;

View File

@@ -7,6 +7,8 @@ use std::slice;
use std::io::{self, Read};
use stable_deref_trait::{CloneStableDeref, StableDeref};
const EMPTY_SLICE: [u8; 0] = [];
/// Read object that represents files in tantivy.
///
/// These read objects are only in charge to deliver
@@ -19,6 +21,8 @@ pub enum ReadOnlySource {
Mmap(MmapReadOnly),
/// Wrapping a `Vec<u8>`
Anonymous(SharedVecSlice),
/// Wrapping a static slice
Static(&'static [u8])
}
unsafe impl StableDeref for ReadOnlySource {}
@@ -35,7 +39,7 @@ impl Deref for ReadOnlySource {
impl ReadOnlySource {
/// Creates an empty ReadOnlySource
pub fn empty() -> ReadOnlySource {
ReadOnlySource::Anonymous(SharedVecSlice::empty())
ReadOnlySource::Static(&EMPTY_SLICE)
}
/// Returns the data underlying the ReadOnlySource object.
@@ -44,6 +48,7 @@ impl ReadOnlySource {
#[cfg(feature="mmap")]
ReadOnlySource::Mmap(ref mmap_read_only) => unsafe { mmap_read_only.as_slice() },
ReadOnlySource::Anonymous(ref shared_vec) => shared_vec.as_slice(),
ReadOnlySource::Static(data) => data,
}
}
@@ -76,6 +81,9 @@ impl ReadOnlySource {
ReadOnlySource::Anonymous(ref shared_vec) => {
ReadOnlySource::Anonymous(shared_vec.slice(from_offset, to_offset))
}
ReadOnlySource::Static(data) => {
ReadOnlySource::Static(&data[from_offset..to_offset])
}
}
}
@@ -116,6 +124,12 @@ impl From<Vec<u8>> for ReadOnlySource {
}
}
impl From<&'static [u8]> for ReadOnlySource {
fn from(data: &'static [u8]) -> ReadOnlySource {
ReadOnlySource::Static(data)
}
}
/// Acts as a owning cursor over the data backed up by a `ReadOnlySource`
pub(crate) struct SourceRead {
_data_owner: ReadOnlySource,

View File

@@ -0,0 +1,123 @@
use std::collections::HashMap;
use Directory;
use std::path::PathBuf;
use directory::ReadOnlySource;
use std::io::BufWriter;
use directory::error::{DeleteError, OpenReadError, OpenWriteError};
use std::path::Path;
use std::fmt::{Formatter, Debug, self};
use Result as TantivyResult;
use directory::SeekableWrite;
use std::io;
use std::fs;
use common::Endianness;
use common::BinarySerializable;
use common::VInt;
use byteorder::ByteOrder;
use std::str;
use std::fs::File;
use std::io::{Read, Write};
use std::ffi::OsString;
#[derive(Clone)]
pub struct StaticDirectory {
files: HashMap<PathBuf, &'static [u8]>,
}
impl Debug for StaticDirectory {
fn fmt(&self, f: &mut Formatter) -> Result<(), fmt::Error> {
write!(f, "StaticDirectory[{} files]", self.files.len())?;
Ok(())
}
}
impl StaticDirectory {
pub fn open(mut data: &'static [u8]) -> TantivyResult<StaticDirectory> {
assert!(data.len() > 8);
let footer_len_offset = data.len() - 8;
let body_len = Endianness::read_u64(&data[footer_len_offset..]) as usize;
let mut body = &data[..body_len];
let mut footer = &data[body_len..footer_len_offset];
let num_files = VInt::deserialize(&mut footer)?.0 as usize;
let mut files = HashMap::new();
for _ in 0..num_files {
let filename_len = VInt::deserialize(&mut footer)?.0 as usize;
let filename = &footer[..filename_len];
footer = &footer[filename_len..];
let data_len = VInt::deserialize(&mut footer)?.0 as usize;
let file_data = &body[..data_len];
body = &body[data_len..];
let filename_str = str::from_utf8(filename).expect("Invalid UTF8");
let filename = PathBuf::from(filename_str);
println!("{:?} {:?}", filename, data_len);
files.insert(filename, file_data);
}
Ok(StaticDirectory {
files
})
}
}
impl Directory for StaticDirectory {
fn open_read(&self, path: &Path) -> Result<ReadOnlySource, OpenReadError> {
if let Some(static_data) = self.files.get(path) {
Ok(ReadOnlySource::from(*static_data))
} else {
Err(OpenReadError::FileDoesNotExist(path.to_owned()))
}
}
fn delete(&self, path: &Path) -> Result<(), DeleteError> {
unimplemented!("Static directory is read-only !")
}
fn exists(&self, path: &Path) -> bool {
self.files.contains_key(path)
}
fn open_write(&mut self, path: &Path) -> Result<BufWriter<Box<SeekableWrite>>, OpenWriteError> {
unimplemented!("Static directory is read-only !")
}
fn atomic_read(&self, path: &Path) -> Result<Vec<u8>, OpenReadError> {
if let Some(static_data) = self.files.get(path) {
Ok(static_data.to_vec())
} else {
Err(OpenReadError::FileDoesNotExist(path.to_owned()))
}
}
fn atomic_write(&mut self, path: &Path, data: &[u8]) -> io::Result<()> {
unimplemented!("Static directory is read-only !")
}
fn box_clone(&self) -> Box<Directory> {
box self.clone()
}
}
pub fn write_static_from_directory(directory_path: &Path) -> TantivyResult<Vec<u8>> {
assert!(directory_path.is_dir());
let mut file_data: Vec<(OsString, usize)> = Vec::new();
let mut write: Vec<u8> = Vec::new();
for entry in fs::read_dir(directory_path)? {
let entry = entry?;
let path = entry.path();
if path.is_file() {
info!("Appending {}", path.to_string_lossy());
let mut open_file = File::open(&path)?;
let file_len = open_file.read_to_end(&mut write)?;
file_data.push((entry.file_name(), file_len));
}
}
// write footer
let body_len = write.len();
VInt(file_data.len() as u64).serialize(&mut write)?;
for (filename, filelen) in file_data {
VInt(filename.len() as u64).serialize(&mut write)?;
write.write_all(filename.to_string_lossy().as_bytes())?;
VInt(filelen as u64).serialize(&mut write)?;
}
(body_len as u64).serialize(&mut write)?;
Ok(write)
}

View File

@@ -1,15 +1,15 @@
use std::sync::atomic::{AtomicU64, Ordering};
use std::sync::atomic::{AtomicUsize, Ordering};
use std::sync::Arc;
#[derive(Clone, Default)]
pub struct Stamper(Arc<AtomicU64>);
pub struct Stamper(Arc<AtomicUsize>);
impl Stamper {
pub fn new(first_opstamp: u64) -> Stamper {
Stamper(Arc::new(AtomicU64::new(first_opstamp)))
Stamper(Arc::new(AtomicUsize::new(first_opstamp as usize)))
}
pub fn stamp(&self) -> u64 {
self.0.fetch_add(1u64, Ordering::SeqCst)
self.0.fetch_add(1, Ordering::SeqCst) as u64
}
}

View File

@@ -140,7 +140,7 @@ extern crate fst;
extern crate futures;
extern crate futures_cpupool;
extern crate itertools;
extern crate lz4;
extern crate snap;
extern crate num_cpus;
extern crate owning_ref;
extern crate regex;
@@ -149,6 +149,7 @@ extern crate serde;
extern crate serde_json;
extern crate stable_deref_trait;
extern crate tempdir;
#[cfg(test)]
extern crate tempfile;
extern crate uuid;
extern crate bitpacking;

View File

@@ -110,7 +110,6 @@ mod tests {
let query = query_parser.parse_query("+a b").unwrap();
let weight = query.weight(&*searcher, false).unwrap();
let scorer = weight.scorer(searcher.segment_reader(0u32)).unwrap();
println!("{:?}", scorer.type_name());
assert!(Downcast::<TermScorer>::is_type(&*scorer));
}
}

View File

@@ -9,7 +9,7 @@ use std::mem::size_of;
use std::io::{self, Read};
use common::VInt;
use datastruct::SkipList;
use lz4;
use snap;
/// Reads document off tantivy's [`Store`](./index.html)
#[derive(Clone)]
@@ -61,9 +61,9 @@ impl StoreReader {
let mut current_block_mut = self.current_block.borrow_mut();
current_block_mut.clear();
let compressed_block = self.compressed_block(block_offset);
let mut lz4_decoder = lz4::Decoder::new(compressed_block)?;
let mut snap_decoder = snap::Reader::new(compressed_block);
*self.current_block_offset.borrow_mut() = usize::max_value();
lz4_decoder.read_to_end(&mut current_block_mut).map(|_| ())?;
snap_decoder.read_to_end(&mut current_block_mut).map(|_| ())?;
*self.current_block_offset.borrow_mut() = block_offset;
}
Ok(())

View File

@@ -3,7 +3,7 @@ use DocId;
use common::{BinarySerializable, VInt};
use std::io::{self, Write};
use super::StoreReader;
use lz4;
use snap;
use datastruct::SkipListBuilder;
use common::CountingWriter;
use schema::Document;
@@ -88,10 +88,9 @@ impl StoreWriter {
fn write_and_compress_block(&mut self) -> io::Result<()> {
self.intermediary_buffer.clear();
{
let mut encoder = lz4::EncoderBuilder::new().build(&mut self.intermediary_buffer)?;
let mut encoder = snap::Writer::new(&mut self.intermediary_buffer);
encoder.write_all(&self.current_block)?;
let (_, encoder_result) = encoder.finish();
encoder_result?;
encoder.flush()?;
}
(self.intermediary_buffer.len() as u32).serialize(&mut self.writer)?;
self.writer.write_all(&self.intermediary_buffer)?;

View File

@@ -86,6 +86,9 @@ fn open_fst_index(source: ReadOnlySource) -> fst::Map {
ReadOnlySource::Anonymous(data) => {
Fst::from_shared_bytes(data.data, data.start, data.len).expect("FST data is corrupted")
}
ReadOnlySource::Static(bytes) => {
Fst::from_static_slice(bytes).expect("FST data is corrupted")
}
#[cfg(feature="mmap")]
ReadOnlySource::Mmap(mmap_readonly) => {
Fst::from_mmap(mmap_readonly).expect("FST data is corrupted")