mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-01-01 15:02:55 +00:00
Compare commits
3 Commits
python-bin
...
wasm
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
232ca5c06c | ||
|
|
e78af20375 | ||
|
|
30637f7a7f |
15
Cargo.toml
15
Cargo.toml
@@ -18,7 +18,6 @@ tinysegmenter = "0.1.0"
|
|||||||
regex = "0.2"
|
regex = "0.2"
|
||||||
fst = {version="0.2", default-features=false}
|
fst = {version="0.2", default-features=false}
|
||||||
atomicwrites = {version="0.1", optional=true}
|
atomicwrites = {version="0.1", optional=true}
|
||||||
tempfile = "2.1"
|
|
||||||
log = "0.3.6"
|
log = "0.3.6"
|
||||||
combine = "2.2"
|
combine = "2.2"
|
||||||
tempdir = "0.3"
|
tempdir = "0.3"
|
||||||
@@ -27,7 +26,6 @@ serde_derive = "1.0"
|
|||||||
serde_json = "1.0"
|
serde_json = "1.0"
|
||||||
num_cpus = "1.2"
|
num_cpus = "1.2"
|
||||||
itertools = "0.5.9"
|
itertools = "0.5.9"
|
||||||
lz4 = "1.20"
|
|
||||||
bit-set = "0.4.0"
|
bit-set = "0.4.0"
|
||||||
uuid = { version = "0.6", features = ["v4", "serde"] }
|
uuid = { version = "0.6", features = ["v4", "serde"] }
|
||||||
chan = "0.1"
|
chan = "0.1"
|
||||||
@@ -40,13 +38,15 @@ stable_deref_trait = "1.0.0"
|
|||||||
rust-stemmers = "0.1.0"
|
rust-stemmers = "0.1.0"
|
||||||
downcast = { version="0.9", features = ["nightly"]}
|
downcast = { version="0.9", features = ["nightly"]}
|
||||||
matches = "0.1"
|
matches = "0.1"
|
||||||
bitpacking = "0.3"
|
snap = "0.2"
|
||||||
|
bitpacking = {path = "../bitpacking"}
|
||||||
|
|
||||||
[target.'cfg(windows)'.dependencies]
|
[target.'cfg(windows)'.dependencies]
|
||||||
winapi = "0.2"
|
winapi = "0.2"
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
rand = "0.3"
|
rand = "0.3"
|
||||||
|
tempfile = "2.1"
|
||||||
env_logger = "0.4"
|
env_logger = "0.4"
|
||||||
|
|
||||||
[profile.release]
|
[profile.release]
|
||||||
@@ -68,3 +68,12 @@ travis-ci = { repository = "tantivy-search/tantivy" }
|
|||||||
[[example]]
|
[[example]]
|
||||||
name = "simple_search"
|
name = "simple_search"
|
||||||
required-features = ["mmap"]
|
required-features = ["mmap"]
|
||||||
|
|
||||||
|
|
||||||
|
[[bin]]
|
||||||
|
name = "convert_to_static"
|
||||||
|
path = "./bin/convert_to_static.rs"
|
||||||
|
|
||||||
|
[[bin]]
|
||||||
|
name = "test_static_dir"
|
||||||
|
path = "./bin/test_static_dir.rs"
|
||||||
20
bin/convert_to_static.rs
Normal file
20
bin/convert_to_static.rs
Normal file
@@ -0,0 +1,20 @@
|
|||||||
|
use std::env;
|
||||||
|
use std::path::PathBuf;
|
||||||
|
use std::fs::File;
|
||||||
|
use std::io::Write;
|
||||||
|
extern crate tantivy;
|
||||||
|
use tantivy::directory::write_static_from_directory;
|
||||||
|
|
||||||
|
fn main() {
|
||||||
|
// Prints each argument on a separate line
|
||||||
|
let mut args = env::args();
|
||||||
|
args.next().unwrap();
|
||||||
|
let directory_path= args.next().expect("Expect 2 args.<directory_path> <outputfile>");
|
||||||
|
let output_path = args.next().expect("Expect 2 args.<directory_path> <outputfile>");
|
||||||
|
println!("{} => {}", directory_path, output_path);
|
||||||
|
let buffer = write_static_from_directory(&PathBuf::from(directory_path)).unwrap();
|
||||||
|
println!("Read all");
|
||||||
|
let mut output = File::create(output_path).unwrap();
|
||||||
|
output.write_all(&buffer[..]).unwrap();
|
||||||
|
output.flush().unwrap();
|
||||||
|
}
|
||||||
51
bin/test_static_dir.rs
Normal file
51
bin/test_static_dir.rs
Normal file
@@ -0,0 +1,51 @@
|
|||||||
|
use std::env;
|
||||||
|
use std::path::PathBuf;
|
||||||
|
use std::fs::File;
|
||||||
|
use std::io::Write;
|
||||||
|
extern crate tantivy;
|
||||||
|
use tantivy::directory::{StaticDirectory, write_static_from_directory};
|
||||||
|
use tantivy::Index;
|
||||||
|
use tantivy::query::QueryParser;
|
||||||
|
use tantivy::collector::TopCollector;
|
||||||
|
|
||||||
|
|
||||||
|
static DATA: &'static [u8] = include_bytes!("output.bin");
|
||||||
|
|
||||||
|
fn run() -> tantivy::Result<()> {
|
||||||
|
// Prints each argument on a separate line
|
||||||
|
let directory = StaticDirectory::open(DATA).unwrap();
|
||||||
|
let index = Index::open_directory(directory).unwrap();
|
||||||
|
index.load_searchers().unwrap();
|
||||||
|
let searcher = index.searcher();
|
||||||
|
|
||||||
|
let schema = index.schema();
|
||||||
|
let title = schema.get_field("title").unwrap();
|
||||||
|
let body = schema.get_field("body").unwrap();
|
||||||
|
|
||||||
|
let query_parser = QueryParser::for_index(&index, vec![title, body]);
|
||||||
|
let query = query_parser.parse_query("sea whale")?;
|
||||||
|
|
||||||
|
let mut top_collector = TopCollector::with_limit(10);
|
||||||
|
|
||||||
|
searcher.search(&*query, &mut top_collector)?;
|
||||||
|
|
||||||
|
let doc_addresses = top_collector.docs();
|
||||||
|
|
||||||
|
// The actual documents still need to be
|
||||||
|
// retrieved from Tantivy's store.
|
||||||
|
//
|
||||||
|
// Since the body field was not configured as stored,
|
||||||
|
// the document returned will only contain
|
||||||
|
// a title.
|
||||||
|
|
||||||
|
for doc_address in doc_addresses {
|
||||||
|
let retrieved_doc = searcher.doc(&doc_address)?;
|
||||||
|
println!("{}", schema.to_json(&retrieved_doc));
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
fn main() {
|
||||||
|
run().unwrap();
|
||||||
|
}
|
||||||
@@ -86,6 +86,7 @@ impl Index {
|
|||||||
/// The temp directory is only used for testing the `MmapDirectory`.
|
/// The temp directory is only used for testing the `MmapDirectory`.
|
||||||
/// For other unit tests, prefer the `RAMDirectory`, see: `create_in_ram`.
|
/// For other unit tests, prefer the `RAMDirectory`, see: `create_in_ram`.
|
||||||
#[cfg(feature="mmap")]
|
#[cfg(feature="mmap")]
|
||||||
|
#[cfg(test)]
|
||||||
pub fn create_from_tempdir(schema: Schema) -> Result<Index> {
|
pub fn create_from_tempdir(schema: Schema) -> Result<Index> {
|
||||||
let mmap_directory = MmapDirectory::create_from_tempdir()?;
|
let mmap_directory = MmapDirectory::create_from_tempdir()?;
|
||||||
let directory = ManagedDirectory::new(mmap_directory)?;
|
let directory = ManagedDirectory::new(mmap_directory)?;
|
||||||
@@ -121,6 +122,13 @@ impl Index {
|
|||||||
Index::create_from_metas(directory, &metas)
|
Index::create_from_metas(directory, &metas)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn open_directory<TDirectory: Directory>(directory: TDirectory) -> Result<Index> {
|
||||||
|
let directory = ManagedDirectory::new(directory)?;
|
||||||
|
let metas = load_metas(&directory)?;
|
||||||
|
Index::create_from_metas(directory, &metas)
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/// Reads the index meta file from the directory.
|
/// Reads the index meta file from the directory.
|
||||||
pub fn load_metas(&self) -> Result<IndexMeta> {
|
pub fn load_metas(&self) -> Result<IndexMeta> {
|
||||||
load_metas(self.directory())
|
load_metas(self.directory())
|
||||||
|
|||||||
@@ -12,12 +12,15 @@ mod directory;
|
|||||||
mod read_only_source;
|
mod read_only_source;
|
||||||
mod shared_vec_slice;
|
mod shared_vec_slice;
|
||||||
mod managed_directory;
|
mod managed_directory;
|
||||||
|
mod static_directory;
|
||||||
|
|
||||||
/// Errors specific to the directory module.
|
/// Errors specific to the directory module.
|
||||||
pub mod error;
|
pub mod error;
|
||||||
|
|
||||||
use std::io::{BufWriter, Seek, Write};
|
use std::io::{BufWriter, Seek, Write};
|
||||||
|
|
||||||
|
pub use self::static_directory::StaticDirectory;
|
||||||
|
pub use self::static_directory::write_static_from_directory;
|
||||||
pub use self::read_only_source::ReadOnlySource;
|
pub use self::read_only_source::ReadOnlySource;
|
||||||
pub use self::directory::Directory;
|
pub use self::directory::Directory;
|
||||||
pub use self::ram_directory::RAMDirectory;
|
pub use self::ram_directory::RAMDirectory;
|
||||||
|
|||||||
@@ -7,6 +7,8 @@ use std::slice;
|
|||||||
use std::io::{self, Read};
|
use std::io::{self, Read};
|
||||||
use stable_deref_trait::{CloneStableDeref, StableDeref};
|
use stable_deref_trait::{CloneStableDeref, StableDeref};
|
||||||
|
|
||||||
|
const EMPTY_SLICE: [u8; 0] = [];
|
||||||
|
|
||||||
/// Read object that represents files in tantivy.
|
/// Read object that represents files in tantivy.
|
||||||
///
|
///
|
||||||
/// These read objects are only in charge to deliver
|
/// These read objects are only in charge to deliver
|
||||||
@@ -19,6 +21,8 @@ pub enum ReadOnlySource {
|
|||||||
Mmap(MmapReadOnly),
|
Mmap(MmapReadOnly),
|
||||||
/// Wrapping a `Vec<u8>`
|
/// Wrapping a `Vec<u8>`
|
||||||
Anonymous(SharedVecSlice),
|
Anonymous(SharedVecSlice),
|
||||||
|
/// Wrapping a static slice
|
||||||
|
Static(&'static [u8])
|
||||||
}
|
}
|
||||||
|
|
||||||
unsafe impl StableDeref for ReadOnlySource {}
|
unsafe impl StableDeref for ReadOnlySource {}
|
||||||
@@ -35,7 +39,7 @@ impl Deref for ReadOnlySource {
|
|||||||
impl ReadOnlySource {
|
impl ReadOnlySource {
|
||||||
/// Creates an empty ReadOnlySource
|
/// Creates an empty ReadOnlySource
|
||||||
pub fn empty() -> ReadOnlySource {
|
pub fn empty() -> ReadOnlySource {
|
||||||
ReadOnlySource::Anonymous(SharedVecSlice::empty())
|
ReadOnlySource::Static(&EMPTY_SLICE)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the data underlying the ReadOnlySource object.
|
/// Returns the data underlying the ReadOnlySource object.
|
||||||
@@ -44,6 +48,7 @@ impl ReadOnlySource {
|
|||||||
#[cfg(feature="mmap")]
|
#[cfg(feature="mmap")]
|
||||||
ReadOnlySource::Mmap(ref mmap_read_only) => unsafe { mmap_read_only.as_slice() },
|
ReadOnlySource::Mmap(ref mmap_read_only) => unsafe { mmap_read_only.as_slice() },
|
||||||
ReadOnlySource::Anonymous(ref shared_vec) => shared_vec.as_slice(),
|
ReadOnlySource::Anonymous(ref shared_vec) => shared_vec.as_slice(),
|
||||||
|
ReadOnlySource::Static(data) => data,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -76,6 +81,9 @@ impl ReadOnlySource {
|
|||||||
ReadOnlySource::Anonymous(ref shared_vec) => {
|
ReadOnlySource::Anonymous(ref shared_vec) => {
|
||||||
ReadOnlySource::Anonymous(shared_vec.slice(from_offset, to_offset))
|
ReadOnlySource::Anonymous(shared_vec.slice(from_offset, to_offset))
|
||||||
}
|
}
|
||||||
|
ReadOnlySource::Static(data) => {
|
||||||
|
ReadOnlySource::Static(&data[from_offset..to_offset])
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -116,6 +124,12 @@ impl From<Vec<u8>> for ReadOnlySource {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl From<&'static [u8]> for ReadOnlySource {
|
||||||
|
fn from(data: &'static [u8]) -> ReadOnlySource {
|
||||||
|
ReadOnlySource::Static(data)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Acts as a owning cursor over the data backed up by a `ReadOnlySource`
|
/// Acts as a owning cursor over the data backed up by a `ReadOnlySource`
|
||||||
pub(crate) struct SourceRead {
|
pub(crate) struct SourceRead {
|
||||||
_data_owner: ReadOnlySource,
|
_data_owner: ReadOnlySource,
|
||||||
|
|||||||
123
src/directory/static_directory.rs
Normal file
123
src/directory/static_directory.rs
Normal file
@@ -0,0 +1,123 @@
|
|||||||
|
use std::collections::HashMap;
|
||||||
|
use Directory;
|
||||||
|
use std::path::PathBuf;
|
||||||
|
use directory::ReadOnlySource;
|
||||||
|
use std::io::BufWriter;
|
||||||
|
use directory::error::{DeleteError, OpenReadError, OpenWriteError};
|
||||||
|
use std::path::Path;
|
||||||
|
use std::fmt::{Formatter, Debug, self};
|
||||||
|
use Result as TantivyResult;
|
||||||
|
use directory::SeekableWrite;
|
||||||
|
use std::io;
|
||||||
|
use std::fs;
|
||||||
|
use common::Endianness;
|
||||||
|
use common::BinarySerializable;
|
||||||
|
use common::VInt;
|
||||||
|
use byteorder::ByteOrder;
|
||||||
|
use std::str;
|
||||||
|
use std::fs::File;
|
||||||
|
use std::io::{Read, Write};
|
||||||
|
use std::ffi::OsString;
|
||||||
|
|
||||||
|
#[derive(Clone)]
|
||||||
|
pub struct StaticDirectory {
|
||||||
|
files: HashMap<PathBuf, &'static [u8]>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Debug for StaticDirectory {
|
||||||
|
fn fmt(&self, f: &mut Formatter) -> Result<(), fmt::Error> {
|
||||||
|
write!(f, "StaticDirectory[{} files]", self.files.len())?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl StaticDirectory {
|
||||||
|
pub fn open(mut data: &'static [u8]) -> TantivyResult<StaticDirectory> {
|
||||||
|
assert!(data.len() > 8);
|
||||||
|
let footer_len_offset = data.len() - 8;
|
||||||
|
let body_len = Endianness::read_u64(&data[footer_len_offset..]) as usize;
|
||||||
|
let mut body = &data[..body_len];
|
||||||
|
let mut footer = &data[body_len..footer_len_offset];
|
||||||
|
let num_files = VInt::deserialize(&mut footer)?.0 as usize;
|
||||||
|
let mut files = HashMap::new();
|
||||||
|
for _ in 0..num_files {
|
||||||
|
let filename_len = VInt::deserialize(&mut footer)?.0 as usize;
|
||||||
|
let filename = &footer[..filename_len];
|
||||||
|
footer = &footer[filename_len..];
|
||||||
|
let data_len = VInt::deserialize(&mut footer)?.0 as usize;
|
||||||
|
let file_data = &body[..data_len];
|
||||||
|
body = &body[data_len..];
|
||||||
|
let filename_str = str::from_utf8(filename).expect("Invalid UTF8");
|
||||||
|
let filename = PathBuf::from(filename_str);
|
||||||
|
println!("{:?} {:?}", filename, data_len);
|
||||||
|
files.insert(filename, file_data);
|
||||||
|
}
|
||||||
|
Ok(StaticDirectory {
|
||||||
|
files
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Directory for StaticDirectory {
|
||||||
|
fn open_read(&self, path: &Path) -> Result<ReadOnlySource, OpenReadError> {
|
||||||
|
if let Some(static_data) = self.files.get(path) {
|
||||||
|
Ok(ReadOnlySource::from(*static_data))
|
||||||
|
} else {
|
||||||
|
Err(OpenReadError::FileDoesNotExist(path.to_owned()))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn delete(&self, path: &Path) -> Result<(), DeleteError> {
|
||||||
|
unimplemented!("Static directory is read-only !")
|
||||||
|
}
|
||||||
|
|
||||||
|
fn exists(&self, path: &Path) -> bool {
|
||||||
|
self.files.contains_key(path)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn open_write(&mut self, path: &Path) -> Result<BufWriter<Box<SeekableWrite>>, OpenWriteError> {
|
||||||
|
unimplemented!("Static directory is read-only !")
|
||||||
|
}
|
||||||
|
|
||||||
|
fn atomic_read(&self, path: &Path) -> Result<Vec<u8>, OpenReadError> {
|
||||||
|
if let Some(static_data) = self.files.get(path) {
|
||||||
|
Ok(static_data.to_vec())
|
||||||
|
} else {
|
||||||
|
Err(OpenReadError::FileDoesNotExist(path.to_owned()))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn atomic_write(&mut self, path: &Path, data: &[u8]) -> io::Result<()> {
|
||||||
|
unimplemented!("Static directory is read-only !")
|
||||||
|
}
|
||||||
|
|
||||||
|
fn box_clone(&self) -> Box<Directory> {
|
||||||
|
box self.clone()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn write_static_from_directory(directory_path: &Path) -> TantivyResult<Vec<u8>> {
|
||||||
|
assert!(directory_path.is_dir());
|
||||||
|
let mut file_data: Vec<(OsString, usize)> = Vec::new();
|
||||||
|
let mut write: Vec<u8> = Vec::new();
|
||||||
|
for entry in fs::read_dir(directory_path)? {
|
||||||
|
let entry = entry?;
|
||||||
|
let path = entry.path();
|
||||||
|
if path.is_file() {
|
||||||
|
info!("Appending {}", path.to_string_lossy());
|
||||||
|
let mut open_file = File::open(&path)?;
|
||||||
|
let file_len = open_file.read_to_end(&mut write)?;
|
||||||
|
file_data.push((entry.file_name(), file_len));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// write footer
|
||||||
|
let body_len = write.len();
|
||||||
|
VInt(file_data.len() as u64).serialize(&mut write)?;
|
||||||
|
for (filename, filelen) in file_data {
|
||||||
|
VInt(filename.len() as u64).serialize(&mut write)?;
|
||||||
|
write.write_all(filename.to_string_lossy().as_bytes())?;
|
||||||
|
VInt(filelen as u64).serialize(&mut write)?;
|
||||||
|
}
|
||||||
|
(body_len as u64).serialize(&mut write)?;
|
||||||
|
Ok(write)
|
||||||
|
}
|
||||||
@@ -1,15 +1,15 @@
|
|||||||
use std::sync::atomic::{AtomicU64, Ordering};
|
use std::sync::atomic::{AtomicUsize, Ordering};
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
#[derive(Clone, Default)]
|
#[derive(Clone, Default)]
|
||||||
pub struct Stamper(Arc<AtomicU64>);
|
pub struct Stamper(Arc<AtomicUsize>);
|
||||||
|
|
||||||
impl Stamper {
|
impl Stamper {
|
||||||
pub fn new(first_opstamp: u64) -> Stamper {
|
pub fn new(first_opstamp: u64) -> Stamper {
|
||||||
Stamper(Arc::new(AtomicU64::new(first_opstamp)))
|
Stamper(Arc::new(AtomicUsize::new(first_opstamp as usize)))
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn stamp(&self) -> u64 {
|
pub fn stamp(&self) -> u64 {
|
||||||
self.0.fetch_add(1u64, Ordering::SeqCst)
|
self.0.fetch_add(1, Ordering::SeqCst) as u64
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -140,7 +140,7 @@ extern crate fst;
|
|||||||
extern crate futures;
|
extern crate futures;
|
||||||
extern crate futures_cpupool;
|
extern crate futures_cpupool;
|
||||||
extern crate itertools;
|
extern crate itertools;
|
||||||
extern crate lz4;
|
extern crate snap;
|
||||||
extern crate num_cpus;
|
extern crate num_cpus;
|
||||||
extern crate owning_ref;
|
extern crate owning_ref;
|
||||||
extern crate regex;
|
extern crate regex;
|
||||||
@@ -149,6 +149,7 @@ extern crate serde;
|
|||||||
extern crate serde_json;
|
extern crate serde_json;
|
||||||
extern crate stable_deref_trait;
|
extern crate stable_deref_trait;
|
||||||
extern crate tempdir;
|
extern crate tempdir;
|
||||||
|
#[cfg(test)]
|
||||||
extern crate tempfile;
|
extern crate tempfile;
|
||||||
extern crate uuid;
|
extern crate uuid;
|
||||||
extern crate bitpacking;
|
extern crate bitpacking;
|
||||||
|
|||||||
@@ -110,7 +110,6 @@ mod tests {
|
|||||||
let query = query_parser.parse_query("+a b").unwrap();
|
let query = query_parser.parse_query("+a b").unwrap();
|
||||||
let weight = query.weight(&*searcher, false).unwrap();
|
let weight = query.weight(&*searcher, false).unwrap();
|
||||||
let scorer = weight.scorer(searcher.segment_reader(0u32)).unwrap();
|
let scorer = weight.scorer(searcher.segment_reader(0u32)).unwrap();
|
||||||
println!("{:?}", scorer.type_name());
|
|
||||||
assert!(Downcast::<TermScorer>::is_type(&*scorer));
|
assert!(Downcast::<TermScorer>::is_type(&*scorer));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -9,7 +9,7 @@ use std::mem::size_of;
|
|||||||
use std::io::{self, Read};
|
use std::io::{self, Read};
|
||||||
use common::VInt;
|
use common::VInt;
|
||||||
use datastruct::SkipList;
|
use datastruct::SkipList;
|
||||||
use lz4;
|
use snap;
|
||||||
|
|
||||||
/// Reads document off tantivy's [`Store`](./index.html)
|
/// Reads document off tantivy's [`Store`](./index.html)
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
@@ -61,9 +61,9 @@ impl StoreReader {
|
|||||||
let mut current_block_mut = self.current_block.borrow_mut();
|
let mut current_block_mut = self.current_block.borrow_mut();
|
||||||
current_block_mut.clear();
|
current_block_mut.clear();
|
||||||
let compressed_block = self.compressed_block(block_offset);
|
let compressed_block = self.compressed_block(block_offset);
|
||||||
let mut lz4_decoder = lz4::Decoder::new(compressed_block)?;
|
let mut snap_decoder = snap::Reader::new(compressed_block);
|
||||||
*self.current_block_offset.borrow_mut() = usize::max_value();
|
*self.current_block_offset.borrow_mut() = usize::max_value();
|
||||||
lz4_decoder.read_to_end(&mut current_block_mut).map(|_| ())?;
|
snap_decoder.read_to_end(&mut current_block_mut).map(|_| ())?;
|
||||||
*self.current_block_offset.borrow_mut() = block_offset;
|
*self.current_block_offset.borrow_mut() = block_offset;
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
|
|||||||
@@ -3,7 +3,7 @@ use DocId;
|
|||||||
use common::{BinarySerializable, VInt};
|
use common::{BinarySerializable, VInt};
|
||||||
use std::io::{self, Write};
|
use std::io::{self, Write};
|
||||||
use super::StoreReader;
|
use super::StoreReader;
|
||||||
use lz4;
|
use snap;
|
||||||
use datastruct::SkipListBuilder;
|
use datastruct::SkipListBuilder;
|
||||||
use common::CountingWriter;
|
use common::CountingWriter;
|
||||||
use schema::Document;
|
use schema::Document;
|
||||||
@@ -88,10 +88,9 @@ impl StoreWriter {
|
|||||||
fn write_and_compress_block(&mut self) -> io::Result<()> {
|
fn write_and_compress_block(&mut self) -> io::Result<()> {
|
||||||
self.intermediary_buffer.clear();
|
self.intermediary_buffer.clear();
|
||||||
{
|
{
|
||||||
let mut encoder = lz4::EncoderBuilder::new().build(&mut self.intermediary_buffer)?;
|
let mut encoder = snap::Writer::new(&mut self.intermediary_buffer);
|
||||||
encoder.write_all(&self.current_block)?;
|
encoder.write_all(&self.current_block)?;
|
||||||
let (_, encoder_result) = encoder.finish();
|
encoder.flush()?;
|
||||||
encoder_result?;
|
|
||||||
}
|
}
|
||||||
(self.intermediary_buffer.len() as u32).serialize(&mut self.writer)?;
|
(self.intermediary_buffer.len() as u32).serialize(&mut self.writer)?;
|
||||||
self.writer.write_all(&self.intermediary_buffer)?;
|
self.writer.write_all(&self.intermediary_buffer)?;
|
||||||
|
|||||||
@@ -86,6 +86,9 @@ fn open_fst_index(source: ReadOnlySource) -> fst::Map {
|
|||||||
ReadOnlySource::Anonymous(data) => {
|
ReadOnlySource::Anonymous(data) => {
|
||||||
Fst::from_shared_bytes(data.data, data.start, data.len).expect("FST data is corrupted")
|
Fst::from_shared_bytes(data.data, data.start, data.len).expect("FST data is corrupted")
|
||||||
}
|
}
|
||||||
|
ReadOnlySource::Static(bytes) => {
|
||||||
|
Fst::from_static_slice(bytes).expect("FST data is corrupted")
|
||||||
|
}
|
||||||
#[cfg(feature="mmap")]
|
#[cfg(feature="mmap")]
|
||||||
ReadOnlySource::Mmap(mmap_readonly) => {
|
ReadOnlySource::Mmap(mmap_readonly) => {
|
||||||
Fst::from_mmap(mmap_readonly).expect("FST data is corrupted")
|
Fst::from_mmap(mmap_readonly).expect("FST data is corrupted")
|
||||||
|
|||||||
Reference in New Issue
Block a user