Added static directory

Compiling in WebAssembly
2026-02-25 01:00:37 +00:00 · 2018-10-04 23:28:44 +09:00 · 2018-10-04 08:45:04 +09:00
16 changed files with 78 additions and 617 deletions
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -17,7 +17,7 @@ byteorder = "1.0"
 lazy_static = "1"
 regex = "1.0"
 fst = {version="0.3", default-features=false}
-fst-regex = { version="0.2" }
+fst-regex = { version="0.2", optional=true}
 lz4 = {version="1.20", optional=true}
 snap = {version="0.2"}
 atomicwrites = {version="0.2.2", optional=true}
@@ -68,8 +68,9 @@ overflow-checks = true

 [features]
 # by default no-fail is disabled. We manually enable it when running test.
-default = ["mmap", "no_fail"]
+default = ["mmap", "no_fail", "regex_query"]
 mmap = ["fst/mmap", "atomicwrites"]
+regex_query = ["fst-regex"]
 lz4-compression = ["lz4"]
 no_fail = ["fail/no_fail"]

--- a/README.md
+++ b/README.md
@@ -21,7 +21,7 @@

 **Tantivy** is a **full text search engine library** written in rust.

-It is closer to [Apache Lucene](https://lucene.apache.org/) than to [Elastic Search](https://www.elastic.co/products/elasticsearch) and [Apache Solr](https://lucene.apache.org/solr/) in the sense it is not
+It is closer to Lucene than to Elastic Search and Solr in the sense it is not
 an off-the-shelf search engine server, but rather a crate that can be used
 to build such a search engine.

--- a/src/common/composite_file.rs
+++ b/src/common/composite_file.rs
@@ -4,8 +4,6 @@ use common::VInt;
 use directory::ReadOnlySource;
 use directory::WritePtr;
 use schema::Field;
-use space_usage::PerFieldSpaceUsage;
-use space_usage::FieldUsage;
 use std::collections::HashMap;
 use std::io::Write;
 use std::io::{self, Read};
@@ -168,16 +166,6 @@ impl CompositeFile {
            .get(&FileAddr { field, idx })
            .map(|&(from, to)| self.data.slice(from, to))
    }
-
-    pub fn space_usage(&self) -> PerFieldSpaceUsage {
-        let mut fields = HashMap::new();
-        for (&field_addr, &(start, end)) in self.offsets_index.iter() {
-            fields.entry(field_addr.field)
-                .or_insert_with(|| FieldUsage::empty(field_addr.field))
-                .add_field_idx(field_addr.idx, end - start);
-        }
-        PerFieldSpaceUsage::new(fields)
-    }
 }

 #[cfg(test)]
--- a/src/core/searcher.rs
+++ b/src/core/searcher.rs
@@ -5,7 +5,6 @@ use query::Query;
 use schema::Document;
 use schema::Schema;
 use schema::{Field, Term};
-use space_usage::SearcherSpaceUsage;
 use std::fmt;
 use std::sync::Arc;
 use termdict::TermMerger;
@@ -100,15 +99,6 @@ impl Searcher {
            .collect::<Vec<_>>();
        FieldSearcher::new(inv_index_readers)
    }
-
-    /// Summarize total space usage of this searcher.
-    pub fn space_usage(&self) -> SearcherSpaceUsage {
-        let mut space_usage = SearcherSpaceUsage::new();
-        for segment_reader in self.segment_readers.iter() {
-            space_usage.add_segment(segment_reader.space_usage());
-        }
-        space_usage
-    }
 }

 pub struct FieldSearcher {
--- a/src/core/segment_reader.rs
+++ b/src/core/segment_reader.rs
@@ -16,7 +16,6 @@ use schema::Document;
 use schema::Field;
 use schema::FieldType;
 use schema::Schema;
-use space_usage::SegmentSpaceUsage;
 use std::collections::HashMap;
 use std::fmt;
 use std::sync::Arc;
@@ -382,21 +381,6 @@ impl SegmentReader {
    pub fn doc_ids_alive(&self) -> SegmentReaderAliveDocsIterator {
        SegmentReaderAliveDocsIterator::new(&self)
    }
-
-    /// Summarize total space usage of this segment.
-    pub fn space_usage(&self) -> SegmentSpaceUsage {
-        SegmentSpaceUsage::new(
-            self.num_docs(),
-            self.termdict_composite.space_usage(),
-            self.postings_composite.space_usage(),
-            self.positions_composite.space_usage(),
-            self.positions_idx_composite.space_usage(),
-            self.fast_fields_composite.space_usage(),
-            self.fieldnorms_composite.space_usage(),
-            self.store_reader.space_usage(),
-            self.delete_bitset_opt.as_ref().map(|x| x.space_usage()).unwrap_or(0),
-        )
-    }
 }

 impl fmt::Debug for SegmentReader {
--- a/src/directory/mod.rs
+++ b/src/directory/mod.rs
@@ -12,6 +12,7 @@ mod managed_directory;
 mod ram_directory;
 mod read_only_source;
 mod shared_vec_slice;
+mod static_dictionnary;

 /// Errors specific to the directory module.
 pub mod error;
@@ -21,6 +22,7 @@ use std::io::{BufWriter, Seek, Write};
 pub use self::directory::{Directory, DirectoryClone};
 pub use self::ram_directory::RAMDirectory;
 pub use self::read_only_source::ReadOnlySource;
+pub use self::static_dictionnary::StaticDirectory;

 #[cfg(feature = "mmap")]
 pub use self::mmap_directory::MmapDirectory;
--- a/src/directory/read_only_source.rs
+++ b/src/directory/read_only_source.rs
@@ -5,6 +5,9 @@ use fst::raw::MmapReadOnly;
 use stable_deref_trait::{CloneStableDeref, StableDeref};
 use std::ops::Deref;

+
+const EMPTY_SLICE: [u8; 0] = [];
+
 /// Read object that represents files in tantivy.
 ///
 /// These read objects are only in charge to deliver
@@ -17,6 +20,8 @@ pub enum ReadOnlySource {
    Mmap(MmapReadOnly),
    /// Wrapping a `Vec<u8>`
    Anonymous(SharedVecSlice),
+    /// Wrapping a static slice
+    Static(&'static [u8])
 }

 unsafe impl StableDeref for ReadOnlySource {}
@@ -33,7 +38,7 @@ impl Deref for ReadOnlySource {
 impl ReadOnlySource {
    /// Creates an empty ReadOnlySource
    pub fn empty() -> ReadOnlySource {
-        ReadOnlySource::Anonymous(SharedVecSlice::empty())
+        ReadOnlySource::Static(&EMPTY_SLICE)
    }

    /// Returns the data underlying the ReadOnlySource object.
@@ -42,6 +47,7 @@ impl ReadOnlySource {
            #[cfg(feature = "mmap")]
            ReadOnlySource::Mmap(ref mmap_read_only) => mmap_read_only.as_slice(),
            ReadOnlySource::Anonymous(ref shared_vec) => shared_vec.as_slice(),
+            ReadOnlySource::Static(data) => data,
        }
    }

@@ -79,6 +85,9 @@ impl ReadOnlySource {
            ReadOnlySource::Anonymous(ref shared_vec) => {
                ReadOnlySource::Anonymous(shared_vec.slice(from_offset, to_offset))
            }
+            ReadOnlySource::Static(data) => {
+                ReadOnlySource::Static(&data[from_offset..to_offset])
+            }
        }
    }

@@ -118,3 +127,9 @@ impl From<Vec<u8>> for ReadOnlySource {
        ReadOnlySource::Anonymous(shared_data)
    }
 }
+
+impl From<&'static [u8]> for ReadOnlySource {
+    fn from(data: &'static [u8]) -> ReadOnlySource {
+        ReadOnlySource::Static(data)
+    }
+}
--- a/src/fastfield/delete.rs
+++ b/src/fastfield/delete.rs
@@ -2,7 +2,6 @@ use bit_set::BitSet;
 use common::HasLen;
 use directory::ReadOnlySource;
 use directory::WritePtr;
-use space_usage::ByteCount;
 use std::io;
 use std::io::Write;
 use DocId;
@@ -64,11 +63,6 @@ impl DeleteBitSet {
            b & (1u8 << shift) != 0
        }
    }
-
-    /// Summarize total space usage of this bitset.
-    pub fn space_usage(&self) -> ByteCount {
-        self.data.len()
-    }
 }

 impl HasLen for DeleteBitSet {
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -136,7 +136,7 @@ extern crate crossbeam;
 extern crate crossbeam_channel;
 extern crate fnv;
 extern crate fst;
-extern crate fst_regex;
+
 extern crate futures;
 extern crate futures_cpupool;
 extern crate htmlescape;
@@ -213,7 +213,6 @@ pub(crate) mod positions;
 pub mod postings;
 pub mod query;
 pub mod schema;
-pub mod space_usage;
 pub mod store;
 pub mod termdict;

--- a/src/query/mod.rs
+++ b/src/query/mod.rs
@@ -16,7 +16,10 @@ mod phrase_query;
 mod query;
 mod query_parser;
 mod range_query;
+
+#[cfg(feature="regex_query")]
 mod regex_query;
+
 mod reqopt_scorer;
 mod scorer;
 mod term_query;
@@ -47,7 +50,10 @@ pub use self::query::Query;
 pub use self::query_parser::QueryParser;
 pub use self::query_parser::QueryParserError;
 pub use self::range_query::RangeQuery;
+
+#[cfg(feature="regex_query")]
 pub use self::regex_query::RegexQuery;
+
 pub use self::reqopt_scorer::RequiredOptionalScorer;
 pub use self::scorer::ConstScorer;
 pub use self::scorer::Scorer;
--- a/src/query/query_parser/query_parser.rs
+++ b/src/query/query_parser/query_parser.rs
@@ -177,6 +177,9 @@ impl QueryParser {
    ///
    /// There is currently no lenient mode for the query parser
    /// which makes it a bad choice for a public/broad user search engine.
+    ///
+    /// Implementing a lenient mode for this query parser is tracked
+    /// in [Issue 5](https://github.com/fulmicoton/tantivy/issues/5)
    pub fn parse_query(&self, query: &str) -> Result<Box<Query>, QueryParserError> {
        let logical_ast = self.parse_query_to_logical_ast(query)?;
        Ok(convert_to_query(logical_ast))
@@ -190,61 +193,6 @@ impl QueryParser {
        self.compute_logical_ast(user_input_ast)
    }

-    /// Parse a query
-    ///
-    /// Note that `parse_query_lenient` will NOT return an error
-    /// if the input is not a valid query.
-    ///
-    /// It will instead escape all special characters in the query body
-    /// retry to process the query, if it still fails will return the AllQuery
-    pub fn parse_query_lenient(&self, query: &str) -> Box<Query> {
-        if let Ok(logical_ast) = self.parse_query_to_logical_ast(query) {
-            return convert_to_query(logical_ast);
-        }
-
-        // try to clean up the query
-        if let Ok(logical_ast) = self.parse_lenient_query_to_logical_ast(query) {
-            return convert_to_query(logical_ast);
-        }
-
-        // we have no idea what you want, so here's nothing
-        Box::new(EmptyQuery)
-    }
-
-    /// Parse the user query into an AST.
-    fn parse_lenient_query_to_logical_ast(
-        &self,
-        query: &str,
-    ) -> Result<LogicalAST, QueryParserError> {
-        // if we are here, we know we have a poorly formed
-        // query input
-
-        //  # Escape special characters: \\+-&|!(){}[]^~*?:\/
-        let special_chars = "\\+-&|!(){}[]^~*?:/";
-        let mut scrubbed_query = query
-            .chars()
-            .filter(|c| !special_chars.contains(*c))
-            .collect::<String>();
-
-        // AND, OR and NOT are used by tantivy as logical operators. We need
-        // to escape them
-        let special_words = vec!["AND", "OR", "NOT"];
-        for word in special_words.iter() {
-            scrubbed_query = scrubbed_query.replace(word, &format!("{}", word));
-        }
-
-        // Escape odd quotes
-        let quote_count = scrubbed_query.chars().filter(|&c| c == '\"').count();
-        if quote_count % 2 == 1 {
-            scrubbed_query = scrubbed_query.replace("\"", "\\\"");
-        }
-        
-        let (user_input_ast, _remaining) = parse_to_ast()
-            .parse(scrubbed_query.as_str())
-            .map_err(|_| QueryParserError::SyntaxError)?;
-        self.compute_logical_ast(user_input_ast)
-    }
-
    fn resolve_field_name(&self, field_name: &str) -> Result<Field, QueryParserError> {
        self.schema
            .get_field(field_name)
@@ -596,26 +544,6 @@ mod test {
        assert!(query_parser.parse_query("toto").is_ok());
    }

-    #[test]
-    pub fn test_parse_query_lenient_no_panics() {
-        let query_parser = make_query_parser();
-
-        query_parser.parse_query_lenient("toto");
-        query_parser.parse_query_lenient("");
-        query_parser.parse_query_lenient("+(happy");
-    }
-
-    #[test]
-    pub fn test_parse_query_lenient_escapes_bad_queries() {
-        let query_parser = make_query_parser();
-
-        let query = query_parser
-            .parse_lenient_query_to_logical_ast("+(happy")
-            .unwrap();
-        let query_str = format!("{:?}", query);
-        assert_eq!(query_str, "(Term([0, 0, 0, 0, 104, 97, 112, 112, 121]) Term([0, 0, 0, 1, 104, 97, 112, 112, 121]))");
-    }
-
    #[test]
    pub fn test_parse_nonindexed_field_yields_error() {
        let query_parser = make_query_parser();
--- a/src/query/query_parser/user_input_ast.rs
+++ b/src/query/query_parser/user_input_ast.rs
@@ -80,6 +80,9 @@ impl UserInputBound {
 pub enum UserInputAST {
    Clause(Vec<UserInputAST>),
    Unary(Occur, Box<UserInputAST>),
+    //    Not(Box<UserInputAST>),
+    //    Should(Box<UserInputAST>),
+    //    Must(Box<UserInputAST>),
    Leaf(Box<UserInputLeaf>),
 }

@@ -89,7 +92,7 @@ impl UserInputAST {
    }

    fn compose(occur: Occur, asts: Vec<UserInputAST>) -> UserInputAST {
-        assert_ne!(occur, Occur::MustNot);
+        assert!(occur != Occur::MustNot);
        assert!(!asts.is_empty());
        if asts.len() == 1 {
            asts.into_iter().next().unwrap() //< safe
@@ -111,6 +114,42 @@ impl UserInputAST {
    }
 }

+/*
+impl UserInputAST {
+
+    fn compose_occur(self, occur: Occur) -> UserInputAST {
+        match self {
+            UserInputAST::Not(other) => {
+                let new_occur = compose_occur(Occur::MustNot, occur);
+                other.simplify()
+            }
+            _ => {
+                self
+            }
+        }
+    }
+
+    pub fn simplify(self) -> UserInputAST {
+        match self {
+            UserInputAST::Clause(els) => {
+                if els.len() == 1 {
+                    return els.into_iter().next().unwrap();
+                } else {
+                    return self;
+                }
+            }
+            UserInputAST::Not(els) => {
+                if els.len() == 1 {
+                    return els.into_iter().next().unwrap();
+                } else {
+                    return self;
+                }
+            }
+        }
+    }
+}
+*/
+
 impl From<UserInputLiteral> for UserInputLeaf {
    fn from(literal: UserInputLiteral) -> UserInputLeaf {
        UserInputLeaf::Literal(literal)
--- a/src/query/regex_query.rs
+++ b/src/query/regex_query.rs
@@ -1,5 +1,7 @@
+extern crate fst_regex;
+
 use error::TantivyError;
-use fst_regex::Regex;
+use self::fst_regex::Regex;
 use query::{AutomatonWeight, Query, Weight};
 use schema::Field;
 use std::clone::Clone;
--- a/src/space_usage/mod.rs
+++ b/src/space_usage/mod.rs
@@ -1,484 +0,0 @@
-/*!
-Representations for the space usage of various parts of a Tantivy index.
-
-This can be used programmatically, and will also be exposed in a human readable fashion in
-tantivy-cli.
-
-One important caveat for all of this functionality is that none of it currently takes storage-level
-details into consideration. For example, if your file system block size is 4096 bytes, we can
-under-count actual resultant space usage by up to 4095 bytes per file.
-*/
-
-use schema::Field;
-use std::collections::HashMap;
-use SegmentComponent;
-
-/// Indicates space usage in bytes
-pub type ByteCount = usize;
-
-/// Enum containing any of the possible space usage results for segment components.
-pub enum ComponentSpaceUsage {
-    /// Data is stored per field in a uniform way
-    PerField(PerFieldSpaceUsage),
-    /// Data is stored in separate pieces in the store
-    Store(StoreSpaceUsage),
-    /// Some sort of raw byte count
-    Basic(ByteCount),
-}
-
-/// Represents combined space usage of an entire searcher and its component segments.
-#[derive(Clone, Debug, Serialize, Deserialize)]
-pub struct SearcherSpaceUsage {
-    segments: Vec<SegmentSpaceUsage>,
-    total: ByteCount,
-}
-
-impl SearcherSpaceUsage {
-    pub(crate) fn new() -> SearcherSpaceUsage {
-        SearcherSpaceUsage {
-            segments: Vec::new(),
-            total: 0,
-        }
-    }
-
-    /// Add a segment, to `self`.
-    /// Performs no deduplication or other intelligence.
-    pub(crate) fn add_segment(&mut self, segment: SegmentSpaceUsage) {
-        self.total += segment.total();
-        self.segments.push(segment);
-    }
-
-    /// Per segment space usage
-    pub fn segments(&self) -> &[SegmentSpaceUsage] {
-        &self.segments[..]
-    }
-
-    /// Returns total byte usage of this searcher, including all large subcomponents.
-    /// Does not account for smaller things like `meta.json`.
-    pub fn total(&self) -> ByteCount {
-        self.total
-    }
-}
-
-/// Represents combined space usage for all of the large components comprising a segment.
-#[derive(Clone, Debug, Serialize, Deserialize)]
-pub struct SegmentSpaceUsage {
-    num_docs: u32,
-
-    termdict: PerFieldSpaceUsage,
-    postings: PerFieldSpaceUsage,
-    positions: PerFieldSpaceUsage,
-    positions_idx: PerFieldSpaceUsage,
-    fast_fields: PerFieldSpaceUsage,
-    fieldnorms: PerFieldSpaceUsage,
-
-    store: StoreSpaceUsage,
-
-    deletes: ByteCount,
-
-    total: ByteCount,
-}
-
-impl SegmentSpaceUsage {
-    pub(crate) fn new(
-        num_docs: u32,
-        termdict: PerFieldSpaceUsage,
-        postings: PerFieldSpaceUsage,
-        positions: PerFieldSpaceUsage,
-        positions_idx: PerFieldSpaceUsage,
-        fast_fields: PerFieldSpaceUsage,
-        fieldnorms: PerFieldSpaceUsage,
-        store: StoreSpaceUsage,
-        deletes: ByteCount,
-    ) -> SegmentSpaceUsage {
-        let total = termdict.total()
-            + postings.total()
-            + positions.total()
-            + fast_fields.total()
-            + fieldnorms.total()
-            + store.total()
-            + deletes;
-        SegmentSpaceUsage {
-            num_docs,
-            termdict,
-            postings,
-            positions,
-            positions_idx,
-            fast_fields,
-            fieldnorms,
-            store,
-            deletes,
-            total,
-        }
-    }
-
-    /// Space usage for the given component
-    ///
-    /// Clones the underlying data.
-    /// Use the components directly if this is somehow in performance critical code.
-    pub fn component(&self, component: SegmentComponent) -> ComponentSpaceUsage {
-        use SegmentComponent::*;
-        use self::ComponentSpaceUsage::*;
-        match component {
-            POSTINGS => PerField(self.postings().clone()),
-            POSITIONS => PerField(self.positions().clone()),
-            POSITIONSSKIP => PerField(self.positions_skip_idx().clone()),
-            FASTFIELDS => PerField(self.fast_fields().clone()),
-            FIELDNORMS => PerField(self.fieldnorms().clone()),
-            TERMS => PerField(self.termdict().clone()),
-            STORE => Store(self.store().clone()),
-            DELETE => Basic(self.deletes()),
-        }
-    }
-
-    /// Num docs in segment
-    pub fn num_docs(&self) -> u32 {
-        self.num_docs
-    }
-
-    /// Space usage for term dictionary
-    pub fn termdict(&self) -> &PerFieldSpaceUsage {
-        &self.termdict
-    }
-
-    /// Space usage for postings list
-    pub fn postings(&self) -> &PerFieldSpaceUsage {
-        &self.postings
-    }
-
-    /// Space usage for positions
-    pub fn positions(&self) -> &PerFieldSpaceUsage {
-        &self.positions
-    }
-
-    /// Space usage for positions skip idx
-    pub fn positions_skip_idx(&self) -> &PerFieldSpaceUsage {
-        &self.positions_idx
-    }
-
-    /// Space usage for fast fields
-    pub fn fast_fields(&self) -> &PerFieldSpaceUsage {
-        &self.fast_fields
-    }
-
-    /// Space usage for field norms
-    pub fn fieldnorms(&self) -> &PerFieldSpaceUsage {
-        &self.fieldnorms
-    }
-
-    /// Space usage for stored documents
-    pub fn store(&self) -> &StoreSpaceUsage {
-        &self.store
-    }
-
-    /// Space usage for document deletions
-    pub fn deletes(&self) -> ByteCount {
-        self.deletes
-    }
-
-    /// Total space usage in bytes for this segment.
-    pub fn total(&self) -> ByteCount {
-        self.total
-    }
-}
-
-/// Represents space usage for the Store for this segment.
-///
-/// This is composed of two parts.
-/// `data` represents the compressed data itself.
-/// `offsets` represents a lookup to find the start of a block
-#[derive(Clone, Debug, Serialize, Deserialize)]
-pub struct StoreSpaceUsage {
-    data: ByteCount,
-    offsets: ByteCount,
-}
-
-impl StoreSpaceUsage {
-    pub(crate) fn new(data: ByteCount, offsets: ByteCount) -> StoreSpaceUsage {
-        StoreSpaceUsage { data, offsets }
-    }
-
-    /// Space usage for the data part of the store
-    pub fn data_usage(&self) -> ByteCount {
-        self.data
-    }
-
-    /// Space usage for the offsets part of the store (doc ID -> offset)
-    pub fn offsets_usage(&self) -> ByteCount {
-        self.offsets
-    }
-
-    /// Total space usage in bytes for this Store
-    pub fn total(&self) -> ByteCount {
-        self.data + self.offsets
-    }
-}
-
-/// Represents space usage for all of the (field, index) pairs that appear in a CompositeFile.
-///
-/// A field can appear with a single index (typically 0) or with multiple indexes.
-/// Multiple indexes are used to handle variable length things, where
-#[derive(Clone, Debug, Serialize, Deserialize)]
-pub struct PerFieldSpaceUsage {
-    fields: HashMap<Field, FieldUsage>,
-    total: ByteCount
-}
-
-impl PerFieldSpaceUsage {
-    pub(crate) fn new(fields: HashMap<Field, FieldUsage>) -> PerFieldSpaceUsage {
-        let total = fields.values().map(|x| x.total()).sum();
-        PerFieldSpaceUsage { fields, total }
-    }
-
-    /// Per field space usage
-    pub fn fields(&self) -> impl Iterator<Item = (&Field, &FieldUsage)> {
-        self.fields.iter()
-    }
-
-    /// Bytes used by the represented file
-    pub fn total(&self) -> ByteCount {
-        self.total
-    }
-}
-
-/// Represents space usage of a given field, breaking it down into the (field, index) pairs that
-/// comprise it.
-///
-/// See documentation for PerFieldSpaceUsage for slightly more information.
-#[derive(Clone, Debug, Serialize, Deserialize)]
-pub struct FieldUsage {
-    field: Field,
-    num_bytes: ByteCount,
-    /// A field can be composed of more than one piece.
-    /// These pieces are indexed by arbitrary numbers starting at zero.
-    /// `self.num_bytes` includes all of `self.sub_num_bytes`.
-    sub_num_bytes: Vec<Option<ByteCount>>,
-}
-
-impl FieldUsage {
-    pub(crate) fn empty(field: Field) -> FieldUsage {
-        FieldUsage {
-            field,
-            num_bytes: 0,
-            sub_num_bytes: Vec::new(),
-        }
-    }
-
-    pub(crate) fn add_field_idx(&mut self, idx: usize, size: ByteCount) {
-        if self.sub_num_bytes.len() < idx + 1{
-            self.sub_num_bytes.resize(idx + 1, None);
-        }
-        assert!(self.sub_num_bytes[idx].is_none());
-        self.sub_num_bytes[idx] = Some(size);
-        self.num_bytes += size
-    }
-
-    /// Field
-    pub fn field(&self) -> Field {
-        self.field
-    }
-
-    /// Space usage for each index
-    pub fn sub_num_bytes(&self) -> &[Option<ByteCount>] {
-        &self.sub_num_bytes[..]
-    }
-
-    /// Total bytes used for this field in this context
-    pub fn total(&self) -> ByteCount {
-        self.num_bytes
-    }
-}
-
-#[cfg(test)]
-mod test {
-    use core::Index;
-    use schema::SchemaBuilder;
-    use schema::{FAST, INT_INDEXED, TEXT};
-    use schema::Field;
-    use space_usage::ByteCount;
-    use space_usage::PerFieldSpaceUsage;
-    use schema::STORED;
-    use Term;
-
-    #[test]
-    fn test_empty() {
-        let schema = SchemaBuilder::new().build();
-        let index = Index::create_in_ram(schema.clone());
-
-        index.load_searchers().unwrap();
-        let searcher = index.searcher();
-        let searcher_space_usage = searcher.space_usage();
-        assert_eq!(0, searcher_space_usage.total());
-    }
-
-    fn expect_single_field(field_space: &PerFieldSpaceUsage, field: &Field, min_size: ByteCount, max_size: ByteCount) {
-        assert!(field_space.total() >= min_size);
-        assert!(field_space.total() <= max_size);
-        assert_eq!(
-            vec![(field, field_space.total())],
-            field_space.fields().map(|(x,y)| (x, y.total())).collect::<Vec<_>>()
-        );
-    }
-
-    #[test]
-    fn test_fast_indexed() {
-        let mut schema_builder = SchemaBuilder::new();
-        let name = schema_builder.add_u64_field("name", FAST | INT_INDEXED);
-        let schema = schema_builder.build();
-        let index = Index::create_in_ram(schema.clone());
-
-        {
-            let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
-            index_writer.add_document(doc!(name => 1u64));
-            index_writer.add_document(doc!(name => 2u64));
-            index_writer.add_document(doc!(name => 10u64));
-            index_writer.add_document(doc!(name => 20u64));
-            index_writer.commit().unwrap();
-        }
-
-        index.load_searchers().unwrap();
-        let searcher = index.searcher();
-        let searcher_space_usage = searcher.space_usage();
-        assert!(searcher_space_usage.total() > 0);
-        assert_eq!(1, searcher_space_usage.segments().len());
-
-        let segment = &searcher_space_usage.segments()[0];
-        assert!(segment.total() > 0);
-
-        assert_eq!(4, segment.num_docs());
-
-        expect_single_field(segment.termdict(), &name, 1, 512);
-        expect_single_field(segment.postings(), &name, 1, 512);
-        assert_eq!(0, segment.positions().total());
-        assert_eq!(0, segment.positions_skip_idx().total());
-        expect_single_field(segment.fast_fields(), &name, 1, 512);
-        expect_single_field(segment.fieldnorms(), &name, 1, 512);
-        // TODO: understand why the following fails
-//        assert_eq!(0, segment.store().total());
-        assert_eq!(0, segment.deletes());
-    }
-
-    #[test]
-    fn test_text() {
-        let mut schema_builder = SchemaBuilder::new();
-        let name = schema_builder.add_text_field("name", TEXT);
-        let schema = schema_builder.build();
-        let index = Index::create_in_ram(schema.clone());
-
-        {
-            let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
-            index_writer.add_document(doc!(name => "hi"));
-            index_writer.add_document(doc!(name => "this is a test"));
-            index_writer.add_document(doc!(name => "some more documents with some word overlap with the other test"));
-            index_writer.add_document(doc!(name => "hello hi goodbye"));
-            index_writer.commit().unwrap();
-        }
-
-        index.load_searchers().unwrap();
-        let searcher = index.searcher();
-        let searcher_space_usage = searcher.space_usage();
-        assert!(searcher_space_usage.total() > 0);
-        assert_eq!(1, searcher_space_usage.segments().len());
-
-        let segment = &searcher_space_usage.segments()[0];
-        assert!(segment.total() > 0);
-
-        assert_eq!(4, segment.num_docs());
-
-        expect_single_field(segment.termdict(), &name, 1, 512);
-        expect_single_field(segment.postings(), &name, 1, 512);
-        expect_single_field(segment.positions(), &name, 1, 512);
-        expect_single_field(segment.positions_skip_idx(), &name, 1, 512);
-        assert_eq!(0, segment.fast_fields().total());
-        expect_single_field(segment.fieldnorms(), &name, 1, 512);
-        // TODO: understand why the following fails
-//        assert_eq!(0, segment.store().total());
-        assert_eq!(0, segment.deletes());
-    }
-
-    #[test]
-    fn test_store() {
-        let mut schema_builder = SchemaBuilder::new();
-        let name = schema_builder.add_text_field("name", STORED);
-        let schema = schema_builder.build();
-        let index = Index::create_in_ram(schema.clone());
-
-        {
-            let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
-            index_writer.add_document(doc!(name => "hi"));
-            index_writer.add_document(doc!(name => "this is a test"));
-            index_writer.add_document(doc!(name => "some more documents with some word overlap with the other test"));
-            index_writer.add_document(doc!(name => "hello hi goodbye"));
-            index_writer.commit().unwrap();
-        }
-
-        index.load_searchers().unwrap();
-        let searcher = index.searcher();
-        let searcher_space_usage = searcher.space_usage();
-        assert!(searcher_space_usage.total() > 0);
-        assert_eq!(1, searcher_space_usage.segments().len());
-
-        let segment = &searcher_space_usage.segments()[0];
-        assert!(segment.total() > 0);
-
-        assert_eq!(4, segment.num_docs());
-
-        assert_eq!(0, segment.termdict().total());
-        assert_eq!(0, segment.postings().total());
-        assert_eq!(0, segment.positions().total());
-        assert_eq!(0, segment.positions_skip_idx().total());
-        assert_eq!(0, segment.fast_fields().total());
-        assert_eq!(0, segment.fieldnorms().total());
-        assert!(segment.store().total() > 0);
-        assert!(segment.store().total() < 512);
-        assert_eq!(0, segment.deletes());
-    }
-
-    #[test]
-    fn test_deletes() {
-        let mut schema_builder = SchemaBuilder::new();
-        let name = schema_builder.add_u64_field("name", INT_INDEXED);
-        let schema = schema_builder.build();
-        let index = Index::create_in_ram(schema.clone());
-
-        {
-            let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
-            index_writer.add_document(doc!(name => 1u64));
-            index_writer.add_document(doc!(name => 2u64));
-            index_writer.add_document(doc!(name => 3u64));
-            index_writer.add_document(doc!(name => 4u64));
-            index_writer.commit().unwrap();
-        }
-
-        {
-            let mut index_writer2 = index.writer(50_000_000).unwrap();
-            index_writer2.delete_term(Term::from_field_u64(name, 2u64));
-            index_writer2.delete_term(Term::from_field_u64(name, 3u64));
-
-            // ok, now we should have a deleted doc
-            index_writer2.commit().unwrap();
-        }
-
-        index.load_searchers().unwrap();
-
-        let searcher = index.searcher();
-        let searcher_space_usage = searcher.space_usage();
-        assert!(searcher_space_usage.total() > 0);
-        assert_eq!(1, searcher_space_usage.segments().len());
-
-        let segment = &searcher_space_usage.segments()[0];
-        assert!(segment.total() > 0);
-
-        assert_eq!(2, segment.num_docs());
-
-        expect_single_field(segment.termdict(), &name, 1, 512);
-        expect_single_field(segment.postings(), &name, 1, 512);
-        assert_eq!(0, segment.positions().total());
-        assert_eq!(0, segment.positions_skip_idx().total());
-        assert_eq!(0, segment.fast_fields().total());
-        expect_single_field(segment.fieldnorms(), &name, 1, 512);
-        // TODO: understand why the following fails
-//        assert_eq!(0, segment.store().total());
-        assert!(segment.deletes() > 0);
-    }
-}
--- a/src/store/reader.rs
+++ b/src/store/reader.rs
@@ -6,7 +6,6 @@ use common::BinarySerializable;
 use common::VInt;
 use directory::ReadOnlySource;
 use schema::Document;
-use space_usage::StoreSpaceUsage;
 use std::cell::RefCell;
 use std::io;
 use std::mem::size_of;
@@ -88,11 +87,6 @@ impl StoreReader {
        cursor = &cursor[..doc_length];
        Ok(Document::deserialize(&mut cursor)?)
    }
-
-    /// Summarize total space usage of this store reader.
-    pub fn space_usage(&self) -> StoreSpaceUsage {
-        StoreSpaceUsage::new(self.data.len(), self.offset_index_source.len())
-    }
 }

 #[cfg_attr(
--- a/src/termdict/termdict.rs
+++ b/src/termdict/termdict.rs
@@ -96,6 +96,9 @@ fn open_fst_index(source: ReadOnlySource) -> fst::Map {
        ReadOnlySource::Mmap(mmap_readonly) => {
            Fst::from_mmap(mmap_readonly).expect("FST data is corrupted")
        }
+        ReadOnlySource::Static(data) => {
+            Fst::from_static_slice(data).expect("FST data is corrupted")
+        }
    };
    fst::Map::from(fst)
 }
Author	SHA1	Message	Date
Paul Masurel	507e46f814	Added static directory	2018-10-04 23:28:44 +09:00
Paul Masurel	3d3da2d66f	Compiling in WebAssembly	2018-10-04 08:45:04 +09:00