diff --git a/examples/aggregation.rs b/examples-disabled/aggregation.rs similarity index 100% rename from examples/aggregation.rs rename to examples-disabled/aggregation.rs diff --git a/examples/basic_search.rs b/examples-disabled/basic_search.rs similarity index 100% rename from examples/basic_search.rs rename to examples-disabled/basic_search.rs diff --git a/examples/custom_collector.rs b/examples-disabled/custom_collector.rs similarity index 100% rename from examples/custom_collector.rs rename to examples-disabled/custom_collector.rs diff --git a/examples/custom_tokenizer.rs b/examples-disabled/custom_tokenizer.rs similarity index 100% rename from examples/custom_tokenizer.rs rename to examples-disabled/custom_tokenizer.rs diff --git a/examples/date_time_field.rs b/examples-disabled/date_time_field.rs similarity index 100% rename from examples/date_time_field.rs rename to examples-disabled/date_time_field.rs diff --git a/examples/deleting_updating_documents.rs b/examples-disabled/deleting_updating_documents.rs similarity index 100% rename from examples/deleting_updating_documents.rs rename to examples-disabled/deleting_updating_documents.rs diff --git a/examples/faceted_search.rs b/examples-disabled/faceted_search.rs similarity index 100% rename from examples/faceted_search.rs rename to examples-disabled/faceted_search.rs diff --git a/examples/faceted_search_with_tweaked_score.rs b/examples-disabled/faceted_search_with_tweaked_score.rs similarity index 100% rename from examples/faceted_search_with_tweaked_score.rs rename to examples-disabled/faceted_search_with_tweaked_score.rs diff --git a/examples/integer_range_search.rs b/examples-disabled/integer_range_search.rs similarity index 100% rename from examples/integer_range_search.rs rename to examples-disabled/integer_range_search.rs diff --git a/examples/ip_field.rs b/examples-disabled/ip_field.rs similarity index 100% rename from examples/ip_field.rs rename to examples-disabled/ip_field.rs diff --git a/examples/iterating_docs_and_positions.rs b/examples-disabled/iterating_docs_and_positions.rs similarity index 100% rename from examples/iterating_docs_and_positions.rs rename to examples-disabled/iterating_docs_and_positions.rs diff --git a/examples/json_field.rs b/examples-disabled/json_field.rs similarity index 100% rename from examples/json_field.rs rename to examples-disabled/json_field.rs diff --git a/examples/multiple_producer.rs b/examples-disabled/multiple_producer.rs similarity index 100% rename from examples/multiple_producer.rs rename to examples-disabled/multiple_producer.rs diff --git a/examples/pre_tokenized_text.rs b/examples-disabled/pre_tokenized_text.rs similarity index 100% rename from examples/pre_tokenized_text.rs rename to examples-disabled/pre_tokenized_text.rs diff --git a/examples/snippet.rs b/examples-disabled/snippet.rs similarity index 100% rename from examples/snippet.rs rename to examples-disabled/snippet.rs diff --git a/examples/stop_words.rs b/examples-disabled/stop_words.rs similarity index 100% rename from examples/stop_words.rs rename to examples-disabled/stop_words.rs diff --git a/examples/warmer.rs b/examples-disabled/warmer.rs similarity index 100% rename from examples/warmer.rs rename to examples-disabled/warmer.rs diff --git a/examples/working_with_json.rs b/examples-disabled/working_with_json.rs similarity index 100% rename from examples/working_with_json.rs rename to examples-disabled/working_with_json.rs diff --git a/src/aggregation/agg_req_with_accessor.rs b/src/aggregation/agg_req_with_accessor.rs index 423b3d1bb..2bd54875e 100644 --- a/src/aggregation/agg_req_with_accessor.rs +++ b/src/aggregation/agg_req_with_accessor.rs @@ -15,7 +15,7 @@ use super::metric::{ use super::segment_agg_result::BucketCount; use super::VecWithNames; use crate::fastfield::{type_and_cardinality, MultiValuedFastFieldReader}; -use crate::schema::{Cardinality, Type}; +use crate::schema::Type; use crate::{InvertedIndexReader, SegmentReader, TantivyError}; #[derive(Clone, Default)] diff --git a/src/collector/mod.rs b/src/collector/mod.rs index 6708d125c..fe9c29ba6 100644 --- a/src/collector/mod.rs +++ b/src/collector/mod.rs @@ -105,8 +105,8 @@ pub use self::custom_score_top_collector::{CustomScorer, CustomSegmentScorer}; mod tweak_score_top_collector; pub use self::tweak_score_top_collector::{ScoreSegmentTweaker, ScoreTweaker}; -mod facet_collector; -pub use self::facet_collector::{FacetCollector, FacetCounts}; +// mod facet_collector; +// pub use self::facet_collector::{FacetCollector, FacetCounts}; use crate::query::Weight; mod docset_collector; diff --git a/src/collector/tests.rs b/src/collector/tests.rs index 1a2aa917a..db8ac7ae2 100644 --- a/src/collector/tests.rs +++ b/src/collector/tests.rs @@ -5,7 +5,6 @@ use fastfield_codecs::Column; use super::*; use crate::collector::{Count, FilterCollector, TopDocs}; use crate::core::SegmentReader; -use crate::fastfield::BytesFastFieldReader; use crate::query::{AllQuery, QueryParser}; use crate::schema::{Field, Schema, FAST, TEXT}; use crate::time::format_description::well_known::Rfc3339; @@ -164,8 +163,8 @@ pub struct FastFieldSegmentCollector { } impl FastFieldTestCollector { - pub fn for_field(field: String) -> FastFieldTestCollector { - FastFieldTestCollector { field } + pub fn for_field(field: impl ToString) -> FastFieldTestCollector { + FastFieldTestCollector { field: field.to_string() } } } @@ -210,64 +209,62 @@ impl SegmentCollector for FastFieldSegmentCollector { } } -/// Collects in order all of the fast field bytes for all of the -/// docs in the `DocSet` -/// -/// This collector is mainly useful for tests. -pub struct BytesFastFieldTestCollector { - field: Field, -} +// /// Collects in order all of the fast field bytes for all of the +// /// docs in the `DocSet` +// /// +// /// This collector is mainly useful for tests. +// pub struct BytesFastFieldTestCollector { +// field: Field, +// } -pub struct BytesFastFieldSegmentCollector { - vals: Vec, - reader: BytesFastFieldReader, -} +// pub struct BytesFastFieldSegmentCollector { +// vals: Vec, +// reader: BytesFastFieldReader, +// } -impl BytesFastFieldTestCollector { - pub fn for_field(field: Field) -> BytesFastFieldTestCollector { - BytesFastFieldTestCollector { field } - } -} +// impl BytesFastFieldTestCollector { +// pub fn for_field(field: Field) -> BytesFastFieldTestCollector { +// BytesFastFieldTestCollector { field } +// } +// } -impl Collector for BytesFastFieldTestCollector { - type Fruit = Vec; - type Child = BytesFastFieldSegmentCollector; +// impl Collector for BytesFastFieldTestCollector { +// type Fruit = Vec; +// type Child = BytesFastFieldSegmentCollector; - fn for_segment( - &self, - _segment_local_id: u32, - segment_reader: &SegmentReader, - ) -> crate::Result { - let reader = segment_reader - .fast_fields() - .bytes(segment_reader.schema().get_field_name(self.field))?; - Ok(BytesFastFieldSegmentCollector { - vals: Vec::new(), - reader, - }) - } +// fn for_segment( +// &self, +// _segment_local_id: u32, +// segment_reader: &SegmentReader, +// ) -> crate::Result { +// let reader = segment_reader.fast_fields().bytes(self.field)?; +// Ok(BytesFastFieldSegmentCollector { +// vals: Vec::new(), +// reader, +// }) +// } - fn requires_scoring(&self) -> bool { - false - } +// fn requires_scoring(&self) -> bool { +// false +// } - fn merge_fruits(&self, children: Vec>) -> crate::Result> { - Ok(children.into_iter().flat_map(|c| c.into_iter()).collect()) - } -} +// fn merge_fruits(&self, children: Vec>) -> crate::Result> { +// Ok(children.into_iter().flat_map(|c| c.into_iter()).collect()) +// } +// } -impl SegmentCollector for BytesFastFieldSegmentCollector { - type Fruit = Vec; +// impl SegmentCollector for BytesFastFieldSegmentCollector { +// type Fruit = Vec; - fn collect(&mut self, doc: u32, _score: Score) { - let data = self.reader.get_bytes(doc); - self.vals.extend(data); - } +// fn collect(&mut self, doc: u32, _score: Score) { +// let data = self.reader.get_bytes(doc); +// self.vals.extend(data); +// } - fn harvest(self) -> ::Fruit { - self.vals - } -} +// fn harvest(self) -> ::Fruit { +// self.vals +// } +// } fn make_test_searcher() -> crate::Result { let schema = Schema::builder().build(); diff --git a/src/core/index.rs b/src/core/index.rs index 46ccc895d..b8ffb5d11 100644 --- a/src/core/index.rs +++ b/src/core/index.rs @@ -19,7 +19,7 @@ use crate::error::{DataCorruption, TantivyError}; use crate::indexer::index_writer::{MAX_NUM_THREAD, MEMORY_ARENA_NUM_BYTES_MIN}; use crate::indexer::segment_updater::save_metas; use crate::reader::{IndexReader, IndexReaderBuilder}; -use crate::schema::{Cardinality, Field, FieldType, Schema}; +use crate::schema::{Field, FieldType, Schema}; use crate::tokenizer::{TextAnalyzer, TokenizerManager}; use crate::IndexWriter; @@ -245,12 +245,6 @@ impl IndexBuilder { sort_by_field.field ))); } - if entry.field_type().fastfield_cardinality() != Some(Cardinality::SingleValue) { - return Err(TantivyError::InvalidArgument(format!( - "Only single value fast field Cardinality supported for sorting index {}", - sort_by_field.field - ))); - } } Ok(()) } else { diff --git a/src/core/segment_reader.rs b/src/core/segment_reader.rs index c19bbffcb..9940f8591 100644 --- a/src/core/segment_reader.rs +++ b/src/core/segment_reader.rs @@ -7,7 +7,7 @@ use fail::fail_point; use crate::core::{InvertedIndexReader, Segment, SegmentComponent, SegmentId}; use crate::directory::{CompositeFile, FileSlice}; use crate::error::DataCorruption; -use crate::fastfield::{intersect_alive_bitsets, AliveBitSet, FacetReader, FastFieldReaders}; +use crate::fastfield::{intersect_alive_bitsets, AliveBitSet, FastFieldReaders}; use crate::fieldnorm::{FieldNormReader, FieldNormReaders}; use crate::schema::{Field, FieldType, IndexRecordOption, Schema}; use crate::space_usage::SegmentSpaceUsage; @@ -90,25 +90,8 @@ impl SegmentReader { } /// Accessor to the `FacetReader` associated with a given `Field`. - pub fn facet_reader(&self, field: Field) -> crate::Result { - let field_entry = self.schema.get_field_entry(field); - - match field_entry.field_type() { - FieldType::Facet(_) => { - let term_ords_reader = - self.fast_fields().u64s(self.schema.get_field_name(field))?; - let termdict = self - .termdict_composite - .open_read(field) - .map(TermDictionary::open) - .unwrap_or_else(|| Ok(TermDictionary::empty()))?; - Ok(FacetReader::new(term_ords_reader, termdict)) - } - _ => Err(crate::TantivyError::InvalidArgument(format!( - "Field {:?} is not a facet field.", - field_entry.name() - ))), - } + pub fn facet_reader(&self, field: Field) -> crate::Result<()> { + todo!(); } /// Accessor to the segment's `Field norms`'s reader. diff --git a/src/fastfield/mod.rs b/src/fastfield/mod.rs index 15bbbd5ec..4f81d66f2 100644 --- a/src/fastfield/mod.rs +++ b/src/fastfield/mod.rs @@ -24,15 +24,10 @@ use std::net::Ipv6Addr; use fastfield_codecs::MonotonicallyMappableToU64; pub use self::alive_bitset::{intersect_alive_bitsets, write_alive_bitset, AliveBitSet}; -pub use self::bytes::{BytesFastFieldReader, BytesFastFieldWriter}; +// pub use self::bytes::{BytesFastFieldReader, BytesFastFieldWriter}; pub use self::error::{FastFieldNotAvailableError, Result}; -pub use self::facet_reader::FacetReader; -pub(crate) use self::multivalued::{get_fastfield_codecs_for_multivalue, MultivalueStartIndex}; -pub use self::multivalued::{ - MultiValueIndex, MultiValueU128FastFieldWriter, MultiValuedFastFieldReader, - MultiValuedFastFieldWriter, -}; -pub(crate) use self::readers::type_and_cardinality; +// pub use self::facet_reader::FacetReader; + pub use self::readers::FastFieldReaders; pub use self::serializer::{Column, CompositeFastFieldSerializer}; use self::writer::unexpected_value; @@ -41,10 +36,10 @@ use crate::schema::{Type, Value}; use crate::DateTime; mod alive_bitset; -mod bytes; +// mod bytes; mod error; -mod facet_reader; -mod multivalued; +// mod facet_reader; +// mod multivalued; mod readers; mod serializer; mod writer; @@ -166,7 +161,7 @@ mod tests { use std::path::Path; use std::sync::Arc; - use common::HasLen; + use common::{HasLen, TerminatingWrite}; use fastfield_codecs::{open, FastFieldCodecType}; use once_cell::sync::Lazy; use rand::prelude::SliceRandom; @@ -189,16 +184,9 @@ mod tests { }); pub static FIELD: Lazy = Lazy::new(|| SCHEMA.get_field("field").unwrap()); - #[test] - pub fn test_fastfield() { - let test_fastfield = fastfield_codecs::serialize_and_load(&[100u64, 200u64, 300u64][..]); - assert_eq!(test_fastfield.get_val(0), 100); - assert_eq!(test_fastfield.get_val(1), 200); - assert_eq!(test_fastfield.get_val(2), 300); - } #[test] - pub fn test_fastfield_i64_u64() { + pub fn test_convert_i64_u64() { let datetime = DateTime::from_utc(OffsetDateTime::UNIX_EPOCH); assert_eq!(i64::from_u64(datetime.to_u64()), 0i64); } @@ -208,22 +196,21 @@ mod tests { let path = Path::new("test"); let directory: RamDirectory = RamDirectory::create(); { - let write: WritePtr = directory.open_write(Path::new("test")).unwrap(); - let mut serializer = CompositeFastFieldSerializer::from_write(write).unwrap(); + let mut write: WritePtr = directory.open_write(Path::new("test")).unwrap(); let mut fast_field_writers = FastFieldsWriter::from_schema(&SCHEMA); fast_field_writers - .add_document(0, &doc!(*FIELD=>13u64)) + .add_document(&doc!(*FIELD=>13u64)) .unwrap(); fast_field_writers - .add_document(1,&doc!(*FIELD=>14u64)) + .add_document(&doc!(*FIELD=>14u64)) .unwrap(); fast_field_writers - .add_document(2,&doc!(*FIELD=>2u64)) + .add_document(&doc!(*FIELD=>2u64)) .unwrap(); fast_field_writers - .serialize(&mut serializer, &HashMap::new(), None) + .serialize(&mut write, None) .unwrap(); - serializer.close().unwrap(); + write.terminate().unwrap(); } let file = directory.open_read(path).unwrap(); assert_eq!(file.len(), 34); @@ -241,38 +228,37 @@ mod tests { let path = Path::new("test"); let directory: RamDirectory = RamDirectory::create(); { - let write: WritePtr = directory.open_write(Path::new("test"))?; - let mut serializer = CompositeFastFieldSerializer::from_write(write)?; + let mut write: WritePtr = directory.open_write(Path::new("test"))?; let mut fast_field_writers = FastFieldsWriter::from_schema(&SCHEMA); fast_field_writers - .add_document(0, &doc!(*FIELD=>4u64)) + .add_document(&doc!(*FIELD=>4u64)) .unwrap(); fast_field_writers - .add_document(1, &doc!(*FIELD=>14_082_001u64)) + .add_document(&doc!(*FIELD=>14_082_001u64)) .unwrap(); fast_field_writers - .add_document(2, &doc!(*FIELD=>3_052u64)) + .add_document(&doc!(*FIELD=>3_052u64)) .unwrap(); fast_field_writers - .add_document(3, &doc!(*FIELD=>9_002u64)) + .add_document(&doc!(*FIELD=>9_002u64)) .unwrap(); fast_field_writers - .add_document(4, &doc!(*FIELD=>15_001u64)) + .add_document(&doc!(*FIELD=>15_001u64)) .unwrap(); fast_field_writers - .add_document(5, &doc!(*FIELD=>777u64)) + .add_document(&doc!(*FIELD=>777u64)) .unwrap(); fast_field_writers - .add_document(6, &doc!(*FIELD=>1_002u64)) + .add_document(&doc!(*FIELD=>1_002u64)) .unwrap(); fast_field_writers - .add_document(7, &doc!(*FIELD=>1_501u64)) + .add_document(&doc!(*FIELD=>1_501u64)) .unwrap(); fast_field_writers - .add_document(8, &doc!(*FIELD=>215u64)) + .add_document(&doc!(*FIELD=>215u64)) .unwrap(); - fast_field_writers.serialize(&mut serializer, &HashMap::new(), None)?; - serializer.close()?; + fast_field_writers.serialize(&mut write, None)?; + write.terminate()?; } let file = directory.open_read(path)?; assert_eq!(file.len(), 62); @@ -302,18 +288,17 @@ mod tests { let directory: RamDirectory = RamDirectory::create(); { - let write: WritePtr = directory.open_write(Path::new("test")).unwrap(); - let mut serializer = CompositeFastFieldSerializer::from_write(write).unwrap(); + let mut write: WritePtr = directory.open_write(Path::new("test")).unwrap(); let mut fast_field_writers = FastFieldsWriter::from_schema(&SCHEMA); - for doc_id in 0..10_000 { + for _ in 0..10_000 { fast_field_writers - .add_document(doc_id, &doc!(*FIELD=>100_000u64)) + .add_document(&doc!(*FIELD=>100_000u64)) .unwrap(); } fast_field_writers - .serialize(&mut serializer, &HashMap::new(), None) + .serialize(&mut write, None) .unwrap(); - serializer.close().unwrap(); + write.terminate().unwrap(); } let file = directory.open_read(path).unwrap(); assert_eq!(file.len(), 35); @@ -337,22 +322,21 @@ mod tests { let directory: RamDirectory = RamDirectory::create(); { - let write: WritePtr = directory.open_write(Path::new("test")).unwrap(); - let mut serializer = CompositeFastFieldSerializer::from_write(write).unwrap(); + let mut write: WritePtr = directory.open_write(Path::new("test")).unwrap(); let mut fast_field_writers = FastFieldsWriter::from_schema(&SCHEMA); // forcing the amplitude to be high fast_field_writers - .add_document(0, &doc!(*FIELD=>0u64)) + .add_document(&doc!(*FIELD=>0u64)) .unwrap(); for doc_id in 1u64..10_001u64 { fast_field_writers - .add_document(doc_id as u32, &doc!(*FIELD=>5_000_000_000_000_000_000u64 + doc_id as u64)) + .add_document(&doc!(*FIELD=>5_000_000_000_000_000_000u64 + doc_id as u64)) .unwrap(); } fast_field_writers - .serialize(&mut serializer, &HashMap::new(), None) + .serialize(&mut write, None) .unwrap(); - serializer.close().unwrap(); + write.terminate().unwrap(); } let file = directory.open_read(path).unwrap(); assert_eq!(file.len(), 80049); @@ -383,20 +367,17 @@ mod tests { let i64_field = schema_builder.add_i64_field("field", FAST); let schema = schema_builder.build(); { - let write: WritePtr = directory.open_write(Path::new("test")).unwrap(); - let mut serializer = CompositeFastFieldSerializer::from_write(write).unwrap(); + let mut write: WritePtr = directory.open_write(Path::new("test")).unwrap(); let mut fast_field_writers = FastFieldsWriter::from_schema(&schema); - let mut doc_id = 0; for i in -100i64..10_000i64 { let mut doc = Document::default(); doc.add_i64(i64_field, i); - fast_field_writers.add_document(doc_id, &doc).unwrap(); - doc_id += 1; + fast_field_writers.add_document(&doc).unwrap(); } fast_field_writers - .serialize(&mut serializer, &HashMap::new(), None) + .serialize(&mut write, None) .unwrap(); - serializer.close().unwrap(); + write.terminate().unwrap(); } let file = directory.open_read(path).unwrap(); assert_eq!(file.len(), 49_usize); @@ -477,14 +458,13 @@ mod tests { let n = permutation.len(); let directory = RamDirectory::create(); { - let write: WritePtr = directory.open_write(Path::new("test"))?; - let mut serializer = CompositeFastFieldSerializer::from_write(write)?; + let mut write: WritePtr = directory.open_write(Path::new("test"))?; let mut fast_field_writers = FastFieldsWriter::from_schema(&SCHEMA); for (doc_id, &x) in permutation.iter().enumerate() { - fast_field_writers.add_document(doc_id as u32, &doc!(*FIELD=>x)).unwrap(); + fast_field_writers.add_document(&doc!(*FIELD=>x)).unwrap(); } - fast_field_writers.serialize(&mut serializer, &HashMap::new(), None)?; - serializer.close()?; + fast_field_writers.serialize(&mut write, None)?; + write.terminate()?; } let file = directory.open_read(path)?; { @@ -543,17 +523,18 @@ mod tests { Ok(()) } - fn get_vals_for_docs(ff: &MultiValuedFastFieldReader, docs: Range) -> Vec { - let mut all = vec![]; + // fn get_vals_for_docs(ff: &MultiValuedFastFieldReader, docs: Range) -> Vec { + // let mut all = vec![]; - for doc in docs { - let mut out: Vec = vec![]; - ff.get_vals(doc, &mut out); - all.extend(out); - } - all - } + // for doc in docs { + // let mut out: Vec = vec![]; + // ff.get_vals(doc, &mut out); + // all.extend(out); + // } + // all + // } + /* #[test] fn test_text_fastfield() -> crate::Result<()> { let mut schema_builder = Schema::builder(); @@ -651,156 +632,159 @@ mod tests { Ok(()) } + */ - #[test] - fn test_string_fastfield() -> crate::Result<()> { - let mut schema_builder = Schema::builder(); - let text_field = schema_builder.add_text_field("text", STRING | FAST); - let schema = schema_builder.build(); - let index = Index::create_in_ram(schema); + // #[test] + // fn test_string_fastfield() -> crate::Result<()> { + // let mut schema_builder = Schema::builder(); + // let text_field = schema_builder.add_text_field("text", STRING | FAST); + // let schema = schema_builder.build(); + // let index = Index::create_in_ram(schema); - { - // first segment - let mut index_writer = index.writer_for_tests()?; - index_writer.set_merge_policy(Box::new(NoMergePolicy)); - index_writer.add_document(doc!( - text_field => "BBBBB", // term_ord 1 - ))?; - index_writer.add_document(doc!())?; - index_writer.add_document(doc!( - text_field => "AAAAA", // term_ord 0 - ))?; - index_writer.add_document(doc!( - text_field => "AAAAA", // term_ord 0 - ))?; - index_writer.add_document(doc!( - text_field => "zumberthree", // term_ord 2, after merge term_ord 3 - ))?; + // { + // // first segment + // let mut index_writer = index.writer_for_tests()?; + // index_writer.set_merge_policy(Box::new(NoMergePolicy)); + // index_writer.add_document(doc!( + // text_field => "BBBBB", // term_ord 1 + // ))?; + // index_writer.add_document(doc!())?; + // index_writer.add_document(doc!( + // text_field => "AAAAA", // term_ord 0 + // ))?; + // index_writer.add_document(doc!( + // text_field => "AAAAA", // term_ord 0 + // ))?; + // index_writer.add_document(doc!( + // text_field => "zumberthree", // term_ord 2, after merge term_ord 3 + // ))?; - index_writer.add_document(doc!())?; - index_writer.commit()?; + // index_writer.add_document(doc!())?; + // index_writer.commit()?; - let reader = index.reader()?; - let searcher = reader.searcher(); - assert_eq!(searcher.segment_readers().len(), 1); - let segment_reader = searcher.segment_reader(0); - let fast_fields = segment_reader.fast_fields(); - let text_fast_field = fast_fields.u64s("text").unwrap(); - assert_eq!(get_vals_for_docs(&text_fast_field, 0..6), vec![1, 0, 0, 2]); + // let reader = index.reader()?; + // let searcher = reader.searcher(); + // assert_eq!(searcher.segment_readers().len(), 1); + // let segment_reader = searcher.segment_reader(0); + // let fast_fields = segment_reader.fast_fields(); + // let text_fast_field = fast_fields.u64s(text_field).unwrap(); - let inverted_index = segment_reader.inverted_index(text_field)?; - assert_eq!(inverted_index.terms().num_terms(), 3); - let mut bytes = vec![]; - assert!(inverted_index.terms().ord_to_term(0, &mut bytes)?); - assert_eq!(bytes, "AAAAA".as_bytes()); - } + // assert_eq!(get_vals_for_docs(&text_fast_field, 0..6), vec![1, 0, 0, 2]); - { - // second segment - let mut index_writer = index.writer_for_tests()?; + // let inverted_index = segment_reader.inverted_index(text_field)?; + // assert_eq!(inverted_index.terms().num_terms(), 3); + // let mut bytes = vec![]; + // assert!(inverted_index.terms().ord_to_term(0, &mut bytes)?); + // assert_eq!(bytes, "AAAAA".as_bytes()); + // } - index_writer.add_document(doc!( - text_field => "AAAAA", // term_ord 0 - ))?; + // { + // // second segment + // let mut index_writer = index.writer_for_tests()?; - index_writer.add_document(doc!( - text_field => "CCCCC", // term_ord 1, after merge 2 - ))?; + // index_writer.add_document(doc!( + // text_field => "AAAAA", // term_ord 0 + // ))?; - index_writer.add_document(doc!())?; - index_writer.commit()?; + // index_writer.add_document(doc!( + // text_field => "CCCCC", // term_ord 1, after merge 2 + // ))?; - let reader = index.reader()?; - let searcher = reader.searcher(); - assert_eq!(searcher.segment_readers().len(), 2); - let segment_reader = searcher.segment_reader(1); - let fast_fields = segment_reader.fast_fields(); - let text_fast_field = fast_fields.u64s("text").unwrap(); + // index_writer.add_document(doc!())?; + // index_writer.commit()?; - assert_eq!(get_vals_for_docs(&text_fast_field, 0..2), vec![0, 1]); - } - // Merging the segments - { - let segment_ids = index.searchable_segment_ids()?; - let mut index_writer = index.writer_for_tests()?; - index_writer.merge(&segment_ids).wait()?; - index_writer.wait_merging_threads()?; - } - let reader = index.reader()?; - let searcher = reader.searcher(); - let segment_reader = searcher.segment_reader(0); - let fast_fields = segment_reader.fast_fields(); - let text_fast_field = fast_fields.u64s("text").unwrap(); + // let reader = index.reader()?; + // let searcher = reader.searcher(); + // assert_eq!(searcher.segment_readers().len(), 2); + // let segment_reader = searcher.segment_reader(1); + // let fast_fields = segment_reader.fast_fields(); + // let text_fast_field = fast_fields.u64s(text_field).unwrap(); - assert_eq!( - get_vals_for_docs(&text_fast_field, 0..9), - vec![1, 0, 0, 3 /* next segment */, 0, 2] - ); + // assert_eq!(get_vals_for_docs(&text_fast_field, 0..2), vec![0, 1]); + // } + // // Merging the segments + // { + // let segment_ids = index.searchable_segment_ids()?; + // let mut index_writer = index.writer_for_tests()?; + // index_writer.merge(&segment_ids).wait()?; + // index_writer.wait_merging_threads()?; + // } - Ok(()) - } + // let reader = index.reader()?; + // let searcher = reader.searcher(); + // let segment_reader = searcher.segment_reader(0); + // let fast_fields = segment_reader.fast_fields(); + // let text_fast_field = fast_fields.u64s(text_field).unwrap(); - #[test] - fn test_datefastfield() -> crate::Result<()> { - let mut schema_builder = Schema::builder(); - let date_field = schema_builder.add_date_field( - "date", - DateOptions::from(FAST).set_precision(DatePrecision::Microseconds), - ); - let multi_date_field = schema_builder.add_date_field( - "multi_date", - DateOptions::default() - .set_precision(DatePrecision::Microseconds) - .set_fast(), - ); - let schema = schema_builder.build(); - let index = Index::create_in_ram(schema); - let mut index_writer = index.writer_for_tests()?; - index_writer.set_merge_policy(Box::new(NoMergePolicy)); - index_writer.add_document(doc!( - date_field => DateTime::from_u64(1i64.to_u64()), - multi_date_field => DateTime::from_u64(2i64.to_u64()), - multi_date_field => DateTime::from_u64(3i64.to_u64()) - ))?; - index_writer.add_document(doc!( - date_field => DateTime::from_u64(4i64.to_u64()) - ))?; - index_writer.add_document(doc!( - multi_date_field => DateTime::from_u64(5i64.to_u64()), - multi_date_field => DateTime::from_u64(6i64.to_u64()) - ))?; - index_writer.commit()?; - let reader = index.reader()?; - let searcher = reader.searcher(); - assert_eq!(searcher.segment_readers().len(), 1); - let segment_reader = searcher.segment_reader(0); - let fast_fields = segment_reader.fast_fields(); - let date_fast_field = fast_fields.date("date").unwrap(); - let dates_fast_field = fast_fields.dates("multi_date").unwrap(); - let mut dates = vec![]; - { - assert_eq!(date_fast_field.get_val(0).into_timestamp_micros(), 1i64); - dates_fast_field.get_vals(0u32, &mut dates); - assert_eq!(dates.len(), 2); - assert_eq!(dates[0].into_timestamp_micros(), 2i64); - assert_eq!(dates[1].into_timestamp_micros(), 3i64); - } - { - assert_eq!(date_fast_field.get_val(1).into_timestamp_micros(), 4i64); - dates_fast_field.get_vals(1u32, &mut dates); - assert!(dates.is_empty()); - } - { - assert_eq!(date_fast_field.get_val(2).into_timestamp_micros(), 0i64); - dates_fast_field.get_vals(2u32, &mut dates); - assert_eq!(dates.len(), 2); - assert_eq!(dates[0].into_timestamp_micros(), 5i64); - assert_eq!(dates[1].into_timestamp_micros(), 6i64); - } - Ok(()) - } + // assert_eq!( + // get_vals_for_docs(&text_fast_field, 0..9), + // vec![1, 0, 0, 3 /* next segment */, 0, 2] + // ); + + // Ok(()) + // } + + // #[test] + // fn test_datefastfield() -> crate::Result<()> { + // let mut schema_builder = Schema::builder(); + // let date_field = schema_builder.add_date_field( + // "date", + // DateOptions::from(FAST).set_precision(DatePrecision::Microseconds), + // ); + // let multi_date_field = schema_builder.add_date_field( + // "multi_date", + // DateOptions::default() + // .set_precision(DatePrecision::Microseconds) + // .set_fast(), + // ); + // let schema = schema_builder.build(); + // let index = Index::create_in_ram(schema); + // let mut index_writer = index.writer_for_tests()?; + // index_writer.set_merge_policy(Box::new(NoMergePolicy)); + // index_writer.add_document(doc!( + // date_field => DateTime::from_u64(1i64.to_u64()), + // multi_date_field => DateTime::from_u64(2i64.to_u64()), + // multi_date_field => DateTime::from_u64(3i64.to_u64()) + // ))?; + // index_writer.add_document(doc!( + // date_field => DateTime::from_u64(4i64.to_u64()) + // ))?; + // index_writer.add_document(doc!( + // multi_date_field => DateTime::from_u64(5i64.to_u64()), + // multi_date_field => DateTime::from_u64(6i64.to_u64()) + // ))?; + // index_writer.commit()?; + // let reader = index.reader()?; + // let searcher = reader.searcher(); + // assert_eq!(searcher.segment_readers().len(), 1); + // let segment_reader = searcher.segment_reader(0); + // let fast_fields = segment_reader.fast_fields(); + // let date_fast_field = fast_fields.date(date_field).unwrap(); + // let dates_fast_field = fast_fields.dates(multi_date_field).unwrap(); + // let mut dates = vec![]; + // { + // assert_eq!(date_fast_field.get_val(0).into_timestamp_micros(), 1i64); + // dates_fast_field.get_vals(0u32, &mut dates); + // assert_eq!(dates.len(), 2); + // assert_eq!(dates[0].into_timestamp_micros(), 2i64); + // assert_eq!(dates[1].into_timestamp_micros(), 3i64); + // } + // { + // assert_eq!(date_fast_field.get_val(1).into_timestamp_micros(), 4i64); + // dates_fast_field.get_vals(1u32, &mut dates); + // assert!(dates.is_empty()); + // } + // { + // assert_eq!(date_fast_field.get_val(2).into_timestamp_micros(), 0i64); + // dates_fast_field.get_vals(2u32, &mut dates); + // assert_eq!(dates.len(), 2); + // assert_eq!(dates[0].into_timestamp_micros(), 5i64); + // assert_eq!(dates[1].into_timestamp_micros(), 6i64); + // } + // Ok(()) + // } #[test] pub fn test_fastfield_bool() { @@ -823,21 +807,20 @@ mod tests { let field = schema.get_field("field_bool").unwrap(); { - let write: WritePtr = directory.open_write(path).unwrap(); - let mut serializer = CompositeFastFieldSerializer::from_write(write).unwrap(); + let mut write: WritePtr = directory.open_write(path).unwrap(); let mut fast_field_writers = FastFieldsWriter::from_schema(&schema); - fast_field_writers.add_document(0u32, &doc!(field=>true)).unwrap(); + fast_field_writers.add_document(&doc!(field=>true)).unwrap(); fast_field_writers - .add_document(1u32, &doc!(field=>false)) + .add_document(&doc!(field=>false)) .unwrap(); - fast_field_writers.add_document(2u32, &doc!(field=>true)).unwrap(); + fast_field_writers.add_document(&doc!(field=>true)).unwrap(); fast_field_writers - .add_document(3u32, &doc!(field=>false)) + .add_document(&doc!(field=>false)) .unwrap(); fast_field_writers - .serialize(&mut serializer, &HashMap::new(), None) + .serialize(&mut write, None) .unwrap(); - serializer.close().unwrap(); + write.terminate().unwrap(); } let file = directory.open_read(path).unwrap(); assert_eq!(file.len(), 33); @@ -863,19 +846,18 @@ mod tests { let field = schema.get_field("field_bool").unwrap(); { - let write: WritePtr = directory.open_write(path).unwrap(); - let mut serializer = CompositeFastFieldSerializer::from_write(write).unwrap(); + let mut write: WritePtr = directory.open_write(path).unwrap(); let mut fast_field_writers = FastFieldsWriter::from_schema(&schema); for doc_id in 0..50 { - fast_field_writers.add_document(doc_id * 2, &doc!(field=>true)).unwrap(); + fast_field_writers.add_document(&doc!(field=>true)).unwrap(); fast_field_writers - .add_document(doc_id * 2 + 1, &doc!(field=>false)) + .add_document(&doc!(field=>false)) .unwrap(); } fast_field_writers - .serialize(&mut serializer, &HashMap::new(), None) + .serialize(&mut write, None) .unwrap(); - serializer.close().unwrap(); + write.terminate().unwrap(); } let file = directory.open_read(path).unwrap(); assert_eq!(file.len(), 45); @@ -900,13 +882,12 @@ mod tests { let schema = schema_builder.build(); { - let write: WritePtr = directory.open_write(path).unwrap(); - let mut serializer = CompositeFastFieldSerializer::from_write(write)?; + let mut write: WritePtr = directory.open_write(path).unwrap(); let mut fast_field_writers = FastFieldsWriter::from_schema(&schema); let doc = Document::default(); - fast_field_writers.add_document(0, &doc).unwrap(); - fast_field_writers.serialize(&mut serializer, &HashMap::new(), None)?; - serializer.close()?; + fast_field_writers.add_document(&doc).unwrap(); + fast_field_writers.serialize(&mut write, None)?; + write.terminate()?; } let file = directory.open_read(path).unwrap(); let composite_file = CompositeFile::open(&file)?; @@ -921,21 +902,18 @@ mod tests { fn get_index( docs: &[crate::Document], schema: &Schema, - codec_types: &[FastFieldCodecType], ) -> crate::Result { let directory: RamDirectory = RamDirectory::create(); { - let write: WritePtr = directory.open_write(Path::new("test")).unwrap(); - let mut serializer = - CompositeFastFieldSerializer::from_write_with_codec(write, codec_types).unwrap(); + let mut write: WritePtr = directory.open_write(Path::new("test")).unwrap(); let mut fast_field_writers = FastFieldsWriter::from_schema(schema); for (doc_id, doc) in docs.into_iter().enumerate() { - fast_field_writers.add_document(doc_id as u32, doc).unwrap(); + fast_field_writers.add_document(doc).unwrap(); } fast_field_writers - .serialize(&mut serializer, &HashMap::new(), None) + .serialize(&mut write, None) .unwrap(); - serializer.close().unwrap(); + write.terminate().unwrap(); } Ok(directory) } @@ -974,7 +952,7 @@ mod tests { let docs: Vec = times.iter().map(|time| doc!(field=>*time)).collect(); - let directory = get_index(&docs[..], &schema, &[codec_type])?; + let directory = get_index(&docs[..], &schema)?; let path = Path::new("test"); let file = directory.open_read(path).unwrap(); let composite_file = CompositeFile::open(&file)?; diff --git a/src/fastfield/readers.rs b/src/fastfield/readers.rs index 04b7835ca..86071d97b 100644 --- a/src/fastfield/readers.rs +++ b/src/fastfield/readers.rs @@ -3,10 +3,9 @@ use std::sync::Arc; use fastfield_codecs::{open, open_u128, Column}; -use super::multivalued::MultiValuedFastFieldReader; use crate::directory::{CompositeFile, FileSlice}; -use crate::fastfield::{BytesFastFieldReader, FastFieldNotAvailableError, FastValue}; -use crate::schema::{Cardinality, Field, FieldType, Schema}; +use crate::fastfield::{FastFieldNotAvailableError, FastValue}; +use crate::schema::{Field, FieldType, Schema}; use crate::space_usage::PerFieldSpaceUsage; use crate::{DateTime, TantivyError}; @@ -29,7 +28,7 @@ pub(crate) enum FastType { Date, } -pub(crate) fn type_and_cardinality(field_type: &FieldType) -> Option<(FastType, Cardinality)> { +pub(crate) fn type_and_cardinality(field_type: &FieldType) -> Option { todo!(); // match field_type { // FieldType::U64(options) => options @@ -80,39 +79,6 @@ impl FastFieldReaders { }) } - fn check_type( - &self, - field: Field, - expected_fast_type: FastType, - expected_cardinality: Cardinality, - ) -> crate::Result<()> { - let field_entry = self.schema.get_field_entry(field); - let (fast_type, cardinality) = - type_and_cardinality(field_entry.field_type()).ok_or_else(|| { - crate::TantivyError::SchemaError(format!( - "Field {:?} is not a fast field.", - field_entry.name() - )) - })?; - if fast_type != expected_fast_type { - return Err(crate::TantivyError::SchemaError(format!( - "Field {:?} is of type {:?}, expected {:?}.", - field_entry.name(), - fast_type, - expected_fast_type - ))); - } - if cardinality != expected_cardinality { - return Err(crate::TantivyError::SchemaError(format!( - "Field {:?} is of cardinality {:?}, expected {:?}.", - field_entry.name(), - cardinality, - expected_cardinality - ))); - } - Ok(()) - } - pub(crate) fn typed_fast_field_reader_with_idx( &self, field_name: &str, @@ -130,80 +96,31 @@ impl FastFieldReaders { &self, field_name: &str, ) -> crate::Result>> { - self.typed_fast_field_reader_with_idx(field_name, 0) - } - - pub(crate) fn typed_fast_field_multi_reader( - &self, - field_name: &str, - ) -> crate::Result> { - let idx_reader = self.typed_fast_field_reader(field_name)?; - let vals_reader = self.typed_fast_field_reader_with_idx(field_name, 1)?; - Ok(MultiValuedFastFieldReader::open(idx_reader, vals_reader)) + todo!(); } /// Returns the `u64` fast field reader reader associated with `field`. /// /// If `field` is not a u64 fast field, this method returns an Error. - pub fn u64(&self, field_name: &str) -> crate::Result>> { - self.check_type( - self.schema.get_field(field_name)?, - FastType::U64, - Cardinality::SingleValue, - )?; - self.typed_fast_field_reader(field_name) + pub fn u64(&self, field: &str) -> crate::Result>> { + todo!(); } /// Returns the `ip` fast field reader reader associated to `field`. /// /// If `field` is not a u128 fast field, this method returns an Error. - pub fn ip_addr(&self, field_name: &str) -> crate::Result>> { - let field = self.schema.get_field(field_name)?; - self.check_type(field, FastType::U128, Cardinality::SingleValue)?; - let bytes = self.fast_field_data(field, 0)?.read_bytes()?; - Ok(open_u128::(bytes)?) - } - - /// Returns the `ip` fast field reader reader associated to `field`. - /// - /// If `field` is not a u128 fast field, this method returns an Error. - pub fn ip_addrs( - &self, - field_name: &str, - ) -> crate::Result> { - let field = self.schema.get_field(field_name)?; - self.check_type(field, FastType::U128, Cardinality::MultiValues)?; - let idx_reader: Arc> = self.typed_fast_field_reader(field_name)?; - - let bytes = self.fast_field_data(field, 1)?.read_bytes()?; - let vals_reader = open_u128::(bytes)?; - - Ok(MultiValuedFastFieldReader::open(idx_reader, vals_reader)) + pub fn ip_addr(&self, field: &str) -> crate::Result>> { + todo!(); + // self.check_type(field, FastType::U128)?; + // let bytes = self.fast_field_data(field, 0)?.read_bytes()?; + // Ok(open_u128::(bytes)?) } /// Returns the `u128` fast field reader reader associated to `field`. /// /// If `field` is not a u128 fast field, this method returns an Error. - pub(crate) fn u128(&self, field_name: &str) -> crate::Result>> { - let field = self.schema.get_field(field_name)?; - self.check_type(field, FastType::U128, Cardinality::SingleValue)?; - let bytes = self.fast_field_data(field, 0)?.read_bytes()?; - Ok(open_u128::(bytes)?) - } - - /// Returns the `u128` multi-valued fast field reader reader associated to `field`. - /// - /// If `field` is not a u128 multi-valued fast field, this method returns an Error. - pub fn u128s(&self, field_name: &str) -> crate::Result> { - let field = self.schema.get_field(field_name)?; - self.check_type(field, FastType::U128, Cardinality::MultiValues)?; - let idx_reader: Arc> = - self.typed_fast_field_reader(self.schema.get_field_name(field))?; - - let bytes = self.fast_field_data(field, 1)?.read_bytes()?; - let vals_reader = open_u128::(bytes)?; - - Ok(MultiValuedFastFieldReader::open(idx_reader, vals_reader)) + pub(crate) fn u128(&self, field: &str) -> crate::Result>> { + todo!(); } /// Returns the `u64` fast field reader reader associated with `field`, regardless of whether @@ -219,113 +136,49 @@ impl FastFieldReaders { /// /// If `field` is not a i64 fast field, this method returns an Error. pub fn i64(&self, field_name: &str) -> crate::Result>> { - let field = self.schema.get_field(field_name)?; - self.check_type(field, FastType::I64, Cardinality::SingleValue)?; - self.typed_fast_field_reader(self.schema.get_field_name(field)) + todo!() } /// Returns the `date` fast field reader reader associated with `field`. /// /// If `field` is not a date fast field, this method returns an Error. pub fn date(&self, field_name: &str) -> crate::Result>> { - let field = self.schema.get_field(field_name)?; - self.check_type(field, FastType::Date, Cardinality::SingleValue)?; - self.typed_fast_field_reader(field_name) + todo!() } /// Returns the `f64` fast field reader reader associated with `field`. /// /// If `field` is not a f64 fast field, this method returns an Error. pub fn f64(&self, field_name: &str) -> crate::Result>> { - let field = self.schema.get_field(field_name)?; - self.check_type(field, FastType::F64, Cardinality::SingleValue)?; - self.typed_fast_field_reader(field_name) + todo!(); } /// Returns the `bool` fast field reader reader associated with `field`. /// /// If `field` is not a bool fast field, this method returns an Error. pub fn bool(&self, field_name: &str) -> crate::Result>> { - let field = self.schema.get_field(field_name)?; - self.check_type(field, FastType::Bool, Cardinality::SingleValue)?; - self.typed_fast_field_reader(field_name) + todo!() } - /// Returns a `u64s` multi-valued fast field reader reader associated with `field`. - /// - /// If `field` is not a u64 multi-valued fast field, this method returns an Error. - pub fn u64s(&self, field_name: &str) -> crate::Result> { - let field = self.schema.get_field(field_name)?; - self.check_type(field, FastType::U64, Cardinality::MultiValues)?; - self.typed_fast_field_multi_reader(field_name) - } - - /// Returns a `u64s` multi-valued fast field reader reader associated with `field`, regardless - /// of whether the given field is effectively of type `u64` or not. - /// - /// If `field` is not a u64 multi-valued fast field, this method returns an Error. - pub fn u64s_lenient(&self, field_name: &str) -> crate::Result> { - self.typed_fast_field_multi_reader(field_name) - } - - /// Returns a `i64s` multi-valued fast field reader reader associated with `field`. - /// - /// If `field` is not a i64 multi-valued fast field, this method returns an Error. - pub fn i64s(&self, field_name: &str) -> crate::Result> { - let field = self.schema.get_field(field_name)?; - self.check_type(field, FastType::I64, Cardinality::MultiValues)?; - self.typed_fast_field_multi_reader(self.schema.get_field_name(field)) - } - - /// Returns a `f64s` multi-valued fast field reader reader associated with `field`. - /// - /// If `field` is not a f64 multi-valued fast field, this method returns an Error. - pub fn f64s(&self, field_name: &str) -> crate::Result> { - let field = self.schema.get_field(field_name)?; - self.check_type(field, FastType::F64, Cardinality::MultiValues)?; - self.typed_fast_field_multi_reader(self.schema.get_field_name(field)) - } - - /// Returns a `bools` multi-valued fast field reader reader associated with `field`. - /// - /// If `field` is not a bool multi-valued fast field, this method returns an Error. - pub fn bools(&self, field_name: &str) -> crate::Result> { - let field = self.schema.get_field(field_name)?; - self.check_type(field, FastType::Bool, Cardinality::MultiValues)?; - self.typed_fast_field_multi_reader(self.schema.get_field_name(field)) - } - - /// Returns a `time::OffsetDateTime` multi-valued fast field reader reader associated with - /// `field`. - /// - /// If `field` is not a `time::OffsetDateTime` multi-valued fast field, this method returns an - /// Error. - pub fn dates(&self, field_name: &str) -> crate::Result> { - let field = self.schema.get_field(field_name)?; - self.check_type(field, FastType::Date, Cardinality::MultiValues)?; - self.typed_fast_field_multi_reader(self.schema.get_field_name(field)) - } - - /// Returns the `bytes` fast field reader associated with `field`. - /// - /// If `field` is not a bytes fast field, returns an Error. - pub fn bytes(&self, field_name: &str) -> crate::Result { - let field = self.schema.get_field(field_name)?; - let field_entry = self.schema.get_field_entry(field); - if let FieldType::Bytes(bytes_option) = field_entry.field_type() { - if !bytes_option.is_fast() { - return Err(crate::TantivyError::SchemaError(format!( - "Field {:?} is not a fast field.", - field_entry.name() - ))); - } - let fast_field_idx_file = self.fast_field_data(field, 0)?; - let fast_field_idx_bytes = fast_field_idx_file.read_bytes()?; - let idx_reader = open(fast_field_idx_bytes)?; - let data = self.fast_field_data(field, 1)?; - BytesFastFieldReader::open(idx_reader, data) - } else { - Err(FastFieldNotAvailableError::new(field_entry).into()) - } - } + // Returns the `bytes` fast field reader associated with `field`. + // + // If `field` is not a bytes fast field, returns an Error. + // pub fn bytes(&self, field: Field) -> crate::Result { + // let field_entry = self.schema.get_field_entry(field); + // if let FieldType::Bytes(bytes_option) = field_entry.field_type() { + // if !bytes_option.is_fast() { + // return Err(crate::TantivyError::SchemaError(format!( + // "Field {:?} is not a fast field.", + // field_entry.name() + // ))); + // } + // let fast_field_idx_file = self.fast_field_data(field, 0)?; + // let fast_field_idx_bytes = fast_field_idx_file.read_bytes()?; + // let idx_reader = open(fast_field_idx_bytes)?; + // let data = self.fast_field_data(field, 1)?; + // BytesFastFieldReader::open(idx_reader, data) + // } else { + // Err(FastFieldNotAvailableError::new(field_entry).into()) + // } + // } } diff --git a/src/fastfield/writer.rs b/src/fastfield/writer.rs index 234bbf4cd..d6980de53 100644 --- a/src/fastfield/writer.rs +++ b/src/fastfield/writer.rs @@ -1,18 +1,17 @@ use std::collections::HashMap; use std::io; +use super::FastFieldType; +use crate::fastfield::{CompositeFastFieldSerializer}; use columnar::{ColumnarWriter, NumericalType, NumericalValue}; use common; use fastfield_codecs::{Column, MonotonicallyMappableToU128, MonotonicallyMappableToU64}; use rustc_hash::FxHashMap; use tantivy_bitpacker::BlockedBitpacker; -use super::multivalued::{MultiValueU128FastFieldWriter, MultiValuedFastFieldWriter}; -use super::FastFieldType; -use crate::fastfield::{BytesFastFieldWriter, CompositeFastFieldSerializer}; use crate::indexer::doc_id_mapping::DocIdMapping; use crate::postings::UnorderedTermId; -use crate::schema::{Cardinality, Document, Field, FieldEntry, FieldType, Schema, Value}; +use crate::schema::{Document, Field, FieldEntry, FieldType, Schema, Value}; use crate::termdict::TermOrdinal; use crate::{DatePrecision, DocId}; @@ -20,6 +19,7 @@ use crate::{DatePrecision, DocId}; pub struct FastFieldsWriter { columnar_writer: ColumnarWriter, fast_fields: Vec>, //< TODO see if we can cash the field name hash too. + num_docs: DocId, // term_id_writers: Vec, // single_value_writers: Vec, // u128_value_writers: Vec, @@ -122,6 +122,7 @@ impl FastFieldsWriter { FastFieldsWriter { columnar_writer, fast_fields, + num_docs: 0u32, } } @@ -131,7 +132,8 @@ impl FastFieldsWriter { } /// Indexes all of the fastfields of a new document. - pub fn add_document(&mut self, doc_id: DocId, doc: &Document) -> crate::Result<()> { + pub fn add_document(&mut self, doc: &Document) -> crate::Result<()> { + let doc_id = self.num_docs; for field_value in doc.field_values() { if let Some(field_name) = self.fast_fields[field_value.field().field_id() as usize].as_ref() { match &field_value.value { @@ -155,40 +157,20 @@ impl FastFieldsWriter { } } } + self.num_docs += 1; Ok(()) } /// Serializes all of the `FastFieldWriter`s by pushing them in /// order to the fast field serializer. pub fn serialize( - self, - serializer: &mut CompositeFastFieldSerializer, - mapping: &HashMap>, + mut self, + wrt: &mut dyn io::Write, doc_id_map: Option<&DocIdMapping>, ) -> io::Result<()> { - todo!(); - // for field_writer in self.term_id_writers { - // let field = field_writer.field(); - // field_writer.serialize(serializer, mapping.get(&field), doc_id_map)?; - // } - // for field_writer in &self.single_value_writers { - // field_writer.serialize(serializer, doc_id_map)?; - // } - - // for field_writer in self.multi_values_writers { - // let field = field_writer.field(); - // field_writer.serialize(serializer, mapping.get(&field), doc_id_map)?; - // } - // for field_writer in self.bytes_value_writers { - // field_writer.serialize(serializer, doc_id_map)?; - // } - // for field_writer in self.u128_value_writers { - // field_writer.serialize(serializer, doc_id_map)?; - // } - // for field_writer in self.u128_multi_value_writers { - // field_writer.serialize(serializer, doc_id_map)?; - // } - + assert!(doc_id_map.is_none()); // TODO handle doc id map + let num_docs = self.num_docs; + self.columnar_writer.serialize(num_docs, wrt)?; Ok(()) } } diff --git a/src/indexer/doc_id_mapping.rs b/src/indexer/doc_id_mapping.rs index 1fd2a90cb..1489f39d4 100644 --- a/src/indexer/doc_id_mapping.rs +++ b/src/indexer/doc_id_mapping.rs @@ -442,47 +442,49 @@ mod tests_indexsorting { Ok(()) } - #[test] - fn test_sort_index_fast_field() -> crate::Result<()> { - let index = create_test_index( - Some(IndexSettings { - sort_by_field: Some(IndexSortByField { - field: "my_number".to_string(), - order: Order::Asc, - }), - ..Default::default() - }), - get_text_options(), - )?; - assert_eq!( - index.settings().sort_by_field.as_ref().unwrap().field, - "my_number".to_string() - ); + // #[test] + // fn test_sort_index_fast_field() -> crate::Result<()> { + // let index = create_test_index( + // Some(IndexSettings { + // sort_by_field: Some(IndexSortByField { + // field: "my_number".to_string(), + // order: Order::Asc, + // }), + // ..Default::default() + // }), + // get_text_options(), + // )?; + // assert_eq!( + // index.settings().sort_by_field.as_ref().unwrap().field, + // "my_number".to_string() + // ); - let searcher = index.reader()?.searcher(); - assert_eq!(searcher.segment_readers().len(), 1); - let segment_reader = searcher.segment_reader(0); - let fast_fields = segment_reader.fast_fields(); - index.schema().get_field("my_number").unwrap(); - let fast_field = fast_fields.u64("my_number").unwrap(); - assert_eq!(fast_field.get_val(0), 10u64); - assert_eq!(fast_field.get_val(1), 20u64); - assert_eq!(fast_field.get_val(2), 30u64); + // let searcher = index.reader()?.searcher(); + // assert_eq!(searcher.segment_readers().len(), 1); + // let segment_reader = searcher.segment_reader(0); + // let fast_fields = segment_reader.fast_fields(); + // let my_number = index.schema().get_field("my_number").unwrap(); - let multifield = fast_fields.u64s("multi_numbers").unwrap(); - let mut vals = vec![]; - multifield.get_vals(0u32, &mut vals); - assert_eq!(vals, &[] as &[u64]); - let mut vals = vec![]; - multifield.get_vals(1u32, &mut vals); - assert_eq!(vals, &[5, 6]); + // let fast_field = fast_fields.u64(my_number).unwrap(); + // assert_eq!(fast_field.get_val(0), 10u64); + // assert_eq!(fast_field.get_val(1), 20u64); + // assert_eq!(fast_field.get_val(2), 30u64); - let mut vals = vec![]; - multifield.get_vals(2u32, &mut vals); - assert_eq!(vals, &[3]); - Ok(()) - } + // let multi_numbers = index.schema().get_field("multi_numbers").unwrap(); + // let multifield = fast_fields.u64s(multi_numbers).unwrap(); + // let mut vals = vec![]; + // multifield.get_vals(0u32, &mut vals); + // assert_eq!(vals, &[] as &[u64]); + // let mut vals = vec![]; + // multifield.get_vals(1u32, &mut vals); + // assert_eq!(vals, &[5, 6]); + + // let mut vals = vec![]; + // multifield.get_vals(2u32, &mut vals); + // assert_eq!(vals, &[3]); + // Ok(()) + // } #[test] fn test_doc_mapping() { diff --git a/src/indexer/index_writer.rs b/src/indexer/index_writer.rs index bb7156578..a1220d1eb 100644 --- a/src/indexer/index_writer.rs +++ b/src/indexer/index_writer.rs @@ -813,7 +813,7 @@ mod tests { use crate::indexer::NoMergePolicy; use crate::query::{BooleanQuery, Occur, Query, QueryParser, TermQuery}; use crate::schema::{ - self, Cardinality, Facet, FacetOptions, IndexRecordOption, IpAddrOptions, NumericOptions, + self, IndexRecordOption, IpAddrOptions, NumericOptions, TextFieldIndexing, TextOptions, FAST, INDEXED, STORED, STRING, TEXT, }; use crate::store::DOCSTORE_CACHE_CAPACITY; @@ -1606,812 +1606,812 @@ mod tests { id_list } - fn test_operation_strategy( - ops: &[IndexingOp], - sort_index: bool, - force_end_merge: bool, - ) -> crate::Result<()> { - let mut schema_builder = schema::Schema::builder(); - let ip_field = schema_builder.add_ip_addr_field("ip", FAST | INDEXED | STORED); - let ips_field = schema_builder.add_ip_addr_field( - "ips", - IpAddrOptions::default() - .set_fast() - .set_indexed(), - ); - let id_field = schema_builder.add_u64_field("id", FAST | INDEXED | STORED); - let i64_field = schema_builder.add_i64_field("i64", INDEXED); - let f64_field = schema_builder.add_f64_field("f64", INDEXED); - let date_field = schema_builder.add_date_field("date", INDEXED); - let bytes_field = schema_builder.add_bytes_field("bytes", FAST | INDEXED | STORED); - let bool_field = schema_builder.add_bool_field("bool", FAST | INDEXED | STORED); - let text_field = schema_builder.add_text_field( - "text_field", - TextOptions::default() - .set_indexing_options( - TextFieldIndexing::default() - .set_index_option(schema::IndexRecordOption::WithFreqsAndPositions), - ) - .set_stored(), - ); - - let large_text_field = schema_builder.add_text_field("large_text_field", TEXT | STORED); - let multi_text_fields = schema_builder.add_text_field("multi_text_fields", TEXT | STORED); - - let multi_numbers = schema_builder.add_u64_field( - "multi_numbers", - NumericOptions::default() - .set_fast() - .set_stored(), - ); - let multi_bools = schema_builder.add_bool_field( - "multi_bools", - NumericOptions::default() - .set_fast() - .set_stored(), - ); - let facet_field = schema_builder.add_facet_field("facet", FacetOptions::default()); - let schema = schema_builder.build(); - let settings = if sort_index { - IndexSettings { - sort_by_field: Some(IndexSortByField { - field: "id".to_string(), - order: Order::Asc, - }), - ..Default::default() - } - } else { - IndexSettings { - ..Default::default() - } - }; - let index = Index::builder() - .schema(schema) - .settings(settings) - .create_in_ram()?; - let mut index_writer = index.writer_for_tests()?; - index_writer.set_merge_policy(Box::new(NoMergePolicy)); - - let old_reader = index.reader()?; - - let ip_exists = |id| id % 3 != 0; // 0 does not exist - - let multi_text_field_text1 = "test1 test2 test3 test1 test2 test3"; - // rotate left - let multi_text_field_text2 = "test2 test3 test1 test2 test3 test1"; - // rotate right - let multi_text_field_text3 = "test3 test1 test2 test3 test1 test2"; - - let ip_from_id = |id| Ipv6Addr::from_u128(id as u128); - - for &op in ops { - match op { - IndexingOp::AddDoc { id } => { - let facet = Facet::from(&("/cola/".to_string() + &id.to_string())); - let ip = ip_from_id(id); - - if !ip_exists(id) { - // every 3rd doc has no ip field - index_writer.add_document(doc!(id_field=>id, - bytes_field => id.to_le_bytes().as_slice(), - multi_numbers=> id, - multi_numbers => id, - bool_field => (id % 2u64) != 0, - i64_field => id as i64, - f64_field => id as f64, - date_field => DateTime::from_timestamp_secs(id as i64), - multi_bools => (id % 2u64) != 0, - multi_bools => (id % 2u64) == 0, - text_field => id.to_string(), - facet_field => facet, - large_text_field => LOREM, - multi_text_fields => multi_text_field_text1, - multi_text_fields => multi_text_field_text2, - multi_text_fields => multi_text_field_text3, - ))?; - } else { - index_writer.add_document(doc!(id_field=>id, - bytes_field => id.to_le_bytes().as_slice(), - ip_field => ip, - ips_field => ip, - ips_field => ip, - multi_numbers=> id, - multi_numbers => id, - bool_field => (id % 2u64) != 0, - i64_field => id as i64, - f64_field => id as f64, - date_field => DateTime::from_timestamp_secs(id as i64), - multi_bools => (id % 2u64) != 0, - multi_bools => (id % 2u64) == 0, - text_field => id.to_string(), - facet_field => facet, - large_text_field => LOREM, - multi_text_fields => multi_text_field_text1, - multi_text_fields => multi_text_field_text2, - multi_text_fields => multi_text_field_text3, - ))?; - } - } - IndexingOp::DeleteDoc { id } => { - index_writer.delete_term(Term::from_field_u64(id_field, id)); - } - IndexingOp::DeleteDocQuery { id } => { - let term = Term::from_field_u64(id_field, id); - let query = TermQuery::new(term, Default::default()); - index_writer.delete_query(Box::new(query))?; - } - IndexingOp::Commit => { - index_writer.commit()?; - } - IndexingOp::Merge => { - let segment_ids = index - .searchable_segment_ids() - .expect("Searchable segments failed."); - if segment_ids.len() >= 2 { - index_writer.merge(&segment_ids).wait().unwrap(); - assert!(index_writer.segment_updater().wait_merging_thread().is_ok()); - } - } - } - } - index_writer.commit()?; - - let searcher = index.reader()?.searcher(); - let num_segments_before_merge = searcher.segment_readers().len(); - if force_end_merge { - index_writer.wait_merging_threads()?; - let mut index_writer = index.writer_for_tests()?; - let segment_ids = index - .searchable_segment_ids() - .expect("Searchable segments failed."); - if segment_ids.len() >= 2 { - index_writer.merge(&segment_ids).wait().unwrap(); - assert!(index_writer.wait_merging_threads().is_ok()); - } - } - let num_segments_after_merge = searcher.segment_readers().len(); - - old_reader.reload()?; - let old_searcher = old_reader.searcher(); - - let ids_old_searcher: HashSet = old_searcher - .segment_readers() - .iter() - .flat_map(|segment_reader| { - let ff_reader = segment_reader.fast_fields().u64("id").unwrap(); - segment_reader - .doc_ids_alive() - .map(move |doc| ff_reader.get_val(doc)) - }) - .collect(); - - let ids: HashSet = searcher - .segment_readers() - .iter() - .flat_map(|segment_reader| { - let ff_reader = segment_reader.fast_fields().u64("id").unwrap(); - segment_reader - .doc_ids_alive() - .map(move |doc| ff_reader.get_val(doc)) - }) - .collect(); - - let (expected_ids_and_num_occurrences, deleted_ids) = expected_ids(ops); - - let id_list = get_id_list(ops); - - // multivalue fast field content - let mut all_ips = Vec::new(); - let mut num_ips = 0; - for segment_reader in searcher.segment_readers().iter() { - let ip_reader = segment_reader.fast_fields().ip_addrs("ips").unwrap(); - for doc in segment_reader.doc_ids_alive() { - let mut vals = vec![]; - ip_reader.get_vals(doc, &mut vals); - all_ips.extend_from_slice(&vals); - } - num_ips += ip_reader.total_num_vals(); - } - - let num_docs_expected = expected_ids_and_num_occurrences - .values() - .map(|id_occurrences| *id_occurrences as usize) - .sum::(); - assert_eq!(searcher.num_docs() as usize, num_docs_expected); - assert_eq!(old_searcher.num_docs() as usize, num_docs_expected); - assert_eq!( - ids_old_searcher, - expected_ids_and_num_occurrences - .keys() - .cloned() - .collect::>() - ); - assert_eq!( - ids, - expected_ids_and_num_occurrences - .keys() - .cloned() - .collect::>() - ); - - if force_end_merge && num_segments_before_merge > 1 && num_segments_after_merge == 1 { - let mut expected_multi_ips: Vec<_> = id_list - .iter() - .filter(|id| ip_exists(**id)) - .flat_map(|id| vec![ip_from_id(*id), ip_from_id(*id)]) - .collect(); - assert_eq!(num_ips, expected_multi_ips.len() as u32); - - expected_multi_ips.sort(); - all_ips.sort(); - assert_eq!(expected_multi_ips, all_ips); - - // Test fastfield num_docs - let num_docs: usize = searcher - .segment_readers() - .iter() - .map(|segment_reader| { - let ff_reader = segment_reader.fast_fields().ip_addrs("ips").unwrap(); - ff_reader.get_index_reader().num_docs() as usize - }) - .sum(); - assert_eq!(num_docs, num_docs_expected); - } - - // Load all ips addr - let ips: HashSet = searcher - .segment_readers() - .iter() - .flat_map(|segment_reader| { - let ff_reader = segment_reader.fast_fields().ip_addr("ip").unwrap(); - segment_reader.doc_ids_alive().flat_map(move |doc| { - let val = ff_reader.get_val(doc); - if val == Ipv6Addr::from_u128(0) { - // TODO Fix null handling - None - } else { - Some(val) - } - }) - }) - .collect(); - - let expected_ips = expected_ids_and_num_occurrences - .keys() - .flat_map(|id| { - if !ip_exists(*id) { - None - } else { - Some(Ipv6Addr::from_u128(*id as u128)) - } - }) - .collect::>(); - assert_eq!(ips, expected_ips); - - let expected_ips = expected_ids_and_num_occurrences - .keys() - .filter_map(|id| { - if !ip_exists(*id) { - None - } else { - Some(Ipv6Addr::from_u128(*id as u128)) - } - }) - .collect::>(); - let ips: HashSet = searcher - .segment_readers() - .iter() - .flat_map(|segment_reader| { - let ff_reader = segment_reader.fast_fields().ip_addrs("ips").unwrap(); - segment_reader.doc_ids_alive().flat_map(move |doc| { - let mut vals = vec![]; - ff_reader.get_vals(doc, &mut vals); - vals.into_iter().filter(|val| val.to_u128() != 0) // TODO Fix null handling - }) - }) - .collect(); - assert_eq!(ips, expected_ips); - - // multivalue fast field tests - for segment_reader in searcher.segment_readers().iter() { - let id_reader = segment_reader.fast_fields().u64("id").unwrap(); - let ff_reader = segment_reader.fast_fields().u64s("multi_numbers").unwrap(); - let bool_ff_reader = segment_reader.fast_fields().bools("multi_bools").unwrap(); - for doc in segment_reader.doc_ids_alive() { - let mut vals = vec![]; - ff_reader.get_vals(doc, &mut vals); - assert_eq!(vals.len(), 2); - assert_eq!(vals[0], vals[1]); - assert_eq!(id_reader.get_val(doc), vals[0]); - - let mut bool_vals = vec![]; - bool_ff_reader.get_vals(doc, &mut bool_vals); - assert_eq!(bool_vals.len(), 2); - assert_ne!(bool_vals[0], bool_vals[1]); - - assert!(expected_ids_and_num_occurrences.contains_key(&vals[0])); - } - } - - // doc store tests - for segment_reader in searcher.segment_readers().iter() { - let store_reader = segment_reader - .get_store_reader(DOCSTORE_CACHE_CAPACITY) - .unwrap(); - // test store iterator - for doc in store_reader.iter(segment_reader.alive_bitset()) { - let id = doc.unwrap().get_first(id_field).unwrap().as_u64().unwrap(); - assert!(expected_ids_and_num_occurrences.contains_key(&id)); - } - // test store random access - for doc_id in segment_reader.doc_ids_alive() { - let id = store_reader - .get(doc_id) - .unwrap() - .get_first(id_field) - .unwrap() - .as_u64() - .unwrap(); - assert!(expected_ids_and_num_occurrences.contains_key(&id)); - let id2 = store_reader - .get(doc_id) - .unwrap() - .get_first(multi_numbers) - .unwrap() - .as_u64() - .unwrap(); - assert_eq!(id, id2); - let bool = store_reader - .get(doc_id) - .unwrap() - .get_first(bool_field) - .unwrap() - .as_bool() - .unwrap(); - let doc = store_reader.get(doc_id).unwrap(); - let mut bool2 = doc.get_all(multi_bools); - assert_eq!(bool, bool2.next().unwrap().as_bool().unwrap()); - assert_ne!(bool, bool2.next().unwrap().as_bool().unwrap()); - assert_eq!(None, bool2.next()) - } - } - // test search - let do_search = |term: &str, field| { - let query = QueryParser::for_index(&index, vec![field]) - .parse_query(term) - .unwrap(); - let top_docs: Vec<(f32, DocAddress)> = - searcher.search(&query, &TopDocs::with_limit(1000)).unwrap(); - - top_docs.iter().map(|el| el.1).collect::>() - }; - - let do_search2 = |term: Term| { - let query = TermQuery::new(term, IndexRecordOption::Basic); - let top_docs: Vec<(f32, DocAddress)> = - searcher.search(&query, &TopDocs::with_limit(1000)).unwrap(); - - top_docs.iter().map(|el| el.1).collect::>() - }; - - for (existing_id, count) in &expected_ids_and_num_occurrences { - let (existing_id, count) = (*existing_id, *count); - let get_num_hits = |field| do_search(&existing_id.to_string(), field).len() as u64; - assert_eq!(get_num_hits(text_field), count); - assert_eq!(get_num_hits(i64_field), count); - assert_eq!(get_num_hits(f64_field), count); - assert_eq!(get_num_hits(id_field), count); - - // Test multi text - assert_eq!( - do_search("\"test1 test2\"", multi_text_fields).len(), - num_docs_expected - ); - assert_eq!( - do_search("\"test2 test3\"", multi_text_fields).len(), - num_docs_expected - ); - - // Test bytes - let term = Term::from_field_bytes(bytes_field, existing_id.to_le_bytes().as_slice()); - assert_eq!(do_search2(term).len() as u64, count); - - // Test date - let term = Term::from_field_date( - date_field, - DateTime::from_timestamp_secs(existing_id as i64), - ); - assert_eq!(do_search2(term).len() as u64, count); - } - for deleted_id in deleted_ids { - let assert_field = |field| { - assert_eq!(do_search(&deleted_id.to_string(), field).len() as u64, 0); - }; - assert_field(text_field); - assert_field(f64_field); - assert_field(i64_field); - assert_field(id_field); - - // Test bytes - let term = Term::from_field_bytes(bytes_field, deleted_id.to_le_bytes().as_slice()); - assert_eq!(do_search2(term).len() as u64, 0); - - // Test date - let term = - Term::from_field_date(date_field, DateTime::from_timestamp_secs(deleted_id as i64)); - assert_eq!(do_search2(term).len() as u64, 0); - } - // search ip address - // - for (existing_id, count) in &expected_ids_and_num_occurrences { - let (existing_id, count) = (*existing_id, *count); - if !ip_exists(existing_id) { - continue; - } - let do_search_ip_field = |term: &str| do_search(term, ip_field).len() as u64; - let ip_addr = Ipv6Addr::from_u128(existing_id as u128); - // Test incoming ip as ipv6 - assert_eq!(do_search_ip_field(&format!("\"{}\"", ip_addr)), count); - - let term = Term::from_field_ip_addr(ip_field, ip_addr); - assert_eq!(do_search2(term).len() as u64, count); - - // Test incoming ip as ipv4 - if let Some(ip_addr) = ip_addr.to_ipv4_mapped() { - assert_eq!(do_search_ip_field(&format!("\"{}\"", ip_addr)), count); - } - } - - // assert data is like expected - // - for (existing_id, count) in expected_ids_and_num_occurrences.iter().take(10) { - let (existing_id, count) = (*existing_id, *count); - if !ip_exists(existing_id) { - continue; - } - let gen_query_inclusive = |field: &str, from: Ipv6Addr, to: Ipv6Addr| { - format!("{}:[{} TO {}]", field, &from.to_string(), &to.to_string()) - }; - let ip = ip_from_id(existing_id); - - let do_search_ip_field = |term: &str| do_search(term, ip_field).len() as u64; - // Range query on single value field - // let query = gen_query_inclusive("ip", ip, ip); - // assert_eq!(do_search_ip_field(&query), count); - - // Range query on multi value field - let query = gen_query_inclusive("ips", ip, ip); - assert_eq!(do_search_ip_field(&query), count); - } - - // ip range query on fast field - // - for (existing_id, count) in expected_ids_and_num_occurrences.iter().take(10) { - let (existing_id, count) = (*existing_id, *count); - if !ip_exists(existing_id) { - continue; - } - let gen_query_inclusive = |field: &str, from: Ipv6Addr, to: Ipv6Addr| { - format!("{}:[{} TO {}]", field, &from.to_string(), &to.to_string()) - }; - let ip = ip_from_id(existing_id); - - let do_search_ip_field = |term: &str| do_search(term, ip_field).len() as u64; - // Range query on single value field - // let query = gen_query_inclusive("ip", ip, ip); - // assert_eq!(do_search_ip_field(&query), count); - - // Range query on multi value field - let query = gen_query_inclusive("ips", ip, ip); - assert_eq!(do_search_ip_field(&query), count); - } - - // test facets - for segment_reader in searcher.segment_readers().iter() { - let mut facet_reader = segment_reader.facet_reader(facet_field).unwrap(); - let ff_reader = segment_reader.fast_fields().u64("id").unwrap(); - for doc_id in segment_reader.doc_ids_alive() { - let mut facet_ords = Vec::new(); - facet_reader.facet_ords(doc_id, &mut facet_ords); - assert_eq!(facet_ords.len(), 1); - let mut facet = Facet::default(); - facet_reader - .facet_from_ord(facet_ords[0], &mut facet) - .unwrap(); - let id = ff_reader.get_val(doc_id); - let facet_expected = Facet::from(&("/cola/".to_string() + &id.to_string())); - - assert_eq!(facet, facet_expected); - } - } - Ok(()) - } - - #[test] - fn test_ip_range_query_multivalue_bug() { - assert!(test_operation_strategy( - &[ - IndexingOp::AddDoc { id: 2 }, - IndexingOp::Commit, - IndexingOp::AddDoc { id: 1 }, - IndexingOp::AddDoc { id: 1 }, - IndexingOp::Commit, - IndexingOp::Merge - ], - true, - false - ) - .is_ok()); - } - - #[test] - fn test_ff_num_ips_regression() { - assert!(test_operation_strategy( - &[ - IndexingOp::AddDoc { id: 13 }, - IndexingOp::AddDoc { id: 1 }, - IndexingOp::Commit, - IndexingOp::DeleteDocQuery { id: 13 }, - IndexingOp::AddDoc { id: 1 }, - IndexingOp::Commit, - ], - false, - true - ) - .is_ok()); - } - - #[test] - fn test_minimal() { - assert!(test_operation_strategy( - &[ - IndexingOp::AddDoc { id: 23 }, - IndexingOp::AddDoc { id: 13 }, - IndexingOp::DeleteDoc { id: 13 } - ], - true, - true - ) - .is_ok()); - - assert!(test_operation_strategy( - &[ - IndexingOp::AddDoc { id: 23 }, - IndexingOp::AddDoc { id: 13 }, - IndexingOp::DeleteDoc { id: 13 } - ], - false, - false - ) - .is_ok()); - } - - #[test] - fn test_minimal_sort_merge() { - assert!(test_operation_strategy(&[IndexingOp::AddDoc { id: 3 },], true, true).is_ok()); - } - - proptest! { - #![proptest_config(ProptestConfig::with_cases(20))] - #[test] - fn test_delete_with_sort_proptest_adding(ops in proptest::collection::vec(adding_operation_strategy(), 1..100)) { - assert!(test_operation_strategy(&ops[..], true, false).is_ok()); - } - #[test] - fn test_delete_without_sort_proptest_adding(ops in proptest::collection::vec(adding_operation_strategy(), 1..100)) { - assert!(test_operation_strategy(&ops[..], false, false).is_ok()); - } - #[test] - fn test_delete_with_sort_proptest_with_merge_adding(ops in proptest::collection::vec(adding_operation_strategy(), 1..100)) { - assert!(test_operation_strategy(&ops[..], true, true).is_ok()); - } - #[test] - fn test_delete_without_sort_proptest_with_merge_adding(ops in proptest::collection::vec(adding_operation_strategy(), 1..100)) { - assert!(test_operation_strategy(&ops[..], false, true).is_ok()); - } - - #[test] - fn test_delete_with_sort_proptest(ops in proptest::collection::vec(balanced_operation_strategy(), 1..10)) { - assert!(test_operation_strategy(&ops[..], true, false).is_ok()); - } - #[test] - fn test_delete_without_sort_proptest(ops in proptest::collection::vec(balanced_operation_strategy(), 1..10)) { - assert!(test_operation_strategy(&ops[..], false, false).is_ok()); - } - #[test] - fn test_delete_with_sort_proptest_with_merge(ops in proptest::collection::vec(balanced_operation_strategy(), 1..10)) { - assert!(test_operation_strategy(&ops[..], true, true).is_ok()); - } - #[test] - fn test_delete_without_sort_proptest_with_merge(ops in proptest::collection::vec(balanced_operation_strategy(), 1..100)) { - assert!(test_operation_strategy(&ops[..], false, true).is_ok()); - } - - - } - - #[test] - fn test_delete_with_sort_by_field_last_opstamp_is_not_max() -> crate::Result<()> { - let mut schema_builder = schema::Schema::builder(); - let sort_by_field = schema_builder.add_u64_field("sort_by", FAST); - let id_field = schema_builder.add_u64_field("id", INDEXED); - let schema = schema_builder.build(); - - let settings = IndexSettings { - sort_by_field: Some(IndexSortByField { - field: "sort_by".to_string(), - order: Order::Asc, - }), - ..Default::default() - }; - - let index = Index::builder() - .schema(schema) - .settings(settings) - .create_in_ram()?; - let mut index_writer = index.writer_for_tests()?; - - // We add a doc... - index_writer.add_document(doc!(sort_by_field => 2u64, id_field => 0u64))?; - // And remove it. - index_writer.delete_term(Term::from_field_u64(id_field, 0u64)); - // We add another doc. - index_writer.add_document(doc!(sort_by_field=>1u64, id_field => 0u64))?; - - // The expected result is a segment with - // maxdoc = 2 - // numdoc = 1. - index_writer.commit()?; - - let searcher = index.reader()?.searcher(); - assert_eq!(searcher.segment_readers().len(), 1); - - let segment_reader = searcher.segment_reader(0); - assert_eq!(segment_reader.max_doc(), 2); - assert_eq!(segment_reader.num_docs(), 1); - Ok(()) - } - - #[test] - fn test_index_doc_missing_field() -> crate::Result<()> { - let mut schema_builder = schema::Schema::builder(); - let idfield = schema_builder.add_text_field("id", STRING); - schema_builder.add_text_field("optfield", STRING); - let index = Index::create_in_ram(schema_builder.build()); - let mut index_writer = index.writer_for_tests()?; - index_writer.add_document(doc!(idfield=>"myid"))?; - index_writer.commit()?; - Ok(()) - } - - #[test] - fn test_bug_1617_3() { - assert!(test_operation_strategy( - &[ - IndexingOp::DeleteDoc { id: 0 }, - IndexingOp::AddDoc { id: 6 }, - IndexingOp::DeleteDocQuery { id: 11 }, - IndexingOp::Commit, - IndexingOp::Merge, - IndexingOp::Commit, - IndexingOp::Commit - ], - false, - false - ) - .is_ok()); - } - - #[test] - fn test_bug_1617_2() { - assert!(test_operation_strategy( - &[ - IndexingOp::AddDoc { id: 13 }, - IndexingOp::DeleteDoc { id: 13 }, - IndexingOp::Commit, - IndexingOp::AddDoc { id: 30 }, - IndexingOp::Commit, - IndexingOp::Merge, - ], - false, - true - ) - .is_ok()); - } - - #[test] - fn test_bug_1617() -> crate::Result<()> { - let mut schema_builder = schema::Schema::builder(); - let id_field = schema_builder.add_u64_field("id", INDEXED); - - let schema = schema_builder.build(); - let index = Index::builder().schema(schema).create_in_ram()?; - let mut index_writer = index.writer_for_tests()?; - index_writer.set_merge_policy(Box::new(NoMergePolicy)); - - let existing_id = 16u64; - let deleted_id = 13u64; - index_writer.add_document(doc!( - id_field=>existing_id, - ))?; - index_writer.add_document(doc!( - id_field=>deleted_id, - ))?; - index_writer.delete_term(Term::from_field_u64(id_field, deleted_id)); - index_writer.commit()?; - - // Merge - { - assert!(index_writer.wait_merging_threads().is_ok()); - let mut index_writer = index.writer_for_tests()?; - let segment_ids = index - .searchable_segment_ids() - .expect("Searchable segments failed."); - index_writer.merge(&segment_ids).wait().unwrap(); - assert!(index_writer.wait_merging_threads().is_ok()); - } - let searcher = index.reader()?.searcher(); - - let query = TermQuery::new( - Term::from_field_u64(id_field, existing_id), - IndexRecordOption::Basic, - ); - let top_docs: Vec<(f32, DocAddress)> = - searcher.search(&query, &TopDocs::with_limit(10)).unwrap(); - - assert_eq!(top_docs.len(), 1); // Fails - - Ok(()) - } - - #[test] - fn test_bug_1618() -> crate::Result<()> { - let mut schema_builder = schema::Schema::builder(); - let id_field = schema_builder.add_i64_field("id", INDEXED); - - let schema = schema_builder.build(); - let index = Index::builder().schema(schema).create_in_ram()?; - let mut index_writer = index.writer_for_tests()?; - index_writer.set_merge_policy(Box::new(NoMergePolicy)); - - index_writer.add_document(doc!( - id_field=>10i64, - ))?; - index_writer.add_document(doc!( - id_field=>30i64, - ))?; - index_writer.commit()?; - - // Merge - { - assert!(index_writer.wait_merging_threads().is_ok()); - let mut index_writer = index.writer_for_tests()?; - let segment_ids = index - .searchable_segment_ids() - .expect("Searchable segments failed."); - index_writer.merge(&segment_ids).wait().unwrap(); - assert!(index_writer.wait_merging_threads().is_ok()); - } - let searcher = index.reader()?.searcher(); - - let query = TermQuery::new( - Term::from_field_i64(id_field, 10i64), - IndexRecordOption::Basic, - ); - let top_docs: Vec<(f32, DocAddress)> = - searcher.search(&query, &TopDocs::with_limit(10)).unwrap(); - - assert_eq!(top_docs.len(), 1); // Fails - - let query = TermQuery::new( - Term::from_field_i64(id_field, 30i64), - IndexRecordOption::Basic, - ); - let top_docs: Vec<(f32, DocAddress)> = - searcher.search(&query, &TopDocs::with_limit(10)).unwrap(); - - assert_eq!(top_docs.len(), 1); // Fails - - Ok(()) - } + // fn test_operation_strategy( + // ops: &[IndexingOp], + // sort_index: bool, + // force_end_merge: bool, + // ) -> crate::Result<()> { + // let mut schema_builder = schema::Schema::builder(); + // let ip_field = schema_builder.add_ip_addr_field("ip", FAST | INDEXED | STORED); + // let ips_field = schema_builder.add_ip_addr_field( + // "ips", + // IpAddrOptions::default() + // .set_fast() + // .set_indexed(), + // ); + // let id_field = schema_builder.add_u64_field("id", FAST | INDEXED | STORED); + // let i64_field = schema_builder.add_i64_field("i64", INDEXED); + // let f64_field = schema_builder.add_f64_field("f64", INDEXED); + // let date_field = schema_builder.add_date_field("date", INDEXED); + // let bytes_field = schema_builder.add_bytes_field("bytes", FAST | INDEXED | STORED); + // let bool_field = schema_builder.add_bool_field("bool", FAST | INDEXED | STORED); + // let text_field = schema_builder.add_text_field( + // "text_field", + // TextOptions::default() + // .set_indexing_options( + // TextFieldIndexing::default() + // .set_index_option(schema::IndexRecordOption::WithFreqsAndPositions), + // ) + // .set_stored(), + // ); + + // let large_text_field = schema_builder.add_text_field("large_text_field", TEXT | STORED); + // let multi_text_fields = schema_builder.add_text_field("multi_text_fields", TEXT | STORED); + + // let multi_numbers = schema_builder.add_u64_field( + // "multi_numbers", + // NumericOptions::default() + // .set_fast() + // .set_stored(), + // ); + // let multi_bools = schema_builder.add_bool_field( + // "multi_bools", + // NumericOptions::default() + // .set_fast() + // .set_stored(), + // ); + // let facet_field = schema_builder.add_facet_field("facet", FacetOptions::default()); + // let schema = schema_builder.build(); + // let settings = if sort_index { + // IndexSettings { + // sort_by_field: Some(IndexSortByField { + // field: "id".to_string(), + // order: Order::Asc, + // }), + // ..Default::default() + // } + // } else { + // IndexSettings { + // ..Default::default() + // } + // }; + // let index = Index::builder() + // .schema(schema) + // .settings(settings) + // .create_in_ram()?; + // let mut index_writer = index.writer_for_tests()?; + // index_writer.set_merge_policy(Box::new(NoMergePolicy)); + + // let old_reader = index.reader()?; + + // let ip_exists = |id| id % 3 != 0; // 0 does not exist + + // let multi_text_field_text1 = "test1 test2 test3 test1 test2 test3"; + // // rotate left + // let multi_text_field_text2 = "test2 test3 test1 test2 test3 test1"; + // // rotate right + // let multi_text_field_text3 = "test3 test1 test2 test3 test1 test2"; + + // let ip_from_id = |id| Ipv6Addr::from_u128(id as u128); + + // for &op in ops { + // match op { + // IndexingOp::AddDoc { id } => { + // let facet = Facet::from(&("/cola/".to_string() + &id.to_string())); + // let ip = ip_from_id(id); + + // if !ip_exists(id) { + // // every 3rd doc has no ip field + // index_writer.add_document(doc!(id_field=>id, + // bytes_field => id.to_le_bytes().as_slice(), + // multi_numbers=> id, + // multi_numbers => id, + // bool_field => (id % 2u64) != 0, + // i64_field => id as i64, + // f64_field => id as f64, + // date_field => DateTime::from_timestamp_secs(id as i64), + // multi_bools => (id % 2u64) != 0, + // multi_bools => (id % 2u64) == 0, + // text_field => id.to_string(), + // facet_field => facet, + // large_text_field => LOREM, + // multi_text_fields => multi_text_field_text1, + // multi_text_fields => multi_text_field_text2, + // multi_text_fields => multi_text_field_text3, + // ))?; + // } else { + // index_writer.add_document(doc!(id_field=>id, + // bytes_field => id.to_le_bytes().as_slice(), + // ip_field => ip, + // ips_field => ip, + // ips_field => ip, + // multi_numbers=> id, + // multi_numbers => id, + // bool_field => (id % 2u64) != 0, + // i64_field => id as i64, + // f64_field => id as f64, + // date_field => DateTime::from_timestamp_secs(id as i64), + // multi_bools => (id % 2u64) != 0, + // multi_bools => (id % 2u64) == 0, + // text_field => id.to_string(), + // facet_field => facet, + // large_text_field => LOREM, + // multi_text_fields => multi_text_field_text1, + // multi_text_fields => multi_text_field_text2, + // multi_text_fields => multi_text_field_text3, + // ))?; + // } + // } + // IndexingOp::DeleteDoc { id } => { + // index_writer.delete_term(Term::from_field_u64(id_field, id)); + // } + // IndexingOp::DeleteDocQuery { id } => { + // let term = Term::from_field_u64(id_field, id); + // let query = TermQuery::new(term, Default::default()); + // index_writer.delete_query(Box::new(query))?; + // } + // IndexingOp::Commit => { + // index_writer.commit()?; + // } + // IndexingOp::Merge => { + // let segment_ids = index + // .searchable_segment_ids() + // .expect("Searchable segments failed."); + // if segment_ids.len() >= 2 { + // index_writer.merge(&segment_ids).wait().unwrap(); + // assert!(index_writer.segment_updater().wait_merging_thread().is_ok()); + // } + // } + // } + // } + // index_writer.commit()?; + + // let searcher = index.reader()?.searcher(); + // let num_segments_before_merge = searcher.segment_readers().len(); + // if force_end_merge { + // index_writer.wait_merging_threads()?; + // let mut index_writer = index.writer_for_tests()?; + // let segment_ids = index + // .searchable_segment_ids() + // .expect("Searchable segments failed."); + // if segment_ids.len() >= 2 { + // index_writer.merge(&segment_ids).wait().unwrap(); + // assert!(index_writer.wait_merging_threads().is_ok()); + // } + // } + // let num_segments_after_merge = searcher.segment_readers().len(); + + // old_reader.reload()?; + // let old_searcher = old_reader.searcher(); + + // let ids_old_searcher: HashSet = old_searcher + // .segment_readers() + // .iter() + // .flat_map(|segment_reader| { + // let ff_reader = segment_reader.fast_fields().u64(id_field).unwrap(); + // segment_reader + // .doc_ids_alive() + // .map(move |doc| ff_reader.get_val(doc)) + // }) + // .collect(); + + // let ids: HashSet = searcher + // .segment_readers() + // .iter() + // .flat_map(|segment_reader| { + // let ff_reader = segment_reader.fast_fields().u64(id_field).unwrap(); + // segment_reader + // .doc_ids_alive() + // .map(move |doc| ff_reader.get_val(doc)) + // }) + // .collect(); + + // let (expected_ids_and_num_occurrences, deleted_ids) = expected_ids(ops); + + // let id_list = get_id_list(ops); + + // // multivalue fast field content + // let mut all_ips = Vec::new(); + // let mut num_ips = 0; + // for segment_reader in searcher.segment_readers().iter() { + // let ip_reader = segment_reader.fast_fields().ip_addrs(ips_field).unwrap(); + // for doc in segment_reader.doc_ids_alive() { + // let mut vals = vec![]; + // ip_reader.get_vals(doc, &mut vals); + // all_ips.extend_from_slice(&vals); + // } + // num_ips += ip_reader.total_num_vals(); + // } + + // let num_docs_expected = expected_ids_and_num_occurrences + // .values() + // .map(|id_occurrences| *id_occurrences as usize) + // .sum::(); + // assert_eq!(searcher.num_docs() as usize, num_docs_expected); + // assert_eq!(old_searcher.num_docs() as usize, num_docs_expected); + // assert_eq!( + // ids_old_searcher, + // expected_ids_and_num_occurrences + // .keys() + // .cloned() + // .collect::>() + // ); + // assert_eq!( + // ids, + // expected_ids_and_num_occurrences + // .keys() + // .cloned() + // .collect::>() + // ); + + // if force_end_merge && num_segments_before_merge > 1 && num_segments_after_merge == 1 { + // let mut expected_multi_ips: Vec<_> = id_list + // .iter() + // .filter(|id| ip_exists(**id)) + // .flat_map(|id| vec![ip_from_id(*id), ip_from_id(*id)]) + // .collect(); + // assert_eq!(num_ips, expected_multi_ips.len() as u32); + + // expected_multi_ips.sort(); + // all_ips.sort(); + // assert_eq!(expected_multi_ips, all_ips); + + // // Test fastfield num_docs + // let num_docs: usize = searcher + // .segment_readers() + // .iter() + // .map(|segment_reader| { + // let ff_reader = segment_reader.fast_fields().ip_addrs(ips_field).unwrap(); + // ff_reader.get_index_reader().num_docs() as usize + // }) + // .sum(); + // assert_eq!(num_docs, num_docs_expected); + // } + + // // Load all ips addr + // let ips: HashSet = searcher + // .segment_readers() + // .iter() + // .flat_map(|segment_reader| { + // let ff_reader = segment_reader.fast_fields().ip_addr(ip_field).unwrap(); + // segment_reader.doc_ids_alive().flat_map(move |doc| { + // let val = ff_reader.get_val(doc); + // if val == Ipv6Addr::from_u128(0) { + // // TODO Fix null handling + // None + // } else { + // Some(val) + // } + // }) + // }) + // .collect(); + + // let expected_ips = expected_ids_and_num_occurrences + // .keys() + // .flat_map(|id| { + // if !ip_exists(*id) { + // None + // } else { + // Some(Ipv6Addr::from_u128(*id as u128)) + // } + // }) + // .collect::>(); + // assert_eq!(ips, expected_ips); + + // let expected_ips = expected_ids_and_num_occurrences + // .keys() + // .filter_map(|id| { + // if !ip_exists(*id) { + // None + // } else { + // Some(Ipv6Addr::from_u128(*id as u128)) + // } + // }) + // .collect::>(); + // let ips: HashSet = searcher + // .segment_readers() + // .iter() + // .flat_map(|segment_reader| { + // let ff_reader = segment_reader.fast_fields().ip_addr(ips_field).unwrap(); + // segment_reader.doc_ids_alive().flat_map(move |doc| { + // let mut vals = vec![]; + // ff_reader.get_vals(doc, &mut vals); + // vals.into_iter().filter(|val| val.to_u128() != 0) // TODO Fix null handling + // }) + // }) + // .collect(); + // assert_eq!(ips, expected_ips); + + // // multivalue fast field tests + // for segment_reader in searcher.segment_readers().iter() { + // let id_reader = segment_reader.fast_fields().u64(id_field).unwrap(); + // let ff_reader = segment_reader.fast_fields().u64s(multi_numbers).unwrap(); + // let bool_ff_reader = segment_reader.fast_fields().bools(multi_bools).unwrap(); + // for doc in segment_reader.doc_ids_alive() { + // let mut vals = vec![]; + // ff_reader.get_vals(doc, &mut vals); + // assert_eq!(vals.len(), 2); + // assert_eq!(vals[0], vals[1]); + // assert_eq!(id_reader.get_val(doc), vals[0]); + + // let mut bool_vals = vec![]; + // bool_ff_reader.get_vals(doc, &mut bool_vals); + // assert_eq!(bool_vals.len(), 2); + // assert_ne!(bool_vals[0], bool_vals[1]); + + // assert!(expected_ids_and_num_occurrences.contains_key(&vals[0])); + // } + // } + + // // doc store tests + // for segment_reader in searcher.segment_readers().iter() { + // let store_reader = segment_reader + // .get_store_reader(DOCSTORE_CACHE_CAPACITY) + // .unwrap(); + // // test store iterator + // for doc in store_reader.iter(segment_reader.alive_bitset()) { + // let id = doc.unwrap().get_first(id_field).unwrap().as_u64().unwrap(); + // assert!(expected_ids_and_num_occurrences.contains_key(&id)); + // } + // // test store random access + // for doc_id in segment_reader.doc_ids_alive() { + // let id = store_reader + // .get(doc_id) + // .unwrap() + // .get_first(id_field) + // .unwrap() + // .as_u64() + // .unwrap(); + // assert!(expected_ids_and_num_occurrences.contains_key(&id)); + // let id2 = store_reader + // .get(doc_id) + // .unwrap() + // .get_first(multi_numbers) + // .unwrap() + // .as_u64() + // .unwrap(); + // assert_eq!(id, id2); + // let bool = store_reader + // .get(doc_id) + // .unwrap() + // .get_first(bool_field) + // .unwrap() + // .as_bool() + // .unwrap(); + // let doc = store_reader.get(doc_id).unwrap(); + // let mut bool2 = doc.get_all(multi_bools); + // assert_eq!(bool, bool2.next().unwrap().as_bool().unwrap()); + // assert_ne!(bool, bool2.next().unwrap().as_bool().unwrap()); + // assert_eq!(None, bool2.next()) + // } + // } + // // test search + // let do_search = |term: &str, field| { + // let query = QueryParser::for_index(&index, vec![field]) + // .parse_query(term) + // .unwrap(); + // let top_docs: Vec<(f32, DocAddress)> = + // searcher.search(&query, &TopDocs::with_limit(1000)).unwrap(); + + // top_docs.iter().map(|el| el.1).collect::>() + // }; + + // let do_search2 = |term: Term| { + // let query = TermQuery::new(term, IndexRecordOption::Basic); + // let top_docs: Vec<(f32, DocAddress)> = + // searcher.search(&query, &TopDocs::with_limit(1000)).unwrap(); + + // top_docs.iter().map(|el| el.1).collect::>() + // }; + + // for (existing_id, count) in &expected_ids_and_num_occurrences { + // let (existing_id, count) = (*existing_id, *count); + // let get_num_hits = |field| do_search(&existing_id.to_string(), field).len() as u64; + // assert_eq!(get_num_hits(text_field), count); + // assert_eq!(get_num_hits(i64_field), count); + // assert_eq!(get_num_hits(f64_field), count); + // assert_eq!(get_num_hits(id_field), count); + + // // Test multi text + // assert_eq!( + // do_search("\"test1 test2\"", multi_text_fields).len(), + // num_docs_expected + // ); + // assert_eq!( + // do_search("\"test2 test3\"", multi_text_fields).len(), + // num_docs_expected + // ); + + // // Test bytes + // let term = Term::from_field_bytes(bytes_field, existing_id.to_le_bytes().as_slice()); + // assert_eq!(do_search2(term).len() as u64, count); + + // // Test date + // let term = Term::from_field_date( + // date_field, + // DateTime::from_timestamp_secs(existing_id as i64), + // ); + // assert_eq!(do_search2(term).len() as u64, count); + // } + // for deleted_id in deleted_ids { + // let assert_field = |field| { + // assert_eq!(do_search(&deleted_id.to_string(), field).len() as u64, 0); + // }; + // assert_field(text_field); + // assert_field(f64_field); + // assert_field(i64_field); + // assert_field(id_field); + + // // Test bytes + // let term = Term::from_field_bytes(bytes_field, deleted_id.to_le_bytes().as_slice()); + // assert_eq!(do_search2(term).len() as u64, 0); + + // // Test date + // let term = + // Term::from_field_date(date_field, DateTime::from_timestamp_secs(deleted_id as i64)); + // assert_eq!(do_search2(term).len() as u64, 0); + // } + // // search ip address + // // + // for (existing_id, count) in &expected_ids_and_num_occurrences { + // let (existing_id, count) = (*existing_id, *count); + // if !ip_exists(existing_id) { + // continue; + // } + // let do_search_ip_field = |term: &str| do_search(term, ip_field).len() as u64; + // let ip_addr = Ipv6Addr::from_u128(existing_id as u128); + // // Test incoming ip as ipv6 + // assert_eq!(do_search_ip_field(&format!("\"{}\"", ip_addr)), count); + + // let term = Term::from_field_ip_addr(ip_field, ip_addr); + // assert_eq!(do_search2(term).len() as u64, count); + + // // Test incoming ip as ipv4 + // if let Some(ip_addr) = ip_addr.to_ipv4_mapped() { + // assert_eq!(do_search_ip_field(&format!("\"{}\"", ip_addr)), count); + // } + // } + + // // assert data is like expected + // // + // for (existing_id, count) in expected_ids_and_num_occurrences.iter().take(10) { + // let (existing_id, count) = (*existing_id, *count); + // if !ip_exists(existing_id) { + // continue; + // } + // let gen_query_inclusive = |field: &str, from: Ipv6Addr, to: Ipv6Addr| { + // format!("{}:[{} TO {}]", field, &from.to_string(), &to.to_string()) + // }; + // let ip = ip_from_id(existing_id); + + // let do_search_ip_field = |term: &str| do_search(term, ip_field).len() as u64; + // // Range query on single value field + // // let query = gen_query_inclusive("ip", ip, ip); + // // assert_eq!(do_search_ip_field(&query), count); + + // // Range query on multi value field + // let query = gen_query_inclusive("ips", ip, ip); + // assert_eq!(do_search_ip_field(&query), count); + // } + + // // ip range query on fast field + // // + // for (existing_id, count) in expected_ids_and_num_occurrences.iter().take(10) { + // let (existing_id, count) = (*existing_id, *count); + // if !ip_exists(existing_id) { + // continue; + // } + // let gen_query_inclusive = |field: &str, from: Ipv6Addr, to: Ipv6Addr| { + // format!("{}:[{} TO {}]", field, &from.to_string(), &to.to_string()) + // }; + // let ip = ip_from_id(existing_id); + + // let do_search_ip_field = |term: &str| do_search(term, ip_field).len() as u64; + // // Range query on single value field + // // let query = gen_query_inclusive("ip", ip, ip); + // // assert_eq!(do_search_ip_field(&query), count); + + // // Range query on multi value field + // let query = gen_query_inclusive("ips", ip, ip); + // assert_eq!(do_search_ip_field(&query), count); + // } + + // // test facets + // for segment_reader in searcher.segment_readers().iter() { + // let mut facet_reader = segment_reader.facet_reader(facet_field).unwrap(); + // let ff_reader = segment_reader.fast_fields().u64(id_field).unwrap(); + // for doc_id in segment_reader.doc_ids_alive() { + // let mut facet_ords = Vec::new(); + // facet_reader.facet_ords(doc_id, &mut facet_ords); + // assert_eq!(facet_ords.len(), 1); + // let mut facet = Facet::default(); + // facet_reader + // .facet_from_ord(facet_ords[0], &mut facet) + // .unwrap(); + // let id = ff_reader.get_val(doc_id); + // let facet_expected = Facet::from(&("/cola/".to_string() + &id.to_string())); + + // assert_eq!(facet, facet_expected); + // } + // } + // Ok(()) + // } + + // #[test] + // fn test_ip_range_query_multivalue_bug() { + // assert!(test_operation_strategy( + // &[ + // IndexingOp::AddDoc { id: 2 }, + // IndexingOp::Commit, + // IndexingOp::AddDoc { id: 1 }, + // IndexingOp::AddDoc { id: 1 }, + // IndexingOp::Commit, + // IndexingOp::Merge + // ], + // true, + // false + // ) + // .is_ok()); + // } + + // #[test] + // fn test_ff_num_ips_regression() { + // assert!(test_operation_strategy( + // &[ + // IndexingOp::AddDoc { id: 13 }, + // IndexingOp::AddDoc { id: 1 }, + // IndexingOp::Commit, + // IndexingOp::DeleteDocQuery { id: 13 }, + // IndexingOp::AddDoc { id: 1 }, + // IndexingOp::Commit, + // ], + // false, + // true + // ) + // .is_ok()); + // } + + // #[test] + // fn test_minimal() { + // assert!(test_operation_strategy( + // &[ + // IndexingOp::AddDoc { id: 23 }, + // IndexingOp::AddDoc { id: 13 }, + // IndexingOp::DeleteDoc { id: 13 } + // ], + // true, + // true + // ) + // .is_ok()); + + // assert!(test_operation_strategy( + // &[ + // IndexingOp::AddDoc { id: 23 }, + // IndexingOp::AddDoc { id: 13 }, + // IndexingOp::DeleteDoc { id: 13 } + // ], + // false, + // false + // ) + // .is_ok()); + // } + + // #[test] + // fn test_minimal_sort_merge() { + // assert!(test_operation_strategy(&[IndexingOp::AddDoc { id: 3 },], true, true).is_ok()); + // } + + // proptest! { + // #![proptest_config(ProptestConfig::with_cases(20))] + // #[test] + // fn test_delete_with_sort_proptest_adding(ops in proptest::collection::vec(adding_operation_strategy(), 1..100)) { + // assert!(test_operation_strategy(&ops[..], true, false).is_ok()); + // } + // #[test] + // fn test_delete_without_sort_proptest_adding(ops in proptest::collection::vec(adding_operation_strategy(), 1..100)) { + // assert!(test_operation_strategy(&ops[..], false, false).is_ok()); + // } + // #[test] + // fn test_delete_with_sort_proptest_with_merge_adding(ops in proptest::collection::vec(adding_operation_strategy(), 1..100)) { + // assert!(test_operation_strategy(&ops[..], true, true).is_ok()); + // } + // #[test] + // fn test_delete_without_sort_proptest_with_merge_adding(ops in proptest::collection::vec(adding_operation_strategy(), 1..100)) { + // assert!(test_operation_strategy(&ops[..], false, true).is_ok()); + // } + + // #[test] + // fn test_delete_with_sort_proptest(ops in proptest::collection::vec(balanced_operation_strategy(), 1..10)) { + // assert!(test_operation_strategy(&ops[..], true, false).is_ok()); + // } + // #[test] + // fn test_delete_without_sort_proptest(ops in proptest::collection::vec(balanced_operation_strategy(), 1..10)) { + // assert!(test_operation_strategy(&ops[..], false, false).is_ok()); + // } + // #[test] + // fn test_delete_with_sort_proptest_with_merge(ops in proptest::collection::vec(balanced_operation_strategy(), 1..10)) { + // assert!(test_operation_strategy(&ops[..], true, true).is_ok()); + // } + // #[test] + // fn test_delete_without_sort_proptest_with_merge(ops in proptest::collection::vec(balanced_operation_strategy(), 1..100)) { + // assert!(test_operation_strategy(&ops[..], false, true).is_ok()); + // } + + + // } + + // #[test] + // fn test_delete_with_sort_by_field_last_opstamp_is_not_max() -> crate::Result<()> { + // let mut schema_builder = schema::Schema::builder(); + // let sort_by_field = schema_builder.add_u64_field("sort_by", FAST); + // let id_field = schema_builder.add_u64_field("id", INDEXED); + // let schema = schema_builder.build(); + + // let settings = IndexSettings { + // sort_by_field: Some(IndexSortByField { + // field: "sort_by".to_string(), + // order: Order::Asc, + // }), + // ..Default::default() + // }; + + // let index = Index::builder() + // .schema(schema) + // .settings(settings) + // .create_in_ram()?; + // let mut index_writer = index.writer_for_tests()?; + + // // We add a doc... + // index_writer.add_document(doc!(sort_by_field => 2u64, id_field => 0u64))?; + // // And remove it. + // index_writer.delete_term(Term::from_field_u64(id_field, 0u64)); + // // We add another doc. + // index_writer.add_document(doc!(sort_by_field=>1u64, id_field => 0u64))?; + + // // The expected result is a segment with + // // maxdoc = 2 + // // numdoc = 1. + // index_writer.commit()?; + + // let searcher = index.reader()?.searcher(); + // assert_eq!(searcher.segment_readers().len(), 1); + + // let segment_reader = searcher.segment_reader(0); + // assert_eq!(segment_reader.max_doc(), 2); + // assert_eq!(segment_reader.num_docs(), 1); + // Ok(()) + // } + + // #[test] + // fn test_index_doc_missing_field() -> crate::Result<()> { + // let mut schema_builder = schema::Schema::builder(); + // let idfield = schema_builder.add_text_field("id", STRING); + // schema_builder.add_text_field("optfield", STRING); + // let index = Index::create_in_ram(schema_builder.build()); + // let mut index_writer = index.writer_for_tests()?; + // index_writer.add_document(doc!(idfield=>"myid"))?; + // index_writer.commit()?; + // Ok(()) + // } + + // #[test] + // fn test_bug_1617_3() { + // assert!(test_operation_strategy( + // &[ + // IndexingOp::DeleteDoc { id: 0 }, + // IndexingOp::AddDoc { id: 6 }, + // IndexingOp::DeleteDocQuery { id: 11 }, + // IndexingOp::Commit, + // IndexingOp::Merge, + // IndexingOp::Commit, + // IndexingOp::Commit + // ], + // false, + // false + // ) + // .is_ok()); + // } + + // #[test] + // fn test_bug_1617_2() { + // assert!(test_operation_strategy( + // &[ + // IndexingOp::AddDoc { id: 13 }, + // IndexingOp::DeleteDoc { id: 13 }, + // IndexingOp::Commit, + // IndexingOp::AddDoc { id: 30 }, + // IndexingOp::Commit, + // IndexingOp::Merge, + // ], + // false, + // true + // ) + // .is_ok()); + // } + + // #[test] + // fn test_bug_1617() -> crate::Result<()> { + // let mut schema_builder = schema::Schema::builder(); + // let id_field = schema_builder.add_u64_field("id", INDEXED); + + // let schema = schema_builder.build(); + // let index = Index::builder().schema(schema).create_in_ram()?; + // let mut index_writer = index.writer_for_tests()?; + // index_writer.set_merge_policy(Box::new(NoMergePolicy)); + + // let existing_id = 16u64; + // let deleted_id = 13u64; + // index_writer.add_document(doc!( + // id_field=>existing_id, + // ))?; + // index_writer.add_document(doc!( + // id_field=>deleted_id, + // ))?; + // index_writer.delete_term(Term::from_field_u64(id_field, deleted_id)); + // index_writer.commit()?; + + // // Merge + // { + // assert!(index_writer.wait_merging_threads().is_ok()); + // let mut index_writer = index.writer_for_tests()?; + // let segment_ids = index + // .searchable_segment_ids() + // .expect("Searchable segments failed."); + // index_writer.merge(&segment_ids).wait().unwrap(); + // assert!(index_writer.wait_merging_threads().is_ok()); + // } + // let searcher = index.reader()?.searcher(); + + // let query = TermQuery::new( + // Term::from_field_u64(id_field, existing_id), + // IndexRecordOption::Basic, + // ); + // let top_docs: Vec<(f32, DocAddress)> = + // searcher.search(&query, &TopDocs::with_limit(10)).unwrap(); + + // assert_eq!(top_docs.len(), 1); // Fails + + // Ok(()) + // } + + // #[test] + // fn test_bug_1618() -> crate::Result<()> { + // let mut schema_builder = schema::Schema::builder(); + // let id_field = schema_builder.add_i64_field("id", INDEXED); + + // let schema = schema_builder.build(); + // let index = Index::builder().schema(schema).create_in_ram()?; + // let mut index_writer = index.writer_for_tests()?; + // index_writer.set_merge_policy(Box::new(NoMergePolicy)); + + // index_writer.add_document(doc!( + // id_field=>10i64, + // ))?; + // index_writer.add_document(doc!( + // id_field=>30i64, + // ))?; + // index_writer.commit()?; + + // // Merge + // { + // assert!(index_writer.wait_merging_threads().is_ok()); + // let mut index_writer = index.writer_for_tests()?; + // let segment_ids = index + // .searchable_segment_ids() + // .expect("Searchable segments failed."); + // index_writer.merge(&segment_ids).wait().unwrap(); + // assert!(index_writer.wait_merging_threads().is_ok()); + // } + // let searcher = index.reader()?.searcher(); + + // let query = TermQuery::new( + // Term::from_field_i64(id_field, 10i64), + // IndexRecordOption::Basic, + // ); + // let top_docs: Vec<(f32, DocAddress)> = + // searcher.search(&query, &TopDocs::with_limit(10)).unwrap(); + + // assert_eq!(top_docs.len(), 1); // Fails + + // let query = TermQuery::new( + // Term::from_field_i64(id_field, 30i64), + // IndexRecordOption::Basic, + // ); + // let top_docs: Vec<(f32, DocAddress)> = + // searcher.search(&query, &TopDocs::with_limit(10)).unwrap(); + + // assert_eq!(top_docs.len(), 1); // Fails + + // Ok(()) + // } } diff --git a/src/indexer/json_term_writer.rs b/src/indexer/json_term_writer.rs index 94487dd7e..be3c56bb8 100644 --- a/src/indexer/json_term_writer.rs +++ b/src/indexer/json_term_writer.rs @@ -150,7 +150,6 @@ fn index_json_value( json_term_writer.term_buffer, ctx, indexing_position, - None, ); } TextOrDateTime::DateTime(dt) => { diff --git a/src/indexer/merger.rs b/src/indexer/merger.rs index af7312aed..c34cb2917 100644 --- a/src/indexer/merger.rs +++ b/src/indexer/merger.rs @@ -7,21 +7,21 @@ use itertools::Itertools; use measure_time::debug_time; use super::flat_map_with_buffer::FlatMapWithBufferIter; -use super::sorted_doc_id_multivalue_column::RemappedDocIdMultiValueIndexColumn; +// use super::sorted_doc_id_multivalue_column::RemappedDocIdMultiValueIndexColumn; use crate::core::{Segment, SegmentReader}; +use crate::directory::WritePtr; use crate::docset::{DocSet, TERMINATED}; use crate::error::DataCorruption; use crate::fastfield::{ - get_fastfield_codecs_for_multivalue, AliveBitSet, Column, CompositeFastFieldSerializer, - MultiValueIndex, MultiValuedFastFieldReader, + AliveBitSet, Column, CompositeFastFieldSerializer, }; use crate::fieldnorm::{FieldNormReader, FieldNormReaders, FieldNormsSerializer, FieldNormsWriter}; use crate::indexer::doc_id_mapping::SegmentDocIdMapping; use crate::indexer::sorted_doc_id_column::RemappedDocIdColumn; -use crate::indexer::sorted_doc_id_multivalue_column::RemappedDocIdMultiValueColumn; +// use crate::indexer::sorted_doc_id_multivalue_column::RemappedDocIdMultiValueColumn; use crate::indexer::SegmentSerializer; use crate::postings::{InvertedIndexSerializer, Postings, SegmentPostings}; -use crate::schema::{Cardinality, Field, FieldType, Schema}; +use crate::schema::{Field, FieldType, Schema}; use crate::store::StoreWriter; use crate::termdict::{TermMerger, TermOrdinal}; use crate::{ @@ -249,11 +249,13 @@ impl IndexMerger { fn write_fast_fields( &self, - fast_field_serializer: &mut CompositeFastFieldSerializer, + fast_field_wrt: &mut WritePtr, mut term_ord_mappings: HashMap, doc_id_mapping: &SegmentDocIdMapping, ) -> crate::Result<()> { - debug_time!("write-fast-fields"); + debug_time!("wrie-fast-fields"); + todo!(); + /* for (field, field_entry) in self.schema.fields() { let field_type = field_entry.field_type(); @@ -306,74 +308,7 @@ impl IndexMerger { } } } - Ok(()) - } - - // used to merge `u128` single fast fields. - fn write_u128_multi_fast_field( - &self, - field: Field, - fast_field_serializer: &mut CompositeFastFieldSerializer, - doc_id_mapping: &SegmentDocIdMapping, - ) -> crate::Result<()> { - let segment_and_ff_readers: Vec<(&SegmentReader, MultiValuedFastFieldReader)> = self - .readers - .iter() - .map(|segment_reader| { - let ff_reader: MultiValuedFastFieldReader = segment_reader - .fast_fields() - .u128s(self.schema.get_field_name(field)) - .expect( - "Failed to find index for multivalued field. This is a bug in tantivy, \ - please report.", - ); - (segment_reader, ff_reader) - }) - .collect::>(); - - Self::write_1_n_fast_field_idx_generic( - field, - fast_field_serializer, - doc_id_mapping, - &segment_and_ff_readers - .iter() - .map(|(segment_reader, u64s_reader)| { - (*segment_reader, u64s_reader.get_index_reader()) - }) - .collect::>(), - )?; - - let num_vals = segment_and_ff_readers - .iter() - .map(|(segment_reader, reader)| { - // TODO implement generic version, implement reverse scan, all - deletes - if let Some(alive_bitset) = segment_reader.alive_bitset() { - alive_bitset - .iter_alive() - .map(|doc| reader.num_vals(doc)) - .sum() - } else { - reader.total_num_vals() - } - }) - .sum(); - - let fast_field_readers = segment_and_ff_readers - .into_iter() - .map(|(_, ff_reader)| ff_reader) - .collect::>(); - - let iter_gen = || { - doc_id_mapping - .iter_old_doc_addrs() - .flat_map_with_buffer(|doc_addr, buffer| { - let fast_field_reader = &fast_field_readers[doc_addr.segment_ord as usize]; - fast_field_reader.get_vals(doc_addr.doc_id, buffer); - }) - }; - - fast_field_serializer.create_u128_fast_field_with_idx(field, iter_gen, num_vals, 1)?; - + */ Ok(()) } @@ -535,111 +470,6 @@ impl IndexMerger { Ok(SegmentDocIdMapping::new(sorted_doc_ids, false)) } - // Creating the index file to point into the data, generic over `BytesFastFieldReader` and - // `MultiValuedFastFieldReader` - // - fn write_1_n_fast_field_idx_generic( - field: Field, - fast_field_serializer: &mut CompositeFastFieldSerializer, - doc_id_mapping: &SegmentDocIdMapping, - segment_and_ff_readers: &[(&SegmentReader, &MultiValueIndex)], - ) -> crate::Result<()> { - let column = - RemappedDocIdMultiValueIndexColumn::new(segment_and_ff_readers, doc_id_mapping); - - fast_field_serializer.create_auto_detect_u64_fast_field(field, column)?; - Ok(()) - } - /// Returns the fastfield index (index for the data, not the data). - fn write_multi_value_fast_field_idx( - &self, - field: Field, - fast_field_serializer: &mut CompositeFastFieldSerializer, - doc_id_mapping: &SegmentDocIdMapping, - ) -> crate::Result<()> { - let segment_and_ff_readers = self - .readers - .iter() - .map(|reader| { - let u64s_reader: MultiValuedFastFieldReader = reader - .fast_fields() - .typed_fast_field_multi_reader::(self.schema.get_field_name(field)) - .expect( - "Failed to find index for multivalued field. This is a bug in tantivy, \ - please report.", - ); - (reader, u64s_reader) - }) - .collect::>(); - - Self::write_1_n_fast_field_idx_generic( - field, - fast_field_serializer, - doc_id_mapping, - &segment_and_ff_readers - .iter() - .map(|(segment_reader, u64s_reader)| { - (*segment_reader, u64s_reader.get_index_reader()) - }) - .collect::>(), - ) - } - - fn write_term_id_fast_field( - &self, - field: Field, - term_ordinal_mappings: &TermOrdinalMapping, - fast_field_serializer: &mut CompositeFastFieldSerializer, - doc_id_mapping: &SegmentDocIdMapping, - ) -> crate::Result<()> { - debug_time!("write-term-id-fast-field"); - - // Multifastfield consists of 2 fastfields. - // The first serves as an index into the second one and is strictly increasing. - // The second contains the actual values. - - // First we merge the idx fast field. - self.write_multi_value_fast_field_idx(field, fast_field_serializer, doc_id_mapping)?; - - let fast_field_reader = self - .readers - .iter() - .map(|reader| { - let ff_reader: MultiValuedFastFieldReader = reader - .fast_fields() - .u64s(self.schema.get_field_name(field)) - .expect("Could not find multivalued u64 fast value reader."); - ff_reader - }) - .collect::>(); - // We can now write the actual fast field values. - // In the case of hierarchical facets, they are actually term ordinals. - { - let mut vals = Vec::new(); - let mut buffer = Vec::new(); - for old_doc_addr in doc_id_mapping.iter_old_doc_addrs() { - let term_ordinal_mapping: &[TermOrdinal] = - term_ordinal_mappings.get_segment(old_doc_addr.segment_ord as usize); - - let ff_reader = &fast_field_reader[old_doc_addr.segment_ord as usize]; - ff_reader.get_vals(old_doc_addr.doc_id, &mut buffer); - for &prev_term_ord in &buffer { - let new_term_ord = term_ordinal_mapping[prev_term_ord as usize]; - vals.push(new_term_ord); - } - } - - let col = VecColumn::from(&vals[..]); - fast_field_serializer.create_auto_detect_u64_fast_field_with_idx_and_codecs( - field, - col, - 1, - &get_fastfield_codecs_for_multivalue(), - )?; - } - Ok(()) - } - /// Creates a mapping if the segments are stacked. this is helpful to merge codelines between /// index sorting and the others pub(crate) fn get_doc_id_from_concatenated_data(&self) -> crate::Result { @@ -664,78 +494,6 @@ impl IndexMerger { ); Ok(SegmentDocIdMapping::new(mapping, true)) } - fn write_multi_fast_field( - &self, - field: Field, - fast_field_serializer: &mut CompositeFastFieldSerializer, - doc_id_mapping: &SegmentDocIdMapping, - ) -> crate::Result<()> { - // Multifastfield consists of 2 fastfields. - // The first serves as an index into the second one and is strictly increasing. - // The second contains the actual values. - - // First we merge the idx fast field. - - self.write_multi_value_fast_field_idx(field, fast_field_serializer, doc_id_mapping)?; - - let fastfield_accessor = RemappedDocIdMultiValueColumn::new( - &self.readers, - doc_id_mapping, - self.schema.get_field_name(field), - ); - fast_field_serializer.create_auto_detect_u64_fast_field_with_idx_and_codecs( - field, - fastfield_accessor, - 1, - &get_fastfield_codecs_for_multivalue(), - )?; - - Ok(()) - } - - fn write_bytes_fast_field( - &self, - field: Field, - fast_field_serializer: &mut CompositeFastFieldSerializer, - doc_id_mapping: &SegmentDocIdMapping, - ) -> crate::Result<()> { - let segment_and_ff_readers = self - .readers - .iter() - .map(|reader| { - let bytes_reader = reader - .fast_fields() - .bytes(self.schema.get_field_name(field)) - .expect( - "Failed to find index for bytes field. This is a bug in tantivy, please \ - report.", - ); - (reader, bytes_reader) - }) - .collect::>(); - Self::write_1_n_fast_field_idx_generic( - field, - fast_field_serializer, - doc_id_mapping, - &segment_and_ff_readers - .iter() - .map(|(segment_reader, u64s_reader)| { - (*segment_reader, u64s_reader.get_index_reader()) - }) - .collect::>(), - )?; - - let mut serialize_vals = fast_field_serializer.new_bytes_fast_field(field); - - for old_doc_addr in doc_id_mapping.iter_old_doc_addrs() { - let bytes_reader = &segment_and_ff_readers[old_doc_addr.segment_ord as usize].1; - let val = bytes_reader.get_bytes(old_doc_addr.doc_id); - serialize_vals.write_all(val)?; - } - - serialize_vals.flush()?; - Ok(()) - } fn write_postings_for_field( &self, @@ -1042,7 +800,7 @@ impl IndexMerger { )?; debug!("write-fastfields"); self.write_fast_fields( - serializer.get_fast_field_serializer(), + serializer.get_fast_field_write(), term_ord_mappings, &doc_id_mapping, )?; @@ -1060,13 +818,13 @@ mod tests { use schema::FAST; use crate::collector::tests::{ - BytesFastFieldTestCollector, FastFieldTestCollector, TEST_COLLECTOR_WITH_SCORE, + FastFieldTestCollector, TEST_COLLECTOR_WITH_SCORE, }; - use crate::collector::{Count, FacetCollector}; + use crate::collector::Count; use crate::core::Index; use crate::query::{AllQuery, BooleanQuery, EnableScoring, Scorer, TermQuery}; use crate::schema::{ - Cardinality, Document, Facet, FacetOptions, IndexRecordOption, NumericOptions, Term, + Document, Facet, FacetOptions, IndexRecordOption, NumericOptions, Term, TextFieldIndexing, INDEXED, TEXT, }; use crate::time::OffsetDateTime; @@ -1203,30 +961,28 @@ mod tests { Some("a b c g") ); } - { - let get_fast_vals = |terms: Vec| { - let query = BooleanQuery::new_multiterms_query(terms); - searcher.search( - &query, - &FastFieldTestCollector::for_field("score".to_string()), - ) - }; - let get_fast_vals_bytes = |terms: Vec| { - let query = BooleanQuery::new_multiterms_query(terms); - searcher.search( - &query, - &BytesFastFieldTestCollector::for_field(bytes_score_field), - ) - }; - assert_eq!( - get_fast_vals(vec![Term::from_field_text(text_field, "a")])?, - vec![5, 7, 13] - ); - assert_eq!( - get_fast_vals_bytes(vec![Term::from_field_text(text_field, "a")])?, - vec![0, 0, 0, 5, 0, 0, 0, 7, 0, 0, 0, 13] - ); - } + + // { + // let get_fast_vals = |terms: Vec| { + // let query = BooleanQuery::new_multiterms_query(terms); + // searcher.search(&query, &FastFieldTestCollector::for_field(score_field)) + // }; + // let get_fast_vals_bytes = |terms: Vec| { + // let query = BooleanQuery::new_multiterms_query(terms); + // searcher.search( + // &query, + // &BytesFastFieldTestCollector::for_field(bytes_score_field), + // ) + // }; + // assert_eq!( + // get_fast_vals(vec![Term::from_field_text(text_field, "a")])?, + // vec![5, 7, 13] + // ); + // assert_eq!( + // get_fast_vals_bytes(vec![Term::from_field_text(text_field, "a")])?, + // vec![0, 0, 0, 5, 0, 0, 0, 7, 0, 0, 0, 13] + // ); + // } } Ok(()) } @@ -1247,18 +1003,20 @@ mod tests { let mut index_writer = index.writer_for_tests()?; let reader = index.reader().unwrap(); let search_term = |searcher: &Searcher, term: Term| { - let collector = FastFieldTestCollector::for_field("score".to_string()); - let bytes_collector = BytesFastFieldTestCollector::for_field(bytes_score_field); + let collector = FastFieldTestCollector::for_field("score"); + // let bytes_collector = BytesFastFieldTestCollector::for_field(bytes_score_field); let term_query = TermQuery::new(term, IndexRecordOption::Basic); + // searcher + // .search(&term_query, &(collector, bytes_collector)) + // .map(|(scores, bytes)| { + // let mut score_bytes = &bytes[..]; + // for &score in &scores { + // assert_eq!(score as u32, score_bytes.read_u32::().unwrap()); + // } + // scores + // }) searcher - .search(&term_query, &(collector, bytes_collector)) - .map(|(scores, bytes)| { - let mut score_bytes = &bytes[..]; - for &score in &scores { - assert_eq!(score as u32, score_bytes.read_u32::().unwrap()); - } - scores - }) + .search(&term_query, &collector) }; let empty_vec = Vec::::new(); @@ -1537,207 +1295,211 @@ mod tests { } Ok(()) } - #[test] - fn test_merge_facets_sort_none() { - test_merge_facets(None, true) - } - #[test] - fn test_merge_facets_sort_asc() { - // In the merge case this will go through the doc_id mapping code - test_merge_facets( - Some(IndexSettings { - sort_by_field: Some(IndexSortByField { - field: "intval".to_string(), - order: Order::Desc, - }), - ..Default::default() - }), - true, - ); - // In the merge case this will not go through the doc_id mapping code, because the data is - // sorted and disjunct - test_merge_facets( - Some(IndexSettings { - sort_by_field: Some(IndexSortByField { - field: "intval".to_string(), - order: Order::Desc, - }), - ..Default::default() - }), - false, - ); - } - #[test] - fn test_merge_facets_sort_desc() { - // In the merge case this will go through the doc_id mapping code - test_merge_facets( - Some(IndexSettings { - sort_by_field: Some(IndexSortByField { - field: "intval".to_string(), - order: Order::Desc, - }), - ..Default::default() - }), - true, - ); - // In the merge case this will not go through the doc_id mapping code, because the data is - // sorted and disjunct - test_merge_facets( - Some(IndexSettings { - sort_by_field: Some(IndexSortByField { - field: "intval".to_string(), - order: Order::Desc, - }), - ..Default::default() - }), - false, - ); - } + // TODO re-enable + // #[test] + // fn test_merge_facets_sort_none() { + // test_merge_facets(None, true) + // } + + // #[test] + // fn test_merge_facets_sort_asc() { + // // In the merge case this will go through the doc_id mapping code + // test_merge_facets( + // Some(IndexSettings { + // sort_by_field: Some(IndexSortByField { + // field: "intval".to_string(), + // order: Order::Desc, + // }), + // ..Default::default() + // }), + // true, + // ); + // // In the merge case this will not go through the doc_id mapping code, because the data is + // // sorted and disjunct + // test_merge_facets( + // Some(IndexSettings { + // sort_by_field: Some(IndexSortByField { + // field: "intval".to_string(), + // order: Order::Desc, + // }), + // ..Default::default() + // }), + // false, + // ); + // } + + // #[test] + // fn test_merge_facets_sort_desc() { + // // In the merge case this will go through the doc_id mapping code + // test_merge_facets( + // Some(IndexSettings { + // sort_by_field: Some(IndexSortByField { + // field: "intval".to_string(), + // order: Order::Desc, + // }), + // ..Default::default() + // }), + // true, + // ); + // // In the merge case this will not go through the doc_id mapping code, because the data is + // // sorted and disjunct + // test_merge_facets( + // Some(IndexSettings { + // sort_by_field: Some(IndexSortByField { + // field: "intval".to_string(), + // order: Order::Desc, + // }), + // ..Default::default() + // }), + // false, + // ); + // } + // force_segment_value_overlap forces the int value for sorting to have overlapping min and max // ranges between segments so that merge algorithm can't apply certain optimizations - fn test_merge_facets(index_settings: Option, force_segment_value_overlap: bool) { - let mut schema_builder = schema::Schema::builder(); - let facet_field = schema_builder.add_facet_field("facet", FacetOptions::default()); - let int_options = NumericOptions::default() - .set_fast() - .set_indexed(); - let int_field = schema_builder.add_u64_field("intval", int_options); - let mut index_builder = Index::builder().schema(schema_builder.build()); - if let Some(settings) = index_settings { - index_builder = index_builder.settings(settings); - } - let index = index_builder.create_in_ram().unwrap(); - // let index = Index::create_in_ram(schema_builder.build()); - let reader = index.reader().unwrap(); - let mut int_val = 0; - { - let mut index_writer = index.writer_for_tests().unwrap(); - let index_doc = - |index_writer: &mut IndexWriter, doc_facets: &[&str], int_val: &mut u64| { - let mut doc = Document::default(); - for facet in doc_facets { - doc.add_facet(facet_field, Facet::from(facet)); - } - doc.add_u64(int_field, *int_val); - *int_val += 1; - index_writer.add_document(doc).unwrap(); - }; + // fn test_merge_facets(index_settings: Option, force_segment_value_overlap: bool) { + // let mut schema_builder = schema::Schema::builder(); + // let facet_field = schema_builder.add_facet_field("facet", FacetOptions::default()); + // let int_options = NumericOptions::default() + // .set_fast() + // .set_indexed(); + // let int_field = schema_builder.add_u64_field("intval", int_options); + // let mut index_builder = Index::builder().schema(schema_builder.build()); + // if let Some(settings) = index_settings { + // index_builder = index_builder.settings(settings); + // } + // let index = index_builder.create_in_ram().unwrap(); + // // let index = Index::create_in_ram(schema_builder.build()); + // let reader = index.reader().unwrap(); + // let mut int_val = 0; + // { + // let mut index_writer = index.writer_for_tests().unwrap(); + // let index_doc = + // |index_writer: &mut IndexWriter, doc_facets: &[&str], int_val: &mut u64| { + // let mut doc = Document::default(); + // for facet in doc_facets { + // doc.add_facet(facet_field, Facet::from(facet)); + // } + // doc.add_u64(int_field, *int_val); + // *int_val += 1; + // index_writer.add_document(doc).unwrap(); + // }; - index_doc( - &mut index_writer, - &["/top/a/firstdoc", "/top/b"], - &mut int_val, - ); - index_doc( - &mut index_writer, - &["/top/a/firstdoc", "/top/b", "/top/c"], - &mut int_val, - ); - index_doc(&mut index_writer, &["/top/a", "/top/b"], &mut int_val); - index_doc(&mut index_writer, &["/top/a"], &mut int_val); + // index_doc( + // &mut index_writer, + // &["/top/a/firstdoc", "/top/b"], + // &mut int_val, + // ); + // index_doc( + // &mut index_writer, + // &["/top/a/firstdoc", "/top/b", "/top/c"], + // &mut int_val, + // ); + // index_doc(&mut index_writer, &["/top/a", "/top/b"], &mut int_val); + // index_doc(&mut index_writer, &["/top/a"], &mut int_val); - index_doc(&mut index_writer, &["/top/b", "/top/d"], &mut int_val); - if force_segment_value_overlap { - index_doc(&mut index_writer, &["/top/d"], &mut 0); - index_doc(&mut index_writer, &["/top/e"], &mut 10); - index_writer.commit().expect("committed"); - index_doc(&mut index_writer, &["/top/a"], &mut 5); // 5 is between 0 - 10 so the - // segments don' have disjunct - // ranges - } else { - index_doc(&mut index_writer, &["/top/d"], &mut int_val); - index_doc(&mut index_writer, &["/top/e"], &mut int_val); - index_writer.commit().expect("committed"); - index_doc(&mut index_writer, &["/top/a"], &mut int_val); - } - index_doc(&mut index_writer, &["/top/b"], &mut int_val); - index_doc(&mut index_writer, &["/top/c"], &mut int_val); - index_writer.commit().expect("committed"); + // index_doc(&mut index_writer, &["/top/b", "/top/d"], &mut int_val); + // if force_segment_value_overlap { + // index_doc(&mut index_writer, &["/top/d"], &mut 0); + // index_doc(&mut index_writer, &["/top/e"], &mut 10); + // index_writer.commit().expect("committed"); + // index_doc(&mut index_writer, &["/top/a"], &mut 5); // 5 is between 0 - 10 so the + // // segments don' have disjunct + // // ranges + // } else { + // index_doc(&mut index_writer, &["/top/d"], &mut int_val); + // index_doc(&mut index_writer, &["/top/e"], &mut int_val); + // index_writer.commit().expect("committed"); + // index_doc(&mut index_writer, &["/top/a"], &mut int_val); + // } + // index_doc(&mut index_writer, &["/top/b"], &mut int_val); + // index_doc(&mut index_writer, &["/top/c"], &mut int_val); + // index_writer.commit().expect("committed"); - index_doc(&mut index_writer, &["/top/e", "/top/f"], &mut int_val); - index_writer.commit().expect("committed"); - } + // index_doc(&mut index_writer, &["/top/e", "/top/f"], &mut int_val); + // index_writer.commit().expect("committed"); + // } - reader.reload().unwrap(); - let test_searcher = |expected_num_docs: usize, expected: &[(&str, u64)]| { - let searcher = reader.searcher(); - let mut facet_collector = FacetCollector::for_field(facet_field); - facet_collector.add_facet(Facet::from("/top")); - let (count, facet_counts) = searcher - .search(&AllQuery, &(Count, facet_collector)) - .unwrap(); - assert_eq!(count, expected_num_docs); - let facets: Vec<(String, u64)> = facet_counts - .get("/top") - .map(|(facet, count)| (facet.to_string(), count)) - .collect(); - assert_eq!( - facets, - expected - .iter() - .map(|&(facet_str, count)| (String::from(facet_str), count)) - .collect::>() - ); - }; - test_searcher( - 11, - &[ - ("/top/a", 5), - ("/top/b", 5), - ("/top/c", 2), - ("/top/d", 2), - ("/top/e", 2), - ("/top/f", 1), - ], - ); - // Merging the segments - { - let segment_ids = index - .searchable_segment_ids() - .expect("Searchable segments failed."); - let mut index_writer = index.writer_for_tests().unwrap(); - index_writer - .merge(&segment_ids) - .wait() - .expect("Merging failed"); - index_writer.wait_merging_threads().unwrap(); - reader.reload().unwrap(); - test_searcher( - 11, - &[ - ("/top/a", 5), - ("/top/b", 5), - ("/top/c", 2), - ("/top/d", 2), - ("/top/e", 2), - ("/top/f", 1), - ], - ); - } + // reader.reload().unwrap(); + // let test_searcher = |expected_num_docs: usize, expected: &[(&str, u64)]| { + // let searcher = reader.searcher(); + // let mut facet_collector = FacetCollector::for_field(facet_field); + // facet_collector.add_facet(Facet::from("/top")); + // let (count, facet_counts) = searcher + // .search(&AllQuery, &(Count, facet_collector)) + // .unwrap(); + // assert_eq!(count, expected_num_docs); + // let facets: Vec<(String, u64)> = facet_counts + // .get("/top") + // .map(|(facet, count)| (facet.to_string(), count)) + // .collect(); + // assert_eq!( + // facets, + // expected + // .iter() + // .map(|&(facet_str, count)| (String::from(facet_str), count)) + // .collect::>() + // ); + // }; + // test_searcher( + // 11, + // &[ + // ("/top/a", 5), + // ("/top/b", 5), + // ("/top/c", 2), + // ("/top/d", 2), + // ("/top/e", 2), + // ("/top/f", 1), + // ], + // ); + // // Merging the segments + // { + // let segment_ids = index + // .searchable_segment_ids() + // .expect("Searchable segments failed."); + // let mut index_writer = index.writer_for_tests().unwrap(); + // index_writer + // .merge(&segment_ids) + // .wait() + // .expect("Merging failed"); + // index_writer.wait_merging_threads().unwrap(); + // reader.reload().unwrap(); + // test_searcher( + // 11, + // &[ + // ("/top/a", 5), + // ("/top/b", 5), + // ("/top/c", 2), + // ("/top/d", 2), + // ("/top/e", 2), + // ("/top/f", 1), + // ], + // ); + // } - // Deleting one term - { - let mut index_writer = index.writer_for_tests().unwrap(); - let facet = Facet::from_path(vec!["top", "a", "firstdoc"]); - let facet_term = Term::from_facet(facet_field, &facet); - index_writer.delete_term(facet_term); - index_writer.commit().unwrap(); - reader.reload().unwrap(); - test_searcher( - 9, - &[ - ("/top/a", 3), - ("/top/b", 3), - ("/top/c", 1), - ("/top/d", 2), - ("/top/e", 2), - ("/top/f", 1), - ], - ); - } - } + // // Deleting one term + // { + // let mut index_writer = index.writer_for_tests().unwrap(); + // let facet = Facet::from_path(vec!["top", "a", "firstdoc"]); + // let facet_term = Term::from_facet(facet_field, &facet); + // index_writer.delete_term(facet_term); + // index_writer.commit().unwrap(); + // reader.reload().unwrap(); + // test_searcher( + // 9, + // &[ + // ("/top/a", 3), + // ("/top/b", 3), + // ("/top/c", 1), + // ("/top/d", 2), + // ("/top/e", 2), + // ("/top/f", 1), + // ], + // ); + // } + // } #[test] fn test_bug_merge() -> crate::Result<()> { @@ -1839,45 +1601,45 @@ mod tests { { let segment = searcher.segment_reader(0u32); - let ff_reader = segment.fast_fields().u64s("intvals").unwrap(); + // let ff_reader = segment.fast_fields().u64s(int_field).unwrap(); - ff_reader.get_vals(0, &mut vals); - assert_eq!(&vals, &[1, 2]); + // ff_reader.get_vals(0, &mut vals); + // assert_eq!(&vals, &[1, 2]); - ff_reader.get_vals(1, &mut vals); - assert_eq!(&vals, &[1, 2, 3]); + // ff_reader.get_vals(1, &mut vals); + // assert_eq!(&vals, &[1, 2, 3]); - ff_reader.get_vals(2, &mut vals); - assert_eq!(&vals, &[4, 5]); + // ff_reader.get_vals(2, &mut vals); + // assert_eq!(&vals, &[4, 5]); - ff_reader.get_vals(3, &mut vals); - assert_eq!(&vals, &[1, 2]); + // ff_reader.get_vals(3, &mut vals); + // assert_eq!(&vals, &[1, 2]); - ff_reader.get_vals(4, &mut vals); - assert_eq!(&vals, &[1, 5]); + // ff_reader.get_vals(4, &mut vals); + // assert_eq!(&vals, &[1, 5]); - ff_reader.get_vals(5, &mut vals); - assert_eq!(&vals, &[3]); + // ff_reader.get_vals(5, &mut vals); + // assert_eq!(&vals, &[3]); - ff_reader.get_vals(6, &mut vals); - assert_eq!(&vals, &[17]); + // ff_reader.get_vals(6, &mut vals); + // assert_eq!(&vals, &[17]); } { let segment = searcher.segment_reader(1u32); - let ff_reader = segment.fast_fields().u64s("intvals").unwrap(); - ff_reader.get_vals(0, &mut vals); - assert_eq!(&vals, &[28, 27]); + // let ff_reader = segment.fast_fields().u64s(int_field).unwrap(); + // ff_reader.get_vals(0, &mut vals); + // assert_eq!(&vals, &[28, 27]); - ff_reader.get_vals(1, &mut vals); - assert_eq!(&vals, &[1_000]); + // ff_reader.get_vals(1, &mut vals); + // assert_eq!(&vals, &[1_000]); } { let segment = searcher.segment_reader(2u32); - let ff_reader = segment.fast_fields().u64s("intvals").unwrap(); - ff_reader.get_vals(0, &mut vals); - assert_eq!(&vals, &[20]); + // let ff_reader = segment.fast_fields().u64s(int_field).unwrap(); + // ff_reader.get_vals(0, &mut vals); + // assert_eq!(&vals, &[20]); } // Merging the segments @@ -1892,37 +1654,37 @@ mod tests { { let searcher = reader.searcher(); let segment = searcher.segment_reader(0u32); - let ff_reader = segment.fast_fields().u64s("intvals").unwrap(); + // let ff_reader = segment.fast_fields().u64s(int_field).unwrap(); - ff_reader.get_vals(0, &mut vals); - assert_eq!(&vals, &[1, 2]); + // ff_reader.get_vals(0, &mut vals); + // assert_eq!(&vals, &[1, 2]); - ff_reader.get_vals(1, &mut vals); - assert_eq!(&vals, &[1, 2, 3]); + // ff_reader.get_vals(1, &mut vals); + // assert_eq!(&vals, &[1, 2, 3]); - ff_reader.get_vals(2, &mut vals); - assert_eq!(&vals, &[4, 5]); + // ff_reader.get_vals(2, &mut vals); + // assert_eq!(&vals, &[4, 5]); - ff_reader.get_vals(3, &mut vals); - assert_eq!(&vals, &[1, 2]); + // ff_reader.get_vals(3, &mut vals); + // assert_eq!(&vals, &[1, 2]); - ff_reader.get_vals(4, &mut vals); - assert_eq!(&vals, &[1, 5]); + // ff_reader.get_vals(4, &mut vals); + // assert_eq!(&vals, &[1, 5]); - ff_reader.get_vals(5, &mut vals); - assert_eq!(&vals, &[3]); + // ff_reader.get_vals(5, &mut vals); + // assert_eq!(&vals, &[3]); - ff_reader.get_vals(6, &mut vals); - assert_eq!(&vals, &[17]); + // ff_reader.get_vals(6, &mut vals); + // assert_eq!(&vals, &[17]); - ff_reader.get_vals(7, &mut vals); - assert_eq!(&vals, &[28, 27]); + // ff_reader.get_vals(7, &mut vals); + // assert_eq!(&vals, &[28, 27]); - ff_reader.get_vals(8, &mut vals); - assert_eq!(&vals, &[1_000]); + // ff_reader.get_vals(8, &mut vals); + // assert_eq!(&vals, &[1_000]); - ff_reader.get_vals(9, &mut vals); - assert_eq!(&vals, &[20]); + // ff_reader.get_vals(9, &mut vals); + // assert_eq!(&vals, &[20]); } Ok(()) } diff --git a/src/indexer/merger_sorted_index_test.rs b/src/indexer/merger_sorted_index_test.rs index 26d3e9a24..8616c2773 100644 --- a/src/indexer/merger_sorted_index_test.rs +++ b/src/indexer/merger_sorted_index_test.rs @@ -2,10 +2,10 @@ mod tests { use crate::collector::TopDocs; use crate::core::Index; - use crate::fastfield::{AliveBitSet, MultiValuedFastFieldReader}; + use crate::fastfield::AliveBitSet; use crate::query::QueryParser; use crate::schema::{ - self, BytesOptions, Cardinality, Facet, FacetOptions, IndexRecordOption, NumericOptions, + self, BytesOptions, Facet, FacetOptions, IndexRecordOption, NumericOptions, TextFieldIndexing, TextOptions, }; use crate::{DocAddress, DocSet, IndexSettings, IndexSortByField, Order, Postings, Term}; @@ -349,128 +349,131 @@ mod tests { } } - #[test] - fn test_merge_sorted_index_asc() { - let index = create_test_index( - Some(IndexSettings { - sort_by_field: Some(IndexSortByField { - field: "intval".to_string(), - order: Order::Asc, - }), - ..Default::default() - }), - false, - ) - .unwrap(); + // #[test] + // fn test_merge_sorted_index_asc() { + // let index = create_test_index( + // Some(IndexSettings { + // sort_by_field: Some(IndexSortByField { + // field: "intval".to_string(), + // order: Order::Asc, + // }), + // ..Default::default() + // }), + // false, + // ) + // .unwrap(); - let int_field = index.schema().get_field("intval").unwrap(); - let reader = index.reader().unwrap(); - let searcher = reader.searcher(); - assert_eq!(searcher.segment_readers().len(), 1); - let segment_reader = searcher.segment_readers().last().unwrap(); - let fast_fields = segment_reader.fast_fields(); - let fast_field = fast_fields.u64("intval").unwrap(); - assert_eq!(fast_field.get_val(0), 1u64); - assert_eq!(fast_field.get_val(1), 2u64); - assert_eq!(fast_field.get_val(2), 3u64); - assert_eq!(fast_field.get_val(3), 10u64); - assert_eq!(fast_field.get_val(4), 20u64); - assert_eq!(fast_field.get_val(5), 1_000u64); + // let int_field = index.schema().get_field("intval").unwrap(); + // let multi_numbers = index.schema().get_field("multi_numbers").unwrap(); + // let bytes_field = index.schema().get_field("bytes").unwrap(); + // let reader = index.reader().unwrap(); + // let searcher = reader.searcher(); + // assert_eq!(searcher.segment_readers().len(), 1); + // let segment_reader = searcher.segment_readers().last().unwrap(); - let get_vals = |fast_field: &MultiValuedFastFieldReader, doc_id: u32| -> Vec { - let mut vals = vec![]; - fast_field.get_vals(doc_id, &mut vals); - vals - }; - let fast_fields = segment_reader.fast_fields(); - let fast_field = fast_fields.u64s("multi_numbers").unwrap(); - assert_eq!(&get_vals(&fast_field, 0), &[] as &[u64]); - assert_eq!(&get_vals(&fast_field, 1), &[2, 3]); - assert_eq!(&get_vals(&fast_field, 2), &[3, 4]); - assert_eq!(&get_vals(&fast_field, 3), &[10, 11]); - assert_eq!(&get_vals(&fast_field, 4), &[20]); - assert_eq!(&get_vals(&fast_field, 5), &[1001, 1002]); + // let fast_fields = segment_reader.fast_fields(); + // let fast_field = fast_fields.u64(int_field).unwrap(); + // assert_eq!(fast_field.get_val(0), 1u64); + // assert_eq!(fast_field.get_val(1), 2u64); + // assert_eq!(fast_field.get_val(2), 3u64); + // assert_eq!(fast_field.get_val(3), 10u64); + // assert_eq!(fast_field.get_val(4), 20u64); + // assert_eq!(fast_field.get_val(5), 1_000u64); - let fast_field = fast_fields.bytes("bytes").unwrap(); - assert_eq!(fast_field.get_bytes(0), &[] as &[u8]); - assert_eq!(fast_field.get_bytes(2), &[1, 2, 3]); - assert_eq!(fast_field.get_bytes(5), &[5, 5]); + // let get_vals = |fast_field: &MultiValuedFastFieldReader, doc_id: u32| -> Vec { + // let mut vals = vec![]; + // fast_field.get_vals(doc_id, &mut vals); + // vals + // }; + // let fast_fields = segment_reader.fast_fields(); + // let fast_field = fast_fields.u64s(multi_numbers).unwrap(); + // assert_eq!(&get_vals(&fast_field, 0), &[] as &[u64]); + // assert_eq!(&get_vals(&fast_field, 1), &[2, 3]); + // assert_eq!(&get_vals(&fast_field, 2), &[3, 4]); + // assert_eq!(&get_vals(&fast_field, 3), &[10, 11]); + // assert_eq!(&get_vals(&fast_field, 4), &[20]); + // assert_eq!(&get_vals(&fast_field, 5), &[1001, 1002]); - // test new field norm mapping - { - let my_text_field = index.schema().get_field("text_field").unwrap(); - let fieldnorm_reader = segment_reader.get_fieldnorms_reader(my_text_field).unwrap(); - assert_eq!(fieldnorm_reader.fieldnorm(0), 0); - assert_eq!(fieldnorm_reader.fieldnorm(1), 4); - assert_eq!(fieldnorm_reader.fieldnorm(2), 2); // some text - assert_eq!(fieldnorm_reader.fieldnorm(3), 1); - assert_eq!(fieldnorm_reader.fieldnorm(5), 3); // the biggest num - } + // let fast_field = fast_fields.bytes(bytes_field).unwrap(); + // assert_eq!(fast_field.get_bytes(0), &[] as &[u8]); + // assert_eq!(fast_field.get_bytes(2), &[1, 2, 3]); + // assert_eq!(fast_field.get_bytes(5), &[5, 5]); - let searcher = index.reader().unwrap().searcher(); - { - let my_text_field = index.schema().get_field("text_field").unwrap(); + // // test new field norm mapping + // { + // let my_text_field = index.schema().get_field("text_field").unwrap(); + // let fieldnorm_reader = segment_reader.get_fieldnorms_reader(my_text_field).unwrap(); + // assert_eq!(fieldnorm_reader.fieldnorm(0), 0); + // assert_eq!(fieldnorm_reader.fieldnorm(1), 4); + // assert_eq!(fieldnorm_reader.fieldnorm(2), 2); // some text + // assert_eq!(fieldnorm_reader.fieldnorm(3), 1); + // assert_eq!(fieldnorm_reader.fieldnorm(5), 3); // the biggest num + // } - let do_search = |term: &str| { - let query = QueryParser::for_index(&index, vec![my_text_field]) - .parse_query(term) - .unwrap(); - let top_docs: Vec<(f32, DocAddress)> = - searcher.search(&query, &TopDocs::with_limit(3)).unwrap(); + // let searcher = index.reader().unwrap().searcher(); + // { + // let my_text_field = index.schema().get_field("text_field").unwrap(); - top_docs.iter().map(|el| el.1.doc_id).collect::>() - }; + // let do_search = |term: &str| { + // let query = QueryParser::for_index(&index, vec![my_text_field]) + // .parse_query(term) + // .unwrap(); + // let top_docs: Vec<(f32, DocAddress)> = + // searcher.search(&query, &TopDocs::with_limit(3)).unwrap(); - assert_eq!(do_search("some"), vec![2]); - assert_eq!(do_search("blubber"), vec![3]); - assert_eq!(do_search("biggest"), vec![5]); - } + // top_docs.iter().map(|el| el.1.doc_id).collect::>() + // }; - // postings file - { - let my_text_field = index.schema().get_field("text_field").unwrap(); - let term_a = Term::from_field_text(my_text_field, "text"); - let inverted_index = segment_reader.inverted_index(my_text_field).unwrap(); - let mut postings = inverted_index - .read_postings(&term_a, IndexRecordOption::WithFreqsAndPositions) - .unwrap() - .unwrap(); + // assert_eq!(do_search("some"), vec![2]); + // assert_eq!(do_search("blubber"), vec![3]); + // assert_eq!(do_search("biggest"), vec![5]); + // } - assert_eq!(postings.doc_freq(), 2); - let fallback_bitset = AliveBitSet::for_test_from_deleted_docs(&[0], 100); - assert_eq!( - postings.doc_freq_given_deletes( - segment_reader.alive_bitset().unwrap_or(&fallback_bitset) - ), - 2 - ); + // // postings file + // { + // let my_text_field = index.schema().get_field("text_field").unwrap(); + // let term_a = Term::from_field_text(my_text_field, "text"); + // let inverted_index = segment_reader.inverted_index(my_text_field).unwrap(); + // let mut postings = inverted_index + // .read_postings(&term_a, IndexRecordOption::WithFreqsAndPositions) + // .unwrap() + // .unwrap(); - let mut output = vec![]; - postings.positions(&mut output); - assert_eq!(output, vec![1, 3]); - postings.advance(); + // assert_eq!(postings.doc_freq(), 2); + // let fallback_bitset = AliveBitSet::for_test_from_deleted_docs(&[0], 100); + // assert_eq!( + // postings.doc_freq_given_deletes( + // segment_reader.alive_bitset().unwrap_or(&fallback_bitset) + // ), + // 2 + // ); - postings.positions(&mut output); - assert_eq!(output, vec![1]); - } + // let mut output = vec![]; + // postings.positions(&mut output); + // assert_eq!(output, vec![1, 3]); + // postings.advance(); - // access doc store - { - let doc = searcher.doc(DocAddress::new(0, 0)).unwrap(); - assert_eq!(doc.get_first(int_field).unwrap().as_u64(), Some(1)); - let doc = searcher.doc(DocAddress::new(0, 1)).unwrap(); - assert_eq!(doc.get_first(int_field).unwrap().as_u64(), Some(2)); - let doc = searcher.doc(DocAddress::new(0, 2)).unwrap(); - assert_eq!(doc.get_first(int_field).unwrap().as_u64(), Some(3)); - let doc = searcher.doc(DocAddress::new(0, 3)).unwrap(); - assert_eq!(doc.get_first(int_field).unwrap().as_u64(), Some(10)); - let doc = searcher.doc(DocAddress::new(0, 4)).unwrap(); - assert_eq!(doc.get_first(int_field).unwrap().as_u64(), Some(20)); - let doc = searcher.doc(DocAddress::new(0, 5)).unwrap(); - assert_eq!(doc.get_first(int_field).unwrap().as_u64(), Some(1_000)); - } - } + // postings.positions(&mut output); + // assert_eq!(output, vec![1]); + // } + + // // access doc store + // { + // let doc = searcher.doc(DocAddress::new(0, 0)).unwrap(); + // assert_eq!(doc.get_first(int_field).unwrap().as_u64(), Some(1)); + // let doc = searcher.doc(DocAddress::new(0, 1)).unwrap(); + // assert_eq!(doc.get_first(int_field).unwrap().as_u64(), Some(2)); + // let doc = searcher.doc(DocAddress::new(0, 2)).unwrap(); + // assert_eq!(doc.get_first(int_field).unwrap().as_u64(), Some(3)); + // let doc = searcher.doc(DocAddress::new(0, 3)).unwrap(); + // assert_eq!(doc.get_first(int_field).unwrap().as_u64(), Some(10)); + // let doc = searcher.doc(DocAddress::new(0, 4)).unwrap(); + // assert_eq!(doc.get_first(int_field).unwrap().as_u64(), Some(20)); + // let doc = searcher.doc(DocAddress::new(0, 5)).unwrap(); + // assert_eq!(doc.get_first(int_field).unwrap().as_u64(), Some(1_000)); + // } + // } } #[cfg(all(test, feature = "unstable"))] diff --git a/src/indexer/mod.rs b/src/indexer/mod.rs index 88861d0a4..f2c361da5 100644 --- a/src/indexer/mod.rs +++ b/src/indexer/mod.rs @@ -20,7 +20,7 @@ pub mod segment_serializer; pub mod segment_updater; mod segment_writer; mod sorted_doc_id_column; -mod sorted_doc_id_multivalue_column; +// mod sorted_doc_id_multivalue_column; mod stamper; use crossbeam_channel as channel; @@ -58,7 +58,7 @@ type AddBatchReceiver = channel::Receiver; #[cfg(test)] mod tests_mmap { use crate::collector::Count; - use crate::query::QueryParser; + // use crate::query::QueryParser; use crate::schema::{JsonObjectOptions, Schema, TEXT}; use crate::{Index, Term}; @@ -79,45 +79,45 @@ mod tests_mmap { Ok(()) } - #[test] - fn test_json_field_expand_dots_disabled_dot_escaped_required() { - let mut schema_builder = Schema::builder(); - let json_field = schema_builder.add_json_field("json", TEXT); - let index = Index::create_in_ram(schema_builder.build()); - let mut index_writer = index.writer_for_tests().unwrap(); - let json = serde_json::json!({"k8s.container.name": "prometheus", "val": "hello"}); - index_writer.add_document(doc!(json_field=>json)).unwrap(); - index_writer.commit().unwrap(); - let reader = index.reader().unwrap(); - let searcher = reader.searcher(); - assert_eq!(searcher.num_docs(), 1); - let parse_query = QueryParser::for_index(&index, Vec::new()); - let query = parse_query - .parse_query(r#"json.k8s\.container\.name:prometheus"#) - .unwrap(); - let num_docs = searcher.search(&query, &Count).unwrap(); - assert_eq!(num_docs, 1); - } + // #[test] + // fn test_json_field_expand_dots_disabled_dot_escaped_required() { + // let mut schema_builder = Schema::builder(); + // let json_field = schema_builder.add_json_field("json", TEXT); + // let index = Index::create_in_ram(schema_builder.build()); + // let mut index_writer = index.writer_for_tests().unwrap(); + // let json = serde_json::json!({"k8s.container.name": "prometheus", "val": "hello"}); + // index_writer.add_document(doc!(json_field=>json)).unwrap(); + // index_writer.commit().unwrap(); + // let reader = index.reader().unwrap(); + // let searcher = reader.searcher(); + // assert_eq!(searcher.num_docs(), 1); + // let parse_query = QueryParser::for_index(&index, Vec::new()); + // let query = parse_query + // .parse_query(r#"json.k8s\.container\.name:prometheus"#) + // .unwrap(); + // let num_docs = searcher.search(&query, &Count).unwrap(); + // assert_eq!(num_docs, 1); + // } - #[test] - fn test_json_field_expand_dots_enabled_dot_escape_not_required() { - let mut schema_builder = Schema::builder(); - let json_options: JsonObjectOptions = - JsonObjectOptions::from(TEXT).set_expand_dots_enabled(); - let json_field = schema_builder.add_json_field("json", json_options); - let index = Index::create_in_ram(schema_builder.build()); - let mut index_writer = index.writer_for_tests().unwrap(); - let json = serde_json::json!({"k8s.container.name": "prometheus", "val": "hello"}); - index_writer.add_document(doc!(json_field=>json)).unwrap(); - index_writer.commit().unwrap(); - let reader = index.reader().unwrap(); - let searcher = reader.searcher(); - assert_eq!(searcher.num_docs(), 1); - let parse_query = QueryParser::for_index(&index, Vec::new()); - let query = parse_query - .parse_query(r#"json.k8s.container.name:prometheus"#) - .unwrap(); - let num_docs = searcher.search(&query, &Count).unwrap(); - assert_eq!(num_docs, 1); - } + // #[test] + // fn test_json_field_expand_dots_enabled_dot_escape_not_required() { + // let mut schema_builder = Schema::builder(); + // let json_options: JsonObjectOptions = + // JsonObjectOptions::from(TEXT).set_expand_dots_enabled(); + // let json_field = schema_builder.add_json_field("json", json_options); + // let index = Index::create_in_ram(schema_builder.build()); + // let mut index_writer = index.writer_for_tests().unwrap(); + // let json = serde_json::json!({"k8s.container.name": "prometheus", "val": "hello"}); + // index_writer.add_document(doc!(json_field=>json)).unwrap(); + // index_writer.commit().unwrap(); + // let reader = index.reader().unwrap(); + // let searcher = reader.searcher(); + // assert_eq!(searcher.num_docs(), 1); + // let parse_query = QueryParser::for_index(&index, Vec::new()); + // let query = parse_query + // .parse_query(r#"json.k8s.container.name:prometheus"#) + // .unwrap(); + // let num_docs = searcher.search(&query, &Count).unwrap(); + // assert_eq!(num_docs, 1); + // } } diff --git a/src/indexer/segment_serializer.rs b/src/indexer/segment_serializer.rs index d82cf47e9..f96a9a594 100644 --- a/src/indexer/segment_serializer.rs +++ b/src/indexer/segment_serializer.rs @@ -1,4 +1,7 @@ +use common::TerminatingWrite; + use crate::core::{Segment, SegmentComponent}; +use crate::directory::WritePtr; use crate::fastfield::CompositeFastFieldSerializer; use crate::fieldnorm::FieldNormsSerializer; use crate::postings::InvertedIndexSerializer; @@ -9,7 +12,7 @@ use crate::store::StoreWriter; pub struct SegmentSerializer { segment: Segment, pub(crate) store_writer: StoreWriter, - fast_field_serializer: CompositeFastFieldSerializer, + fast_field_write: WritePtr, fieldnorms_serializer: Option, postings_serializer: InvertedIndexSerializer, } @@ -47,7 +50,6 @@ impl SegmentSerializer { }; let fast_field_write = segment.open_write(SegmentComponent::FastFields)?; - let fast_field_serializer = CompositeFastFieldSerializer::from_write(fast_field_write)?; let fieldnorms_write = segment.open_write(SegmentComponent::FieldNorms)?; let fieldnorms_serializer = FieldNormsSerializer::from_write(fieldnorms_write)?; @@ -56,7 +58,7 @@ impl SegmentSerializer { Ok(SegmentSerializer { segment, store_writer, - fast_field_serializer, + fast_field_write, fieldnorms_serializer: Some(fieldnorms_serializer), postings_serializer, }) @@ -81,8 +83,8 @@ impl SegmentSerializer { } /// Accessor to the `FastFieldSerializer`. - pub fn get_fast_field_serializer(&mut self) -> &mut CompositeFastFieldSerializer { - &mut self.fast_field_serializer + pub fn get_fast_field_write(&mut self) -> &mut WritePtr { + &mut self.fast_field_write } /// Extract the field norm serializer. @@ -102,7 +104,7 @@ impl SegmentSerializer { if let Some(fieldnorms_serializer) = self.extract_fieldnorms_serializer() { fieldnorms_serializer.close()?; } - self.fast_field_serializer.close()?; + self.fast_field_write.terminate()?; self.postings_serializer.close()?; self.store_writer.close()?; Ok(()) diff --git a/src/indexer/segment_writer.rs b/src/indexer/segment_writer.rs index 6f2e680b6..f1b087148 100644 --- a/src/indexer/segment_writer.rs +++ b/src/indexer/segment_writer.rs @@ -348,8 +348,7 @@ impl SegmentWriter { pub fn add_document(&mut self, add_operation: AddOperation) -> crate::Result<()> { let doc = add_operation.document; self.doc_opstamps.push(add_operation.opstamp); - let doc_id = self.max_doc; - self.fast_field_writers.add_document(doc_id, &doc)?; + self.fast_field_writers.add_document(&doc)?; self.index_document(&doc)?; let doc_writer = self.segment_serializer.get_store_writer(); doc_writer.store(&doc, &self.schema)?; @@ -410,8 +409,7 @@ fn remap_and_write( )?; debug!("fastfield-serialize"); fast_field_writers.serialize( - serializer.get_fast_field_serializer(), - &term_ord_map, + serializer.get_fast_field_write(), doc_id_map, )?; diff --git a/src/lib.rs b/src/lib.rs index 2929c2f1c..6b457894e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -263,7 +263,7 @@ mod indexer; pub mod error; pub mod tokenizer; -pub mod aggregation; +// pub mod aggregation; pub mod collector; pub mod directory; pub mod fastfield; @@ -1166,4 +1166,5 @@ pub mod tests { ); assert_eq!(dt_from_ts_nanos.to_hms_micro(), offset_dt.to_hms_micro()); } + } diff --git a/src/postings/json_postings_writer.rs b/src/postings/json_postings_writer.rs index 2e29aa0a0..af4655c6e 100644 --- a/src/postings/json_postings_writer.rs +++ b/src/postings/json_postings_writer.rs @@ -2,7 +2,6 @@ use std::io; use stacker::Addr; -use crate::fastfield::MultiValuedFastFieldWriter; use crate::indexer::doc_id_mapping::DocIdMapping; use crate::postings::postings_writer::SpecializedPostingsWriter; use crate::postings::recorder::{BufferLender, DocIdRecorder, Recorder}; @@ -44,7 +43,6 @@ impl PostingsWriter for JsonPostingsWriter { term_buffer: &mut Term, ctx: &mut IndexingContext, indexing_position: &mut IndexingPosition, - _fast_field_writer: Option<&mut MultiValuedFastFieldWriter>, ) { self.str_posting_writer.index_text( doc_id, @@ -52,7 +50,6 @@ impl PostingsWriter for JsonPostingsWriter { term_buffer, ctx, indexing_position, - None, ); } diff --git a/src/postings/postings_writer.rs b/src/postings/postings_writer.rs index 19c127475..10bf3ae17 100644 --- a/src/postings/postings_writer.rs +++ b/src/postings/postings_writer.rs @@ -6,7 +6,6 @@ use std::ops::Range; use rustc_hash::FxHashMap; use stacker::Addr; -use crate::fastfield::MultiValuedFastFieldWriter; use crate::fieldnorm::FieldNormReaders; use crate::indexer::doc_id_mapping::DocIdMapping; use crate::postings::recorder::{BufferLender, Recorder}; @@ -135,7 +134,7 @@ pub(crate) trait PostingsWriter: Send + Sync { pos: u32, term: &Term, ctx: &mut IndexingContext, - ) -> UnorderedTermId; + ) -> UnorderedTermId; // TODO remove UnorderedTermId /// Serializes the postings on disk. /// The actual serialization format is handled by the `PostingsSerializer`. @@ -155,7 +154,6 @@ pub(crate) trait PostingsWriter: Send + Sync { term_buffer: &mut Term, ctx: &mut IndexingContext, indexing_position: &mut IndexingPosition, - mut term_id_fast_field_writer_opt: Option<&mut MultiValuedFastFieldWriter>, ) { let end_of_path_idx = term_buffer.len_bytes(); let mut num_tokens = 0; @@ -175,11 +173,7 @@ pub(crate) trait PostingsWriter: Send + Sync { term_buffer.append_bytes(token.text.as_bytes()); let start_position = indexing_position.end_position + token.position as u32; end_position = end_position.max(start_position + token.position_length as u32); - let unordered_term_id = self.subscribe(doc_id, start_position, term_buffer, ctx); - if let Some(term_id_fast_field_writer) = term_id_fast_field_writer_opt.as_mut() { - term_id_fast_field_writer.add_val(unordered_term_id); - } - + self.subscribe(doc_id, start_position, term_buffer, ctx); num_tokens += 1; }); diff --git a/src/query/mod.rs b/src/query/mod.rs index ed0672070..df096ef72 100644 --- a/src/query/mod.rs +++ b/src/query/mod.rs @@ -15,7 +15,7 @@ mod more_like_this; mod phrase_query; mod query; mod query_parser; -mod range_query; +// mod range_query; mod regex_query; mod reqopt_scorer; mod scorer; @@ -50,7 +50,7 @@ pub use self::more_like_this::{MoreLikeThisQuery, MoreLikeThisQueryBuilder}; pub use self::phrase_query::PhraseQuery; pub use self::query::{EnableScoring, Query, QueryClone}; pub use self::query_parser::{QueryParser, QueryParserError}; -pub use self::range_query::RangeQuery; +// pub use self::range_query::RangeQuery; pub use self::regex_query::RegexQuery; pub use self::reqopt_scorer::RequiredOptionalScorer; pub use self::score_combiner::{ diff --git a/src/query/query_parser/query_parser.rs b/src/query/query_parser/query_parser.rs index 63388312a..9ecfd44d8 100644 --- a/src/query/query_parser/query_parser.rs +++ b/src/query/query_parser/query_parser.rs @@ -13,10 +13,11 @@ use crate::core::Index; use crate::indexer::{ convert_to_fast_value_and_get_term, set_string_and_get_terms, JsonTermWriter, }; -use crate::query::range_query::is_type_valid_for_fastfield_range_query; +// use crate::query::range_query::is_type_valid_for_fastfield_range_query; use crate::query::{ AllQuery, BooleanQuery, BoostQuery, EmptyQuery, FuzzyTermQuery, Occur, PhraseQuery, Query, - RangeQuery, TermQuery, TermSetQuery, + // RangeQuery, + TermQuery, TermSetQuery, }; use crate::schema::{ Facet, FacetParseError, Field, FieldType, IndexRecordOption, IntoIpv6Addr, JsonObjectOptions, @@ -334,6 +335,8 @@ impl QueryParser { json_path: &str, phrase: &str, ) -> Result { + todo!(); + /* let field_entry = self.schema.get_field_entry(field); let field_type = field_entry.field_type(); let field_supports_ff_range_queries = field_type.is_fast() @@ -417,6 +420,7 @@ impl QueryParser { Ok(Term::from_field_ip_addr(field, ip_v6)) } } + */ } fn compute_logical_ast_for_leaf( @@ -740,9 +744,11 @@ fn convert_literal_to_query( value_type, lower, upper, - } => Box::new(RangeQuery::new_term_bounds( - field, value_type, &lower, &upper, - )), + } => { todo!(); +// Box::new(RangeQuery::new_term_bounds( +// field, value_type, &lower, &upper, +// )) + } , LogicalLiteral::Set { elements, .. } => Box::new(TermSetQuery::new(elements)), LogicalLiteral::All => Box::new(AllQuery), } diff --git a/src/query/range_query/fast_field_range_query.rs b/src/query/range_query/fast_field_range_query.rs index b7ef2cc53..c822d5ab4 100644 --- a/src/query/range_query/fast_field_range_query.rs +++ b/src/query/range_query/fast_field_range_query.rs @@ -4,7 +4,7 @@ use std::sync::Arc; use fastfield_codecs::Column; -use crate::fastfield::{MakeZero, MultiValuedFastFieldReader}; +use crate::fastfield::MakeZero; use crate::{DocId, DocSet, TERMINATED}; /// Helper to have a cursor over a vec of docids diff --git a/src/query/range_query/range_query_ip_fastfield.rs b/src/query/range_query/range_query_ip_fastfield.rs index eb2bebc29..2aee8a047 100644 --- a/src/query/range_query/range_query_ip_fastfield.rs +++ b/src/query/range_query/range_query_ip_fastfield.rs @@ -8,10 +8,13 @@ use std::ops::{Bound, RangeInclusive}; use common::BinarySerializable; use fastfield_codecs::MonotonicallyMappableToU128; -use super::fast_field_range_query::{FastFieldCardinality, RangeDocSet}; use super::range_query::map_bound; use crate::query::{ConstScorer, Explanation, Scorer, Weight}; +<<<<<<< HEAD use crate::schema::Cardinality; +======= +use crate::schema::Field; +>>>>>>> fd1deefd12 (Disconnected facet / fast field merges / examples) use crate::{DocId, DocSet, Score, SegmentReader, TantivyError}; /// `IPFastFieldRangeWeight` uses the ip address fast field to execute range queries. @@ -40,6 +43,7 @@ impl IPFastFieldRangeWeight { impl Weight for IPFastFieldRangeWeight { fn scorer(&self, reader: &SegmentReader, boost: Score) -> crate::Result> { +<<<<<<< HEAD let field_type = reader .schema() .get_field_entry(reader.schema().get_field(&self.field)?) @@ -74,6 +78,40 @@ impl Weight for IPFastFieldRangeWeight { Ok(Box::new(ConstScorer::new(docset, boost))) } } +======= + todo!(); + // let field_type = reader.schema().get_field_entry(self.field).field_type(); + // match field_type.fastfield_cardinality().unwrap() { + // Cardinality::SingleValue => { + // let ip_addr_fast_field = reader.fast_fields().ip_addr(self.field)?; + // let value_range = bound_to_value_range( + // &self.left_bound, + // &self.right_bound, + // ip_addr_fast_field.min_value(), + // ip_addr_fast_field.max_value(), + // ); + // let docset = RangeDocSet::new( + // value_range, + // FastFieldCardinality::SingleValue(ip_addr_fast_field), + // ); + // Ok(Box::new(ConstScorer::new(docset, boost))) + // } + // Cardinality::MultiValues => { + // let ip_addr_fast_field = reader.fast_fields().ip_addrs(self.field)?; + // let value_range = bound_to_value_range( + // &self.left_bound, + // &self.right_bound, + // ip_addr_fast_field.min_value(), + // ip_addr_fast_field.max_value(), + // ); + // let docset = RangeDocSet::new( + // value_range, + // FastFieldCardinality::MultiValue(ip_addr_fast_field), + // ); + // Ok(Box::new(ConstScorer::new(docset, boost))) + // } + // } +>>>>>>> fd1deefd12 (Disconnected facet / fast field merges / examples) } fn explain(&self, reader: &SegmentReader, doc: DocId) -> crate::Result { diff --git a/src/query/range_query/range_query_u64_fastfield.rs b/src/query/range_query/range_query_u64_fastfield.rs index 5cdcf15ba..2cb766bc8 100644 --- a/src/query/range_query/range_query_u64_fastfield.rs +++ b/src/query/range_query/range_query_u64_fastfield.rs @@ -6,10 +6,14 @@ use std::ops::{Bound, RangeInclusive}; use fastfield_codecs::MonotonicallyMappableToU64; -use super::fast_field_range_query::{FastFieldCardinality, RangeDocSet}; +use super::fast_field_range_query::RangeDocSet; use super::range_query::map_bound; use crate::query::{ConstScorer, Explanation, Scorer, Weight}; +<<<<<<< HEAD use crate::schema::Cardinality; +======= +use crate::schema::Field; +>>>>>>> fd1deefd12 (Disconnected facet / fast field merges / examples) use crate::{DocId, DocSet, Score, SegmentReader, TantivyError}; /// `FastFieldRangeWeight` uses the fast field to execute range queries. @@ -33,6 +37,7 @@ impl FastFieldRangeWeight { impl Weight for FastFieldRangeWeight { fn scorer(&self, reader: &SegmentReader, boost: Score) -> crate::Result> { +<<<<<<< HEAD let field_type = reader .schema() .get_field_entry(reader.schema().get_field(&self.field)?) @@ -63,6 +68,36 @@ impl Weight for FastFieldRangeWeight { Ok(Box::new(ConstScorer::new(docset, boost))) } } +======= + todo!(); + // let field_type = reader.schema().get_field_entry(self.field).field_type(); + // match field_type.fastfield_cardinality().unwrap() { + // Cardinality::SingleValue => { + // let fast_field = reader.fast_fields().u64_lenient(self.field)?; + // let value_range = bound_to_value_range( + // &self.left_bound, + // &self.right_bound, + // fast_field.min_value(), + // fast_field.max_value(), + // ); + // let docset = + // RangeDocSet::new(value_range, FastFieldCardinality::SingleValue(fast_field)); + // Ok(Box::new(ConstScorer::new(docset, boost))) + // } + // Cardinality::MultiValues => { + // let fast_field = reader.fast_fields().u64s_lenient(self.field)?; + // let value_range = bound_to_value_range( + // &self.left_bound, + // &self.right_bound, + // fast_field.min_value(), + // fast_field.max_value(), + // ); + // let docset = + // RangeDocSet::new(value_range, FastFieldCardinality::MultiValue(fast_field)); + // Ok(Box::new(ConstScorer::new(docset, boost))) + // } + // } +>>>>>>> fd1deefd12 (Disconnected facet / fast field merges / examples) } fn explain(&self, reader: &SegmentReader, doc: DocId) -> crate::Result { diff --git a/src/schema/field_type.rs b/src/schema/field_type.rs index fb7ae9a5a..fd3b9c01b 100644 --- a/src/schema/field_type.rs +++ b/src/schema/field_type.rs @@ -8,7 +8,7 @@ use serde_json::Value as JsonValue; use thiserror::Error; use super::ip_options::IpAddrOptions; -use super::{Cardinality, IntoIpv6Addr}; +use super:: IntoIpv6Addr; use crate::schema::bytes_options::BytesOptions; use crate::schema::facet_options::FacetOptions; use crate::schema::{ @@ -241,27 +241,6 @@ impl FieldType { } } - /// returns true if the field is fast. - pub fn fastfield_cardinality(&self) -> Option { - todo!(); - // match *self { - // FieldType::Bytes(ref bytes_options) => { - // bytes_options.is_fast().then_some(Cardinality::SingleValue) - // } - // FieldType::Str(ref text_options) => { - // text_options.is_fast().then_some(Cardinality::MultiValues) - // } - // FieldType::U64(ref int_options) - // | FieldType::I64(ref int_options) - // | FieldType::F64(ref int_options) - // | FieldType::Bool(ref int_options) => int_options.get_fastfield_cardinality(), - // FieldType::Date(ref date_options) => date_options.get_fastfield_cardinality(), - // FieldType::Facet(_) => Some(Cardinality::MultiValues), - // FieldType::JsonObject(_) => None, - // FieldType::IpAddr(ref ip_addr_options) => - // ip_addr_options.get_fastfield_cardinality(), } - } - /// returns true if the field is normed (see [fieldnorms](crate::fieldnorm)). pub fn has_fieldnorms(&self) -> bool { match *self { diff --git a/src/schema/mod.rs b/src/schema/mod.rs index 2d2612500..8ee6f4b88 100644 --- a/src/schema/mod.rs +++ b/src/schema/mod.rs @@ -143,7 +143,7 @@ pub use self::json_object_options::JsonObjectOptions; pub use self::named_field_document::NamedFieldDocument; pub use self::numeric_options::NumericOptions; #[allow(deprecated)] -pub use self::numeric_options::{Cardinality, IntOptions}; +pub use self::numeric_options::IntOptions; pub use self::schema::{DocParsingError, Schema, SchemaBuilder}; pub use self::term::Term; pub use self::text_options::{TextFieldIndexing, TextOptions, STRING, TEXT}; diff --git a/src/schema/numeric_options.rs b/src/schema/numeric_options.rs index d8d1947f8..e5352e717 100644 --- a/src/schema/numeric_options.rs +++ b/src/schema/numeric_options.rs @@ -4,18 +4,6 @@ use serde::{Deserialize, Serialize}; use crate::schema::flags::{FastFlag, IndexedFlag, SchemaFlagList, StoredFlag}; -/// Express whether a field is single-value or multi-valued. -#[derive(Clone, Copy, PartialEq, Eq, Debug, Serialize, Deserialize)] -pub enum Cardinality { - /// The document must have exactly one value associated with the document. - #[serde(rename = "single")] - SingleValue, - /// The document can have any number of values associated with the document. - /// This is more memory and CPU expensive than the `SingleValue` solution. - #[serde(rename = "multi")] - MultiValues, -} - #[deprecated(since = "0.17.0", note = "Use NumericOptions instead.")] /// Deprecated use [`NumericOptions`] instead. pub type IntOptions = NumericOptions;