mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-05-27 13:40:49 +00:00
Disconnected facet / fast field merges / examples
This commit is contained in:
@@ -15,7 +15,7 @@ use super::metric::{
|
||||
use super::segment_agg_result::BucketCount;
|
||||
use super::VecWithNames;
|
||||
use crate::fastfield::{type_and_cardinality, MultiValuedFastFieldReader};
|
||||
use crate::schema::{Cardinality, Type};
|
||||
use crate::schema::Type;
|
||||
use crate::{InvertedIndexReader, SegmentReader, TantivyError};
|
||||
|
||||
#[derive(Clone, Default)]
|
||||
|
||||
@@ -105,8 +105,8 @@ pub use self::custom_score_top_collector::{CustomScorer, CustomSegmentScorer};
|
||||
mod tweak_score_top_collector;
|
||||
pub use self::tweak_score_top_collector::{ScoreSegmentTweaker, ScoreTweaker};
|
||||
|
||||
mod facet_collector;
|
||||
pub use self::facet_collector::{FacetCollector, FacetCounts};
|
||||
// mod facet_collector;
|
||||
// pub use self::facet_collector::{FacetCollector, FacetCounts};
|
||||
use crate::query::Weight;
|
||||
|
||||
mod docset_collector;
|
||||
|
||||
@@ -5,7 +5,6 @@ use fastfield_codecs::Column;
|
||||
use super::*;
|
||||
use crate::collector::{Count, FilterCollector, TopDocs};
|
||||
use crate::core::SegmentReader;
|
||||
use crate::fastfield::BytesFastFieldReader;
|
||||
use crate::query::{AllQuery, QueryParser};
|
||||
use crate::schema::{Field, Schema, FAST, TEXT};
|
||||
use crate::time::format_description::well_known::Rfc3339;
|
||||
@@ -164,8 +163,8 @@ pub struct FastFieldSegmentCollector {
|
||||
}
|
||||
|
||||
impl FastFieldTestCollector {
|
||||
pub fn for_field(field: String) -> FastFieldTestCollector {
|
||||
FastFieldTestCollector { field }
|
||||
pub fn for_field(field: impl ToString) -> FastFieldTestCollector {
|
||||
FastFieldTestCollector { field: field.to_string() }
|
||||
}
|
||||
}
|
||||
|
||||
@@ -210,64 +209,62 @@ impl SegmentCollector for FastFieldSegmentCollector {
|
||||
}
|
||||
}
|
||||
|
||||
/// Collects in order all of the fast field bytes for all of the
|
||||
/// docs in the `DocSet`
|
||||
///
|
||||
/// This collector is mainly useful for tests.
|
||||
pub struct BytesFastFieldTestCollector {
|
||||
field: Field,
|
||||
}
|
||||
// /// Collects in order all of the fast field bytes for all of the
|
||||
// /// docs in the `DocSet`
|
||||
// ///
|
||||
// /// This collector is mainly useful for tests.
|
||||
// pub struct BytesFastFieldTestCollector {
|
||||
// field: Field,
|
||||
// }
|
||||
|
||||
pub struct BytesFastFieldSegmentCollector {
|
||||
vals: Vec<u8>,
|
||||
reader: BytesFastFieldReader,
|
||||
}
|
||||
// pub struct BytesFastFieldSegmentCollector {
|
||||
// vals: Vec<u8>,
|
||||
// reader: BytesFastFieldReader,
|
||||
// }
|
||||
|
||||
impl BytesFastFieldTestCollector {
|
||||
pub fn for_field(field: Field) -> BytesFastFieldTestCollector {
|
||||
BytesFastFieldTestCollector { field }
|
||||
}
|
||||
}
|
||||
// impl BytesFastFieldTestCollector {
|
||||
// pub fn for_field(field: Field) -> BytesFastFieldTestCollector {
|
||||
// BytesFastFieldTestCollector { field }
|
||||
// }
|
||||
// }
|
||||
|
||||
impl Collector for BytesFastFieldTestCollector {
|
||||
type Fruit = Vec<u8>;
|
||||
type Child = BytesFastFieldSegmentCollector;
|
||||
// impl Collector for BytesFastFieldTestCollector {
|
||||
// type Fruit = Vec<u8>;
|
||||
// type Child = BytesFastFieldSegmentCollector;
|
||||
|
||||
fn for_segment(
|
||||
&self,
|
||||
_segment_local_id: u32,
|
||||
segment_reader: &SegmentReader,
|
||||
) -> crate::Result<BytesFastFieldSegmentCollector> {
|
||||
let reader = segment_reader
|
||||
.fast_fields()
|
||||
.bytes(segment_reader.schema().get_field_name(self.field))?;
|
||||
Ok(BytesFastFieldSegmentCollector {
|
||||
vals: Vec::new(),
|
||||
reader,
|
||||
})
|
||||
}
|
||||
// fn for_segment(
|
||||
// &self,
|
||||
// _segment_local_id: u32,
|
||||
// segment_reader: &SegmentReader,
|
||||
// ) -> crate::Result<BytesFastFieldSegmentCollector> {
|
||||
// let reader = segment_reader.fast_fields().bytes(self.field)?;
|
||||
// Ok(BytesFastFieldSegmentCollector {
|
||||
// vals: Vec::new(),
|
||||
// reader,
|
||||
// })
|
||||
// }
|
||||
|
||||
fn requires_scoring(&self) -> bool {
|
||||
false
|
||||
}
|
||||
// fn requires_scoring(&self) -> bool {
|
||||
// false
|
||||
// }
|
||||
|
||||
fn merge_fruits(&self, children: Vec<Vec<u8>>) -> crate::Result<Vec<u8>> {
|
||||
Ok(children.into_iter().flat_map(|c| c.into_iter()).collect())
|
||||
}
|
||||
}
|
||||
// fn merge_fruits(&self, children: Vec<Vec<u8>>) -> crate::Result<Vec<u8>> {
|
||||
// Ok(children.into_iter().flat_map(|c| c.into_iter()).collect())
|
||||
// }
|
||||
// }
|
||||
|
||||
impl SegmentCollector for BytesFastFieldSegmentCollector {
|
||||
type Fruit = Vec<u8>;
|
||||
// impl SegmentCollector for BytesFastFieldSegmentCollector {
|
||||
// type Fruit = Vec<u8>;
|
||||
|
||||
fn collect(&mut self, doc: u32, _score: Score) {
|
||||
let data = self.reader.get_bytes(doc);
|
||||
self.vals.extend(data);
|
||||
}
|
||||
// fn collect(&mut self, doc: u32, _score: Score) {
|
||||
// let data = self.reader.get_bytes(doc);
|
||||
// self.vals.extend(data);
|
||||
// }
|
||||
|
||||
fn harvest(self) -> <Self as SegmentCollector>::Fruit {
|
||||
self.vals
|
||||
}
|
||||
}
|
||||
// fn harvest(self) -> <Self as SegmentCollector>::Fruit {
|
||||
// self.vals
|
||||
// }
|
||||
// }
|
||||
|
||||
fn make_test_searcher() -> crate::Result<Searcher> {
|
||||
let schema = Schema::builder().build();
|
||||
|
||||
@@ -19,7 +19,7 @@ use crate::error::{DataCorruption, TantivyError};
|
||||
use crate::indexer::index_writer::{MAX_NUM_THREAD, MEMORY_ARENA_NUM_BYTES_MIN};
|
||||
use crate::indexer::segment_updater::save_metas;
|
||||
use crate::reader::{IndexReader, IndexReaderBuilder};
|
||||
use crate::schema::{Cardinality, Field, FieldType, Schema};
|
||||
use crate::schema::{Field, FieldType, Schema};
|
||||
use crate::tokenizer::{TextAnalyzer, TokenizerManager};
|
||||
use crate::IndexWriter;
|
||||
|
||||
@@ -245,12 +245,6 @@ impl IndexBuilder {
|
||||
sort_by_field.field
|
||||
)));
|
||||
}
|
||||
if entry.field_type().fastfield_cardinality() != Some(Cardinality::SingleValue) {
|
||||
return Err(TantivyError::InvalidArgument(format!(
|
||||
"Only single value fast field Cardinality supported for sorting index {}",
|
||||
sort_by_field.field
|
||||
)));
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
} else {
|
||||
|
||||
@@ -7,7 +7,7 @@ use fail::fail_point;
|
||||
use crate::core::{InvertedIndexReader, Segment, SegmentComponent, SegmentId};
|
||||
use crate::directory::{CompositeFile, FileSlice};
|
||||
use crate::error::DataCorruption;
|
||||
use crate::fastfield::{intersect_alive_bitsets, AliveBitSet, FacetReader, FastFieldReaders};
|
||||
use crate::fastfield::{intersect_alive_bitsets, AliveBitSet, FastFieldReaders};
|
||||
use crate::fieldnorm::{FieldNormReader, FieldNormReaders};
|
||||
use crate::schema::{Field, FieldType, IndexRecordOption, Schema};
|
||||
use crate::space_usage::SegmentSpaceUsage;
|
||||
@@ -90,25 +90,8 @@ impl SegmentReader {
|
||||
}
|
||||
|
||||
/// Accessor to the `FacetReader` associated with a given `Field`.
|
||||
pub fn facet_reader(&self, field: Field) -> crate::Result<FacetReader> {
|
||||
let field_entry = self.schema.get_field_entry(field);
|
||||
|
||||
match field_entry.field_type() {
|
||||
FieldType::Facet(_) => {
|
||||
let term_ords_reader =
|
||||
self.fast_fields().u64s(self.schema.get_field_name(field))?;
|
||||
let termdict = self
|
||||
.termdict_composite
|
||||
.open_read(field)
|
||||
.map(TermDictionary::open)
|
||||
.unwrap_or_else(|| Ok(TermDictionary::empty()))?;
|
||||
Ok(FacetReader::new(term_ords_reader, termdict))
|
||||
}
|
||||
_ => Err(crate::TantivyError::InvalidArgument(format!(
|
||||
"Field {:?} is not a facet field.",
|
||||
field_entry.name()
|
||||
))),
|
||||
}
|
||||
pub fn facet_reader(&self, field: Field) -> crate::Result<()> {
|
||||
todo!();
|
||||
}
|
||||
|
||||
/// Accessor to the segment's `Field norms`'s reader.
|
||||
|
||||
@@ -24,15 +24,10 @@ use std::net::Ipv6Addr;
|
||||
use fastfield_codecs::MonotonicallyMappableToU64;
|
||||
|
||||
pub use self::alive_bitset::{intersect_alive_bitsets, write_alive_bitset, AliveBitSet};
|
||||
pub use self::bytes::{BytesFastFieldReader, BytesFastFieldWriter};
|
||||
// pub use self::bytes::{BytesFastFieldReader, BytesFastFieldWriter};
|
||||
pub use self::error::{FastFieldNotAvailableError, Result};
|
||||
pub use self::facet_reader::FacetReader;
|
||||
pub(crate) use self::multivalued::{get_fastfield_codecs_for_multivalue, MultivalueStartIndex};
|
||||
pub use self::multivalued::{
|
||||
MultiValueIndex, MultiValueU128FastFieldWriter, MultiValuedFastFieldReader,
|
||||
MultiValuedFastFieldWriter,
|
||||
};
|
||||
pub(crate) use self::readers::type_and_cardinality;
|
||||
// pub use self::facet_reader::FacetReader;
|
||||
|
||||
pub use self::readers::FastFieldReaders;
|
||||
pub use self::serializer::{Column, CompositeFastFieldSerializer};
|
||||
use self::writer::unexpected_value;
|
||||
@@ -41,10 +36,10 @@ use crate::schema::{Type, Value};
|
||||
use crate::DateTime;
|
||||
|
||||
mod alive_bitset;
|
||||
mod bytes;
|
||||
// mod bytes;
|
||||
mod error;
|
||||
mod facet_reader;
|
||||
mod multivalued;
|
||||
// mod facet_reader;
|
||||
// mod multivalued;
|
||||
mod readers;
|
||||
mod serializer;
|
||||
mod writer;
|
||||
@@ -166,7 +161,7 @@ mod tests {
|
||||
use std::path::Path;
|
||||
use std::sync::Arc;
|
||||
|
||||
use common::HasLen;
|
||||
use common::{HasLen, TerminatingWrite};
|
||||
use fastfield_codecs::{open, FastFieldCodecType};
|
||||
use once_cell::sync::Lazy;
|
||||
use rand::prelude::SliceRandom;
|
||||
@@ -189,16 +184,9 @@ mod tests {
|
||||
});
|
||||
pub static FIELD: Lazy<Field> = Lazy::new(|| SCHEMA.get_field("field").unwrap());
|
||||
|
||||
#[test]
|
||||
pub fn test_fastfield() {
|
||||
let test_fastfield = fastfield_codecs::serialize_and_load(&[100u64, 200u64, 300u64][..]);
|
||||
assert_eq!(test_fastfield.get_val(0), 100);
|
||||
assert_eq!(test_fastfield.get_val(1), 200);
|
||||
assert_eq!(test_fastfield.get_val(2), 300);
|
||||
}
|
||||
|
||||
#[test]
|
||||
pub fn test_fastfield_i64_u64() {
|
||||
pub fn test_convert_i64_u64() {
|
||||
let datetime = DateTime::from_utc(OffsetDateTime::UNIX_EPOCH);
|
||||
assert_eq!(i64::from_u64(datetime.to_u64()), 0i64);
|
||||
}
|
||||
@@ -208,22 +196,21 @@ mod tests {
|
||||
let path = Path::new("test");
|
||||
let directory: RamDirectory = RamDirectory::create();
|
||||
{
|
||||
let write: WritePtr = directory.open_write(Path::new("test")).unwrap();
|
||||
let mut serializer = CompositeFastFieldSerializer::from_write(write).unwrap();
|
||||
let mut write: WritePtr = directory.open_write(Path::new("test")).unwrap();
|
||||
let mut fast_field_writers = FastFieldsWriter::from_schema(&SCHEMA);
|
||||
fast_field_writers
|
||||
.add_document(0, &doc!(*FIELD=>13u64))
|
||||
.add_document(&doc!(*FIELD=>13u64))
|
||||
.unwrap();
|
||||
fast_field_writers
|
||||
.add_document(1,&doc!(*FIELD=>14u64))
|
||||
.add_document(&doc!(*FIELD=>14u64))
|
||||
.unwrap();
|
||||
fast_field_writers
|
||||
.add_document(2,&doc!(*FIELD=>2u64))
|
||||
.add_document(&doc!(*FIELD=>2u64))
|
||||
.unwrap();
|
||||
fast_field_writers
|
||||
.serialize(&mut serializer, &HashMap::new(), None)
|
||||
.serialize(&mut write, None)
|
||||
.unwrap();
|
||||
serializer.close().unwrap();
|
||||
write.terminate().unwrap();
|
||||
}
|
||||
let file = directory.open_read(path).unwrap();
|
||||
assert_eq!(file.len(), 34);
|
||||
@@ -241,38 +228,37 @@ mod tests {
|
||||
let path = Path::new("test");
|
||||
let directory: RamDirectory = RamDirectory::create();
|
||||
{
|
||||
let write: WritePtr = directory.open_write(Path::new("test"))?;
|
||||
let mut serializer = CompositeFastFieldSerializer::from_write(write)?;
|
||||
let mut write: WritePtr = directory.open_write(Path::new("test"))?;
|
||||
let mut fast_field_writers = FastFieldsWriter::from_schema(&SCHEMA);
|
||||
fast_field_writers
|
||||
.add_document(0, &doc!(*FIELD=>4u64))
|
||||
.add_document(&doc!(*FIELD=>4u64))
|
||||
.unwrap();
|
||||
fast_field_writers
|
||||
.add_document(1, &doc!(*FIELD=>14_082_001u64))
|
||||
.add_document(&doc!(*FIELD=>14_082_001u64))
|
||||
.unwrap();
|
||||
fast_field_writers
|
||||
.add_document(2, &doc!(*FIELD=>3_052u64))
|
||||
.add_document(&doc!(*FIELD=>3_052u64))
|
||||
.unwrap();
|
||||
fast_field_writers
|
||||
.add_document(3, &doc!(*FIELD=>9_002u64))
|
||||
.add_document(&doc!(*FIELD=>9_002u64))
|
||||
.unwrap();
|
||||
fast_field_writers
|
||||
.add_document(4, &doc!(*FIELD=>15_001u64))
|
||||
.add_document(&doc!(*FIELD=>15_001u64))
|
||||
.unwrap();
|
||||
fast_field_writers
|
||||
.add_document(5, &doc!(*FIELD=>777u64))
|
||||
.add_document(&doc!(*FIELD=>777u64))
|
||||
.unwrap();
|
||||
fast_field_writers
|
||||
.add_document(6, &doc!(*FIELD=>1_002u64))
|
||||
.add_document(&doc!(*FIELD=>1_002u64))
|
||||
.unwrap();
|
||||
fast_field_writers
|
||||
.add_document(7, &doc!(*FIELD=>1_501u64))
|
||||
.add_document(&doc!(*FIELD=>1_501u64))
|
||||
.unwrap();
|
||||
fast_field_writers
|
||||
.add_document(8, &doc!(*FIELD=>215u64))
|
||||
.add_document(&doc!(*FIELD=>215u64))
|
||||
.unwrap();
|
||||
fast_field_writers.serialize(&mut serializer, &HashMap::new(), None)?;
|
||||
serializer.close()?;
|
||||
fast_field_writers.serialize(&mut write, None)?;
|
||||
write.terminate()?;
|
||||
}
|
||||
let file = directory.open_read(path)?;
|
||||
assert_eq!(file.len(), 62);
|
||||
@@ -302,18 +288,17 @@ mod tests {
|
||||
let directory: RamDirectory = RamDirectory::create();
|
||||
|
||||
{
|
||||
let write: WritePtr = directory.open_write(Path::new("test")).unwrap();
|
||||
let mut serializer = CompositeFastFieldSerializer::from_write(write).unwrap();
|
||||
let mut write: WritePtr = directory.open_write(Path::new("test")).unwrap();
|
||||
let mut fast_field_writers = FastFieldsWriter::from_schema(&SCHEMA);
|
||||
for doc_id in 0..10_000 {
|
||||
for _ in 0..10_000 {
|
||||
fast_field_writers
|
||||
.add_document(doc_id, &doc!(*FIELD=>100_000u64))
|
||||
.add_document(&doc!(*FIELD=>100_000u64))
|
||||
.unwrap();
|
||||
}
|
||||
fast_field_writers
|
||||
.serialize(&mut serializer, &HashMap::new(), None)
|
||||
.serialize(&mut write, None)
|
||||
.unwrap();
|
||||
serializer.close().unwrap();
|
||||
write.terminate().unwrap();
|
||||
}
|
||||
let file = directory.open_read(path).unwrap();
|
||||
assert_eq!(file.len(), 35);
|
||||
@@ -337,22 +322,21 @@ mod tests {
|
||||
let directory: RamDirectory = RamDirectory::create();
|
||||
|
||||
{
|
||||
let write: WritePtr = directory.open_write(Path::new("test")).unwrap();
|
||||
let mut serializer = CompositeFastFieldSerializer::from_write(write).unwrap();
|
||||
let mut write: WritePtr = directory.open_write(Path::new("test")).unwrap();
|
||||
let mut fast_field_writers = FastFieldsWriter::from_schema(&SCHEMA);
|
||||
// forcing the amplitude to be high
|
||||
fast_field_writers
|
||||
.add_document(0, &doc!(*FIELD=>0u64))
|
||||
.add_document(&doc!(*FIELD=>0u64))
|
||||
.unwrap();
|
||||
for doc_id in 1u64..10_001u64 {
|
||||
fast_field_writers
|
||||
.add_document(doc_id as u32, &doc!(*FIELD=>5_000_000_000_000_000_000u64 + doc_id as u64))
|
||||
.add_document(&doc!(*FIELD=>5_000_000_000_000_000_000u64 + doc_id as u64))
|
||||
.unwrap();
|
||||
}
|
||||
fast_field_writers
|
||||
.serialize(&mut serializer, &HashMap::new(), None)
|
||||
.serialize(&mut write, None)
|
||||
.unwrap();
|
||||
serializer.close().unwrap();
|
||||
write.terminate().unwrap();
|
||||
}
|
||||
let file = directory.open_read(path).unwrap();
|
||||
assert_eq!(file.len(), 80049);
|
||||
@@ -383,20 +367,17 @@ mod tests {
|
||||
let i64_field = schema_builder.add_i64_field("field", FAST);
|
||||
let schema = schema_builder.build();
|
||||
{
|
||||
let write: WritePtr = directory.open_write(Path::new("test")).unwrap();
|
||||
let mut serializer = CompositeFastFieldSerializer::from_write(write).unwrap();
|
||||
let mut write: WritePtr = directory.open_write(Path::new("test")).unwrap();
|
||||
let mut fast_field_writers = FastFieldsWriter::from_schema(&schema);
|
||||
let mut doc_id = 0;
|
||||
for i in -100i64..10_000i64 {
|
||||
let mut doc = Document::default();
|
||||
doc.add_i64(i64_field, i);
|
||||
fast_field_writers.add_document(doc_id, &doc).unwrap();
|
||||
doc_id += 1;
|
||||
fast_field_writers.add_document(&doc).unwrap();
|
||||
}
|
||||
fast_field_writers
|
||||
.serialize(&mut serializer, &HashMap::new(), None)
|
||||
.serialize(&mut write, None)
|
||||
.unwrap();
|
||||
serializer.close().unwrap();
|
||||
write.terminate().unwrap();
|
||||
}
|
||||
let file = directory.open_read(path).unwrap();
|
||||
assert_eq!(file.len(), 49_usize);
|
||||
@@ -477,14 +458,13 @@ mod tests {
|
||||
let n = permutation.len();
|
||||
let directory = RamDirectory::create();
|
||||
{
|
||||
let write: WritePtr = directory.open_write(Path::new("test"))?;
|
||||
let mut serializer = CompositeFastFieldSerializer::from_write(write)?;
|
||||
let mut write: WritePtr = directory.open_write(Path::new("test"))?;
|
||||
let mut fast_field_writers = FastFieldsWriter::from_schema(&SCHEMA);
|
||||
for (doc_id, &x) in permutation.iter().enumerate() {
|
||||
fast_field_writers.add_document(doc_id as u32, &doc!(*FIELD=>x)).unwrap();
|
||||
fast_field_writers.add_document(&doc!(*FIELD=>x)).unwrap();
|
||||
}
|
||||
fast_field_writers.serialize(&mut serializer, &HashMap::new(), None)?;
|
||||
serializer.close()?;
|
||||
fast_field_writers.serialize(&mut write, None)?;
|
||||
write.terminate()?;
|
||||
}
|
||||
let file = directory.open_read(path)?;
|
||||
{
|
||||
@@ -543,17 +523,18 @@ mod tests {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn get_vals_for_docs(ff: &MultiValuedFastFieldReader<u64>, docs: Range<u32>) -> Vec<u64> {
|
||||
let mut all = vec![];
|
||||
// fn get_vals_for_docs(ff: &MultiValuedFastFieldReader<u64>, docs: Range<u32>) -> Vec<u64> {
|
||||
// let mut all = vec![];
|
||||
|
||||
for doc in docs {
|
||||
let mut out: Vec<u64> = vec![];
|
||||
ff.get_vals(doc, &mut out);
|
||||
all.extend(out);
|
||||
}
|
||||
all
|
||||
}
|
||||
// for doc in docs {
|
||||
// let mut out: Vec<u64> = vec![];
|
||||
// ff.get_vals(doc, &mut out);
|
||||
// all.extend(out);
|
||||
// }
|
||||
// all
|
||||
// }
|
||||
|
||||
/*
|
||||
#[test]
|
||||
fn test_text_fastfield() -> crate::Result<()> {
|
||||
let mut schema_builder = Schema::builder();
|
||||
@@ -651,156 +632,159 @@ mod tests {
|
||||
|
||||
Ok(())
|
||||
}
|
||||
*/
|
||||
|
||||
#[test]
|
||||
fn test_string_fastfield() -> crate::Result<()> {
|
||||
let mut schema_builder = Schema::builder();
|
||||
let text_field = schema_builder.add_text_field("text", STRING | FAST);
|
||||
let schema = schema_builder.build();
|
||||
let index = Index::create_in_ram(schema);
|
||||
// #[test]
|
||||
// fn test_string_fastfield() -> crate::Result<()> {
|
||||
// let mut schema_builder = Schema::builder();
|
||||
// let text_field = schema_builder.add_text_field("text", STRING | FAST);
|
||||
// let schema = schema_builder.build();
|
||||
// let index = Index::create_in_ram(schema);
|
||||
|
||||
{
|
||||
// first segment
|
||||
let mut index_writer = index.writer_for_tests()?;
|
||||
index_writer.set_merge_policy(Box::new(NoMergePolicy));
|
||||
index_writer.add_document(doc!(
|
||||
text_field => "BBBBB", // term_ord 1
|
||||
))?;
|
||||
index_writer.add_document(doc!())?;
|
||||
index_writer.add_document(doc!(
|
||||
text_field => "AAAAA", // term_ord 0
|
||||
))?;
|
||||
index_writer.add_document(doc!(
|
||||
text_field => "AAAAA", // term_ord 0
|
||||
))?;
|
||||
index_writer.add_document(doc!(
|
||||
text_field => "zumberthree", // term_ord 2, after merge term_ord 3
|
||||
))?;
|
||||
// {
|
||||
// // first segment
|
||||
// let mut index_writer = index.writer_for_tests()?;
|
||||
// index_writer.set_merge_policy(Box::new(NoMergePolicy));
|
||||
// index_writer.add_document(doc!(
|
||||
// text_field => "BBBBB", // term_ord 1
|
||||
// ))?;
|
||||
// index_writer.add_document(doc!())?;
|
||||
// index_writer.add_document(doc!(
|
||||
// text_field => "AAAAA", // term_ord 0
|
||||
// ))?;
|
||||
// index_writer.add_document(doc!(
|
||||
// text_field => "AAAAA", // term_ord 0
|
||||
// ))?;
|
||||
// index_writer.add_document(doc!(
|
||||
// text_field => "zumberthree", // term_ord 2, after merge term_ord 3
|
||||
// ))?;
|
||||
|
||||
index_writer.add_document(doc!())?;
|
||||
index_writer.commit()?;
|
||||
// index_writer.add_document(doc!())?;
|
||||
// index_writer.commit()?;
|
||||
|
||||
let reader = index.reader()?;
|
||||
let searcher = reader.searcher();
|
||||
assert_eq!(searcher.segment_readers().len(), 1);
|
||||
let segment_reader = searcher.segment_reader(0);
|
||||
let fast_fields = segment_reader.fast_fields();
|
||||
let text_fast_field = fast_fields.u64s("text").unwrap();
|
||||
|
||||
assert_eq!(get_vals_for_docs(&text_fast_field, 0..6), vec![1, 0, 0, 2]);
|
||||
// let reader = index.reader()?;
|
||||
// let searcher = reader.searcher();
|
||||
// assert_eq!(searcher.segment_readers().len(), 1);
|
||||
// let segment_reader = searcher.segment_reader(0);
|
||||
// let fast_fields = segment_reader.fast_fields();
|
||||
// let text_fast_field = fast_fields.u64s(text_field).unwrap();
|
||||
|
||||
let inverted_index = segment_reader.inverted_index(text_field)?;
|
||||
assert_eq!(inverted_index.terms().num_terms(), 3);
|
||||
let mut bytes = vec![];
|
||||
assert!(inverted_index.terms().ord_to_term(0, &mut bytes)?);
|
||||
assert_eq!(bytes, "AAAAA".as_bytes());
|
||||
}
|
||||
// assert_eq!(get_vals_for_docs(&text_fast_field, 0..6), vec![1, 0, 0, 2]);
|
||||
|
||||
{
|
||||
// second segment
|
||||
let mut index_writer = index.writer_for_tests()?;
|
||||
// let inverted_index = segment_reader.inverted_index(text_field)?;
|
||||
// assert_eq!(inverted_index.terms().num_terms(), 3);
|
||||
// let mut bytes = vec![];
|
||||
// assert!(inverted_index.terms().ord_to_term(0, &mut bytes)?);
|
||||
// assert_eq!(bytes, "AAAAA".as_bytes());
|
||||
// }
|
||||
|
||||
index_writer.add_document(doc!(
|
||||
text_field => "AAAAA", // term_ord 0
|
||||
))?;
|
||||
// {
|
||||
// // second segment
|
||||
// let mut index_writer = index.writer_for_tests()?;
|
||||
|
||||
index_writer.add_document(doc!(
|
||||
text_field => "CCCCC", // term_ord 1, after merge 2
|
||||
))?;
|
||||
// index_writer.add_document(doc!(
|
||||
// text_field => "AAAAA", // term_ord 0
|
||||
// ))?;
|
||||
|
||||
index_writer.add_document(doc!())?;
|
||||
index_writer.commit()?;
|
||||
// index_writer.add_document(doc!(
|
||||
// text_field => "CCCCC", // term_ord 1, after merge 2
|
||||
// ))?;
|
||||
|
||||
let reader = index.reader()?;
|
||||
let searcher = reader.searcher();
|
||||
assert_eq!(searcher.segment_readers().len(), 2);
|
||||
let segment_reader = searcher.segment_reader(1);
|
||||
let fast_fields = segment_reader.fast_fields();
|
||||
let text_fast_field = fast_fields.u64s("text").unwrap();
|
||||
// index_writer.add_document(doc!())?;
|
||||
// index_writer.commit()?;
|
||||
|
||||
assert_eq!(get_vals_for_docs(&text_fast_field, 0..2), vec![0, 1]);
|
||||
}
|
||||
// Merging the segments
|
||||
{
|
||||
let segment_ids = index.searchable_segment_ids()?;
|
||||
let mut index_writer = index.writer_for_tests()?;
|
||||
index_writer.merge(&segment_ids).wait()?;
|
||||
index_writer.wait_merging_threads()?;
|
||||
}
|
||||
|
||||
let reader = index.reader()?;
|
||||
let searcher = reader.searcher();
|
||||
let segment_reader = searcher.segment_reader(0);
|
||||
let fast_fields = segment_reader.fast_fields();
|
||||
let text_fast_field = fast_fields.u64s("text").unwrap();
|
||||
// let reader = index.reader()?;
|
||||
// let searcher = reader.searcher();
|
||||
// assert_eq!(searcher.segment_readers().len(), 2);
|
||||
// let segment_reader = searcher.segment_reader(1);
|
||||
// let fast_fields = segment_reader.fast_fields();
|
||||
// let text_fast_field = fast_fields.u64s(text_field).unwrap();
|
||||
|
||||
assert_eq!(
|
||||
get_vals_for_docs(&text_fast_field, 0..9),
|
||||
vec![1, 0, 0, 3 /* next segment */, 0, 2]
|
||||
);
|
||||
// assert_eq!(get_vals_for_docs(&text_fast_field, 0..2), vec![0, 1]);
|
||||
// }
|
||||
// // Merging the segments
|
||||
// {
|
||||
// let segment_ids = index.searchable_segment_ids()?;
|
||||
// let mut index_writer = index.writer_for_tests()?;
|
||||
// index_writer.merge(&segment_ids).wait()?;
|
||||
// index_writer.wait_merging_threads()?;
|
||||
// }
|
||||
|
||||
Ok(())
|
||||
}
|
||||
// let reader = index.reader()?;
|
||||
// let searcher = reader.searcher();
|
||||
// let segment_reader = searcher.segment_reader(0);
|
||||
// let fast_fields = segment_reader.fast_fields();
|
||||
// let text_fast_field = fast_fields.u64s(text_field).unwrap();
|
||||
|
||||
#[test]
|
||||
fn test_datefastfield() -> crate::Result<()> {
|
||||
let mut schema_builder = Schema::builder();
|
||||
let date_field = schema_builder.add_date_field(
|
||||
"date",
|
||||
DateOptions::from(FAST).set_precision(DatePrecision::Microseconds),
|
||||
);
|
||||
let multi_date_field = schema_builder.add_date_field(
|
||||
"multi_date",
|
||||
DateOptions::default()
|
||||
.set_precision(DatePrecision::Microseconds)
|
||||
.set_fast(),
|
||||
);
|
||||
let schema = schema_builder.build();
|
||||
let index = Index::create_in_ram(schema);
|
||||
let mut index_writer = index.writer_for_tests()?;
|
||||
index_writer.set_merge_policy(Box::new(NoMergePolicy));
|
||||
index_writer.add_document(doc!(
|
||||
date_field => DateTime::from_u64(1i64.to_u64()),
|
||||
multi_date_field => DateTime::from_u64(2i64.to_u64()),
|
||||
multi_date_field => DateTime::from_u64(3i64.to_u64())
|
||||
))?;
|
||||
index_writer.add_document(doc!(
|
||||
date_field => DateTime::from_u64(4i64.to_u64())
|
||||
))?;
|
||||
index_writer.add_document(doc!(
|
||||
multi_date_field => DateTime::from_u64(5i64.to_u64()),
|
||||
multi_date_field => DateTime::from_u64(6i64.to_u64())
|
||||
))?;
|
||||
index_writer.commit()?;
|
||||
let reader = index.reader()?;
|
||||
let searcher = reader.searcher();
|
||||
assert_eq!(searcher.segment_readers().len(), 1);
|
||||
let segment_reader = searcher.segment_reader(0);
|
||||
let fast_fields = segment_reader.fast_fields();
|
||||
let date_fast_field = fast_fields.date("date").unwrap();
|
||||
let dates_fast_field = fast_fields.dates("multi_date").unwrap();
|
||||
let mut dates = vec![];
|
||||
{
|
||||
assert_eq!(date_fast_field.get_val(0).into_timestamp_micros(), 1i64);
|
||||
dates_fast_field.get_vals(0u32, &mut dates);
|
||||
assert_eq!(dates.len(), 2);
|
||||
assert_eq!(dates[0].into_timestamp_micros(), 2i64);
|
||||
assert_eq!(dates[1].into_timestamp_micros(), 3i64);
|
||||
}
|
||||
{
|
||||
assert_eq!(date_fast_field.get_val(1).into_timestamp_micros(), 4i64);
|
||||
dates_fast_field.get_vals(1u32, &mut dates);
|
||||
assert!(dates.is_empty());
|
||||
}
|
||||
{
|
||||
assert_eq!(date_fast_field.get_val(2).into_timestamp_micros(), 0i64);
|
||||
dates_fast_field.get_vals(2u32, &mut dates);
|
||||
assert_eq!(dates.len(), 2);
|
||||
assert_eq!(dates[0].into_timestamp_micros(), 5i64);
|
||||
assert_eq!(dates[1].into_timestamp_micros(), 6i64);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
// assert_eq!(
|
||||
// get_vals_for_docs(&text_fast_field, 0..9),
|
||||
// vec![1, 0, 0, 3 /* next segment */, 0, 2]
|
||||
// );
|
||||
|
||||
// Ok(())
|
||||
// }
|
||||
|
||||
// #[test]
|
||||
// fn test_datefastfield() -> crate::Result<()> {
|
||||
// let mut schema_builder = Schema::builder();
|
||||
// let date_field = schema_builder.add_date_field(
|
||||
// "date",
|
||||
// DateOptions::from(FAST).set_precision(DatePrecision::Microseconds),
|
||||
// );
|
||||
// let multi_date_field = schema_builder.add_date_field(
|
||||
// "multi_date",
|
||||
// DateOptions::default()
|
||||
// .set_precision(DatePrecision::Microseconds)
|
||||
// .set_fast(),
|
||||
// );
|
||||
// let schema = schema_builder.build();
|
||||
// let index = Index::create_in_ram(schema);
|
||||
// let mut index_writer = index.writer_for_tests()?;
|
||||
// index_writer.set_merge_policy(Box::new(NoMergePolicy));
|
||||
// index_writer.add_document(doc!(
|
||||
// date_field => DateTime::from_u64(1i64.to_u64()),
|
||||
// multi_date_field => DateTime::from_u64(2i64.to_u64()),
|
||||
// multi_date_field => DateTime::from_u64(3i64.to_u64())
|
||||
// ))?;
|
||||
// index_writer.add_document(doc!(
|
||||
// date_field => DateTime::from_u64(4i64.to_u64())
|
||||
// ))?;
|
||||
// index_writer.add_document(doc!(
|
||||
// multi_date_field => DateTime::from_u64(5i64.to_u64()),
|
||||
// multi_date_field => DateTime::from_u64(6i64.to_u64())
|
||||
// ))?;
|
||||
// index_writer.commit()?;
|
||||
// let reader = index.reader()?;
|
||||
// let searcher = reader.searcher();
|
||||
// assert_eq!(searcher.segment_readers().len(), 1);
|
||||
// let segment_reader = searcher.segment_reader(0);
|
||||
// let fast_fields = segment_reader.fast_fields();
|
||||
// let date_fast_field = fast_fields.date(date_field).unwrap();
|
||||
// let dates_fast_field = fast_fields.dates(multi_date_field).unwrap();
|
||||
// let mut dates = vec![];
|
||||
// {
|
||||
// assert_eq!(date_fast_field.get_val(0).into_timestamp_micros(), 1i64);
|
||||
// dates_fast_field.get_vals(0u32, &mut dates);
|
||||
// assert_eq!(dates.len(), 2);
|
||||
// assert_eq!(dates[0].into_timestamp_micros(), 2i64);
|
||||
// assert_eq!(dates[1].into_timestamp_micros(), 3i64);
|
||||
// }
|
||||
// {
|
||||
// assert_eq!(date_fast_field.get_val(1).into_timestamp_micros(), 4i64);
|
||||
// dates_fast_field.get_vals(1u32, &mut dates);
|
||||
// assert!(dates.is_empty());
|
||||
// }
|
||||
// {
|
||||
// assert_eq!(date_fast_field.get_val(2).into_timestamp_micros(), 0i64);
|
||||
// dates_fast_field.get_vals(2u32, &mut dates);
|
||||
// assert_eq!(dates.len(), 2);
|
||||
// assert_eq!(dates[0].into_timestamp_micros(), 5i64);
|
||||
// assert_eq!(dates[1].into_timestamp_micros(), 6i64);
|
||||
// }
|
||||
// Ok(())
|
||||
// }
|
||||
|
||||
#[test]
|
||||
pub fn test_fastfield_bool() {
|
||||
@@ -823,21 +807,20 @@ mod tests {
|
||||
let field = schema.get_field("field_bool").unwrap();
|
||||
|
||||
{
|
||||
let write: WritePtr = directory.open_write(path).unwrap();
|
||||
let mut serializer = CompositeFastFieldSerializer::from_write(write).unwrap();
|
||||
let mut write: WritePtr = directory.open_write(path).unwrap();
|
||||
let mut fast_field_writers = FastFieldsWriter::from_schema(&schema);
|
||||
fast_field_writers.add_document(0u32, &doc!(field=>true)).unwrap();
|
||||
fast_field_writers.add_document(&doc!(field=>true)).unwrap();
|
||||
fast_field_writers
|
||||
.add_document(1u32, &doc!(field=>false))
|
||||
.add_document(&doc!(field=>false))
|
||||
.unwrap();
|
||||
fast_field_writers.add_document(2u32, &doc!(field=>true)).unwrap();
|
||||
fast_field_writers.add_document(&doc!(field=>true)).unwrap();
|
||||
fast_field_writers
|
||||
.add_document(3u32, &doc!(field=>false))
|
||||
.add_document(&doc!(field=>false))
|
||||
.unwrap();
|
||||
fast_field_writers
|
||||
.serialize(&mut serializer, &HashMap::new(), None)
|
||||
.serialize(&mut write, None)
|
||||
.unwrap();
|
||||
serializer.close().unwrap();
|
||||
write.terminate().unwrap();
|
||||
}
|
||||
let file = directory.open_read(path).unwrap();
|
||||
assert_eq!(file.len(), 33);
|
||||
@@ -863,19 +846,18 @@ mod tests {
|
||||
let field = schema.get_field("field_bool").unwrap();
|
||||
|
||||
{
|
||||
let write: WritePtr = directory.open_write(path).unwrap();
|
||||
let mut serializer = CompositeFastFieldSerializer::from_write(write).unwrap();
|
||||
let mut write: WritePtr = directory.open_write(path).unwrap();
|
||||
let mut fast_field_writers = FastFieldsWriter::from_schema(&schema);
|
||||
for doc_id in 0..50 {
|
||||
fast_field_writers.add_document(doc_id * 2, &doc!(field=>true)).unwrap();
|
||||
fast_field_writers.add_document(&doc!(field=>true)).unwrap();
|
||||
fast_field_writers
|
||||
.add_document(doc_id * 2 + 1, &doc!(field=>false))
|
||||
.add_document(&doc!(field=>false))
|
||||
.unwrap();
|
||||
}
|
||||
fast_field_writers
|
||||
.serialize(&mut serializer, &HashMap::new(), None)
|
||||
.serialize(&mut write, None)
|
||||
.unwrap();
|
||||
serializer.close().unwrap();
|
||||
write.terminate().unwrap();
|
||||
}
|
||||
let file = directory.open_read(path).unwrap();
|
||||
assert_eq!(file.len(), 45);
|
||||
@@ -900,13 +882,12 @@ mod tests {
|
||||
let schema = schema_builder.build();
|
||||
|
||||
{
|
||||
let write: WritePtr = directory.open_write(path).unwrap();
|
||||
let mut serializer = CompositeFastFieldSerializer::from_write(write)?;
|
||||
let mut write: WritePtr = directory.open_write(path).unwrap();
|
||||
let mut fast_field_writers = FastFieldsWriter::from_schema(&schema);
|
||||
let doc = Document::default();
|
||||
fast_field_writers.add_document(0, &doc).unwrap();
|
||||
fast_field_writers.serialize(&mut serializer, &HashMap::new(), None)?;
|
||||
serializer.close()?;
|
||||
fast_field_writers.add_document(&doc).unwrap();
|
||||
fast_field_writers.serialize(&mut write, None)?;
|
||||
write.terminate()?;
|
||||
}
|
||||
let file = directory.open_read(path).unwrap();
|
||||
let composite_file = CompositeFile::open(&file)?;
|
||||
@@ -921,21 +902,18 @@ mod tests {
|
||||
fn get_index(
|
||||
docs: &[crate::Document],
|
||||
schema: &Schema,
|
||||
codec_types: &[FastFieldCodecType],
|
||||
) -> crate::Result<RamDirectory> {
|
||||
let directory: RamDirectory = RamDirectory::create();
|
||||
{
|
||||
let write: WritePtr = directory.open_write(Path::new("test")).unwrap();
|
||||
let mut serializer =
|
||||
CompositeFastFieldSerializer::from_write_with_codec(write, codec_types).unwrap();
|
||||
let mut write: WritePtr = directory.open_write(Path::new("test")).unwrap();
|
||||
let mut fast_field_writers = FastFieldsWriter::from_schema(schema);
|
||||
for (doc_id, doc) in docs.into_iter().enumerate() {
|
||||
fast_field_writers.add_document(doc_id as u32, doc).unwrap();
|
||||
fast_field_writers.add_document(doc).unwrap();
|
||||
}
|
||||
fast_field_writers
|
||||
.serialize(&mut serializer, &HashMap::new(), None)
|
||||
.serialize(&mut write, None)
|
||||
.unwrap();
|
||||
serializer.close().unwrap();
|
||||
write.terminate().unwrap();
|
||||
}
|
||||
Ok(directory)
|
||||
}
|
||||
@@ -974,7 +952,7 @@ mod tests {
|
||||
|
||||
let docs: Vec<Document> = times.iter().map(|time| doc!(field=>*time)).collect();
|
||||
|
||||
let directory = get_index(&docs[..], &schema, &[codec_type])?;
|
||||
let directory = get_index(&docs[..], &schema)?;
|
||||
let path = Path::new("test");
|
||||
let file = directory.open_read(path).unwrap();
|
||||
let composite_file = CompositeFile::open(&file)?;
|
||||
|
||||
@@ -3,10 +3,9 @@ use std::sync::Arc;
|
||||
|
||||
use fastfield_codecs::{open, open_u128, Column};
|
||||
|
||||
use super::multivalued::MultiValuedFastFieldReader;
|
||||
use crate::directory::{CompositeFile, FileSlice};
|
||||
use crate::fastfield::{BytesFastFieldReader, FastFieldNotAvailableError, FastValue};
|
||||
use crate::schema::{Cardinality, Field, FieldType, Schema};
|
||||
use crate::fastfield::{FastFieldNotAvailableError, FastValue};
|
||||
use crate::schema::{Field, FieldType, Schema};
|
||||
use crate::space_usage::PerFieldSpaceUsage;
|
||||
use crate::{DateTime, TantivyError};
|
||||
|
||||
@@ -29,7 +28,7 @@ pub(crate) enum FastType {
|
||||
Date,
|
||||
}
|
||||
|
||||
pub(crate) fn type_and_cardinality(field_type: &FieldType) -> Option<(FastType, Cardinality)> {
|
||||
pub(crate) fn type_and_cardinality(field_type: &FieldType) -> Option<FastType> {
|
||||
todo!();
|
||||
// match field_type {
|
||||
// FieldType::U64(options) => options
|
||||
@@ -80,39 +79,6 @@ impl FastFieldReaders {
|
||||
})
|
||||
}
|
||||
|
||||
fn check_type(
|
||||
&self,
|
||||
field: Field,
|
||||
expected_fast_type: FastType,
|
||||
expected_cardinality: Cardinality,
|
||||
) -> crate::Result<()> {
|
||||
let field_entry = self.schema.get_field_entry(field);
|
||||
let (fast_type, cardinality) =
|
||||
type_and_cardinality(field_entry.field_type()).ok_or_else(|| {
|
||||
crate::TantivyError::SchemaError(format!(
|
||||
"Field {:?} is not a fast field.",
|
||||
field_entry.name()
|
||||
))
|
||||
})?;
|
||||
if fast_type != expected_fast_type {
|
||||
return Err(crate::TantivyError::SchemaError(format!(
|
||||
"Field {:?} is of type {:?}, expected {:?}.",
|
||||
field_entry.name(),
|
||||
fast_type,
|
||||
expected_fast_type
|
||||
)));
|
||||
}
|
||||
if cardinality != expected_cardinality {
|
||||
return Err(crate::TantivyError::SchemaError(format!(
|
||||
"Field {:?} is of cardinality {:?}, expected {:?}.",
|
||||
field_entry.name(),
|
||||
cardinality,
|
||||
expected_cardinality
|
||||
)));
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) fn typed_fast_field_reader_with_idx<TFastValue: FastValue>(
|
||||
&self,
|
||||
field_name: &str,
|
||||
@@ -130,80 +96,31 @@ impl FastFieldReaders {
|
||||
&self,
|
||||
field_name: &str,
|
||||
) -> crate::Result<Arc<dyn Column<TFastValue>>> {
|
||||
self.typed_fast_field_reader_with_idx(field_name, 0)
|
||||
}
|
||||
|
||||
pub(crate) fn typed_fast_field_multi_reader<TFastValue: FastValue>(
|
||||
&self,
|
||||
field_name: &str,
|
||||
) -> crate::Result<MultiValuedFastFieldReader<TFastValue>> {
|
||||
let idx_reader = self.typed_fast_field_reader(field_name)?;
|
||||
let vals_reader = self.typed_fast_field_reader_with_idx(field_name, 1)?;
|
||||
Ok(MultiValuedFastFieldReader::open(idx_reader, vals_reader))
|
||||
todo!();
|
||||
}
|
||||
|
||||
/// Returns the `u64` fast field reader reader associated with `field`.
|
||||
///
|
||||
/// If `field` is not a u64 fast field, this method returns an Error.
|
||||
pub fn u64(&self, field_name: &str) -> crate::Result<Arc<dyn Column<u64>>> {
|
||||
self.check_type(
|
||||
self.schema.get_field(field_name)?,
|
||||
FastType::U64,
|
||||
Cardinality::SingleValue,
|
||||
)?;
|
||||
self.typed_fast_field_reader(field_name)
|
||||
pub fn u64(&self, field: &str) -> crate::Result<Arc<dyn Column<u64>>> {
|
||||
todo!();
|
||||
}
|
||||
|
||||
/// Returns the `ip` fast field reader reader associated to `field`.
|
||||
///
|
||||
/// If `field` is not a u128 fast field, this method returns an Error.
|
||||
pub fn ip_addr(&self, field_name: &str) -> crate::Result<Arc<dyn Column<Ipv6Addr>>> {
|
||||
let field = self.schema.get_field(field_name)?;
|
||||
self.check_type(field, FastType::U128, Cardinality::SingleValue)?;
|
||||
let bytes = self.fast_field_data(field, 0)?.read_bytes()?;
|
||||
Ok(open_u128::<Ipv6Addr>(bytes)?)
|
||||
}
|
||||
|
||||
/// Returns the `ip` fast field reader reader associated to `field`.
|
||||
///
|
||||
/// If `field` is not a u128 fast field, this method returns an Error.
|
||||
pub fn ip_addrs(
|
||||
&self,
|
||||
field_name: &str,
|
||||
) -> crate::Result<MultiValuedFastFieldReader<Ipv6Addr>> {
|
||||
let field = self.schema.get_field(field_name)?;
|
||||
self.check_type(field, FastType::U128, Cardinality::MultiValues)?;
|
||||
let idx_reader: Arc<dyn Column<u64>> = self.typed_fast_field_reader(field_name)?;
|
||||
|
||||
let bytes = self.fast_field_data(field, 1)?.read_bytes()?;
|
||||
let vals_reader = open_u128::<Ipv6Addr>(bytes)?;
|
||||
|
||||
Ok(MultiValuedFastFieldReader::open(idx_reader, vals_reader))
|
||||
pub fn ip_addr(&self, field: &str) -> crate::Result<Arc<dyn Column<Ipv6Addr>>> {
|
||||
todo!();
|
||||
// self.check_type(field, FastType::U128)?;
|
||||
// let bytes = self.fast_field_data(field, 0)?.read_bytes()?;
|
||||
// Ok(open_u128::<Ipv6Addr>(bytes)?)
|
||||
}
|
||||
|
||||
/// Returns the `u128` fast field reader reader associated to `field`.
|
||||
///
|
||||
/// If `field` is not a u128 fast field, this method returns an Error.
|
||||
pub(crate) fn u128(&self, field_name: &str) -> crate::Result<Arc<dyn Column<u128>>> {
|
||||
let field = self.schema.get_field(field_name)?;
|
||||
self.check_type(field, FastType::U128, Cardinality::SingleValue)?;
|
||||
let bytes = self.fast_field_data(field, 0)?.read_bytes()?;
|
||||
Ok(open_u128::<u128>(bytes)?)
|
||||
}
|
||||
|
||||
/// Returns the `u128` multi-valued fast field reader reader associated to `field`.
|
||||
///
|
||||
/// If `field` is not a u128 multi-valued fast field, this method returns an Error.
|
||||
pub fn u128s(&self, field_name: &str) -> crate::Result<MultiValuedFastFieldReader<u128>> {
|
||||
let field = self.schema.get_field(field_name)?;
|
||||
self.check_type(field, FastType::U128, Cardinality::MultiValues)?;
|
||||
let idx_reader: Arc<dyn Column<u64>> =
|
||||
self.typed_fast_field_reader(self.schema.get_field_name(field))?;
|
||||
|
||||
let bytes = self.fast_field_data(field, 1)?.read_bytes()?;
|
||||
let vals_reader = open_u128::<u128>(bytes)?;
|
||||
|
||||
Ok(MultiValuedFastFieldReader::open(idx_reader, vals_reader))
|
||||
pub(crate) fn u128(&self, field: &str) -> crate::Result<Arc<dyn Column<u128>>> {
|
||||
todo!();
|
||||
}
|
||||
|
||||
/// Returns the `u64` fast field reader reader associated with `field`, regardless of whether
|
||||
@@ -219,113 +136,49 @@ impl FastFieldReaders {
|
||||
///
|
||||
/// If `field` is not a i64 fast field, this method returns an Error.
|
||||
pub fn i64(&self, field_name: &str) -> crate::Result<Arc<dyn Column<i64>>> {
|
||||
let field = self.schema.get_field(field_name)?;
|
||||
self.check_type(field, FastType::I64, Cardinality::SingleValue)?;
|
||||
self.typed_fast_field_reader(self.schema.get_field_name(field))
|
||||
todo!()
|
||||
}
|
||||
|
||||
/// Returns the `date` fast field reader reader associated with `field`.
|
||||
///
|
||||
/// If `field` is not a date fast field, this method returns an Error.
|
||||
pub fn date(&self, field_name: &str) -> crate::Result<Arc<dyn Column<DateTime>>> {
|
||||
let field = self.schema.get_field(field_name)?;
|
||||
self.check_type(field, FastType::Date, Cardinality::SingleValue)?;
|
||||
self.typed_fast_field_reader(field_name)
|
||||
todo!()
|
||||
}
|
||||
|
||||
/// Returns the `f64` fast field reader reader associated with `field`.
|
||||
///
|
||||
/// If `field` is not a f64 fast field, this method returns an Error.
|
||||
pub fn f64(&self, field_name: &str) -> crate::Result<Arc<dyn Column<f64>>> {
|
||||
let field = self.schema.get_field(field_name)?;
|
||||
self.check_type(field, FastType::F64, Cardinality::SingleValue)?;
|
||||
self.typed_fast_field_reader(field_name)
|
||||
todo!();
|
||||
}
|
||||
|
||||
/// Returns the `bool` fast field reader reader associated with `field`.
|
||||
///
|
||||
/// If `field` is not a bool fast field, this method returns an Error.
|
||||
pub fn bool(&self, field_name: &str) -> crate::Result<Arc<dyn Column<bool>>> {
|
||||
let field = self.schema.get_field(field_name)?;
|
||||
self.check_type(field, FastType::Bool, Cardinality::SingleValue)?;
|
||||
self.typed_fast_field_reader(field_name)
|
||||
todo!()
|
||||
}
|
||||
|
||||
/// Returns a `u64s` multi-valued fast field reader reader associated with `field`.
|
||||
///
|
||||
/// If `field` is not a u64 multi-valued fast field, this method returns an Error.
|
||||
pub fn u64s(&self, field_name: &str) -> crate::Result<MultiValuedFastFieldReader<u64>> {
|
||||
let field = self.schema.get_field(field_name)?;
|
||||
self.check_type(field, FastType::U64, Cardinality::MultiValues)?;
|
||||
self.typed_fast_field_multi_reader(field_name)
|
||||
}
|
||||
|
||||
/// Returns a `u64s` multi-valued fast field reader reader associated with `field`, regardless
|
||||
/// of whether the given field is effectively of type `u64` or not.
|
||||
///
|
||||
/// If `field` is not a u64 multi-valued fast field, this method returns an Error.
|
||||
pub fn u64s_lenient(&self, field_name: &str) -> crate::Result<MultiValuedFastFieldReader<u64>> {
|
||||
self.typed_fast_field_multi_reader(field_name)
|
||||
}
|
||||
|
||||
/// Returns a `i64s` multi-valued fast field reader reader associated with `field`.
|
||||
///
|
||||
/// If `field` is not a i64 multi-valued fast field, this method returns an Error.
|
||||
pub fn i64s(&self, field_name: &str) -> crate::Result<MultiValuedFastFieldReader<i64>> {
|
||||
let field = self.schema.get_field(field_name)?;
|
||||
self.check_type(field, FastType::I64, Cardinality::MultiValues)?;
|
||||
self.typed_fast_field_multi_reader(self.schema.get_field_name(field))
|
||||
}
|
||||
|
||||
/// Returns a `f64s` multi-valued fast field reader reader associated with `field`.
|
||||
///
|
||||
/// If `field` is not a f64 multi-valued fast field, this method returns an Error.
|
||||
pub fn f64s(&self, field_name: &str) -> crate::Result<MultiValuedFastFieldReader<f64>> {
|
||||
let field = self.schema.get_field(field_name)?;
|
||||
self.check_type(field, FastType::F64, Cardinality::MultiValues)?;
|
||||
self.typed_fast_field_multi_reader(self.schema.get_field_name(field))
|
||||
}
|
||||
|
||||
/// Returns a `bools` multi-valued fast field reader reader associated with `field`.
|
||||
///
|
||||
/// If `field` is not a bool multi-valued fast field, this method returns an Error.
|
||||
pub fn bools(&self, field_name: &str) -> crate::Result<MultiValuedFastFieldReader<bool>> {
|
||||
let field = self.schema.get_field(field_name)?;
|
||||
self.check_type(field, FastType::Bool, Cardinality::MultiValues)?;
|
||||
self.typed_fast_field_multi_reader(self.schema.get_field_name(field))
|
||||
}
|
||||
|
||||
/// Returns a `time::OffsetDateTime` multi-valued fast field reader reader associated with
|
||||
/// `field`.
|
||||
///
|
||||
/// If `field` is not a `time::OffsetDateTime` multi-valued fast field, this method returns an
|
||||
/// Error.
|
||||
pub fn dates(&self, field_name: &str) -> crate::Result<MultiValuedFastFieldReader<DateTime>> {
|
||||
let field = self.schema.get_field(field_name)?;
|
||||
self.check_type(field, FastType::Date, Cardinality::MultiValues)?;
|
||||
self.typed_fast_field_multi_reader(self.schema.get_field_name(field))
|
||||
}
|
||||
|
||||
/// Returns the `bytes` fast field reader associated with `field`.
|
||||
///
|
||||
/// If `field` is not a bytes fast field, returns an Error.
|
||||
pub fn bytes(&self, field_name: &str) -> crate::Result<BytesFastFieldReader> {
|
||||
let field = self.schema.get_field(field_name)?;
|
||||
let field_entry = self.schema.get_field_entry(field);
|
||||
if let FieldType::Bytes(bytes_option) = field_entry.field_type() {
|
||||
if !bytes_option.is_fast() {
|
||||
return Err(crate::TantivyError::SchemaError(format!(
|
||||
"Field {:?} is not a fast field.",
|
||||
field_entry.name()
|
||||
)));
|
||||
}
|
||||
let fast_field_idx_file = self.fast_field_data(field, 0)?;
|
||||
let fast_field_idx_bytes = fast_field_idx_file.read_bytes()?;
|
||||
let idx_reader = open(fast_field_idx_bytes)?;
|
||||
let data = self.fast_field_data(field, 1)?;
|
||||
BytesFastFieldReader::open(idx_reader, data)
|
||||
} else {
|
||||
Err(FastFieldNotAvailableError::new(field_entry).into())
|
||||
}
|
||||
}
|
||||
// Returns the `bytes` fast field reader associated with `field`.
|
||||
//
|
||||
// If `field` is not a bytes fast field, returns an Error.
|
||||
// pub fn bytes(&self, field: Field) -> crate::Result<BytesFastFieldReader> {
|
||||
// let field_entry = self.schema.get_field_entry(field);
|
||||
// if let FieldType::Bytes(bytes_option) = field_entry.field_type() {
|
||||
// if !bytes_option.is_fast() {
|
||||
// return Err(crate::TantivyError::SchemaError(format!(
|
||||
// "Field {:?} is not a fast field.",
|
||||
// field_entry.name()
|
||||
// )));
|
||||
// }
|
||||
// let fast_field_idx_file = self.fast_field_data(field, 0)?;
|
||||
// let fast_field_idx_bytes = fast_field_idx_file.read_bytes()?;
|
||||
// let idx_reader = open(fast_field_idx_bytes)?;
|
||||
// let data = self.fast_field_data(field, 1)?;
|
||||
// BytesFastFieldReader::open(idx_reader, data)
|
||||
// } else {
|
||||
// Err(FastFieldNotAvailableError::new(field_entry).into())
|
||||
// }
|
||||
// }
|
||||
}
|
||||
|
||||
@@ -1,18 +1,17 @@
|
||||
use std::collections::HashMap;
|
||||
use std::io;
|
||||
|
||||
use super::FastFieldType;
|
||||
use crate::fastfield::{CompositeFastFieldSerializer};
|
||||
use columnar::{ColumnarWriter, NumericalType, NumericalValue};
|
||||
use common;
|
||||
use fastfield_codecs::{Column, MonotonicallyMappableToU128, MonotonicallyMappableToU64};
|
||||
use rustc_hash::FxHashMap;
|
||||
use tantivy_bitpacker::BlockedBitpacker;
|
||||
|
||||
use super::multivalued::{MultiValueU128FastFieldWriter, MultiValuedFastFieldWriter};
|
||||
use super::FastFieldType;
|
||||
use crate::fastfield::{BytesFastFieldWriter, CompositeFastFieldSerializer};
|
||||
use crate::indexer::doc_id_mapping::DocIdMapping;
|
||||
use crate::postings::UnorderedTermId;
|
||||
use crate::schema::{Cardinality, Document, Field, FieldEntry, FieldType, Schema, Value};
|
||||
use crate::schema::{Document, Field, FieldEntry, FieldType, Schema, Value};
|
||||
use crate::termdict::TermOrdinal;
|
||||
use crate::{DatePrecision, DocId};
|
||||
|
||||
@@ -20,6 +19,7 @@ use crate::{DatePrecision, DocId};
|
||||
pub struct FastFieldsWriter {
|
||||
columnar_writer: ColumnarWriter,
|
||||
fast_fields: Vec<Option<String>>, //< TODO see if we can cash the field name hash too.
|
||||
num_docs: DocId,
|
||||
// term_id_writers: Vec<MultiValuedFastFieldWriter>,
|
||||
// single_value_writers: Vec<IntFastFieldWriter>,
|
||||
// u128_value_writers: Vec<U128FastFieldWriter>,
|
||||
@@ -122,6 +122,7 @@ impl FastFieldsWriter {
|
||||
FastFieldsWriter {
|
||||
columnar_writer,
|
||||
fast_fields,
|
||||
num_docs: 0u32,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -131,7 +132,8 @@ impl FastFieldsWriter {
|
||||
}
|
||||
|
||||
/// Indexes all of the fastfields of a new document.
|
||||
pub fn add_document(&mut self, doc_id: DocId, doc: &Document) -> crate::Result<()> {
|
||||
pub fn add_document(&mut self, doc: &Document) -> crate::Result<()> {
|
||||
let doc_id = self.num_docs;
|
||||
for field_value in doc.field_values() {
|
||||
if let Some(field_name) = self.fast_fields[field_value.field().field_id() as usize].as_ref() {
|
||||
match &field_value.value {
|
||||
@@ -155,40 +157,20 @@ impl FastFieldsWriter {
|
||||
}
|
||||
}
|
||||
}
|
||||
self.num_docs += 1;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Serializes all of the `FastFieldWriter`s by pushing them in
|
||||
/// order to the fast field serializer.
|
||||
pub fn serialize(
|
||||
self,
|
||||
serializer: &mut CompositeFastFieldSerializer,
|
||||
mapping: &HashMap<Field, FxHashMap<UnorderedTermId, TermOrdinal>>,
|
||||
mut self,
|
||||
wrt: &mut dyn io::Write,
|
||||
doc_id_map: Option<&DocIdMapping>,
|
||||
) -> io::Result<()> {
|
||||
todo!();
|
||||
// for field_writer in self.term_id_writers {
|
||||
// let field = field_writer.field();
|
||||
// field_writer.serialize(serializer, mapping.get(&field), doc_id_map)?;
|
||||
// }
|
||||
// for field_writer in &self.single_value_writers {
|
||||
// field_writer.serialize(serializer, doc_id_map)?;
|
||||
// }
|
||||
|
||||
// for field_writer in self.multi_values_writers {
|
||||
// let field = field_writer.field();
|
||||
// field_writer.serialize(serializer, mapping.get(&field), doc_id_map)?;
|
||||
// }
|
||||
// for field_writer in self.bytes_value_writers {
|
||||
// field_writer.serialize(serializer, doc_id_map)?;
|
||||
// }
|
||||
// for field_writer in self.u128_value_writers {
|
||||
// field_writer.serialize(serializer, doc_id_map)?;
|
||||
// }
|
||||
// for field_writer in self.u128_multi_value_writers {
|
||||
// field_writer.serialize(serializer, doc_id_map)?;
|
||||
// }
|
||||
|
||||
assert!(doc_id_map.is_none()); // TODO handle doc id map
|
||||
let num_docs = self.num_docs;
|
||||
self.columnar_writer.serialize(num_docs, wrt)?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -442,47 +442,49 @@ mod tests_indexsorting {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_sort_index_fast_field() -> crate::Result<()> {
|
||||
let index = create_test_index(
|
||||
Some(IndexSettings {
|
||||
sort_by_field: Some(IndexSortByField {
|
||||
field: "my_number".to_string(),
|
||||
order: Order::Asc,
|
||||
}),
|
||||
..Default::default()
|
||||
}),
|
||||
get_text_options(),
|
||||
)?;
|
||||
assert_eq!(
|
||||
index.settings().sort_by_field.as_ref().unwrap().field,
|
||||
"my_number".to_string()
|
||||
);
|
||||
// #[test]
|
||||
// fn test_sort_index_fast_field() -> crate::Result<()> {
|
||||
// let index = create_test_index(
|
||||
// Some(IndexSettings {
|
||||
// sort_by_field: Some(IndexSortByField {
|
||||
// field: "my_number".to_string(),
|
||||
// order: Order::Asc,
|
||||
// }),
|
||||
// ..Default::default()
|
||||
// }),
|
||||
// get_text_options(),
|
||||
// )?;
|
||||
// assert_eq!(
|
||||
// index.settings().sort_by_field.as_ref().unwrap().field,
|
||||
// "my_number".to_string()
|
||||
// );
|
||||
|
||||
let searcher = index.reader()?.searcher();
|
||||
assert_eq!(searcher.segment_readers().len(), 1);
|
||||
let segment_reader = searcher.segment_reader(0);
|
||||
let fast_fields = segment_reader.fast_fields();
|
||||
index.schema().get_field("my_number").unwrap();
|
||||
|
||||
let fast_field = fast_fields.u64("my_number").unwrap();
|
||||
assert_eq!(fast_field.get_val(0), 10u64);
|
||||
assert_eq!(fast_field.get_val(1), 20u64);
|
||||
assert_eq!(fast_field.get_val(2), 30u64);
|
||||
// let searcher = index.reader()?.searcher();
|
||||
// assert_eq!(searcher.segment_readers().len(), 1);
|
||||
// let segment_reader = searcher.segment_reader(0);
|
||||
// let fast_fields = segment_reader.fast_fields();
|
||||
// let my_number = index.schema().get_field("my_number").unwrap();
|
||||
|
||||
let multifield = fast_fields.u64s("multi_numbers").unwrap();
|
||||
let mut vals = vec![];
|
||||
multifield.get_vals(0u32, &mut vals);
|
||||
assert_eq!(vals, &[] as &[u64]);
|
||||
let mut vals = vec![];
|
||||
multifield.get_vals(1u32, &mut vals);
|
||||
assert_eq!(vals, &[5, 6]);
|
||||
// let fast_field = fast_fields.u64(my_number).unwrap();
|
||||
// assert_eq!(fast_field.get_val(0), 10u64);
|
||||
// assert_eq!(fast_field.get_val(1), 20u64);
|
||||
// assert_eq!(fast_field.get_val(2), 30u64);
|
||||
|
||||
let mut vals = vec![];
|
||||
multifield.get_vals(2u32, &mut vals);
|
||||
assert_eq!(vals, &[3]);
|
||||
Ok(())
|
||||
}
|
||||
// let multi_numbers = index.schema().get_field("multi_numbers").unwrap();
|
||||
// let multifield = fast_fields.u64s(multi_numbers).unwrap();
|
||||
// let mut vals = vec![];
|
||||
// multifield.get_vals(0u32, &mut vals);
|
||||
// assert_eq!(vals, &[] as &[u64]);
|
||||
// let mut vals = vec![];
|
||||
// multifield.get_vals(1u32, &mut vals);
|
||||
// assert_eq!(vals, &[5, 6]);
|
||||
|
||||
// let mut vals = vec![];
|
||||
// multifield.get_vals(2u32, &mut vals);
|
||||
// assert_eq!(vals, &[3]);
|
||||
// Ok(())
|
||||
// }
|
||||
|
||||
#[test]
|
||||
fn test_doc_mapping() {
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -150,7 +150,6 @@ fn index_json_value(
|
||||
json_term_writer.term_buffer,
|
||||
ctx,
|
||||
indexing_position,
|
||||
None,
|
||||
);
|
||||
}
|
||||
TextOrDateTime::DateTime(dt) => {
|
||||
|
||||
@@ -7,21 +7,21 @@ use itertools::Itertools;
|
||||
use measure_time::debug_time;
|
||||
|
||||
use super::flat_map_with_buffer::FlatMapWithBufferIter;
|
||||
use super::sorted_doc_id_multivalue_column::RemappedDocIdMultiValueIndexColumn;
|
||||
// use super::sorted_doc_id_multivalue_column::RemappedDocIdMultiValueIndexColumn;
|
||||
use crate::core::{Segment, SegmentReader};
|
||||
use crate::directory::WritePtr;
|
||||
use crate::docset::{DocSet, TERMINATED};
|
||||
use crate::error::DataCorruption;
|
||||
use crate::fastfield::{
|
||||
get_fastfield_codecs_for_multivalue, AliveBitSet, Column, CompositeFastFieldSerializer,
|
||||
MultiValueIndex, MultiValuedFastFieldReader,
|
||||
AliveBitSet, Column, CompositeFastFieldSerializer,
|
||||
};
|
||||
use crate::fieldnorm::{FieldNormReader, FieldNormReaders, FieldNormsSerializer, FieldNormsWriter};
|
||||
use crate::indexer::doc_id_mapping::SegmentDocIdMapping;
|
||||
use crate::indexer::sorted_doc_id_column::RemappedDocIdColumn;
|
||||
use crate::indexer::sorted_doc_id_multivalue_column::RemappedDocIdMultiValueColumn;
|
||||
// use crate::indexer::sorted_doc_id_multivalue_column::RemappedDocIdMultiValueColumn;
|
||||
use crate::indexer::SegmentSerializer;
|
||||
use crate::postings::{InvertedIndexSerializer, Postings, SegmentPostings};
|
||||
use crate::schema::{Cardinality, Field, FieldType, Schema};
|
||||
use crate::schema::{Field, FieldType, Schema};
|
||||
use crate::store::StoreWriter;
|
||||
use crate::termdict::{TermMerger, TermOrdinal};
|
||||
use crate::{
|
||||
@@ -249,11 +249,13 @@ impl IndexMerger {
|
||||
|
||||
fn write_fast_fields(
|
||||
&self,
|
||||
fast_field_serializer: &mut CompositeFastFieldSerializer,
|
||||
fast_field_wrt: &mut WritePtr,
|
||||
mut term_ord_mappings: HashMap<Field, TermOrdinalMapping>,
|
||||
doc_id_mapping: &SegmentDocIdMapping,
|
||||
) -> crate::Result<()> {
|
||||
debug_time!("write-fast-fields");
|
||||
debug_time!("wrie-fast-fields");
|
||||
todo!();
|
||||
/*
|
||||
|
||||
for (field, field_entry) in self.schema.fields() {
|
||||
let field_type = field_entry.field_type();
|
||||
@@ -306,74 +308,7 @@ impl IndexMerger {
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// used to merge `u128` single fast fields.
|
||||
fn write_u128_multi_fast_field(
|
||||
&self,
|
||||
field: Field,
|
||||
fast_field_serializer: &mut CompositeFastFieldSerializer,
|
||||
doc_id_mapping: &SegmentDocIdMapping,
|
||||
) -> crate::Result<()> {
|
||||
let segment_and_ff_readers: Vec<(&SegmentReader, MultiValuedFastFieldReader<u128>)> = self
|
||||
.readers
|
||||
.iter()
|
||||
.map(|segment_reader| {
|
||||
let ff_reader: MultiValuedFastFieldReader<u128> = segment_reader
|
||||
.fast_fields()
|
||||
.u128s(self.schema.get_field_name(field))
|
||||
.expect(
|
||||
"Failed to find index for multivalued field. This is a bug in tantivy, \
|
||||
please report.",
|
||||
);
|
||||
(segment_reader, ff_reader)
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
Self::write_1_n_fast_field_idx_generic(
|
||||
field,
|
||||
fast_field_serializer,
|
||||
doc_id_mapping,
|
||||
&segment_and_ff_readers
|
||||
.iter()
|
||||
.map(|(segment_reader, u64s_reader)| {
|
||||
(*segment_reader, u64s_reader.get_index_reader())
|
||||
})
|
||||
.collect::<Vec<_>>(),
|
||||
)?;
|
||||
|
||||
let num_vals = segment_and_ff_readers
|
||||
.iter()
|
||||
.map(|(segment_reader, reader)| {
|
||||
// TODO implement generic version, implement reverse scan, all - deletes
|
||||
if let Some(alive_bitset) = segment_reader.alive_bitset() {
|
||||
alive_bitset
|
||||
.iter_alive()
|
||||
.map(|doc| reader.num_vals(doc))
|
||||
.sum()
|
||||
} else {
|
||||
reader.total_num_vals()
|
||||
}
|
||||
})
|
||||
.sum();
|
||||
|
||||
let fast_field_readers = segment_and_ff_readers
|
||||
.into_iter()
|
||||
.map(|(_, ff_reader)| ff_reader)
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let iter_gen = || {
|
||||
doc_id_mapping
|
||||
.iter_old_doc_addrs()
|
||||
.flat_map_with_buffer(|doc_addr, buffer| {
|
||||
let fast_field_reader = &fast_field_readers[doc_addr.segment_ord as usize];
|
||||
fast_field_reader.get_vals(doc_addr.doc_id, buffer);
|
||||
})
|
||||
};
|
||||
|
||||
fast_field_serializer.create_u128_fast_field_with_idx(field, iter_gen, num_vals, 1)?;
|
||||
|
||||
*/
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -535,111 +470,6 @@ impl IndexMerger {
|
||||
Ok(SegmentDocIdMapping::new(sorted_doc_ids, false))
|
||||
}
|
||||
|
||||
// Creating the index file to point into the data, generic over `BytesFastFieldReader` and
|
||||
// `MultiValuedFastFieldReader`
|
||||
//
|
||||
fn write_1_n_fast_field_idx_generic(
|
||||
field: Field,
|
||||
fast_field_serializer: &mut CompositeFastFieldSerializer,
|
||||
doc_id_mapping: &SegmentDocIdMapping,
|
||||
segment_and_ff_readers: &[(&SegmentReader, &MultiValueIndex)],
|
||||
) -> crate::Result<()> {
|
||||
let column =
|
||||
RemappedDocIdMultiValueIndexColumn::new(segment_and_ff_readers, doc_id_mapping);
|
||||
|
||||
fast_field_serializer.create_auto_detect_u64_fast_field(field, column)?;
|
||||
Ok(())
|
||||
}
|
||||
/// Returns the fastfield index (index for the data, not the data).
|
||||
fn write_multi_value_fast_field_idx(
|
||||
&self,
|
||||
field: Field,
|
||||
fast_field_serializer: &mut CompositeFastFieldSerializer,
|
||||
doc_id_mapping: &SegmentDocIdMapping,
|
||||
) -> crate::Result<()> {
|
||||
let segment_and_ff_readers = self
|
||||
.readers
|
||||
.iter()
|
||||
.map(|reader| {
|
||||
let u64s_reader: MultiValuedFastFieldReader<u64> = reader
|
||||
.fast_fields()
|
||||
.typed_fast_field_multi_reader::<u64>(self.schema.get_field_name(field))
|
||||
.expect(
|
||||
"Failed to find index for multivalued field. This is a bug in tantivy, \
|
||||
please report.",
|
||||
);
|
||||
(reader, u64s_reader)
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
Self::write_1_n_fast_field_idx_generic(
|
||||
field,
|
||||
fast_field_serializer,
|
||||
doc_id_mapping,
|
||||
&segment_and_ff_readers
|
||||
.iter()
|
||||
.map(|(segment_reader, u64s_reader)| {
|
||||
(*segment_reader, u64s_reader.get_index_reader())
|
||||
})
|
||||
.collect::<Vec<_>>(),
|
||||
)
|
||||
}
|
||||
|
||||
fn write_term_id_fast_field(
|
||||
&self,
|
||||
field: Field,
|
||||
term_ordinal_mappings: &TermOrdinalMapping,
|
||||
fast_field_serializer: &mut CompositeFastFieldSerializer,
|
||||
doc_id_mapping: &SegmentDocIdMapping,
|
||||
) -> crate::Result<()> {
|
||||
debug_time!("write-term-id-fast-field");
|
||||
|
||||
// Multifastfield consists of 2 fastfields.
|
||||
// The first serves as an index into the second one and is strictly increasing.
|
||||
// The second contains the actual values.
|
||||
|
||||
// First we merge the idx fast field.
|
||||
self.write_multi_value_fast_field_idx(field, fast_field_serializer, doc_id_mapping)?;
|
||||
|
||||
let fast_field_reader = self
|
||||
.readers
|
||||
.iter()
|
||||
.map(|reader| {
|
||||
let ff_reader: MultiValuedFastFieldReader<u64> = reader
|
||||
.fast_fields()
|
||||
.u64s(self.schema.get_field_name(field))
|
||||
.expect("Could not find multivalued u64 fast value reader.");
|
||||
ff_reader
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
// We can now write the actual fast field values.
|
||||
// In the case of hierarchical facets, they are actually term ordinals.
|
||||
{
|
||||
let mut vals = Vec::new();
|
||||
let mut buffer = Vec::new();
|
||||
for old_doc_addr in doc_id_mapping.iter_old_doc_addrs() {
|
||||
let term_ordinal_mapping: &[TermOrdinal] =
|
||||
term_ordinal_mappings.get_segment(old_doc_addr.segment_ord as usize);
|
||||
|
||||
let ff_reader = &fast_field_reader[old_doc_addr.segment_ord as usize];
|
||||
ff_reader.get_vals(old_doc_addr.doc_id, &mut buffer);
|
||||
for &prev_term_ord in &buffer {
|
||||
let new_term_ord = term_ordinal_mapping[prev_term_ord as usize];
|
||||
vals.push(new_term_ord);
|
||||
}
|
||||
}
|
||||
|
||||
let col = VecColumn::from(&vals[..]);
|
||||
fast_field_serializer.create_auto_detect_u64_fast_field_with_idx_and_codecs(
|
||||
field,
|
||||
col,
|
||||
1,
|
||||
&get_fastfield_codecs_for_multivalue(),
|
||||
)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Creates a mapping if the segments are stacked. this is helpful to merge codelines between
|
||||
/// index sorting and the others
|
||||
pub(crate) fn get_doc_id_from_concatenated_data(&self) -> crate::Result<SegmentDocIdMapping> {
|
||||
@@ -664,78 +494,6 @@ impl IndexMerger {
|
||||
);
|
||||
Ok(SegmentDocIdMapping::new(mapping, true))
|
||||
}
|
||||
fn write_multi_fast_field(
|
||||
&self,
|
||||
field: Field,
|
||||
fast_field_serializer: &mut CompositeFastFieldSerializer,
|
||||
doc_id_mapping: &SegmentDocIdMapping,
|
||||
) -> crate::Result<()> {
|
||||
// Multifastfield consists of 2 fastfields.
|
||||
// The first serves as an index into the second one and is strictly increasing.
|
||||
// The second contains the actual values.
|
||||
|
||||
// First we merge the idx fast field.
|
||||
|
||||
self.write_multi_value_fast_field_idx(field, fast_field_serializer, doc_id_mapping)?;
|
||||
|
||||
let fastfield_accessor = RemappedDocIdMultiValueColumn::new(
|
||||
&self.readers,
|
||||
doc_id_mapping,
|
||||
self.schema.get_field_name(field),
|
||||
);
|
||||
fast_field_serializer.create_auto_detect_u64_fast_field_with_idx_and_codecs(
|
||||
field,
|
||||
fastfield_accessor,
|
||||
1,
|
||||
&get_fastfield_codecs_for_multivalue(),
|
||||
)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn write_bytes_fast_field(
|
||||
&self,
|
||||
field: Field,
|
||||
fast_field_serializer: &mut CompositeFastFieldSerializer,
|
||||
doc_id_mapping: &SegmentDocIdMapping,
|
||||
) -> crate::Result<()> {
|
||||
let segment_and_ff_readers = self
|
||||
.readers
|
||||
.iter()
|
||||
.map(|reader| {
|
||||
let bytes_reader = reader
|
||||
.fast_fields()
|
||||
.bytes(self.schema.get_field_name(field))
|
||||
.expect(
|
||||
"Failed to find index for bytes field. This is a bug in tantivy, please \
|
||||
report.",
|
||||
);
|
||||
(reader, bytes_reader)
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
Self::write_1_n_fast_field_idx_generic(
|
||||
field,
|
||||
fast_field_serializer,
|
||||
doc_id_mapping,
|
||||
&segment_and_ff_readers
|
||||
.iter()
|
||||
.map(|(segment_reader, u64s_reader)| {
|
||||
(*segment_reader, u64s_reader.get_index_reader())
|
||||
})
|
||||
.collect::<Vec<_>>(),
|
||||
)?;
|
||||
|
||||
let mut serialize_vals = fast_field_serializer.new_bytes_fast_field(field);
|
||||
|
||||
for old_doc_addr in doc_id_mapping.iter_old_doc_addrs() {
|
||||
let bytes_reader = &segment_and_ff_readers[old_doc_addr.segment_ord as usize].1;
|
||||
let val = bytes_reader.get_bytes(old_doc_addr.doc_id);
|
||||
serialize_vals.write_all(val)?;
|
||||
}
|
||||
|
||||
serialize_vals.flush()?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn write_postings_for_field(
|
||||
&self,
|
||||
@@ -1042,7 +800,7 @@ impl IndexMerger {
|
||||
)?;
|
||||
debug!("write-fastfields");
|
||||
self.write_fast_fields(
|
||||
serializer.get_fast_field_serializer(),
|
||||
serializer.get_fast_field_write(),
|
||||
term_ord_mappings,
|
||||
&doc_id_mapping,
|
||||
)?;
|
||||
@@ -1060,13 +818,13 @@ mod tests {
|
||||
use schema::FAST;
|
||||
|
||||
use crate::collector::tests::{
|
||||
BytesFastFieldTestCollector, FastFieldTestCollector, TEST_COLLECTOR_WITH_SCORE,
|
||||
FastFieldTestCollector, TEST_COLLECTOR_WITH_SCORE,
|
||||
};
|
||||
use crate::collector::{Count, FacetCollector};
|
||||
use crate::collector::Count;
|
||||
use crate::core::Index;
|
||||
use crate::query::{AllQuery, BooleanQuery, EnableScoring, Scorer, TermQuery};
|
||||
use crate::schema::{
|
||||
Cardinality, Document, Facet, FacetOptions, IndexRecordOption, NumericOptions, Term,
|
||||
Document, Facet, FacetOptions, IndexRecordOption, NumericOptions, Term,
|
||||
TextFieldIndexing, INDEXED, TEXT,
|
||||
};
|
||||
use crate::time::OffsetDateTime;
|
||||
@@ -1203,30 +961,28 @@ mod tests {
|
||||
Some("a b c g")
|
||||
);
|
||||
}
|
||||
{
|
||||
let get_fast_vals = |terms: Vec<Term>| {
|
||||
let query = BooleanQuery::new_multiterms_query(terms);
|
||||
searcher.search(
|
||||
&query,
|
||||
&FastFieldTestCollector::for_field("score".to_string()),
|
||||
)
|
||||
};
|
||||
let get_fast_vals_bytes = |terms: Vec<Term>| {
|
||||
let query = BooleanQuery::new_multiterms_query(terms);
|
||||
searcher.search(
|
||||
&query,
|
||||
&BytesFastFieldTestCollector::for_field(bytes_score_field),
|
||||
)
|
||||
};
|
||||
assert_eq!(
|
||||
get_fast_vals(vec![Term::from_field_text(text_field, "a")])?,
|
||||
vec![5, 7, 13]
|
||||
);
|
||||
assert_eq!(
|
||||
get_fast_vals_bytes(vec![Term::from_field_text(text_field, "a")])?,
|
||||
vec![0, 0, 0, 5, 0, 0, 0, 7, 0, 0, 0, 13]
|
||||
);
|
||||
}
|
||||
|
||||
// {
|
||||
// let get_fast_vals = |terms: Vec<Term>| {
|
||||
// let query = BooleanQuery::new_multiterms_query(terms);
|
||||
// searcher.search(&query, &FastFieldTestCollector::for_field(score_field))
|
||||
// };
|
||||
// let get_fast_vals_bytes = |terms: Vec<Term>| {
|
||||
// let query = BooleanQuery::new_multiterms_query(terms);
|
||||
// searcher.search(
|
||||
// &query,
|
||||
// &BytesFastFieldTestCollector::for_field(bytes_score_field),
|
||||
// )
|
||||
// };
|
||||
// assert_eq!(
|
||||
// get_fast_vals(vec![Term::from_field_text(text_field, "a")])?,
|
||||
// vec![5, 7, 13]
|
||||
// );
|
||||
// assert_eq!(
|
||||
// get_fast_vals_bytes(vec![Term::from_field_text(text_field, "a")])?,
|
||||
// vec![0, 0, 0, 5, 0, 0, 0, 7, 0, 0, 0, 13]
|
||||
// );
|
||||
// }
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
@@ -1247,18 +1003,20 @@ mod tests {
|
||||
let mut index_writer = index.writer_for_tests()?;
|
||||
let reader = index.reader().unwrap();
|
||||
let search_term = |searcher: &Searcher, term: Term| {
|
||||
let collector = FastFieldTestCollector::for_field("score".to_string());
|
||||
let bytes_collector = BytesFastFieldTestCollector::for_field(bytes_score_field);
|
||||
let collector = FastFieldTestCollector::for_field("score");
|
||||
// let bytes_collector = BytesFastFieldTestCollector::for_field(bytes_score_field);
|
||||
let term_query = TermQuery::new(term, IndexRecordOption::Basic);
|
||||
// searcher
|
||||
// .search(&term_query, &(collector, bytes_collector))
|
||||
// .map(|(scores, bytes)| {
|
||||
// let mut score_bytes = &bytes[..];
|
||||
// for &score in &scores {
|
||||
// assert_eq!(score as u32, score_bytes.read_u32::<BigEndian>().unwrap());
|
||||
// }
|
||||
// scores
|
||||
// })
|
||||
searcher
|
||||
.search(&term_query, &(collector, bytes_collector))
|
||||
.map(|(scores, bytes)| {
|
||||
let mut score_bytes = &bytes[..];
|
||||
for &score in &scores {
|
||||
assert_eq!(score as u32, score_bytes.read_u32::<BigEndian>().unwrap());
|
||||
}
|
||||
scores
|
||||
})
|
||||
.search(&term_query, &collector)
|
||||
};
|
||||
|
||||
let empty_vec = Vec::<u64>::new();
|
||||
@@ -1537,207 +1295,211 @@ mod tests {
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
#[test]
|
||||
fn test_merge_facets_sort_none() {
|
||||
test_merge_facets(None, true)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_merge_facets_sort_asc() {
|
||||
// In the merge case this will go through the doc_id mapping code
|
||||
test_merge_facets(
|
||||
Some(IndexSettings {
|
||||
sort_by_field: Some(IndexSortByField {
|
||||
field: "intval".to_string(),
|
||||
order: Order::Desc,
|
||||
}),
|
||||
..Default::default()
|
||||
}),
|
||||
true,
|
||||
);
|
||||
// In the merge case this will not go through the doc_id mapping code, because the data is
|
||||
// sorted and disjunct
|
||||
test_merge_facets(
|
||||
Some(IndexSettings {
|
||||
sort_by_field: Some(IndexSortByField {
|
||||
field: "intval".to_string(),
|
||||
order: Order::Desc,
|
||||
}),
|
||||
..Default::default()
|
||||
}),
|
||||
false,
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_merge_facets_sort_desc() {
|
||||
// In the merge case this will go through the doc_id mapping code
|
||||
test_merge_facets(
|
||||
Some(IndexSettings {
|
||||
sort_by_field: Some(IndexSortByField {
|
||||
field: "intval".to_string(),
|
||||
order: Order::Desc,
|
||||
}),
|
||||
..Default::default()
|
||||
}),
|
||||
true,
|
||||
);
|
||||
// In the merge case this will not go through the doc_id mapping code, because the data is
|
||||
// sorted and disjunct
|
||||
test_merge_facets(
|
||||
Some(IndexSettings {
|
||||
sort_by_field: Some(IndexSortByField {
|
||||
field: "intval".to_string(),
|
||||
order: Order::Desc,
|
||||
}),
|
||||
..Default::default()
|
||||
}),
|
||||
false,
|
||||
);
|
||||
}
|
||||
// TODO re-enable
|
||||
// #[test]
|
||||
// fn test_merge_facets_sort_none() {
|
||||
// test_merge_facets(None, true)
|
||||
// }
|
||||
|
||||
// #[test]
|
||||
// fn test_merge_facets_sort_asc() {
|
||||
// // In the merge case this will go through the doc_id mapping code
|
||||
// test_merge_facets(
|
||||
// Some(IndexSettings {
|
||||
// sort_by_field: Some(IndexSortByField {
|
||||
// field: "intval".to_string(),
|
||||
// order: Order::Desc,
|
||||
// }),
|
||||
// ..Default::default()
|
||||
// }),
|
||||
// true,
|
||||
// );
|
||||
// // In the merge case this will not go through the doc_id mapping code, because the data is
|
||||
// // sorted and disjunct
|
||||
// test_merge_facets(
|
||||
// Some(IndexSettings {
|
||||
// sort_by_field: Some(IndexSortByField {
|
||||
// field: "intval".to_string(),
|
||||
// order: Order::Desc,
|
||||
// }),
|
||||
// ..Default::default()
|
||||
// }),
|
||||
// false,
|
||||
// );
|
||||
// }
|
||||
|
||||
// #[test]
|
||||
// fn test_merge_facets_sort_desc() {
|
||||
// // In the merge case this will go through the doc_id mapping code
|
||||
// test_merge_facets(
|
||||
// Some(IndexSettings {
|
||||
// sort_by_field: Some(IndexSortByField {
|
||||
// field: "intval".to_string(),
|
||||
// order: Order::Desc,
|
||||
// }),
|
||||
// ..Default::default()
|
||||
// }),
|
||||
// true,
|
||||
// );
|
||||
// // In the merge case this will not go through the doc_id mapping code, because the data is
|
||||
// // sorted and disjunct
|
||||
// test_merge_facets(
|
||||
// Some(IndexSettings {
|
||||
// sort_by_field: Some(IndexSortByField {
|
||||
// field: "intval".to_string(),
|
||||
// order: Order::Desc,
|
||||
// }),
|
||||
// ..Default::default()
|
||||
// }),
|
||||
// false,
|
||||
// );
|
||||
// }
|
||||
|
||||
// force_segment_value_overlap forces the int value for sorting to have overlapping min and max
|
||||
// ranges between segments so that merge algorithm can't apply certain optimizations
|
||||
fn test_merge_facets(index_settings: Option<IndexSettings>, force_segment_value_overlap: bool) {
|
||||
let mut schema_builder = schema::Schema::builder();
|
||||
let facet_field = schema_builder.add_facet_field("facet", FacetOptions::default());
|
||||
let int_options = NumericOptions::default()
|
||||
.set_fast()
|
||||
.set_indexed();
|
||||
let int_field = schema_builder.add_u64_field("intval", int_options);
|
||||
let mut index_builder = Index::builder().schema(schema_builder.build());
|
||||
if let Some(settings) = index_settings {
|
||||
index_builder = index_builder.settings(settings);
|
||||
}
|
||||
let index = index_builder.create_in_ram().unwrap();
|
||||
// let index = Index::create_in_ram(schema_builder.build());
|
||||
let reader = index.reader().unwrap();
|
||||
let mut int_val = 0;
|
||||
{
|
||||
let mut index_writer = index.writer_for_tests().unwrap();
|
||||
let index_doc =
|
||||
|index_writer: &mut IndexWriter, doc_facets: &[&str], int_val: &mut u64| {
|
||||
let mut doc = Document::default();
|
||||
for facet in doc_facets {
|
||||
doc.add_facet(facet_field, Facet::from(facet));
|
||||
}
|
||||
doc.add_u64(int_field, *int_val);
|
||||
*int_val += 1;
|
||||
index_writer.add_document(doc).unwrap();
|
||||
};
|
||||
// fn test_merge_facets(index_settings: Option<IndexSettings>, force_segment_value_overlap: bool) {
|
||||
// let mut schema_builder = schema::Schema::builder();
|
||||
// let facet_field = schema_builder.add_facet_field("facet", FacetOptions::default());
|
||||
// let int_options = NumericOptions::default()
|
||||
// .set_fast()
|
||||
// .set_indexed();
|
||||
// let int_field = schema_builder.add_u64_field("intval", int_options);
|
||||
// let mut index_builder = Index::builder().schema(schema_builder.build());
|
||||
// if let Some(settings) = index_settings {
|
||||
// index_builder = index_builder.settings(settings);
|
||||
// }
|
||||
// let index = index_builder.create_in_ram().unwrap();
|
||||
// // let index = Index::create_in_ram(schema_builder.build());
|
||||
// let reader = index.reader().unwrap();
|
||||
// let mut int_val = 0;
|
||||
// {
|
||||
// let mut index_writer = index.writer_for_tests().unwrap();
|
||||
// let index_doc =
|
||||
// |index_writer: &mut IndexWriter, doc_facets: &[&str], int_val: &mut u64| {
|
||||
// let mut doc = Document::default();
|
||||
// for facet in doc_facets {
|
||||
// doc.add_facet(facet_field, Facet::from(facet));
|
||||
// }
|
||||
// doc.add_u64(int_field, *int_val);
|
||||
// *int_val += 1;
|
||||
// index_writer.add_document(doc).unwrap();
|
||||
// };
|
||||
|
||||
index_doc(
|
||||
&mut index_writer,
|
||||
&["/top/a/firstdoc", "/top/b"],
|
||||
&mut int_val,
|
||||
);
|
||||
index_doc(
|
||||
&mut index_writer,
|
||||
&["/top/a/firstdoc", "/top/b", "/top/c"],
|
||||
&mut int_val,
|
||||
);
|
||||
index_doc(&mut index_writer, &["/top/a", "/top/b"], &mut int_val);
|
||||
index_doc(&mut index_writer, &["/top/a"], &mut int_val);
|
||||
// index_doc(
|
||||
// &mut index_writer,
|
||||
// &["/top/a/firstdoc", "/top/b"],
|
||||
// &mut int_val,
|
||||
// );
|
||||
// index_doc(
|
||||
// &mut index_writer,
|
||||
// &["/top/a/firstdoc", "/top/b", "/top/c"],
|
||||
// &mut int_val,
|
||||
// );
|
||||
// index_doc(&mut index_writer, &["/top/a", "/top/b"], &mut int_val);
|
||||
// index_doc(&mut index_writer, &["/top/a"], &mut int_val);
|
||||
|
||||
index_doc(&mut index_writer, &["/top/b", "/top/d"], &mut int_val);
|
||||
if force_segment_value_overlap {
|
||||
index_doc(&mut index_writer, &["/top/d"], &mut 0);
|
||||
index_doc(&mut index_writer, &["/top/e"], &mut 10);
|
||||
index_writer.commit().expect("committed");
|
||||
index_doc(&mut index_writer, &["/top/a"], &mut 5); // 5 is between 0 - 10 so the
|
||||
// segments don' have disjunct
|
||||
// ranges
|
||||
} else {
|
||||
index_doc(&mut index_writer, &["/top/d"], &mut int_val);
|
||||
index_doc(&mut index_writer, &["/top/e"], &mut int_val);
|
||||
index_writer.commit().expect("committed");
|
||||
index_doc(&mut index_writer, &["/top/a"], &mut int_val);
|
||||
}
|
||||
index_doc(&mut index_writer, &["/top/b"], &mut int_val);
|
||||
index_doc(&mut index_writer, &["/top/c"], &mut int_val);
|
||||
index_writer.commit().expect("committed");
|
||||
// index_doc(&mut index_writer, &["/top/b", "/top/d"], &mut int_val);
|
||||
// if force_segment_value_overlap {
|
||||
// index_doc(&mut index_writer, &["/top/d"], &mut 0);
|
||||
// index_doc(&mut index_writer, &["/top/e"], &mut 10);
|
||||
// index_writer.commit().expect("committed");
|
||||
// index_doc(&mut index_writer, &["/top/a"], &mut 5); // 5 is between 0 - 10 so the
|
||||
// // segments don' have disjunct
|
||||
// // ranges
|
||||
// } else {
|
||||
// index_doc(&mut index_writer, &["/top/d"], &mut int_val);
|
||||
// index_doc(&mut index_writer, &["/top/e"], &mut int_val);
|
||||
// index_writer.commit().expect("committed");
|
||||
// index_doc(&mut index_writer, &["/top/a"], &mut int_val);
|
||||
// }
|
||||
// index_doc(&mut index_writer, &["/top/b"], &mut int_val);
|
||||
// index_doc(&mut index_writer, &["/top/c"], &mut int_val);
|
||||
// index_writer.commit().expect("committed");
|
||||
|
||||
index_doc(&mut index_writer, &["/top/e", "/top/f"], &mut int_val);
|
||||
index_writer.commit().expect("committed");
|
||||
}
|
||||
// index_doc(&mut index_writer, &["/top/e", "/top/f"], &mut int_val);
|
||||
// index_writer.commit().expect("committed");
|
||||
// }
|
||||
|
||||
reader.reload().unwrap();
|
||||
let test_searcher = |expected_num_docs: usize, expected: &[(&str, u64)]| {
|
||||
let searcher = reader.searcher();
|
||||
let mut facet_collector = FacetCollector::for_field(facet_field);
|
||||
facet_collector.add_facet(Facet::from("/top"));
|
||||
let (count, facet_counts) = searcher
|
||||
.search(&AllQuery, &(Count, facet_collector))
|
||||
.unwrap();
|
||||
assert_eq!(count, expected_num_docs);
|
||||
let facets: Vec<(String, u64)> = facet_counts
|
||||
.get("/top")
|
||||
.map(|(facet, count)| (facet.to_string(), count))
|
||||
.collect();
|
||||
assert_eq!(
|
||||
facets,
|
||||
expected
|
||||
.iter()
|
||||
.map(|&(facet_str, count)| (String::from(facet_str), count))
|
||||
.collect::<Vec<_>>()
|
||||
);
|
||||
};
|
||||
test_searcher(
|
||||
11,
|
||||
&[
|
||||
("/top/a", 5),
|
||||
("/top/b", 5),
|
||||
("/top/c", 2),
|
||||
("/top/d", 2),
|
||||
("/top/e", 2),
|
||||
("/top/f", 1),
|
||||
],
|
||||
);
|
||||
// Merging the segments
|
||||
{
|
||||
let segment_ids = index
|
||||
.searchable_segment_ids()
|
||||
.expect("Searchable segments failed.");
|
||||
let mut index_writer = index.writer_for_tests().unwrap();
|
||||
index_writer
|
||||
.merge(&segment_ids)
|
||||
.wait()
|
||||
.expect("Merging failed");
|
||||
index_writer.wait_merging_threads().unwrap();
|
||||
reader.reload().unwrap();
|
||||
test_searcher(
|
||||
11,
|
||||
&[
|
||||
("/top/a", 5),
|
||||
("/top/b", 5),
|
||||
("/top/c", 2),
|
||||
("/top/d", 2),
|
||||
("/top/e", 2),
|
||||
("/top/f", 1),
|
||||
],
|
||||
);
|
||||
}
|
||||
// reader.reload().unwrap();
|
||||
// let test_searcher = |expected_num_docs: usize, expected: &[(&str, u64)]| {
|
||||
// let searcher = reader.searcher();
|
||||
// let mut facet_collector = FacetCollector::for_field(facet_field);
|
||||
// facet_collector.add_facet(Facet::from("/top"));
|
||||
// let (count, facet_counts) = searcher
|
||||
// .search(&AllQuery, &(Count, facet_collector))
|
||||
// .unwrap();
|
||||
// assert_eq!(count, expected_num_docs);
|
||||
// let facets: Vec<(String, u64)> = facet_counts
|
||||
// .get("/top")
|
||||
// .map(|(facet, count)| (facet.to_string(), count))
|
||||
// .collect();
|
||||
// assert_eq!(
|
||||
// facets,
|
||||
// expected
|
||||
// .iter()
|
||||
// .map(|&(facet_str, count)| (String::from(facet_str), count))
|
||||
// .collect::<Vec<_>>()
|
||||
// );
|
||||
// };
|
||||
// test_searcher(
|
||||
// 11,
|
||||
// &[
|
||||
// ("/top/a", 5),
|
||||
// ("/top/b", 5),
|
||||
// ("/top/c", 2),
|
||||
// ("/top/d", 2),
|
||||
// ("/top/e", 2),
|
||||
// ("/top/f", 1),
|
||||
// ],
|
||||
// );
|
||||
// // Merging the segments
|
||||
// {
|
||||
// let segment_ids = index
|
||||
// .searchable_segment_ids()
|
||||
// .expect("Searchable segments failed.");
|
||||
// let mut index_writer = index.writer_for_tests().unwrap();
|
||||
// index_writer
|
||||
// .merge(&segment_ids)
|
||||
// .wait()
|
||||
// .expect("Merging failed");
|
||||
// index_writer.wait_merging_threads().unwrap();
|
||||
// reader.reload().unwrap();
|
||||
// test_searcher(
|
||||
// 11,
|
||||
// &[
|
||||
// ("/top/a", 5),
|
||||
// ("/top/b", 5),
|
||||
// ("/top/c", 2),
|
||||
// ("/top/d", 2),
|
||||
// ("/top/e", 2),
|
||||
// ("/top/f", 1),
|
||||
// ],
|
||||
// );
|
||||
// }
|
||||
|
||||
// Deleting one term
|
||||
{
|
||||
let mut index_writer = index.writer_for_tests().unwrap();
|
||||
let facet = Facet::from_path(vec!["top", "a", "firstdoc"]);
|
||||
let facet_term = Term::from_facet(facet_field, &facet);
|
||||
index_writer.delete_term(facet_term);
|
||||
index_writer.commit().unwrap();
|
||||
reader.reload().unwrap();
|
||||
test_searcher(
|
||||
9,
|
||||
&[
|
||||
("/top/a", 3),
|
||||
("/top/b", 3),
|
||||
("/top/c", 1),
|
||||
("/top/d", 2),
|
||||
("/top/e", 2),
|
||||
("/top/f", 1),
|
||||
],
|
||||
);
|
||||
}
|
||||
}
|
||||
// // Deleting one term
|
||||
// {
|
||||
// let mut index_writer = index.writer_for_tests().unwrap();
|
||||
// let facet = Facet::from_path(vec!["top", "a", "firstdoc"]);
|
||||
// let facet_term = Term::from_facet(facet_field, &facet);
|
||||
// index_writer.delete_term(facet_term);
|
||||
// index_writer.commit().unwrap();
|
||||
// reader.reload().unwrap();
|
||||
// test_searcher(
|
||||
// 9,
|
||||
// &[
|
||||
// ("/top/a", 3),
|
||||
// ("/top/b", 3),
|
||||
// ("/top/c", 1),
|
||||
// ("/top/d", 2),
|
||||
// ("/top/e", 2),
|
||||
// ("/top/f", 1),
|
||||
// ],
|
||||
// );
|
||||
// }
|
||||
// }
|
||||
|
||||
#[test]
|
||||
fn test_bug_merge() -> crate::Result<()> {
|
||||
@@ -1839,45 +1601,45 @@ mod tests {
|
||||
|
||||
{
|
||||
let segment = searcher.segment_reader(0u32);
|
||||
let ff_reader = segment.fast_fields().u64s("intvals").unwrap();
|
||||
// let ff_reader = segment.fast_fields().u64s(int_field).unwrap();
|
||||
|
||||
ff_reader.get_vals(0, &mut vals);
|
||||
assert_eq!(&vals, &[1, 2]);
|
||||
// ff_reader.get_vals(0, &mut vals);
|
||||
// assert_eq!(&vals, &[1, 2]);
|
||||
|
||||
ff_reader.get_vals(1, &mut vals);
|
||||
assert_eq!(&vals, &[1, 2, 3]);
|
||||
// ff_reader.get_vals(1, &mut vals);
|
||||
// assert_eq!(&vals, &[1, 2, 3]);
|
||||
|
||||
ff_reader.get_vals(2, &mut vals);
|
||||
assert_eq!(&vals, &[4, 5]);
|
||||
// ff_reader.get_vals(2, &mut vals);
|
||||
// assert_eq!(&vals, &[4, 5]);
|
||||
|
||||
ff_reader.get_vals(3, &mut vals);
|
||||
assert_eq!(&vals, &[1, 2]);
|
||||
// ff_reader.get_vals(3, &mut vals);
|
||||
// assert_eq!(&vals, &[1, 2]);
|
||||
|
||||
ff_reader.get_vals(4, &mut vals);
|
||||
assert_eq!(&vals, &[1, 5]);
|
||||
// ff_reader.get_vals(4, &mut vals);
|
||||
// assert_eq!(&vals, &[1, 5]);
|
||||
|
||||
ff_reader.get_vals(5, &mut vals);
|
||||
assert_eq!(&vals, &[3]);
|
||||
// ff_reader.get_vals(5, &mut vals);
|
||||
// assert_eq!(&vals, &[3]);
|
||||
|
||||
ff_reader.get_vals(6, &mut vals);
|
||||
assert_eq!(&vals, &[17]);
|
||||
// ff_reader.get_vals(6, &mut vals);
|
||||
// assert_eq!(&vals, &[17]);
|
||||
}
|
||||
|
||||
{
|
||||
let segment = searcher.segment_reader(1u32);
|
||||
let ff_reader = segment.fast_fields().u64s("intvals").unwrap();
|
||||
ff_reader.get_vals(0, &mut vals);
|
||||
assert_eq!(&vals, &[28, 27]);
|
||||
// let ff_reader = segment.fast_fields().u64s(int_field).unwrap();
|
||||
// ff_reader.get_vals(0, &mut vals);
|
||||
// assert_eq!(&vals, &[28, 27]);
|
||||
|
||||
ff_reader.get_vals(1, &mut vals);
|
||||
assert_eq!(&vals, &[1_000]);
|
||||
// ff_reader.get_vals(1, &mut vals);
|
||||
// assert_eq!(&vals, &[1_000]);
|
||||
}
|
||||
|
||||
{
|
||||
let segment = searcher.segment_reader(2u32);
|
||||
let ff_reader = segment.fast_fields().u64s("intvals").unwrap();
|
||||
ff_reader.get_vals(0, &mut vals);
|
||||
assert_eq!(&vals, &[20]);
|
||||
// let ff_reader = segment.fast_fields().u64s(int_field).unwrap();
|
||||
// ff_reader.get_vals(0, &mut vals);
|
||||
// assert_eq!(&vals, &[20]);
|
||||
}
|
||||
|
||||
// Merging the segments
|
||||
@@ -1892,37 +1654,37 @@ mod tests {
|
||||
{
|
||||
let searcher = reader.searcher();
|
||||
let segment = searcher.segment_reader(0u32);
|
||||
let ff_reader = segment.fast_fields().u64s("intvals").unwrap();
|
||||
// let ff_reader = segment.fast_fields().u64s(int_field).unwrap();
|
||||
|
||||
ff_reader.get_vals(0, &mut vals);
|
||||
assert_eq!(&vals, &[1, 2]);
|
||||
// ff_reader.get_vals(0, &mut vals);
|
||||
// assert_eq!(&vals, &[1, 2]);
|
||||
|
||||
ff_reader.get_vals(1, &mut vals);
|
||||
assert_eq!(&vals, &[1, 2, 3]);
|
||||
// ff_reader.get_vals(1, &mut vals);
|
||||
// assert_eq!(&vals, &[1, 2, 3]);
|
||||
|
||||
ff_reader.get_vals(2, &mut vals);
|
||||
assert_eq!(&vals, &[4, 5]);
|
||||
// ff_reader.get_vals(2, &mut vals);
|
||||
// assert_eq!(&vals, &[4, 5]);
|
||||
|
||||
ff_reader.get_vals(3, &mut vals);
|
||||
assert_eq!(&vals, &[1, 2]);
|
||||
// ff_reader.get_vals(3, &mut vals);
|
||||
// assert_eq!(&vals, &[1, 2]);
|
||||
|
||||
ff_reader.get_vals(4, &mut vals);
|
||||
assert_eq!(&vals, &[1, 5]);
|
||||
// ff_reader.get_vals(4, &mut vals);
|
||||
// assert_eq!(&vals, &[1, 5]);
|
||||
|
||||
ff_reader.get_vals(5, &mut vals);
|
||||
assert_eq!(&vals, &[3]);
|
||||
// ff_reader.get_vals(5, &mut vals);
|
||||
// assert_eq!(&vals, &[3]);
|
||||
|
||||
ff_reader.get_vals(6, &mut vals);
|
||||
assert_eq!(&vals, &[17]);
|
||||
// ff_reader.get_vals(6, &mut vals);
|
||||
// assert_eq!(&vals, &[17]);
|
||||
|
||||
ff_reader.get_vals(7, &mut vals);
|
||||
assert_eq!(&vals, &[28, 27]);
|
||||
// ff_reader.get_vals(7, &mut vals);
|
||||
// assert_eq!(&vals, &[28, 27]);
|
||||
|
||||
ff_reader.get_vals(8, &mut vals);
|
||||
assert_eq!(&vals, &[1_000]);
|
||||
// ff_reader.get_vals(8, &mut vals);
|
||||
// assert_eq!(&vals, &[1_000]);
|
||||
|
||||
ff_reader.get_vals(9, &mut vals);
|
||||
assert_eq!(&vals, &[20]);
|
||||
// ff_reader.get_vals(9, &mut vals);
|
||||
// assert_eq!(&vals, &[20]);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -2,10 +2,10 @@
|
||||
mod tests {
|
||||
use crate::collector::TopDocs;
|
||||
use crate::core::Index;
|
||||
use crate::fastfield::{AliveBitSet, MultiValuedFastFieldReader};
|
||||
use crate::fastfield::AliveBitSet;
|
||||
use crate::query::QueryParser;
|
||||
use crate::schema::{
|
||||
self, BytesOptions, Cardinality, Facet, FacetOptions, IndexRecordOption, NumericOptions,
|
||||
self, BytesOptions, Facet, FacetOptions, IndexRecordOption, NumericOptions,
|
||||
TextFieldIndexing, TextOptions,
|
||||
};
|
||||
use crate::{DocAddress, DocSet, IndexSettings, IndexSortByField, Order, Postings, Term};
|
||||
@@ -349,128 +349,131 @@ mod tests {
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_merge_sorted_index_asc() {
|
||||
let index = create_test_index(
|
||||
Some(IndexSettings {
|
||||
sort_by_field: Some(IndexSortByField {
|
||||
field: "intval".to_string(),
|
||||
order: Order::Asc,
|
||||
}),
|
||||
..Default::default()
|
||||
}),
|
||||
false,
|
||||
)
|
||||
.unwrap();
|
||||
// #[test]
|
||||
// fn test_merge_sorted_index_asc() {
|
||||
// let index = create_test_index(
|
||||
// Some(IndexSettings {
|
||||
// sort_by_field: Some(IndexSortByField {
|
||||
// field: "intval".to_string(),
|
||||
// order: Order::Asc,
|
||||
// }),
|
||||
// ..Default::default()
|
||||
// }),
|
||||
// false,
|
||||
// )
|
||||
// .unwrap();
|
||||
|
||||
let int_field = index.schema().get_field("intval").unwrap();
|
||||
let reader = index.reader().unwrap();
|
||||
let searcher = reader.searcher();
|
||||
assert_eq!(searcher.segment_readers().len(), 1);
|
||||
let segment_reader = searcher.segment_readers().last().unwrap();
|
||||
|
||||
let fast_fields = segment_reader.fast_fields();
|
||||
let fast_field = fast_fields.u64("intval").unwrap();
|
||||
assert_eq!(fast_field.get_val(0), 1u64);
|
||||
assert_eq!(fast_field.get_val(1), 2u64);
|
||||
assert_eq!(fast_field.get_val(2), 3u64);
|
||||
assert_eq!(fast_field.get_val(3), 10u64);
|
||||
assert_eq!(fast_field.get_val(4), 20u64);
|
||||
assert_eq!(fast_field.get_val(5), 1_000u64);
|
||||
// let int_field = index.schema().get_field("intval").unwrap();
|
||||
// let multi_numbers = index.schema().get_field("multi_numbers").unwrap();
|
||||
// let bytes_field = index.schema().get_field("bytes").unwrap();
|
||||
// let reader = index.reader().unwrap();
|
||||
// let searcher = reader.searcher();
|
||||
// assert_eq!(searcher.segment_readers().len(), 1);
|
||||
// let segment_reader = searcher.segment_readers().last().unwrap();
|
||||
|
||||
let get_vals = |fast_field: &MultiValuedFastFieldReader<u64>, doc_id: u32| -> Vec<u64> {
|
||||
let mut vals = vec![];
|
||||
fast_field.get_vals(doc_id, &mut vals);
|
||||
vals
|
||||
};
|
||||
let fast_fields = segment_reader.fast_fields();
|
||||
let fast_field = fast_fields.u64s("multi_numbers").unwrap();
|
||||
assert_eq!(&get_vals(&fast_field, 0), &[] as &[u64]);
|
||||
assert_eq!(&get_vals(&fast_field, 1), &[2, 3]);
|
||||
assert_eq!(&get_vals(&fast_field, 2), &[3, 4]);
|
||||
assert_eq!(&get_vals(&fast_field, 3), &[10, 11]);
|
||||
assert_eq!(&get_vals(&fast_field, 4), &[20]);
|
||||
assert_eq!(&get_vals(&fast_field, 5), &[1001, 1002]);
|
||||
// let fast_fields = segment_reader.fast_fields();
|
||||
// let fast_field = fast_fields.u64(int_field).unwrap();
|
||||
// assert_eq!(fast_field.get_val(0), 1u64);
|
||||
// assert_eq!(fast_field.get_val(1), 2u64);
|
||||
// assert_eq!(fast_field.get_val(2), 3u64);
|
||||
// assert_eq!(fast_field.get_val(3), 10u64);
|
||||
// assert_eq!(fast_field.get_val(4), 20u64);
|
||||
// assert_eq!(fast_field.get_val(5), 1_000u64);
|
||||
|
||||
let fast_field = fast_fields.bytes("bytes").unwrap();
|
||||
assert_eq!(fast_field.get_bytes(0), &[] as &[u8]);
|
||||
assert_eq!(fast_field.get_bytes(2), &[1, 2, 3]);
|
||||
assert_eq!(fast_field.get_bytes(5), &[5, 5]);
|
||||
// let get_vals = |fast_field: &MultiValuedFastFieldReader<u64>, doc_id: u32| -> Vec<u64> {
|
||||
// let mut vals = vec![];
|
||||
// fast_field.get_vals(doc_id, &mut vals);
|
||||
// vals
|
||||
// };
|
||||
// let fast_fields = segment_reader.fast_fields();
|
||||
// let fast_field = fast_fields.u64s(multi_numbers).unwrap();
|
||||
// assert_eq!(&get_vals(&fast_field, 0), &[] as &[u64]);
|
||||
// assert_eq!(&get_vals(&fast_field, 1), &[2, 3]);
|
||||
// assert_eq!(&get_vals(&fast_field, 2), &[3, 4]);
|
||||
// assert_eq!(&get_vals(&fast_field, 3), &[10, 11]);
|
||||
// assert_eq!(&get_vals(&fast_field, 4), &[20]);
|
||||
// assert_eq!(&get_vals(&fast_field, 5), &[1001, 1002]);
|
||||
|
||||
// test new field norm mapping
|
||||
{
|
||||
let my_text_field = index.schema().get_field("text_field").unwrap();
|
||||
let fieldnorm_reader = segment_reader.get_fieldnorms_reader(my_text_field).unwrap();
|
||||
assert_eq!(fieldnorm_reader.fieldnorm(0), 0);
|
||||
assert_eq!(fieldnorm_reader.fieldnorm(1), 4);
|
||||
assert_eq!(fieldnorm_reader.fieldnorm(2), 2); // some text
|
||||
assert_eq!(fieldnorm_reader.fieldnorm(3), 1);
|
||||
assert_eq!(fieldnorm_reader.fieldnorm(5), 3); // the biggest num
|
||||
}
|
||||
// let fast_field = fast_fields.bytes(bytes_field).unwrap();
|
||||
// assert_eq!(fast_field.get_bytes(0), &[] as &[u8]);
|
||||
// assert_eq!(fast_field.get_bytes(2), &[1, 2, 3]);
|
||||
// assert_eq!(fast_field.get_bytes(5), &[5, 5]);
|
||||
|
||||
let searcher = index.reader().unwrap().searcher();
|
||||
{
|
||||
let my_text_field = index.schema().get_field("text_field").unwrap();
|
||||
// // test new field norm mapping
|
||||
// {
|
||||
// let my_text_field = index.schema().get_field("text_field").unwrap();
|
||||
// let fieldnorm_reader = segment_reader.get_fieldnorms_reader(my_text_field).unwrap();
|
||||
// assert_eq!(fieldnorm_reader.fieldnorm(0), 0);
|
||||
// assert_eq!(fieldnorm_reader.fieldnorm(1), 4);
|
||||
// assert_eq!(fieldnorm_reader.fieldnorm(2), 2); // some text
|
||||
// assert_eq!(fieldnorm_reader.fieldnorm(3), 1);
|
||||
// assert_eq!(fieldnorm_reader.fieldnorm(5), 3); // the biggest num
|
||||
// }
|
||||
|
||||
let do_search = |term: &str| {
|
||||
let query = QueryParser::for_index(&index, vec![my_text_field])
|
||||
.parse_query(term)
|
||||
.unwrap();
|
||||
let top_docs: Vec<(f32, DocAddress)> =
|
||||
searcher.search(&query, &TopDocs::with_limit(3)).unwrap();
|
||||
// let searcher = index.reader().unwrap().searcher();
|
||||
// {
|
||||
// let my_text_field = index.schema().get_field("text_field").unwrap();
|
||||
|
||||
top_docs.iter().map(|el| el.1.doc_id).collect::<Vec<_>>()
|
||||
};
|
||||
// let do_search = |term: &str| {
|
||||
// let query = QueryParser::for_index(&index, vec![my_text_field])
|
||||
// .parse_query(term)
|
||||
// .unwrap();
|
||||
// let top_docs: Vec<(f32, DocAddress)> =
|
||||
// searcher.search(&query, &TopDocs::with_limit(3)).unwrap();
|
||||
|
||||
assert_eq!(do_search("some"), vec![2]);
|
||||
assert_eq!(do_search("blubber"), vec![3]);
|
||||
assert_eq!(do_search("biggest"), vec![5]);
|
||||
}
|
||||
// top_docs.iter().map(|el| el.1.doc_id).collect::<Vec<_>>()
|
||||
// };
|
||||
|
||||
// postings file
|
||||
{
|
||||
let my_text_field = index.schema().get_field("text_field").unwrap();
|
||||
let term_a = Term::from_field_text(my_text_field, "text");
|
||||
let inverted_index = segment_reader.inverted_index(my_text_field).unwrap();
|
||||
let mut postings = inverted_index
|
||||
.read_postings(&term_a, IndexRecordOption::WithFreqsAndPositions)
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
// assert_eq!(do_search("some"), vec![2]);
|
||||
// assert_eq!(do_search("blubber"), vec![3]);
|
||||
// assert_eq!(do_search("biggest"), vec![5]);
|
||||
// }
|
||||
|
||||
assert_eq!(postings.doc_freq(), 2);
|
||||
let fallback_bitset = AliveBitSet::for_test_from_deleted_docs(&[0], 100);
|
||||
assert_eq!(
|
||||
postings.doc_freq_given_deletes(
|
||||
segment_reader.alive_bitset().unwrap_or(&fallback_bitset)
|
||||
),
|
||||
2
|
||||
);
|
||||
// // postings file
|
||||
// {
|
||||
// let my_text_field = index.schema().get_field("text_field").unwrap();
|
||||
// let term_a = Term::from_field_text(my_text_field, "text");
|
||||
// let inverted_index = segment_reader.inverted_index(my_text_field).unwrap();
|
||||
// let mut postings = inverted_index
|
||||
// .read_postings(&term_a, IndexRecordOption::WithFreqsAndPositions)
|
||||
// .unwrap()
|
||||
// .unwrap();
|
||||
|
||||
let mut output = vec![];
|
||||
postings.positions(&mut output);
|
||||
assert_eq!(output, vec![1, 3]);
|
||||
postings.advance();
|
||||
// assert_eq!(postings.doc_freq(), 2);
|
||||
// let fallback_bitset = AliveBitSet::for_test_from_deleted_docs(&[0], 100);
|
||||
// assert_eq!(
|
||||
// postings.doc_freq_given_deletes(
|
||||
// segment_reader.alive_bitset().unwrap_or(&fallback_bitset)
|
||||
// ),
|
||||
// 2
|
||||
// );
|
||||
|
||||
postings.positions(&mut output);
|
||||
assert_eq!(output, vec![1]);
|
||||
}
|
||||
// let mut output = vec![];
|
||||
// postings.positions(&mut output);
|
||||
// assert_eq!(output, vec![1, 3]);
|
||||
// postings.advance();
|
||||
|
||||
// access doc store
|
||||
{
|
||||
let doc = searcher.doc(DocAddress::new(0, 0)).unwrap();
|
||||
assert_eq!(doc.get_first(int_field).unwrap().as_u64(), Some(1));
|
||||
let doc = searcher.doc(DocAddress::new(0, 1)).unwrap();
|
||||
assert_eq!(doc.get_first(int_field).unwrap().as_u64(), Some(2));
|
||||
let doc = searcher.doc(DocAddress::new(0, 2)).unwrap();
|
||||
assert_eq!(doc.get_first(int_field).unwrap().as_u64(), Some(3));
|
||||
let doc = searcher.doc(DocAddress::new(0, 3)).unwrap();
|
||||
assert_eq!(doc.get_first(int_field).unwrap().as_u64(), Some(10));
|
||||
let doc = searcher.doc(DocAddress::new(0, 4)).unwrap();
|
||||
assert_eq!(doc.get_first(int_field).unwrap().as_u64(), Some(20));
|
||||
let doc = searcher.doc(DocAddress::new(0, 5)).unwrap();
|
||||
assert_eq!(doc.get_first(int_field).unwrap().as_u64(), Some(1_000));
|
||||
}
|
||||
}
|
||||
// postings.positions(&mut output);
|
||||
// assert_eq!(output, vec![1]);
|
||||
// }
|
||||
|
||||
// // access doc store
|
||||
// {
|
||||
// let doc = searcher.doc(DocAddress::new(0, 0)).unwrap();
|
||||
// assert_eq!(doc.get_first(int_field).unwrap().as_u64(), Some(1));
|
||||
// let doc = searcher.doc(DocAddress::new(0, 1)).unwrap();
|
||||
// assert_eq!(doc.get_first(int_field).unwrap().as_u64(), Some(2));
|
||||
// let doc = searcher.doc(DocAddress::new(0, 2)).unwrap();
|
||||
// assert_eq!(doc.get_first(int_field).unwrap().as_u64(), Some(3));
|
||||
// let doc = searcher.doc(DocAddress::new(0, 3)).unwrap();
|
||||
// assert_eq!(doc.get_first(int_field).unwrap().as_u64(), Some(10));
|
||||
// let doc = searcher.doc(DocAddress::new(0, 4)).unwrap();
|
||||
// assert_eq!(doc.get_first(int_field).unwrap().as_u64(), Some(20));
|
||||
// let doc = searcher.doc(DocAddress::new(0, 5)).unwrap();
|
||||
// assert_eq!(doc.get_first(int_field).unwrap().as_u64(), Some(1_000));
|
||||
// }
|
||||
// }
|
||||
}
|
||||
|
||||
#[cfg(all(test, feature = "unstable"))]
|
||||
|
||||
@@ -20,7 +20,7 @@ pub mod segment_serializer;
|
||||
pub mod segment_updater;
|
||||
mod segment_writer;
|
||||
mod sorted_doc_id_column;
|
||||
mod sorted_doc_id_multivalue_column;
|
||||
// mod sorted_doc_id_multivalue_column;
|
||||
mod stamper;
|
||||
|
||||
use crossbeam_channel as channel;
|
||||
@@ -58,7 +58,7 @@ type AddBatchReceiver = channel::Receiver<AddBatch>;
|
||||
#[cfg(test)]
|
||||
mod tests_mmap {
|
||||
use crate::collector::Count;
|
||||
use crate::query::QueryParser;
|
||||
// use crate::query::QueryParser;
|
||||
use crate::schema::{JsonObjectOptions, Schema, TEXT};
|
||||
use crate::{Index, Term};
|
||||
|
||||
@@ -79,45 +79,45 @@ mod tests_mmap {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_json_field_expand_dots_disabled_dot_escaped_required() {
|
||||
let mut schema_builder = Schema::builder();
|
||||
let json_field = schema_builder.add_json_field("json", TEXT);
|
||||
let index = Index::create_in_ram(schema_builder.build());
|
||||
let mut index_writer = index.writer_for_tests().unwrap();
|
||||
let json = serde_json::json!({"k8s.container.name": "prometheus", "val": "hello"});
|
||||
index_writer.add_document(doc!(json_field=>json)).unwrap();
|
||||
index_writer.commit().unwrap();
|
||||
let reader = index.reader().unwrap();
|
||||
let searcher = reader.searcher();
|
||||
assert_eq!(searcher.num_docs(), 1);
|
||||
let parse_query = QueryParser::for_index(&index, Vec::new());
|
||||
let query = parse_query
|
||||
.parse_query(r#"json.k8s\.container\.name:prometheus"#)
|
||||
.unwrap();
|
||||
let num_docs = searcher.search(&query, &Count).unwrap();
|
||||
assert_eq!(num_docs, 1);
|
||||
}
|
||||
// #[test]
|
||||
// fn test_json_field_expand_dots_disabled_dot_escaped_required() {
|
||||
// let mut schema_builder = Schema::builder();
|
||||
// let json_field = schema_builder.add_json_field("json", TEXT);
|
||||
// let index = Index::create_in_ram(schema_builder.build());
|
||||
// let mut index_writer = index.writer_for_tests().unwrap();
|
||||
// let json = serde_json::json!({"k8s.container.name": "prometheus", "val": "hello"});
|
||||
// index_writer.add_document(doc!(json_field=>json)).unwrap();
|
||||
// index_writer.commit().unwrap();
|
||||
// let reader = index.reader().unwrap();
|
||||
// let searcher = reader.searcher();
|
||||
// assert_eq!(searcher.num_docs(), 1);
|
||||
// let parse_query = QueryParser::for_index(&index, Vec::new());
|
||||
// let query = parse_query
|
||||
// .parse_query(r#"json.k8s\.container\.name:prometheus"#)
|
||||
// .unwrap();
|
||||
// let num_docs = searcher.search(&query, &Count).unwrap();
|
||||
// assert_eq!(num_docs, 1);
|
||||
// }
|
||||
|
||||
#[test]
|
||||
fn test_json_field_expand_dots_enabled_dot_escape_not_required() {
|
||||
let mut schema_builder = Schema::builder();
|
||||
let json_options: JsonObjectOptions =
|
||||
JsonObjectOptions::from(TEXT).set_expand_dots_enabled();
|
||||
let json_field = schema_builder.add_json_field("json", json_options);
|
||||
let index = Index::create_in_ram(schema_builder.build());
|
||||
let mut index_writer = index.writer_for_tests().unwrap();
|
||||
let json = serde_json::json!({"k8s.container.name": "prometheus", "val": "hello"});
|
||||
index_writer.add_document(doc!(json_field=>json)).unwrap();
|
||||
index_writer.commit().unwrap();
|
||||
let reader = index.reader().unwrap();
|
||||
let searcher = reader.searcher();
|
||||
assert_eq!(searcher.num_docs(), 1);
|
||||
let parse_query = QueryParser::for_index(&index, Vec::new());
|
||||
let query = parse_query
|
||||
.parse_query(r#"json.k8s.container.name:prometheus"#)
|
||||
.unwrap();
|
||||
let num_docs = searcher.search(&query, &Count).unwrap();
|
||||
assert_eq!(num_docs, 1);
|
||||
}
|
||||
// #[test]
|
||||
// fn test_json_field_expand_dots_enabled_dot_escape_not_required() {
|
||||
// let mut schema_builder = Schema::builder();
|
||||
// let json_options: JsonObjectOptions =
|
||||
// JsonObjectOptions::from(TEXT).set_expand_dots_enabled();
|
||||
// let json_field = schema_builder.add_json_field("json", json_options);
|
||||
// let index = Index::create_in_ram(schema_builder.build());
|
||||
// let mut index_writer = index.writer_for_tests().unwrap();
|
||||
// let json = serde_json::json!({"k8s.container.name": "prometheus", "val": "hello"});
|
||||
// index_writer.add_document(doc!(json_field=>json)).unwrap();
|
||||
// index_writer.commit().unwrap();
|
||||
// let reader = index.reader().unwrap();
|
||||
// let searcher = reader.searcher();
|
||||
// assert_eq!(searcher.num_docs(), 1);
|
||||
// let parse_query = QueryParser::for_index(&index, Vec::new());
|
||||
// let query = parse_query
|
||||
// .parse_query(r#"json.k8s.container.name:prometheus"#)
|
||||
// .unwrap();
|
||||
// let num_docs = searcher.search(&query, &Count).unwrap();
|
||||
// assert_eq!(num_docs, 1);
|
||||
// }
|
||||
}
|
||||
|
||||
@@ -1,4 +1,7 @@
|
||||
use common::TerminatingWrite;
|
||||
|
||||
use crate::core::{Segment, SegmentComponent};
|
||||
use crate::directory::WritePtr;
|
||||
use crate::fastfield::CompositeFastFieldSerializer;
|
||||
use crate::fieldnorm::FieldNormsSerializer;
|
||||
use crate::postings::InvertedIndexSerializer;
|
||||
@@ -9,7 +12,7 @@ use crate::store::StoreWriter;
|
||||
pub struct SegmentSerializer {
|
||||
segment: Segment,
|
||||
pub(crate) store_writer: StoreWriter,
|
||||
fast_field_serializer: CompositeFastFieldSerializer,
|
||||
fast_field_write: WritePtr,
|
||||
fieldnorms_serializer: Option<FieldNormsSerializer>,
|
||||
postings_serializer: InvertedIndexSerializer,
|
||||
}
|
||||
@@ -47,7 +50,6 @@ impl SegmentSerializer {
|
||||
};
|
||||
|
||||
let fast_field_write = segment.open_write(SegmentComponent::FastFields)?;
|
||||
let fast_field_serializer = CompositeFastFieldSerializer::from_write(fast_field_write)?;
|
||||
|
||||
let fieldnorms_write = segment.open_write(SegmentComponent::FieldNorms)?;
|
||||
let fieldnorms_serializer = FieldNormsSerializer::from_write(fieldnorms_write)?;
|
||||
@@ -56,7 +58,7 @@ impl SegmentSerializer {
|
||||
Ok(SegmentSerializer {
|
||||
segment,
|
||||
store_writer,
|
||||
fast_field_serializer,
|
||||
fast_field_write,
|
||||
fieldnorms_serializer: Some(fieldnorms_serializer),
|
||||
postings_serializer,
|
||||
})
|
||||
@@ -81,8 +83,8 @@ impl SegmentSerializer {
|
||||
}
|
||||
|
||||
/// Accessor to the `FastFieldSerializer`.
|
||||
pub fn get_fast_field_serializer(&mut self) -> &mut CompositeFastFieldSerializer {
|
||||
&mut self.fast_field_serializer
|
||||
pub fn get_fast_field_write(&mut self) -> &mut WritePtr {
|
||||
&mut self.fast_field_write
|
||||
}
|
||||
|
||||
/// Extract the field norm serializer.
|
||||
@@ -102,7 +104,7 @@ impl SegmentSerializer {
|
||||
if let Some(fieldnorms_serializer) = self.extract_fieldnorms_serializer() {
|
||||
fieldnorms_serializer.close()?;
|
||||
}
|
||||
self.fast_field_serializer.close()?;
|
||||
self.fast_field_write.terminate()?;
|
||||
self.postings_serializer.close()?;
|
||||
self.store_writer.close()?;
|
||||
Ok(())
|
||||
|
||||
@@ -348,8 +348,7 @@ impl SegmentWriter {
|
||||
pub fn add_document(&mut self, add_operation: AddOperation) -> crate::Result<()> {
|
||||
let doc = add_operation.document;
|
||||
self.doc_opstamps.push(add_operation.opstamp);
|
||||
let doc_id = self.max_doc;
|
||||
self.fast_field_writers.add_document(doc_id, &doc)?;
|
||||
self.fast_field_writers.add_document(&doc)?;
|
||||
self.index_document(&doc)?;
|
||||
let doc_writer = self.segment_serializer.get_store_writer();
|
||||
doc_writer.store(&doc, &self.schema)?;
|
||||
@@ -410,8 +409,7 @@ fn remap_and_write(
|
||||
)?;
|
||||
debug!("fastfield-serialize");
|
||||
fast_field_writers.serialize(
|
||||
serializer.get_fast_field_serializer(),
|
||||
&term_ord_map,
|
||||
serializer.get_fast_field_write(),
|
||||
doc_id_map,
|
||||
)?;
|
||||
|
||||
|
||||
@@ -263,7 +263,7 @@ mod indexer;
|
||||
pub mod error;
|
||||
pub mod tokenizer;
|
||||
|
||||
pub mod aggregation;
|
||||
// pub mod aggregation;
|
||||
pub mod collector;
|
||||
pub mod directory;
|
||||
pub mod fastfield;
|
||||
@@ -1166,4 +1166,5 @@ pub mod tests {
|
||||
);
|
||||
assert_eq!(dt_from_ts_nanos.to_hms_micro(), offset_dt.to_hms_micro());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -2,7 +2,6 @@ use std::io;
|
||||
|
||||
use stacker::Addr;
|
||||
|
||||
use crate::fastfield::MultiValuedFastFieldWriter;
|
||||
use crate::indexer::doc_id_mapping::DocIdMapping;
|
||||
use crate::postings::postings_writer::SpecializedPostingsWriter;
|
||||
use crate::postings::recorder::{BufferLender, DocIdRecorder, Recorder};
|
||||
@@ -44,7 +43,6 @@ impl<Rec: Recorder> PostingsWriter for JsonPostingsWriter<Rec> {
|
||||
term_buffer: &mut Term,
|
||||
ctx: &mut IndexingContext,
|
||||
indexing_position: &mut IndexingPosition,
|
||||
_fast_field_writer: Option<&mut MultiValuedFastFieldWriter>,
|
||||
) {
|
||||
self.str_posting_writer.index_text(
|
||||
doc_id,
|
||||
@@ -52,7 +50,6 @@ impl<Rec: Recorder> PostingsWriter for JsonPostingsWriter<Rec> {
|
||||
term_buffer,
|
||||
ctx,
|
||||
indexing_position,
|
||||
None,
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
@@ -6,7 +6,6 @@ use std::ops::Range;
|
||||
use rustc_hash::FxHashMap;
|
||||
use stacker::Addr;
|
||||
|
||||
use crate::fastfield::MultiValuedFastFieldWriter;
|
||||
use crate::fieldnorm::FieldNormReaders;
|
||||
use crate::indexer::doc_id_mapping::DocIdMapping;
|
||||
use crate::postings::recorder::{BufferLender, Recorder};
|
||||
@@ -135,7 +134,7 @@ pub(crate) trait PostingsWriter: Send + Sync {
|
||||
pos: u32,
|
||||
term: &Term,
|
||||
ctx: &mut IndexingContext,
|
||||
) -> UnorderedTermId;
|
||||
) -> UnorderedTermId; // TODO remove UnorderedTermId
|
||||
|
||||
/// Serializes the postings on disk.
|
||||
/// The actual serialization format is handled by the `PostingsSerializer`.
|
||||
@@ -155,7 +154,6 @@ pub(crate) trait PostingsWriter: Send + Sync {
|
||||
term_buffer: &mut Term,
|
||||
ctx: &mut IndexingContext,
|
||||
indexing_position: &mut IndexingPosition,
|
||||
mut term_id_fast_field_writer_opt: Option<&mut MultiValuedFastFieldWriter>,
|
||||
) {
|
||||
let end_of_path_idx = term_buffer.len_bytes();
|
||||
let mut num_tokens = 0;
|
||||
@@ -175,11 +173,7 @@ pub(crate) trait PostingsWriter: Send + Sync {
|
||||
term_buffer.append_bytes(token.text.as_bytes());
|
||||
let start_position = indexing_position.end_position + token.position as u32;
|
||||
end_position = end_position.max(start_position + token.position_length as u32);
|
||||
let unordered_term_id = self.subscribe(doc_id, start_position, term_buffer, ctx);
|
||||
if let Some(term_id_fast_field_writer) = term_id_fast_field_writer_opt.as_mut() {
|
||||
term_id_fast_field_writer.add_val(unordered_term_id);
|
||||
}
|
||||
|
||||
self.subscribe(doc_id, start_position, term_buffer, ctx);
|
||||
num_tokens += 1;
|
||||
});
|
||||
|
||||
|
||||
@@ -15,7 +15,7 @@ mod more_like_this;
|
||||
mod phrase_query;
|
||||
mod query;
|
||||
mod query_parser;
|
||||
mod range_query;
|
||||
// mod range_query;
|
||||
mod regex_query;
|
||||
mod reqopt_scorer;
|
||||
mod scorer;
|
||||
@@ -50,7 +50,7 @@ pub use self::more_like_this::{MoreLikeThisQuery, MoreLikeThisQueryBuilder};
|
||||
pub use self::phrase_query::PhraseQuery;
|
||||
pub use self::query::{EnableScoring, Query, QueryClone};
|
||||
pub use self::query_parser::{QueryParser, QueryParserError};
|
||||
pub use self::range_query::RangeQuery;
|
||||
// pub use self::range_query::RangeQuery;
|
||||
pub use self::regex_query::RegexQuery;
|
||||
pub use self::reqopt_scorer::RequiredOptionalScorer;
|
||||
pub use self::score_combiner::{
|
||||
|
||||
@@ -13,10 +13,11 @@ use crate::core::Index;
|
||||
use crate::indexer::{
|
||||
convert_to_fast_value_and_get_term, set_string_and_get_terms, JsonTermWriter,
|
||||
};
|
||||
use crate::query::range_query::is_type_valid_for_fastfield_range_query;
|
||||
// use crate::query::range_query::is_type_valid_for_fastfield_range_query;
|
||||
use crate::query::{
|
||||
AllQuery, BooleanQuery, BoostQuery, EmptyQuery, FuzzyTermQuery, Occur, PhraseQuery, Query,
|
||||
RangeQuery, TermQuery, TermSetQuery,
|
||||
// RangeQuery,
|
||||
TermQuery, TermSetQuery,
|
||||
};
|
||||
use crate::schema::{
|
||||
Facet, FacetParseError, Field, FieldType, IndexRecordOption, IntoIpv6Addr, JsonObjectOptions,
|
||||
@@ -334,6 +335,8 @@ impl QueryParser {
|
||||
json_path: &str,
|
||||
phrase: &str,
|
||||
) -> Result<Term, QueryParserError> {
|
||||
todo!();
|
||||
/*
|
||||
let field_entry = self.schema.get_field_entry(field);
|
||||
let field_type = field_entry.field_type();
|
||||
let field_supports_ff_range_queries = field_type.is_fast()
|
||||
@@ -417,6 +420,7 @@ impl QueryParser {
|
||||
Ok(Term::from_field_ip_addr(field, ip_v6))
|
||||
}
|
||||
}
|
||||
*/
|
||||
}
|
||||
|
||||
fn compute_logical_ast_for_leaf(
|
||||
@@ -740,9 +744,11 @@ fn convert_literal_to_query(
|
||||
value_type,
|
||||
lower,
|
||||
upper,
|
||||
} => Box::new(RangeQuery::new_term_bounds(
|
||||
field, value_type, &lower, &upper,
|
||||
)),
|
||||
} => { todo!();
|
||||
// Box::new(RangeQuery::new_term_bounds(
|
||||
// field, value_type, &lower, &upper,
|
||||
// ))
|
||||
} ,
|
||||
LogicalLiteral::Set { elements, .. } => Box::new(TermSetQuery::new(elements)),
|
||||
LogicalLiteral::All => Box::new(AllQuery),
|
||||
}
|
||||
|
||||
@@ -4,7 +4,7 @@ use std::sync::Arc;
|
||||
|
||||
use fastfield_codecs::Column;
|
||||
|
||||
use crate::fastfield::{MakeZero, MultiValuedFastFieldReader};
|
||||
use crate::fastfield::MakeZero;
|
||||
use crate::{DocId, DocSet, TERMINATED};
|
||||
|
||||
/// Helper to have a cursor over a vec of docids
|
||||
|
||||
@@ -8,10 +8,13 @@ use std::ops::{Bound, RangeInclusive};
|
||||
use common::BinarySerializable;
|
||||
use fastfield_codecs::MonotonicallyMappableToU128;
|
||||
|
||||
use super::fast_field_range_query::{FastFieldCardinality, RangeDocSet};
|
||||
use super::range_query::map_bound;
|
||||
use crate::query::{ConstScorer, Explanation, Scorer, Weight};
|
||||
<<<<<<< HEAD
|
||||
use crate::schema::Cardinality;
|
||||
=======
|
||||
use crate::schema::Field;
|
||||
>>>>>>> fd1deefd12 (Disconnected facet / fast field merges / examples)
|
||||
use crate::{DocId, DocSet, Score, SegmentReader, TantivyError};
|
||||
|
||||
/// `IPFastFieldRangeWeight` uses the ip address fast field to execute range queries.
|
||||
@@ -40,6 +43,7 @@ impl IPFastFieldRangeWeight {
|
||||
|
||||
impl Weight for IPFastFieldRangeWeight {
|
||||
fn scorer(&self, reader: &SegmentReader, boost: Score) -> crate::Result<Box<dyn Scorer>> {
|
||||
<<<<<<< HEAD
|
||||
let field_type = reader
|
||||
.schema()
|
||||
.get_field_entry(reader.schema().get_field(&self.field)?)
|
||||
@@ -74,6 +78,40 @@ impl Weight for IPFastFieldRangeWeight {
|
||||
Ok(Box::new(ConstScorer::new(docset, boost)))
|
||||
}
|
||||
}
|
||||
=======
|
||||
todo!();
|
||||
// let field_type = reader.schema().get_field_entry(self.field).field_type();
|
||||
// match field_type.fastfield_cardinality().unwrap() {
|
||||
// Cardinality::SingleValue => {
|
||||
// let ip_addr_fast_field = reader.fast_fields().ip_addr(self.field)?;
|
||||
// let value_range = bound_to_value_range(
|
||||
// &self.left_bound,
|
||||
// &self.right_bound,
|
||||
// ip_addr_fast_field.min_value(),
|
||||
// ip_addr_fast_field.max_value(),
|
||||
// );
|
||||
// let docset = RangeDocSet::new(
|
||||
// value_range,
|
||||
// FastFieldCardinality::SingleValue(ip_addr_fast_field),
|
||||
// );
|
||||
// Ok(Box::new(ConstScorer::new(docset, boost)))
|
||||
// }
|
||||
// Cardinality::MultiValues => {
|
||||
// let ip_addr_fast_field = reader.fast_fields().ip_addrs(self.field)?;
|
||||
// let value_range = bound_to_value_range(
|
||||
// &self.left_bound,
|
||||
// &self.right_bound,
|
||||
// ip_addr_fast_field.min_value(),
|
||||
// ip_addr_fast_field.max_value(),
|
||||
// );
|
||||
// let docset = RangeDocSet::new(
|
||||
// value_range,
|
||||
// FastFieldCardinality::MultiValue(ip_addr_fast_field),
|
||||
// );
|
||||
// Ok(Box::new(ConstScorer::new(docset, boost)))
|
||||
// }
|
||||
// }
|
||||
>>>>>>> fd1deefd12 (Disconnected facet / fast field merges / examples)
|
||||
}
|
||||
|
||||
fn explain(&self, reader: &SegmentReader, doc: DocId) -> crate::Result<Explanation> {
|
||||
|
||||
@@ -6,10 +6,14 @@ use std::ops::{Bound, RangeInclusive};
|
||||
|
||||
use fastfield_codecs::MonotonicallyMappableToU64;
|
||||
|
||||
use super::fast_field_range_query::{FastFieldCardinality, RangeDocSet};
|
||||
use super::fast_field_range_query::RangeDocSet;
|
||||
use super::range_query::map_bound;
|
||||
use crate::query::{ConstScorer, Explanation, Scorer, Weight};
|
||||
<<<<<<< HEAD
|
||||
use crate::schema::Cardinality;
|
||||
=======
|
||||
use crate::schema::Field;
|
||||
>>>>>>> fd1deefd12 (Disconnected facet / fast field merges / examples)
|
||||
use crate::{DocId, DocSet, Score, SegmentReader, TantivyError};
|
||||
|
||||
/// `FastFieldRangeWeight` uses the fast field to execute range queries.
|
||||
@@ -33,6 +37,7 @@ impl FastFieldRangeWeight {
|
||||
|
||||
impl Weight for FastFieldRangeWeight {
|
||||
fn scorer(&self, reader: &SegmentReader, boost: Score) -> crate::Result<Box<dyn Scorer>> {
|
||||
<<<<<<< HEAD
|
||||
let field_type = reader
|
||||
.schema()
|
||||
.get_field_entry(reader.schema().get_field(&self.field)?)
|
||||
@@ -63,6 +68,36 @@ impl Weight for FastFieldRangeWeight {
|
||||
Ok(Box::new(ConstScorer::new(docset, boost)))
|
||||
}
|
||||
}
|
||||
=======
|
||||
todo!();
|
||||
// let field_type = reader.schema().get_field_entry(self.field).field_type();
|
||||
// match field_type.fastfield_cardinality().unwrap() {
|
||||
// Cardinality::SingleValue => {
|
||||
// let fast_field = reader.fast_fields().u64_lenient(self.field)?;
|
||||
// let value_range = bound_to_value_range(
|
||||
// &self.left_bound,
|
||||
// &self.right_bound,
|
||||
// fast_field.min_value(),
|
||||
// fast_field.max_value(),
|
||||
// );
|
||||
// let docset =
|
||||
// RangeDocSet::new(value_range, FastFieldCardinality::SingleValue(fast_field));
|
||||
// Ok(Box::new(ConstScorer::new(docset, boost)))
|
||||
// }
|
||||
// Cardinality::MultiValues => {
|
||||
// let fast_field = reader.fast_fields().u64s_lenient(self.field)?;
|
||||
// let value_range = bound_to_value_range(
|
||||
// &self.left_bound,
|
||||
// &self.right_bound,
|
||||
// fast_field.min_value(),
|
||||
// fast_field.max_value(),
|
||||
// );
|
||||
// let docset =
|
||||
// RangeDocSet::new(value_range, FastFieldCardinality::MultiValue(fast_field));
|
||||
// Ok(Box::new(ConstScorer::new(docset, boost)))
|
||||
// }
|
||||
// }
|
||||
>>>>>>> fd1deefd12 (Disconnected facet / fast field merges / examples)
|
||||
}
|
||||
|
||||
fn explain(&self, reader: &SegmentReader, doc: DocId) -> crate::Result<Explanation> {
|
||||
|
||||
@@ -8,7 +8,7 @@ use serde_json::Value as JsonValue;
|
||||
use thiserror::Error;
|
||||
|
||||
use super::ip_options::IpAddrOptions;
|
||||
use super::{Cardinality, IntoIpv6Addr};
|
||||
use super:: IntoIpv6Addr;
|
||||
use crate::schema::bytes_options::BytesOptions;
|
||||
use crate::schema::facet_options::FacetOptions;
|
||||
use crate::schema::{
|
||||
@@ -241,27 +241,6 @@ impl FieldType {
|
||||
}
|
||||
}
|
||||
|
||||
/// returns true if the field is fast.
|
||||
pub fn fastfield_cardinality(&self) -> Option<Cardinality> {
|
||||
todo!();
|
||||
// match *self {
|
||||
// FieldType::Bytes(ref bytes_options) => {
|
||||
// bytes_options.is_fast().then_some(Cardinality::SingleValue)
|
||||
// }
|
||||
// FieldType::Str(ref text_options) => {
|
||||
// text_options.is_fast().then_some(Cardinality::MultiValues)
|
||||
// }
|
||||
// FieldType::U64(ref int_options)
|
||||
// | FieldType::I64(ref int_options)
|
||||
// | FieldType::F64(ref int_options)
|
||||
// | FieldType::Bool(ref int_options) => int_options.get_fastfield_cardinality(),
|
||||
// FieldType::Date(ref date_options) => date_options.get_fastfield_cardinality(),
|
||||
// FieldType::Facet(_) => Some(Cardinality::MultiValues),
|
||||
// FieldType::JsonObject(_) => None,
|
||||
// FieldType::IpAddr(ref ip_addr_options) =>
|
||||
// ip_addr_options.get_fastfield_cardinality(), }
|
||||
}
|
||||
|
||||
/// returns true if the field is normed (see [fieldnorms](crate::fieldnorm)).
|
||||
pub fn has_fieldnorms(&self) -> bool {
|
||||
match *self {
|
||||
|
||||
@@ -143,7 +143,7 @@ pub use self::json_object_options::JsonObjectOptions;
|
||||
pub use self::named_field_document::NamedFieldDocument;
|
||||
pub use self::numeric_options::NumericOptions;
|
||||
#[allow(deprecated)]
|
||||
pub use self::numeric_options::{Cardinality, IntOptions};
|
||||
pub use self::numeric_options::IntOptions;
|
||||
pub use self::schema::{DocParsingError, Schema, SchemaBuilder};
|
||||
pub use self::term::Term;
|
||||
pub use self::text_options::{TextFieldIndexing, TextOptions, STRING, TEXT};
|
||||
|
||||
@@ -4,18 +4,6 @@ use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::schema::flags::{FastFlag, IndexedFlag, SchemaFlagList, StoredFlag};
|
||||
|
||||
/// Express whether a field is single-value or multi-valued.
|
||||
#[derive(Clone, Copy, PartialEq, Eq, Debug, Serialize, Deserialize)]
|
||||
pub enum Cardinality {
|
||||
/// The document must have exactly one value associated with the document.
|
||||
#[serde(rename = "single")]
|
||||
SingleValue,
|
||||
/// The document can have any number of values associated with the document.
|
||||
/// This is more memory and CPU expensive than the `SingleValue` solution.
|
||||
#[serde(rename = "multi")]
|
||||
MultiValues,
|
||||
}
|
||||
|
||||
#[deprecated(since = "0.17.0", note = "Use NumericOptions instead.")]
|
||||
/// Deprecated use [`NumericOptions`] instead.
|
||||
pub type IntOptions = NumericOptions;
|
||||
|
||||
Reference in New Issue
Block a user