diff --git a/columnar/src/TODO.md b/columnar/src/TODO.md index c29f86910..24cb0e76f 100644 --- a/columnar/src/TODO.md +++ b/columnar/src/TODO.md @@ -26,6 +26,7 @@ Add alignment? Consider another codec to bridge the gap between few and 5k elements # Cleanup and rationalization +remove the 6 bit limitation of columntype. use 4 + 4 bits instead. in benchmark, unify percent vs ratio, f32 vs f64. investigate if should have better errors? io::Error is overused at the moment. rename rank/select in unit tests diff --git a/columnar/src/column/mod.rs b/columnar/src/column/mod.rs index 1c8830c08..23586d505 100644 --- a/columnar/src/column/mod.rs +++ b/columnar/src/column/mod.rs @@ -20,7 +20,7 @@ pub struct Column { use crate::column_index::Set; -impl Column { +impl Column { pub fn first(&self, row_id: RowId) -> Option { match &self.idx { ColumnIndex::Full => Some(self.values.get_val(row_id)), @@ -33,6 +33,13 @@ impl Column { } } } + + pub fn first_or_default_col(self, default_value: T) -> Arc> { + Arc::new(FirstValueWithDefault { + column: self, + default_value, + }) + } } impl Deref for Column { @@ -54,3 +61,27 @@ impl BinarySerializable for Cardinality { Ok(cardinality) } } + +// TODO simplify or optimize +struct FirstValueWithDefault { + column: Column, + default_value: T, +} + +impl ColumnValues for FirstValueWithDefault { + fn get_val(&self, idx: u32) -> T { + self.column.first(idx).unwrap_or(self.default_value) + } + + fn min_value(&self) -> T { + self.column.values.min_value() + } + + fn max_value(&self) -> T { + self.column.values.max_value() + } + + fn num_vals(&self) -> u32 { + self.column.idx.num_rows() + } +} diff --git a/columnar/src/column_values/monotonic_mapping.rs b/columnar/src/column_values/monotonic_mapping.rs index 10bb27319..415043647 100644 --- a/columnar/src/column_values/monotonic_mapping.rs +++ b/columnar/src/column_values/monotonic_mapping.rs @@ -194,6 +194,20 @@ impl MonotonicallyMappableToU64 for i64 { } } +impl MonotonicallyMappableToU64 for crate::DateTime { + #[inline(always)] + fn to_u64(self) -> u64 { + common::i64_to_u64(self.timestamp_micros) + } + + #[inline(always)] + fn from_u64(val: u64) -> Self { + crate::DateTime { + timestamp_micros: common::u64_to_i64(val), + } + } +} + impl MonotonicallyMappableToU64 for bool { #[inline(always)] fn to_u64(self) -> u64 { diff --git a/columnar/src/columnar/column_type.rs b/columnar/src/columnar/column_type.rs index 6d851ed38..524e5b3bc 100644 --- a/columnar/src/columnar/column_type.rs +++ b/columnar/src/columnar/column_type.rs @@ -11,6 +11,7 @@ pub enum ColumnType { Bytes, Numerical(NumericalType), Bool, + DateTime, } impl ColumnType { @@ -31,6 +32,10 @@ impl ColumnType { column_type_category = ColumnTypeCategory::Bool; numerical_type_code = 0u8; } + ColumnType::DateTime => { + column_type_category = ColumnTypeCategory::DateTime; + numerical_type_code = 0u8; + } } place_bits::<0, 3>(column_type_category.to_code()) | place_bits::<3, 6>(numerical_type_code) } @@ -59,10 +64,50 @@ impl ColumnType { let numerical_type = NumericalType::try_from_code(numerical_type_code)?; Ok(ColumnType::Numerical(numerical_type)) } + ColumnTypeCategory::DateTime => { + if numerical_type_code != 0u8 { + return Err(InvalidData); + } + Ok(ColumnType::DateTime) + } } } } +pub trait HasAssociatedColumnType: 'static + Send + Sync + Copy + PartialOrd { + fn column_type() -> ColumnType; +} + +impl HasAssociatedColumnType for u64 { + fn column_type() -> ColumnType { + ColumnType::Numerical(NumericalType::U64) + } +} + +impl HasAssociatedColumnType for i64 { + fn column_type() -> ColumnType { + ColumnType::Numerical(NumericalType::I64) + } +} + +impl HasAssociatedColumnType for f64 { + fn column_type() -> ColumnType { + ColumnType::Numerical(NumericalType::F64) + } +} + +impl HasAssociatedColumnType for bool { + fn column_type() -> ColumnType { + ColumnType::Bool + } +} + +impl HasAssociatedColumnType for crate::DateTime { + fn column_type() -> ColumnType { + ColumnType::DateTime + } +} + /// Column types are grouped into different categories that /// corresponds to the different types of `JsonValue` types. /// @@ -76,6 +121,7 @@ pub(crate) enum ColumnTypeCategory { Bool = 0u8, Str = 1u8, Numerical = 2u8, + DateTime = 3u8, } impl ColumnTypeCategory { @@ -88,6 +134,7 @@ impl ColumnTypeCategory { 0u8 => Ok(Self::Bool), 1u8 => Ok(Self::Str), 2u8 => Ok(Self::Numerical), + 3u8 => Ok(Self::Numerical), _ => Err(InvalidData), } } diff --git a/columnar/src/columnar/mod.rs b/columnar/src/columnar/mod.rs index 37114c8d0..dead1431b 100644 --- a/columnar/src/columnar/mod.rs +++ b/columnar/src/columnar/mod.rs @@ -23,6 +23,6 @@ mod format_version; mod reader; mod writer; -pub use column_type::ColumnType; +pub use column_type::{ColumnType, HasAssociatedColumnType}; pub use reader::ColumnarReader; pub use writer::ColumnarWriter; diff --git a/columnar/src/columnar/writer/mod.rs b/columnar/src/columnar/writer/mod.rs index e2915af63..593cd0f23 100644 --- a/columnar/src/columnar/writer/mod.rs +++ b/columnar/src/columnar/writer/mod.rs @@ -85,13 +85,12 @@ fn mutate_or_create_column( } impl ColumnarWriter { - pub fn mem_usage(&self) -> usize { // TODO add dictionary builders. - self.arena.mem_usage() + - self.numerical_field_hash_map.mem_usage() + - self.bool_field_hash_map.mem_usage() + - self.bytes_field_hash_map.mem_usage() + self.arena.mem_usage() + + self.numerical_field_hash_map.mem_usage() + + self.bool_field_hash_map.mem_usage() + + self.bytes_field_hash_map.mem_usage() } pub fn force_numerical_type(&mut self, column_name: &str, numerical_type: NumericalType) { @@ -223,6 +222,22 @@ impl ColumnarWriter { &mut column_serializer, )?; } + ColumnTypeCategory::DateTime => { + let numerical_column_writer: NumericalColumnWriter = + self.numerical_field_hash_map.read(addr); + let (_numerical_type, cardinality) = + numerical_column_writer.column_type_and_cardinality(num_docs); + let mut column_serializer = + serializer.serialize_column(column_name, ColumnType::DateTime); + serialize_numerical_column( + cardinality, + num_docs, + NumericalType::I64, + numerical_column_writer.operation_iterator(arena, &mut symbol_byte_buffer), + buffers, + &mut column_serializer, + )?; + } }; } serializer.finalize()?; diff --git a/columnar/src/dynamic_column.rs b/columnar/src/dynamic_column.rs index fdfb7ad64..9a8f12e1c 100644 --- a/columnar/src/dynamic_column.rs +++ b/columnar/src/dynamic_column.rs @@ -6,7 +6,6 @@ use common::{HasLen, OwnedBytes}; use crate::column::{BytesColumn, Column}; use crate::columnar::ColumnType; -use crate::DateTime; #[derive(Clone)] pub enum DynamicColumn { @@ -15,33 +14,35 @@ pub enum DynamicColumn { U64(Column), F64(Column), IpAddr(Column), - DateTime(Column), Str(BytesColumn), + DateTime(Column), } -impl From> for DynamicColumn { - fn from(column_i64: Column) -> Self { - DynamicColumn::I64(column_i64) - } +macro_rules! static_dynamic_conversions { + ($typ:ty, $enum_name:ident) => { + impl Into>> for DynamicColumn { + fn into(self) -> Option> { + if let Self::$enum_name(col) = self { + Some(col) + } else { + None + } + } + } + + impl From> for DynamicColumn { + fn from(typed_column: Column<$typ>) -> Self { + DynamicColumn::$enum_name(typed_column) + } + } + }; } -impl From> for DynamicColumn { - fn from(column_u64: Column) -> Self { - DynamicColumn::U64(column_u64) - } -} - -impl From> for DynamicColumn { - fn from(column_f64: Column) -> Self { - DynamicColumn::F64(column_f64) - } -} - -impl From> for DynamicColumn { - fn from(bool_column: Column) -> Self { - DynamicColumn::Bool(bool_column) - } -} +static_dynamic_conversions!(bool, Bool); +static_dynamic_conversions!(u64, U64); +static_dynamic_conversions!(i64, I64); +static_dynamic_conversions!(f64, F64); +static_dynamic_conversions!(crate::DateTime, DateTime); impl From for DynamicColumn { fn from(dictionary_encoded_col: BytesColumn) -> Self { @@ -56,11 +57,13 @@ pub struct DynamicColumnHandle { } impl DynamicColumnHandle { + // TODO rename load pub fn open(&self) -> io::Result { let column_bytes: OwnedBytes = self.file_slice.read_bytes()?; self.open_internal(column_bytes) } + // TODO rename load_async pub async fn open_async(&self) -> io::Result { let column_bytes: OwnedBytes = self.file_slice.read_bytes_async().await?; self.open_internal(column_bytes) @@ -81,6 +84,9 @@ impl DynamicColumnHandle { } }, ColumnType::Bool => crate::column::open_column_u64::(column_bytes)?.into(), + ColumnType::DateTime => { + crate::column::open_column_u64::(column_bytes)?.into() + } }; Ok(dynamic_column) } diff --git a/columnar/src/lib.rs b/columnar/src/lib.rs index 0c37a025a..d6ae238cf 100644 --- a/columnar/src/lib.rs +++ b/columnar/src/lib.rs @@ -18,16 +18,18 @@ mod dynamic_column; pub(crate) mod utils; mod value; -pub use columnar::{ColumnarReader, ColumnarWriter}; +pub use column::Column; +pub use column_values::ColumnValues; +pub use columnar::{ColumnType, ColumnarReader, ColumnarWriter, HasAssociatedColumnType}; pub use value::{NumericalType, NumericalValue}; -// pub use self::dynamic_column::DynamicColumnHandle; +pub use self::dynamic_column::{DynamicColumn, DynamicColumnHandle}; pub type RowId = u32; -#[derive(Clone, Copy)] +#[derive(Clone, Copy, PartialOrd, PartialEq, Default)] pub struct DateTime { - timestamp_micros: i64, + pub timestamp_micros: i64, } #[derive(Copy, Clone, Debug)] diff --git a/columnar/src/value.rs b/columnar/src/value.rs index 258e80b18..82992a1dd 100644 --- a/columnar/src/value.rs +++ b/columnar/src/value.rs @@ -1,4 +1,4 @@ -use crate::InvalidData; +use crate::{Column, ColumnType, InvalidData}; #[derive(Copy, Clone, Debug, PartialEq)] pub enum NumericalValue { diff --git a/fastfield_codecs/Cargo.toml b/fastfield_codecs/Cargo.toml index c1552ecc7..6cada40a2 100644 --- a/fastfield_codecs/Cargo.toml +++ b/fastfield_codecs/Cargo.toml @@ -14,6 +14,7 @@ repository = "https://github.com/quickwit-oss/tantivy" [dependencies] common = { version = "0.5", path = "../common/", package = "tantivy-common" } tantivy-bitpacker = { version= "0.3", path = "../bitpacker/" } +columnar = { version= "0.1", path="../columnar", package="tantivy-columnar" } prettytable-rs = {version="0.10.0", optional= true} rand = {version="0.8.3", optional= true} fastdivide = "0.4" diff --git a/fastfield_codecs/src/column.rs b/fastfield_codecs/src/column.rs index 14334bfa8..807e59855 100644 --- a/fastfield_codecs/src/column.rs +++ b/fastfield_codecs/src/column.rs @@ -2,81 +2,11 @@ use std::fmt::{self, Debug}; use std::marker::PhantomData; use std::ops::{Range, RangeInclusive}; +pub use columnar::ColumnValues as Column; use tantivy_bitpacker::minmax; use crate::monotonic_mapping::StrictlyMonotonicFn; -/// `Column` provides columnar access on a field. -pub trait Column: Send + Sync { - /// Return the value associated with the given idx. - /// - /// This accessor should return as fast as possible. - /// - /// # Panics - /// - /// May panic if `idx` is greater than the column length. - fn get_val(&self, idx: u32) -> T; - - /// Fills an output buffer with the fast field values - /// associated with the `DocId` going from - /// `start` to `start + output.len()`. - /// - /// # Panics - /// - /// Must panic if `start + output.len()` is greater than - /// the segment's `maxdoc`. - #[inline] - fn get_range(&self, start: u64, output: &mut [T]) { - for (out, idx) in output.iter_mut().zip(start..) { - *out = self.get_val(idx as u32); - } - } - - /// Get the positions of values which are in the provided value range. - /// - /// Note that position == docid for single value fast fields - #[inline] - fn get_docids_for_value_range( - &self, - value_range: RangeInclusive, - doc_id_range: Range, - positions: &mut Vec, - ) { - let doc_id_range = doc_id_range.start..doc_id_range.end.min(self.num_vals()); - - for idx in doc_id_range.start..doc_id_range.end { - let val = self.get_val(idx); - if value_range.contains(&val) { - positions.push(idx); - } - } - } - - /// Returns the minimum value for this fast field. - /// - /// This min_value may not be exact. - /// For instance, the min value does not take in account of possible - /// deleted document. All values are however guaranteed to be higher than - /// `.min_value()`. - fn min_value(&self) -> T; - - /// Returns the maximum value for this fast field. - /// - /// This max_value may not be exact. - /// For instance, the max value does not take in account of possible - /// deleted document. All values are however guaranteed to be higher than - /// `.max_value()`. - fn max_value(&self) -> T; - - /// The number of values in the column. - fn num_vals(&self) -> u32; - - /// Returns a iterator over the data - fn iter<'a>(&'a self) -> Box + 'a> { - Box::new((0..self.num_vals()).map(|idx| self.get_val(idx))) - } -} - /// VecColumn provides `Column` over a slice. pub struct VecColumn<'a, T = u64> { values: &'a [T], @@ -84,32 +14,6 @@ pub struct VecColumn<'a, T = u64> { max_value: T, } -impl<'a, C: Column, T: Copy + PartialOrd + fmt::Debug> Column for &'a C { - fn get_val(&self, idx: u32) -> T { - (*self).get_val(idx) - } - - fn min_value(&self) -> T { - (*self).min_value() - } - - fn max_value(&self) -> T { - (*self).max_value() - } - - fn num_vals(&self) -> u32 { - (*self).num_vals() - } - - fn iter<'b>(&'b self) -> Box + 'b> { - (*self).iter() - } - - fn get_range(&self, start: u64, output: &mut [T]) { - (*self).get_range(start, output) - } -} - impl<'a, T: Copy + PartialOrd + Send + Sync + Debug> Column for VecColumn<'a, T> { fn get_val(&self, position: u32) -> T { self.values[position as usize] diff --git a/src/collector/filter_collector_wrapper.rs b/src/collector/filter_collector_wrapper.rs index a70bc1e48..b5e0840d9 100644 --- a/src/collector/filter_collector_wrapper.rs +++ b/src/collector/filter_collector_wrapper.rs @@ -12,10 +12,10 @@ use std::marker::PhantomData; use std::sync::Arc; +use columnar::{DynamicColumn, HasAssociatedColumnType}; use fastfield_codecs::Column; use crate::collector::{Collector, SegmentCollector}; -use crate::fastfield::FastValue; use crate::schema::Field; use crate::{Score, SegmentReader, TantivyError}; @@ -61,7 +61,7 @@ use crate::{Score, SegmentReader, TantivyError}; /// # Ok(()) /// # } /// ``` -pub struct FilterCollector +pub struct FilterCollector where TPredicate: 'static + Clone { field: Field, @@ -70,7 +70,7 @@ where TPredicate: 'static + Clone t_predicate_value: PhantomData, } -impl +impl FilterCollector where TCollector: Collector + Send + Sync, @@ -91,12 +91,13 @@ where } } -impl Collector +impl Collector for FilterCollector where TCollector: Collector + Send + Sync, TPredicate: 'static + Fn(TPredicateValue) -> bool + Send + Sync + Clone, - TPredicateValue: FastValue, + TPredicateValue: HasAssociatedColumnType, + DynamicColumn: Into>>, { // That's the type of our result. // Our standard deviation will be a float. @@ -117,20 +118,10 @@ where field_entry.name() ))); } - let requested_type = TPredicateValue::to_type(); - let field_schema_type = field_entry.field_type().value_type(); - if requested_type != field_schema_type { - return Err(TantivyError::SchemaError(format!( - "Field {:?} is of type {:?}!={:?}", - field_entry.name(), - requested_type, - field_schema_type - ))); - } let fast_field_reader = segment_reader .fast_fields() - .typed_fast_field_reader(schema.get_field_name(self.field))?; + .typed_column_first_or_default(schema.get_field_name(self.field))?; let segment_collector = self .collector @@ -159,7 +150,7 @@ where pub struct FilterSegmentCollector where TPredicate: 'static, - TPredicateValue: FastValue, + DynamicColumn: Into>>, { fast_field_reader: Arc>, segment_collector: TSegmentCollector, @@ -171,8 +162,9 @@ impl SegmentCollector for FilterSegmentCollector where TSegmentCollector: SegmentCollector, + TPredicateValue: HasAssociatedColumnType, TPredicate: 'static + Fn(TPredicateValue) -> bool + Send + Sync, - TPredicateValue: FastValue, + DynamicColumn: Into>>, { type Fruit = TSegmentCollector::Fruit; diff --git a/src/collector/mod.rs b/src/collector/mod.rs index fe9c29ba6..deb38c752 100644 --- a/src/collector/mod.rs +++ b/src/collector/mod.rs @@ -104,7 +104,6 @@ pub use self::custom_score_top_collector::{CustomScorer, CustomSegmentScorer}; mod tweak_score_top_collector; pub use self::tweak_score_top_collector::{ScoreSegmentTweaker, ScoreTweaker}; - // mod facet_collector; // pub use self::facet_collector::{FacetCollector, FacetCounts}; use crate::query::Weight; diff --git a/src/collector/tests.rs b/src/collector/tests.rs index db8ac7ae2..e6704f83a 100644 --- a/src/collector/tests.rs +++ b/src/collector/tests.rs @@ -57,9 +57,10 @@ pub fn test_filter_collector() -> crate::Result<()> { assert_eq!(filtered_top_docs.len(), 0); - fn date_filter(value: DateTime) -> bool { - (value.into_utc() - OffsetDateTime::parse("2019-04-09T00:00:00+00:00", &Rfc3339).unwrap()) - .whole_weeks() + fn date_filter(value: columnar::DateTime) -> bool { + (crate::DateTime::from(value).into_utc() + - OffsetDateTime::parse("2019-04-09T00:00:00+00:00", &Rfc3339).unwrap()) + .whole_weeks() > 0 } @@ -164,7 +165,9 @@ pub struct FastFieldSegmentCollector { impl FastFieldTestCollector { pub fn for_field(field: impl ToString) -> FastFieldTestCollector { - FastFieldTestCollector { field: field.to_string() } + FastFieldTestCollector { + field: field.to_string(), + } } } diff --git a/src/collector/top_score_collector.rs b/src/collector/top_score_collector.rs index 53009a747..02cc0540f 100644 --- a/src/collector/top_score_collector.rs +++ b/src/collector/top_score_collector.rs @@ -154,10 +154,11 @@ impl CustomScorer for ScorerByField { // mapping is monotonic, so it is sufficient to compute our top-K docs. // // The conversion will then happen only on the top-K docs. - let ff_reader = segment_reader - .fast_fields() - .typed_fast_field_reader(segment_reader.schema().get_field_name(self.field))?; - Ok(ScorerByFastFieldReader { ff_reader }) + todo!(); + // let ff_reader = segment_reader + // .fast_fields() + // .typed_column(&self.field)?; + // Ok(ScorerByFastFieldReader { ff_reader }) } } diff --git a/src/core/segment_reader.rs b/src/core/segment_reader.rs index 2bc5a88d6..49234ba0b 100644 --- a/src/core/segment_reader.rs +++ b/src/core/segment_reader.rs @@ -153,8 +153,7 @@ impl SegmentReader { let schema = segment.schema(); let fast_fields_data = segment.open_read(SegmentComponent::FastFields)?; - let fast_fields_readers = - Arc::new(FastFieldReaders::open(fast_fields_data)?); + let fast_fields_readers = Arc::new(FastFieldReaders::open(fast_fields_data)?); let fieldnorm_data = segment.open_read(SegmentComponent::FieldNorms)?; let fieldnorm_readers = FieldNormReaders::open(fieldnorm_data)?; diff --git a/src/fastfield/mod.rs b/src/fastfield/mod.rs index c904e0eec..ece6c4634 100644 --- a/src/fastfield/mod.rs +++ b/src/fastfield/mod.rs @@ -27,7 +27,6 @@ pub use self::alive_bitset::{intersect_alive_bitsets, write_alive_bitset, AliveB // pub use self::bytes::{BytesFastFieldReader, BytesFastFieldWriter}; pub use self::error::{FastFieldNotAvailableError, Result}; // pub use self::facet_reader::FacetReader; - pub use self::readers::FastFieldReaders; pub use self::serializer::{Column, CompositeFastFieldSerializer}; use self::writer::unexpected_value; @@ -171,9 +170,7 @@ mod tests { use super::*; use crate::directory::{CompositeFile, Directory, RamDirectory, WritePtr}; use crate::merge_policy::NoMergePolicy; - use crate::schema::{ - Document, Field, Schema, SchemaBuilder, FAST, INDEXED, STRING, TEXT, - }; + use crate::schema::{Document, Field, Schema, SchemaBuilder, FAST, INDEXED, STRING, TEXT}; use crate::time::OffsetDateTime; use crate::{DateOptions, DatePrecision, Index, SegmentId, SegmentReader}; @@ -184,7 +181,6 @@ mod tests { }); pub static FIELD: Lazy = Lazy::new(|| SCHEMA.get_field("field").unwrap()); - #[test] pub fn test_convert_i64_u64() { let datetime = DateTime::from_utc(OffsetDateTime::UNIX_EPOCH); @@ -207,27 +203,25 @@ mod tests { fast_field_writers .add_document(&doc!(*FIELD=>2u64)) .unwrap(); - fast_field_writers - .serialize(&mut write, None) - .unwrap(); + fast_field_writers.serialize(&mut write, None).unwrap(); write.terminate().unwrap(); } let file = directory.open_read(path).unwrap(); assert_eq!(file.len(), 164); let fast_field_readers = FastFieldReaders::open(file).unwrap(); - // let column = fast_field_readers.u64("field").unwrap(); - // assert_eq!(column.get_val(0), 13u64); - // assert_eq!(column.get_val(1), 14u64); - // assert_eq!(column.get_val(2), 2u64); + let column = fast_field_readers.u64("field").unwrap(); + assert_eq!(column.get_val(0), 13u64); + assert_eq!(column.get_val(1), 14u64); + assert_eq!(column.get_val(2), 2u64); Ok(()) } #[test] - fn test_intfastfield_large() -> crate::Result<()> { + fn test_intfastfield_large() { let path = Path::new("test"); let directory: RamDirectory = RamDirectory::create(); { - let mut write: WritePtr = directory.open_write(Path::new("test"))?; + let mut write: WritePtr = directory.open_write(Path::new("test")).unwrap(); let mut fast_field_writers = FastFieldsWriter::from_schema(&SCHEMA); fast_field_writers .add_document(&doc!(*FIELD=>4u64)) @@ -256,36 +250,28 @@ mod tests { fast_field_writers .add_document(&doc!(*FIELD=>215u64)) .unwrap(); - fast_field_writers.serialize(&mut write, None)?; - write.terminate()?; + fast_field_writers.serialize(&mut write, None).unwrap(); + write.terminate().unwrap(); } - let file = directory.open_read(path)?; - assert_eq!(file.len(), 62); - { - let fast_fields_composite = CompositeFile::open(&file)?; - let data = fast_fields_composite - .open_read(*FIELD) - .unwrap() - .read_bytes()?; - let fast_field_reader = open::(data)?; - assert_eq!(fast_field_reader.get_val(0), 4u64); - assert_eq!(fast_field_reader.get_val(1), 14_082_001u64); - assert_eq!(fast_field_reader.get_val(2), 3_052u64); - assert_eq!(fast_field_reader.get_val(3), 9002u64); - assert_eq!(fast_field_reader.get_val(4), 15_001u64); - assert_eq!(fast_field_reader.get_val(5), 777u64); - assert_eq!(fast_field_reader.get_val(6), 1_002u64); - assert_eq!(fast_field_reader.get_val(7), 1_501u64); - assert_eq!(fast_field_reader.get_val(8), 215u64); - } - Ok(()) + let file = directory.open_read(path).unwrap(); + assert_eq!(file.len(), 192); + let fast_field_readers = FastFieldReaders::open(file).unwrap(); + let col = fast_field_readers.u64("field").unwrap(); + assert_eq!(col.get_val(0), 4u64); + assert_eq!(col.get_val(1), 14_082_001u64); + assert_eq!(col.get_val(2), 3_052u64); + assert_eq!(col.get_val(3), 9002u64); + assert_eq!(col.get_val(4), 15_001u64); + assert_eq!(col.get_val(5), 777u64); + assert_eq!(col.get_val(6), 1_002u64); + assert_eq!(col.get_val(7), 1_501u64); + assert_eq!(col.get_val(8), 215u64); } #[test] - fn test_intfastfield_null_amplitude() -> crate::Result<()> { + fn test_intfastfield_null_amplitude() { let path = Path::new("test"); let directory: RamDirectory = RamDirectory::create(); - { let mut write: WritePtr = directory.open_write(Path::new("test")).unwrap(); let mut fast_field_writers = FastFieldsWriter::from_schema(&SCHEMA); @@ -294,29 +280,20 @@ mod tests { .add_document(&doc!(*FIELD=>100_000u64)) .unwrap(); } - fast_field_writers - .serialize(&mut write, None) - .unwrap(); + fast_field_writers.serialize(&mut write, None).unwrap(); write.terminate().unwrap(); } let file = directory.open_read(path).unwrap(); - assert_eq!(file.len(), 35); - { - let fast_fields_composite = CompositeFile::open(&file).unwrap(); - let data = fast_fields_composite - .open_read(*FIELD) - .unwrap() - .read_bytes()?; - let fast_field_reader = open::(data)?; - for doc in 0..10_000 { - assert_eq!(fast_field_reader.get_val(doc), 100_000u64); - } + assert_eq!(file.len(), 165); + let fast_field_readers = FastFieldReaders::open(file).unwrap(); + let fast_field_reader = fast_field_readers.u64("field").unwrap(); + for doc in 0..10_000 { + assert_eq!(fast_field_reader.get_val(doc), 100_000u64); } - Ok(()) } #[test] - fn test_intfastfield_large_numbers() -> crate::Result<()> { + fn test_intfastfield_large_numbers() { let path = Path::new("test"); let directory: RamDirectory = RamDirectory::create(); @@ -327,34 +304,23 @@ mod tests { fast_field_writers .add_document(&doc!(*FIELD=>0u64)) .unwrap(); - for doc_id in 1u64..10_001u64 { + for doc_id in 1u64..10_000u64 { fast_field_writers .add_document(&doc!(*FIELD=>5_000_000_000_000_000_000u64 + doc_id as u64)) .unwrap(); } - fast_field_writers - .serialize(&mut write, None) - .unwrap(); + fast_field_writers.serialize(&mut write, None).unwrap(); write.terminate().unwrap(); } let file = directory.open_read(path).unwrap(); - assert_eq!(file.len(), 80049); + assert_eq!(file.len(), 80173); { - let fast_fields_composite = CompositeFile::open(&file)?; - let data = fast_fields_composite - .open_read(*FIELD) - .unwrap() - .read_bytes()?; - let fast_field_reader = open::(data)?; - assert_eq!(fast_field_reader.get_val(0), 0u64); - for doc in 1..10_001 { - assert_eq!( - fast_field_reader.get_val(doc), - 5_000_000_000_000_000_000u64 + doc as u64 - 1u64 - ); + let fast_field_readers = FastFieldReaders::open(file).unwrap(); + let col = fast_field_readers.u64("field").unwrap(); + for doc in 1..10_000 { + assert_eq!(col.get_val(doc), 5_000_000_000_000_000_000u64 + doc as u64); } } - Ok(()) } #[test] @@ -373,29 +339,22 @@ mod tests { doc.add_i64(i64_field, i); fast_field_writers.add_document(&doc).unwrap(); } - fast_field_writers - .serialize(&mut write, None) - .unwrap(); + fast_field_writers.serialize(&mut write, None).unwrap(); write.terminate().unwrap(); } let file = directory.open_read(path).unwrap(); - assert_eq!(file.len(), 49_usize); + assert_eq!(file.len(), 179_usize); { - let fast_fields_composite = CompositeFile::open(&file)?; - let data = fast_fields_composite - .open_read(i64_field) - .unwrap() - .read_bytes()?; - let fast_field_reader = open::(data)?; - - assert_eq!(fast_field_reader.min_value(), -100i64); - assert_eq!(fast_field_reader.max_value(), 9_999i64); + let fast_field_readers = FastFieldReaders::open(file).unwrap(); + let col = fast_field_readers.i64("field").unwrap(); + assert_eq!(col.min_value(), -100i64); + assert_eq!(col.max_value(), 9_999i64); for (doc, i) in (-100i64..10_000i64).enumerate() { - assert_eq!(fast_field_reader.get_val(doc as u32), i); + assert_eq!(col.get_val(doc as u32), i); } let mut buffer = vec![0i64; 100]; - fast_field_reader.get_range(53, &mut buffer[..]); + col.get_range(53, &mut buffer[..]); for i in 0..100 { assert_eq!(buffer[i], -100i64 + 53i64 + i as i64); } @@ -533,105 +492,103 @@ mod tests { // all // } - /* - #[test] - fn test_text_fastfield() -> crate::Result<()> { - let mut schema_builder = Schema::builder(); - let text_field = schema_builder.add_text_field("text", TEXT | FAST); - let schema = schema_builder.build(); - let index = Index::create_in_ram(schema); - - { - // first segment - let mut index_writer = index.writer_for_tests()?; - index_writer.set_merge_policy(Box::new(NoMergePolicy)); - index_writer.add_document(doc!( - text_field => "BBBBB AAAAA", // term_ord 1,2 - ))?; - index_writer.add_document(doc!())?; - index_writer.add_document(doc!( - text_field => "AAAAA", // term_ord 0 - ))?; - index_writer.add_document(doc!( - text_field => "AAAAA BBBBB", // term_ord 0 - ))?; - index_writer.add_document(doc!( - text_field => "zumberthree", // term_ord 2, after merge term_ord 3 - ))?; - - index_writer.add_document(doc!())?; - index_writer.commit()?; - - let reader = index.reader()?; - let searcher = reader.searcher(); - assert_eq!(searcher.segment_readers().len(), 1); - let segment_reader = searcher.segment_reader(0); - let fast_fields = segment_reader.fast_fields(); - let text_fast_field = fast_fields.u64s("text").unwrap(); - - assert_eq!( - get_vals_for_docs(&text_fast_field, 0..5), - vec![1, 0, 0, 0, 1, 2] - ); - - let mut out = vec![]; - text_fast_field.get_vals(3, &mut out); - assert_eq!(out, vec![0, 1]); - - let inverted_index = segment_reader.inverted_index(text_field)?; - assert_eq!(inverted_index.terms().num_terms(), 3); - let mut bytes = vec![]; - assert!(inverted_index.terms().ord_to_term(0, &mut bytes)?); - // default tokenizer applies lower case - assert_eq!(bytes, "aaaaa".as_bytes()); - } - - { - // second segment - let mut index_writer = index.writer_for_tests()?; - - index_writer.add_document(doc!( - text_field => "AAAAA", // term_ord 0 - ))?; - - index_writer.add_document(doc!( - text_field => "CCCCC AAAAA", // term_ord 1, after merge 2 - ))?; - - index_writer.add_document(doc!())?; - index_writer.commit()?; - - let reader = index.reader()?; - let searcher = reader.searcher(); - assert_eq!(searcher.segment_readers().len(), 2); - let segment_reader = searcher.segment_reader(1); - let fast_fields = segment_reader.fast_fields(); - let text_fast_field = fast_fields.u64s("text").unwrap(); - - assert_eq!(get_vals_for_docs(&text_fast_field, 0..3), vec![0, 1, 0]); - } - // Merging the segments - { - let segment_ids = index.searchable_segment_ids()?; - let mut index_writer = index.writer_for_tests()?; - index_writer.merge(&segment_ids).wait()?; - index_writer.wait_merging_threads()?; - } - - let reader = index.reader()?; - let searcher = reader.searcher(); - let segment_reader = searcher.segment_reader(0); - let fast_fields = segment_reader.fast_fields(); - let text_fast_field = fast_fields.u64s("text").unwrap(); - - assert_eq!( - get_vals_for_docs(&text_fast_field, 0..8), - vec![1, 0, 0, 0, 1, 3 /* next segment */, 0, 2, 0] - ); - - Ok(()) - } - */ + // #[test] + // fn test_text_fastfield() -> crate::Result<()> { + // let mut schema_builder = Schema::builder(); + // let text_field = schema_builder.add_text_field("text", TEXT | FAST); + // let schema = schema_builder.build(); + // let index = Index::create_in_ram(schema); + // + // { + // first segment + // let mut index_writer = index.writer_for_tests()?; + // index_writer.set_merge_policy(Box::new(NoMergePolicy)); + // index_writer.add_document(doc!( + // text_field => "BBBBB AAAAA", // term_ord 1,2 + // ))?; + // index_writer.add_document(doc!())?; + // index_writer.add_document(doc!( + // text_field => "AAAAA", // term_ord 0 + // ))?; + // index_writer.add_document(doc!( + // text_field => "AAAAA BBBBB", // term_ord 0 + // ))?; + // index_writer.add_document(doc!( + // text_field => "zumberthree", // term_ord 2, after merge term_ord 3 + // ))?; + // + // index_writer.add_document(doc!())?; + // index_writer.commit()?; + // + // let reader = index.reader()?; + // let searcher = reader.searcher(); + // assert_eq!(searcher.segment_readers().len(), 1); + // let segment_reader = searcher.segment_reader(0); + // let fast_fields = segment_reader.fast_fields(); + // let text_fast_field = fast_fields.u64s("text").unwrap(); + // + // assert_eq!( + // get_vals_for_docs(&text_fast_field, 0..5), + // vec![1, 0, 0, 0, 1, 2] + // ); + // + // let mut out = vec![]; + // text_fast_field.get_vals(3, &mut out); + // assert_eq!(out, vec![0, 1]); + // + // let inverted_index = segment_reader.inverted_index(text_field)?; + // assert_eq!(inverted_index.terms().num_terms(), 3); + // let mut bytes = vec![]; + // assert!(inverted_index.terms().ord_to_term(0, &mut bytes)?); + // default tokenizer applies lower case + // assert_eq!(bytes, "aaaaa".as_bytes()); + // } + // + // { + // second segment + // let mut index_writer = index.writer_for_tests()?; + // + // index_writer.add_document(doc!( + // text_field => "AAAAA", // term_ord 0 + // ))?; + // + // index_writer.add_document(doc!( + // text_field => "CCCCC AAAAA", // term_ord 1, after merge 2 + // ))?; + // + // index_writer.add_document(doc!())?; + // index_writer.commit()?; + // + // let reader = index.reader()?; + // let searcher = reader.searcher(); + // assert_eq!(searcher.segment_readers().len(), 2); + // let segment_reader = searcher.segment_reader(1); + // let fast_fields = segment_reader.fast_fields(); + // let text_fast_field = fast_fields.u64s("text").unwrap(); + // + // assert_eq!(get_vals_for_docs(&text_fast_field, 0..3), vec![0, 1, 0]); + // } + // Merging the segments + // { + // let segment_ids = index.searchable_segment_ids()?; + // let mut index_writer = index.writer_for_tests()?; + // index_writer.merge(&segment_ids).wait()?; + // index_writer.wait_merging_threads()?; + // } + // + // let reader = index.reader()?; + // let searcher = reader.searcher(); + // let segment_reader = searcher.segment_reader(0); + // let fast_fields = segment_reader.fast_fields(); + // let text_fast_field = fast_fields.u64s("text").unwrap(); + // + // assert_eq!( + // get_vals_for_docs(&text_fast_field, 0..8), + // vec![1, 0, 0, 0, 1, 3 /* next segment */, 0, 2, 0] + // ); + // + // Ok(()) + // } // #[test] // fn test_string_fastfield() -> crate::Result<()> { @@ -661,7 +618,6 @@ mod tests { // index_writer.add_document(doc!())?; // index_writer.commit()?; - // let reader = index.reader()?; // let searcher = reader.searcher(); // assert_eq!(searcher.segment_readers().len(), 1); @@ -693,7 +649,6 @@ mod tests { // index_writer.add_document(doc!())?; // index_writer.commit()?; - // let reader = index.reader()?; // let searcher = reader.searcher(); // assert_eq!(searcher.segment_readers().len(), 2); @@ -816,9 +771,7 @@ mod tests { fast_field_writers .add_document(&doc!(field=>false)) .unwrap(); - fast_field_writers - .serialize(&mut write, None) - .unwrap(); + fast_field_writers.serialize(&mut write, None).unwrap(); write.terminate().unwrap(); } let file = directory.open_read(path).unwrap(); @@ -853,9 +806,7 @@ mod tests { .add_document(&doc!(field=>false)) .unwrap(); } - fast_field_writers - .serialize(&mut write, None) - .unwrap(); + fast_field_writers.serialize(&mut write, None).unwrap(); write.terminate().unwrap(); } let file = directory.open_read(path).unwrap(); @@ -898,10 +849,7 @@ mod tests { Ok(()) } - fn get_index( - docs: &[crate::Document], - schema: &Schema, - ) -> crate::Result { + fn get_index(docs: &[crate::Document], schema: &Schema) -> crate::Result { let directory: RamDirectory = RamDirectory::create(); { let mut write: WritePtr = directory.open_write(Path::new("test")).unwrap(); @@ -909,9 +857,7 @@ mod tests { for (doc_id, doc) in docs.into_iter().enumerate() { fast_field_writers.add_document(doc).unwrap(); } - fast_field_writers - .serialize(&mut write, None) - .unwrap(); + fast_field_writers.serialize(&mut write, None).unwrap(); write.terminate().unwrap(); } Ok(directory) @@ -942,9 +888,7 @@ mod tests { }) .take(1_000) .collect(); - let date_options = DateOptions::default() - .set_fast() - .set_precision(precision); + let date_options = DateOptions::default().set_fast().set_precision(precision); let mut schema_builder = SchemaBuilder::default(); let field = schema_builder.add_date_field("field", date_options); let schema = schema_builder.build(); diff --git a/src/fastfield/readers.rs b/src/fastfield/readers.rs index d738d03b1..701de4a56 100644 --- a/src/fastfield/readers.rs +++ b/src/fastfield/readers.rs @@ -2,7 +2,9 @@ use std::io; use std::net::Ipv6Addr; use std::sync::Arc; -use columnar::ColumnarReader; +use columnar::{ + ColumnType, ColumnValues, ColumnarReader, DynamicColumn, HasAssociatedColumnType, NumericalType, +}; use fastfield_codecs::{open, open_u128, Column}; use crate::directory::{CompositeFile, FileSlice}; @@ -19,73 +21,52 @@ use crate::{DateTime, TantivyError}; pub struct FastFieldReaders { columnar: Arc, } -#[derive(Eq, PartialEq, Debug)] -pub(crate) enum FastType { - I64, - U64, - U128, - F64, - Bool, - Date, -} - -pub(crate) fn type_and_cardinality(field_type: &FieldType) -> Option { - todo!(); - // match field_type { - // FieldType::U64(options) => options - // .get_fastfield_cardinality() - // .map(|cardinality| (FastType::U64, cardinality)), - // FieldType::I64(options) => options - // .get_fastfield_cardinality() - // .map(|cardinality| (FastType::I64, cardinality)), - // FieldType::F64(options) => options - // .get_fastfield_cardinality() - // .map(|cardinality| (FastType::F64, cardinality)), - // FieldType::Bool(options) => options - // .get_fastfield_cardinality() - // .map(|cardinality| (FastType::Bool, cardinality)), - // FieldType::Date(options) => options - // .get_fastfield_cardinality() - // .map(|cardinality| (FastType::Date, cardinality)), - // FieldType::Facet(_) => Some((FastType::U64, Cardinality::MultiValues)), - // FieldType::Str(options) if options.is_fast() => { - // Some((FastType::U64, Cardinality::MultiValues)) - // } - // FieldType::IpAddr(options) => options - // .get_fastfield_cardinality() - // .map(|cardinality| (FastType::U128, cardinality)), - // _ => None, - // } -} impl FastFieldReaders { pub(crate) fn open(fast_field_file: FileSlice) -> io::Result { let columnar = Arc::new(ColumnarReader::open(fast_field_file)?); - Ok(FastFieldReaders { - columnar, - }) + Ok(FastFieldReaders { columnar }) } pub(crate) fn space_usage(&self) -> PerFieldSpaceUsage { todo!() } - pub fn column(&self, column_name: &str) { - todo!() + // TODO make opt + pub fn typed_column(&self, field: &str) -> crate::Result> + where + T: PartialOrd + Copy + HasAssociatedColumnType + Send + Sync + Default + 'static, + DynamicColumn: Into>>, + { + let column_type = T::column_type(); + let Some(dynamic_column_handle) = self.columnar.read_columns(field)? + .into_iter() + .filter(|column| column.column_type() == column_type) + .next() else { + // TODO Option would make more sense. + return Err(crate::TantivyError::SchemaError(format!("No fast field of with this name"))); + }; + let dynamic_column = dynamic_column_handle.open()?; + let col: columnar::Column = dynamic_column + .into() + .ok_or_else(|| crate::TantivyError::SchemaError(format!("Invalid type")))?; + Ok(col) } - pub(crate) fn typed_fast_field_reader( - &self, - field_name: &str, - ) -> crate::Result>> { - todo!(); + pub fn typed_column_first_or_default(&self, field: &str) -> crate::Result>> + where + T: PartialOrd + Copy + HasAssociatedColumnType + Send + Sync + Default + 'static, + DynamicColumn: Into>>, + { + let col = self.typed_column(field)?; + Ok(col.first_or_default_col(T::default())) } /// Returns the `u64` fast field reader reader associated with `field`. /// /// If `field` is not a u64 fast field, this method returns an Error. - pub fn u64(&self, field: &str) -> crate::Result>> { - todo!(); + pub fn u64(&self, field: &str) -> crate::Result>> { + self.typed_column_first_or_default(field) } /// Returns the `ip` fast field reader reader associated to `field`. @@ -111,14 +92,15 @@ impl FastFieldReaders { /// If not, the fastfield reader will returns the u64-value associated with the original /// FastValue. pub fn u64_lenient(&self, field_name: &str) -> crate::Result>> { - self.typed_fast_field_reader(field_name) + todo!(); + // self.typed_fast_field_reader(field_name) } /// Returns the `i64` fast field reader reader associated with `field`. /// /// If `field` is not a i64 fast field, this method returns an Error. pub fn i64(&self, field_name: &str) -> crate::Result>> { - todo!() + self.typed_column_first_or_default(field_name) } /// Returns the `date` fast field reader reader associated with `field`. @@ -126,41 +108,42 @@ impl FastFieldReaders { /// If `field` is not a date fast field, this method returns an Error. pub fn date(&self, field_name: &str) -> crate::Result>> { todo!() + // self.numerical_column(field_name) } /// Returns the `f64` fast field reader reader associated with `field`. /// /// If `field` is not a f64 fast field, this method returns an Error. pub fn f64(&self, field_name: &str) -> crate::Result>> { - todo!(); + self.typed_column_first_or_default(field_name) } /// Returns the `bool` fast field reader reader associated with `field`. /// /// If `field` is not a bool fast field, this method returns an Error. pub fn bool(&self, field_name: &str) -> crate::Result>> { - todo!() + self.typed_column_first_or_default(field_name) } // Returns the `bytes` fast field reader associated with `field`. // // If `field` is not a bytes fast field, returns an Error. // pub fn bytes(&self, field: Field) -> crate::Result { - // let field_entry = self.schema.get_field_entry(field); - // if let FieldType::Bytes(bytes_option) = field_entry.field_type() { - // if !bytes_option.is_fast() { - // return Err(crate::TantivyError::SchemaError(format!( - // "Field {:?} is not a fast field.", - // field_entry.name() - // ))); - // } - // let fast_field_idx_file = self.fast_field_data(field, 0)?; - // let fast_field_idx_bytes = fast_field_idx_file.read_bytes()?; - // let idx_reader = open(fast_field_idx_bytes)?; - // let data = self.fast_field_data(field, 1)?; - // BytesFastFieldReader::open(idx_reader, data) - // } else { - // Err(FastFieldNotAvailableError::new(field_entry).into()) - // } + // let field_entry = self.schema.get_field_entry(field); + // if let FieldType::Bytes(bytes_option) = field_entry.field_type() { + // if !bytes_option.is_fast() { + // return Err(crate::TantivyError::SchemaError(format!( + // "Field {:?} is not a fast field.", + // field_entry.name() + // ))); + // } + // let fast_field_idx_file = self.fast_field_data(field, 0)?; + // let fast_field_idx_bytes = fast_field_idx_file.read_bytes()?; + // let idx_reader = open(fast_field_idx_bytes)?; + // let data = self.fast_field_data(field, 1)?; + // BytesFastFieldReader::open(idx_reader, data) + // } else { + // Err(FastFieldNotAvailableError::new(field_entry).into()) + // } // } } diff --git a/src/fastfield/writer.rs b/src/fastfield/writer.rs index d6980de53..188bb8ab2 100644 --- a/src/fastfield/writer.rs +++ b/src/fastfield/writer.rs @@ -1,14 +1,14 @@ use std::collections::HashMap; use std::io; -use super::FastFieldType; -use crate::fastfield::{CompositeFastFieldSerializer}; use columnar::{ColumnarWriter, NumericalType, NumericalValue}; use common; use fastfield_codecs::{Column, MonotonicallyMappableToU128, MonotonicallyMappableToU64}; use rustc_hash::FxHashMap; use tantivy_bitpacker::BlockedBitpacker; +use super::FastFieldType; +use crate::fastfield::CompositeFastFieldSerializer; use crate::indexer::doc_id_mapping::DocIdMapping; use crate::postings::UnorderedTermId; use crate::schema::{Document, Field, FieldEntry, FieldType, Schema, Value}; @@ -57,48 +57,46 @@ fn fast_numerical_type(field_type: &FieldType) -> Option { } else { None } - }, + } FieldType::I64(numerical_option) => { if numerical_option.is_fast() { Some(FastFieldTyp::Numerical(NumericalType::I64)) } else { None } - }, + } FieldType::F64(numerical_option) => { if numerical_option.is_fast() { Some(FastFieldTyp::Numerical(NumericalType::F64)) } else { None } - }, + } FieldType::Str(str_option) => { if str_option.is_fast() { Some(FastFieldTyp::Other) } else { None } - }, + } FieldType::Bool(int_options) => { if int_options.is_fast() { Some(FastFieldTyp::Other) } else { None } - }, + } FieldType::Date(date_options) => { if date_options.is_fast() { Some(FastFieldTyp::Other) } else { None } - }, + } FieldType::Facet(_) => todo!(), FieldType::Bytes(_) => todo!(), FieldType::JsonObject(_) => todo!(), FieldType::IpAddr(_) => todo!(), - - } } @@ -109,12 +107,12 @@ impl FastFieldsWriter { let mut fast_fields = vec![None; schema.num_fields()]; // TODO see other types for (field, field_entry) in schema.fields() { - if let Some(fast_field_typ) =fast_numerical_type(field_entry.field_type()) { + if let Some(fast_field_typ) = fast_numerical_type(field_entry.field_type()) { match fast_field_typ { FastFieldTyp::Numerical(numerical_type) => { columnar_writer.force_numerical_type(field_entry.name(), numerical_type); - }, - FastFieldTyp::Other => {}, + } + FastFieldTyp::Other => {} } fast_fields[field.field_id() as usize] = Some(field_entry.name().to_string()); } @@ -132,20 +130,34 @@ impl FastFieldsWriter { } /// Indexes all of the fastfields of a new document. - pub fn add_document(&mut self, doc: &Document) -> crate::Result<()> { + pub fn add_document(&mut self, doc: &Document) -> crate::Result<()> { let doc_id = self.num_docs; for field_value in doc.field_values() { - if let Some(field_name) = self.fast_fields[field_value.field().field_id() as usize].as_ref() { + if let Some(field_name) = + self.fast_fields[field_value.field().field_id() as usize].as_ref() + { match &field_value.value { Value::U64(u64_val) => { - self.columnar_writer.record_numerical(doc_id, field_name.as_str(), NumericalValue::from(*u64_val)); - }, + self.columnar_writer.record_numerical( + doc_id, + field_name.as_str(), + NumericalValue::from(*u64_val), + ); + } Value::I64(i64_val) => { - self.columnar_writer.record_numerical(doc_id, field_name.as_str(), NumericalValue::from(*i64_val)); - }, + self.columnar_writer.record_numerical( + doc_id, + field_name.as_str(), + NumericalValue::from(*i64_val), + ); + } Value::F64(f64_val) => { - self.columnar_writer.record_numerical(doc_id, field_name.as_str(), NumericalValue::from(*f64_val)); - }, + self.columnar_writer.record_numerical( + doc_id, + field_name.as_str(), + NumericalValue::from(*f64_val), + ); + } Value::Str(_) => todo!(), Value::PreTokStr(_) => todo!(), Value::Bool(_) => todo!(), diff --git a/src/indexer/doc_id_mapping.rs b/src/indexer/doc_id_mapping.rs index 1489f39d4..59c637aa2 100644 --- a/src/indexer/doc_id_mapping.rs +++ b/src/indexer/doc_id_mapping.rs @@ -115,8 +115,8 @@ pub(crate) fn get_doc_id_mapping_from_field( ) -> crate::Result { todo!() // let schema = segment_writer.segment_serializer.segment().schema(); - // let field_id = expect_field_id_for_sort_field(&schema, &sort_by_field)?; // for now expect fastfield, but not strictly required - // let fast_field = segment_writer + // let field_id = expect_field_id_for_sort_field(&schema, &sort_by_field)?; // for now expect + // fastfield, but not strictly required let fast_field = segment_writer // .fast_field_writers // .get_field_writer(field_id) // .ok_or_else(|| { @@ -160,15 +160,11 @@ mod tests_indexsorting { let my_text_field = schema_builder.add_text_field("text_field", text_field_options); let my_string_field = schema_builder.add_text_field("string_field", STRING | STORED); - let my_number = schema_builder.add_u64_field( - "my_number", - NumericOptions::default().set_fast(), - ); + let my_number = + schema_builder.add_u64_field("my_number", NumericOptions::default().set_fast()); - let multi_numbers = schema_builder.add_u64_field( - "multi_numbers", - NumericOptions::default().set_fast(), - ); + let multi_numbers = + schema_builder.add_u64_field("multi_numbers", NumericOptions::default().set_fast()); let schema = schema_builder.build(); let mut index_builder = Index::builder().schema(schema); @@ -459,7 +455,6 @@ mod tests_indexsorting { // "my_number".to_string() // ); - // let searcher = index.reader()?.searcher(); // assert_eq!(searcher.segment_readers().len(), 1); // let segment_reader = searcher.segment_reader(0); diff --git a/src/indexer/index_writer.rs b/src/indexer/index_writer.rs index a1220d1eb..0d0746910 100644 --- a/src/indexer/index_writer.rs +++ b/src/indexer/index_writer.rs @@ -813,8 +813,8 @@ mod tests { use crate::indexer::NoMergePolicy; use crate::query::{BooleanQuery, Occur, Query, QueryParser, TermQuery}; use crate::schema::{ - self, IndexRecordOption, IpAddrOptions, NumericOptions, - TextFieldIndexing, TextOptions, FAST, INDEXED, STORED, STRING, TEXT, + self, IndexRecordOption, IpAddrOptions, NumericOptions, TextFieldIndexing, TextOptions, + FAST, INDEXED, STORED, STRING, TEXT, }; use crate::store::DOCSTORE_CACHE_CAPACITY; use crate::{ @@ -1636,7 +1636,8 @@ mod tests { // ); // let large_text_field = schema_builder.add_text_field("large_text_field", TEXT | STORED); - // let multi_text_fields = schema_builder.add_text_field("multi_text_fields", TEXT | STORED); + // let multi_text_fields = schema_builder.add_text_field("multi_text_fields", TEXT | + // STORED); // let multi_numbers = schema_builder.add_u64_field( // "multi_numbers", @@ -2038,8 +2039,8 @@ mod tests { // // Test date // let term = - // Term::from_field_date(date_field, DateTime::from_timestamp_secs(deleted_id as i64)); - // assert_eq!(do_search2(term).len() as u64, 0); + // Term::from_field_date(date_field, DateTime::from_timestamp_secs(deleted_id as + // i64)); assert_eq!(do_search2(term).len() as u64, 0); // } // // search ip address // // @@ -2194,39 +2195,38 @@ mod tests { // proptest! { // #![proptest_config(ProptestConfig::with_cases(20))] // #[test] - // fn test_delete_with_sort_proptest_adding(ops in proptest::collection::vec(adding_operation_strategy(), 1..100)) { - // assert!(test_operation_strategy(&ops[..], true, false).is_ok()); - // } + // fn test_delete_with_sort_proptest_adding(ops in + // proptest::collection::vec(adding_operation_strategy(), 1..100)) { assert! + // (test_operation_strategy(&ops[..], true, false).is_ok()); } // #[test] - // fn test_delete_without_sort_proptest_adding(ops in proptest::collection::vec(adding_operation_strategy(), 1..100)) { - // assert!(test_operation_strategy(&ops[..], false, false).is_ok()); - // } + // fn test_delete_without_sort_proptest_adding(ops in + // proptest::collection::vec(adding_operation_strategy(), 1..100)) { assert! + // (test_operation_strategy(&ops[..], false, false).is_ok()); } // #[test] - // fn test_delete_with_sort_proptest_with_merge_adding(ops in proptest::collection::vec(adding_operation_strategy(), 1..100)) { - // assert!(test_operation_strategy(&ops[..], true, true).is_ok()); - // } + // fn test_delete_with_sort_proptest_with_merge_adding(ops in + // proptest::collection::vec(adding_operation_strategy(), 1..100)) { assert! + // (test_operation_strategy(&ops[..], true, true).is_ok()); } // #[test] - // fn test_delete_without_sort_proptest_with_merge_adding(ops in proptest::collection::vec(adding_operation_strategy(), 1..100)) { - // assert!(test_operation_strategy(&ops[..], false, true).is_ok()); - // } + // fn test_delete_without_sort_proptest_with_merge_adding(ops in + // proptest::collection::vec(adding_operation_strategy(), 1..100)) { assert! + // (test_operation_strategy(&ops[..], false, true).is_ok()); } // #[test] - // fn test_delete_with_sort_proptest(ops in proptest::collection::vec(balanced_operation_strategy(), 1..10)) { - // assert!(test_operation_strategy(&ops[..], true, false).is_ok()); - // } + // fn test_delete_with_sort_proptest(ops in + // proptest::collection::vec(balanced_operation_strategy(), 1..10)) { assert! + // (test_operation_strategy(&ops[..], true, false).is_ok()); } // #[test] - // fn test_delete_without_sort_proptest(ops in proptest::collection::vec(balanced_operation_strategy(), 1..10)) { - // assert!(test_operation_strategy(&ops[..], false, false).is_ok()); - // } + // fn test_delete_without_sort_proptest(ops in + // proptest::collection::vec(balanced_operation_strategy(), 1..10)) { assert! + // (test_operation_strategy(&ops[..], false, false).is_ok()); } // #[test] - // fn test_delete_with_sort_proptest_with_merge(ops in proptest::collection::vec(balanced_operation_strategy(), 1..10)) { - // assert!(test_operation_strategy(&ops[..], true, true).is_ok()); - // } + // fn test_delete_with_sort_proptest_with_merge(ops in + // proptest::collection::vec(balanced_operation_strategy(), 1..10)) { assert! + // (test_operation_strategy(&ops[..], true, true).is_ok()); } // #[test] - // fn test_delete_without_sort_proptest_with_merge(ops in proptest::collection::vec(balanced_operation_strategy(), 1..100)) { - // assert!(test_operation_strategy(&ops[..], false, true).is_ok()); - // } - + // fn test_delete_without_sort_proptest_with_merge(ops in + // proptest::collection::vec(balanced_operation_strategy(), 1..100)) { assert! + // (test_operation_strategy(&ops[..], false, true).is_ok()); } // } diff --git a/src/indexer/merger.rs b/src/indexer/merger.rs index c34cb2917..947d52968 100644 --- a/src/indexer/merger.rs +++ b/src/indexer/merger.rs @@ -12,12 +12,9 @@ use crate::core::{Segment, SegmentReader}; use crate::directory::WritePtr; use crate::docset::{DocSet, TERMINATED}; use crate::error::DataCorruption; -use crate::fastfield::{ - AliveBitSet, Column, CompositeFastFieldSerializer, -}; +use crate::fastfield::{AliveBitSet, Column, CompositeFastFieldSerializer}; use crate::fieldnorm::{FieldNormReader, FieldNormReaders, FieldNormsSerializer, FieldNormsWriter}; use crate::indexer::doc_id_mapping::SegmentDocIdMapping; -use crate::indexer::sorted_doc_id_column::RemappedDocIdColumn; // use crate::indexer::sorted_doc_id_multivalue_column::RemappedDocIdMultiValueColumn; use crate::indexer::SegmentSerializer; use crate::postings::{InvertedIndexSerializer, Postings, SegmentPostings}; @@ -255,60 +252,57 @@ impl IndexMerger { ) -> crate::Result<()> { debug_time!("wrie-fast-fields"); todo!(); - /* - - for (field, field_entry) in self.schema.fields() { - let field_type = field_entry.field_type(); - match field_type { - FieldType::Facet(_) | FieldType::Str(_) if field_type.is_fast() => { - let term_ordinal_mapping = term_ord_mappings.remove(&field).expect( - "Logic Error in Tantivy (Please report). Facet field should have required \ - a`term_ordinal_mapping`.", - ); - self.write_term_id_fast_field( - field, - &term_ordinal_mapping, - fast_field_serializer, - doc_id_mapping, - )?; - } - FieldType::U64(ref options) - | FieldType::I64(ref options) - | FieldType::F64(ref options) - | FieldType::Bool(ref options) => { - todo!() - } - FieldType::Date(ref options) => { - if options.is_fast() { - todo!(); - } - // Some(Cardinality::SingleValue) => { - // self.write_single_fast_field(field, fast_field_serializer, doc_id_mapping)?; - // } - // Some(Cardinality::MultiValues) => { - // self.write_multi_fast_field(field, fast_field_serializer, doc_id_mapping)?; - // } - // None => {} - }, - FieldType::Bytes(byte_options) => { - if byte_options.is_fast() { - self.write_bytes_fast_field(field, fast_field_serializer, doc_id_mapping)?; - } - } - FieldType::IpAddr(options) => { - if options.is_fast() { - todo!(); - } - }, - - FieldType::JsonObject(_) | FieldType::Facet(_) | FieldType::Str(_) => { - // We don't handle json fast field for the moment - // They can be implemented using what is done - // for facets in the future - } - } - } - */ + // for (field, field_entry) in self.schema.fields() { + // let field_type = field_entry.field_type(); + // match field_type { + // FieldType::Facet(_) | FieldType::Str(_) if field_type.is_fast() => { + // let term_ordinal_mapping = term_ord_mappings.remove(&field).expect( + // "Logic Error in Tantivy (Please report). Facet field should have required \ + // a`term_ordinal_mapping`.", + // ); + // self.write_term_id_fast_field( + // field, + // &term_ordinal_mapping, + // fast_field_serializer, + // doc_id_mapping, + // )?; + // } + // FieldType::U64(ref options) + // | FieldType::I64(ref options) + // | FieldType::F64(ref options) + // | FieldType::Bool(ref options) => { + // todo!() + // } + // FieldType::Date(ref options) => { + // if options.is_fast() { + // todo!(); + // } + // Some(Cardinality::SingleValue) => { + // self.write_single_fast_field(field, fast_field_serializer, doc_id_mapping)?; + // } + // Some(Cardinality::MultiValues) => { + // self.write_multi_fast_field(field, fast_field_serializer, doc_id_mapping)?; + // } + // None => {} + // }, + // FieldType::Bytes(byte_options) => { + // if byte_options.is_fast() { + // self.write_bytes_fast_field(field, fast_field_serializer, doc_id_mapping)?; + // } + // } + // FieldType::IpAddr(options) => { + // if options.is_fast() { + // todo!(); + // } + // }, + // + // FieldType::JsonObject(_) | FieldType::Facet(_) | FieldType::Str(_) => { + // We don't handle json fast field for the moment + // They can be implemented using what is done + // for facets in the future + // } + // } + // } Ok(()) } @@ -356,12 +350,13 @@ impl IndexMerger { fast_field_serializer: &mut CompositeFastFieldSerializer, doc_id_mapping: &SegmentDocIdMapping, ) -> crate::Result<()> { - let fast_field_accessor = RemappedDocIdColumn::new( - &self.readers, - doc_id_mapping, - self.schema.get_field_name(field), - ); - fast_field_serializer.create_auto_detect_u64_fast_field(field, fast_field_accessor)?; + todo!(); + // let fast_field_accessor = RemappedDocIdColumn::new( + // &self.readers, + // doc_id_mapping, + // self.schema.get_field_name(field), + // ); + // fast_field_serializer.create_auto_detect_u64_fast_field(field, fast_field_accessor)?; Ok(()) } @@ -817,15 +812,13 @@ mod tests { use byteorder::{BigEndian, ReadBytesExt}; use schema::FAST; - use crate::collector::tests::{ - FastFieldTestCollector, TEST_COLLECTOR_WITH_SCORE, - }; + use crate::collector::tests::{FastFieldTestCollector, TEST_COLLECTOR_WITH_SCORE}; use crate::collector::Count; use crate::core::Index; use crate::query::{AllQuery, BooleanQuery, EnableScoring, Scorer, TermQuery}; use crate::schema::{ - Document, Facet, FacetOptions, IndexRecordOption, NumericOptions, Term, - TextFieldIndexing, INDEXED, TEXT, + Document, Facet, FacetOptions, IndexRecordOption, NumericOptions, Term, TextFieldIndexing, + INDEXED, TEXT, }; use crate::time::OffsetDateTime; use crate::{ @@ -1015,8 +1008,7 @@ mod tests { // } // scores // }) - searcher - .search(&term_query, &collector) + searcher.search(&term_query, &collector) }; let empty_vec = Vec::::new(); @@ -1296,7 +1288,6 @@ mod tests { Ok(()) } - // TODO re-enable // #[test] // fn test_merge_facets_sort_none() { @@ -1316,8 +1307,8 @@ mod tests { // }), // true, // ); - // // In the merge case this will not go through the doc_id mapping code, because the data is - // // sorted and disjunct + // // In the merge case this will not go through the doc_id mapping code, because the data + // is // sorted and disjunct // test_merge_facets( // Some(IndexSettings { // sort_by_field: Some(IndexSortByField { @@ -1343,8 +1334,8 @@ mod tests { // }), // true, // ); - // // In the merge case this will not go through the doc_id mapping code, because the data is - // // sorted and disjunct + // // In the merge case this will not go through the doc_id mapping code, because the data + // is // sorted and disjunct // test_merge_facets( // Some(IndexSettings { // sort_by_field: Some(IndexSortByField { @@ -1359,8 +1350,8 @@ mod tests { // force_segment_value_overlap forces the int value for sorting to have overlapping min and max // ranges between segments so that merge algorithm can't apply certain optimizations - // fn test_merge_facets(index_settings: Option, force_segment_value_overlap: bool) { - // let mut schema_builder = schema::Schema::builder(); + // fn test_merge_facets(index_settings: Option, force_segment_value_overlap: + // bool) { let mut schema_builder = schema::Schema::builder(); // let facet_field = schema_builder.add_facet_field("facet", FacetOptions::default()); // let int_options = NumericOptions::default() // .set_fast() @@ -1529,9 +1520,7 @@ mod tests { #[test] fn test_merge_multivalued_int_fields_all_deleted() -> crate::Result<()> { let mut schema_builder = schema::Schema::builder(); - let int_options = NumericOptions::default() - .set_fast() - .set_indexed(); + let int_options = NumericOptions::default().set_fast().set_indexed(); let int_field = schema_builder.add_u64_field("intvals", int_options); let index = Index::create_in_ram(schema_builder.build()); let reader = index.reader()?; @@ -1566,9 +1555,7 @@ mod tests { #[test] fn test_merge_multivalued_int_fields_simple() -> crate::Result<()> { let mut schema_builder = schema::Schema::builder(); - let int_options = NumericOptions::default() - .set_fast() - .set_indexed(); + let int_options = NumericOptions::default().set_fast().set_indexed(); let int_field = schema_builder.add_u64_field("intvals", int_options); let index = Index::create_in_ram(schema_builder.build()); diff --git a/src/indexer/merger_sorted_index_test.rs b/src/indexer/merger_sorted_index_test.rs index 8616c2773..67a349969 100644 --- a/src/indexer/merger_sorted_index_test.rs +++ b/src/indexer/merger_sorted_index_test.rs @@ -12,9 +12,7 @@ mod tests { fn create_test_index_posting_list_issue(index_settings: Option) -> Index { let mut schema_builder = schema::Schema::builder(); - let int_options = NumericOptions::default() - .set_fast() - .set_indexed(); + let int_options = NumericOptions::default().set_fast().set_indexed(); let int_field = schema_builder.add_u64_field("intval", int_options); let facet_field = schema_builder.add_facet_field("facet", FacetOptions::default()); @@ -71,10 +69,8 @@ mod tests { let bytes_field = schema_builder.add_bytes_field("bytes", bytes_options); let facet_field = schema_builder.add_facet_field("facet", FacetOptions::default()); - let multi_numbers = schema_builder.add_u64_field( - "multi_numbers", - NumericOptions::default().set_fast(), - ); + let multi_numbers = + schema_builder.add_u64_field("multi_numbers", NumericOptions::default().set_fast()); let text_field_options = TextOptions::default() .set_indexing_options( TextFieldIndexing::default() @@ -363,7 +359,6 @@ mod tests { // ) // .unwrap(); - // let int_field = index.schema().get_field("intval").unwrap(); // let multi_numbers = index.schema().get_field("multi_numbers").unwrap(); // let bytes_field = index.schema().get_field("bytes").unwrap(); @@ -490,9 +485,7 @@ mod bench_sorted_index_merge { use crate::{IndexSettings, IndexSortByField, IndexWriter, Order}; fn create_index(sort_by_field: Option) -> Index { let mut schema_builder = Schema::builder(); - let int_options = NumericOptions::default() - .set_fast() - .set_indexed(); + let int_options = NumericOptions::default().set_fast().set_indexed(); let int_field = schema_builder.add_u64_field("intval", int_options); let schema = schema_builder.build(); diff --git a/src/indexer/mod.rs b/src/indexer/mod.rs index f2c361da5..9e6f52c51 100644 --- a/src/indexer/mod.rs +++ b/src/indexer/mod.rs @@ -19,7 +19,7 @@ mod segment_register; pub mod segment_serializer; pub mod segment_updater; mod segment_writer; -mod sorted_doc_id_column; +// mod sorted_doc_id_column; // mod sorted_doc_id_multivalue_column; mod stamper; diff --git a/src/indexer/segment_writer.rs b/src/indexer/segment_writer.rs index f1b087148..b5749dc39 100644 --- a/src/indexer/segment_writer.rs +++ b/src/indexer/segment_writer.rs @@ -408,10 +408,7 @@ fn remap_and_write( serializer.get_postings_serializer(), )?; debug!("fastfield-serialize"); - fast_field_writers.serialize( - serializer.get_fast_field_write(), - doc_id_map, - )?; + fast_field_writers.serialize(serializer.get_fast_field_write(), doc_id_map)?; // finalize temp docstore and create version, which reflects the doc_id_map if let Some(doc_id_map) = doc_id_map { diff --git a/src/lib.rs b/src/lib.rs index 6b457894e..87427f182 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -147,6 +147,14 @@ pub struct DateTime { pub(crate) timestamp_micros: i64, } +impl From for DateTime { + fn from(columnar_datetime: columnar::DateTime) -> Self { + DateTime { + timestamp_micros: columnar_datetime.timestamp_micros, + } + } +} + impl DateTime { /// Create new from UNIX timestamp in seconds pub const fn from_timestamp_secs(seconds: i64) -> Self { @@ -1166,5 +1174,4 @@ pub mod tests { ); assert_eq!(dt_from_ts_nanos.to_hms_micro(), offset_dt.to_hms_micro()); } - } diff --git a/src/query/query_parser/query_parser.rs b/src/query/query_parser/query_parser.rs index 9ecfd44d8..2a92349ee 100644 --- a/src/query/query_parser/query_parser.rs +++ b/src/query/query_parser/query_parser.rs @@ -15,9 +15,17 @@ use crate::indexer::{ }; // use crate::query::range_query::is_type_valid_for_fastfield_range_query; use crate::query::{ - AllQuery, BooleanQuery, BoostQuery, EmptyQuery, FuzzyTermQuery, Occur, PhraseQuery, Query, + AllQuery, + BooleanQuery, + BoostQuery, + EmptyQuery, + FuzzyTermQuery, + Occur, + PhraseQuery, + Query, // RangeQuery, - TermQuery, TermSetQuery, + TermQuery, + TermSetQuery, }; use crate::schema::{ Facet, FacetParseError, Field, FieldType, IndexRecordOption, IntoIpv6Addr, JsonObjectOptions, @@ -336,91 +344,89 @@ impl QueryParser { phrase: &str, ) -> Result { todo!(); - /* - let field_entry = self.schema.get_field_entry(field); - let field_type = field_entry.field_type(); - let field_supports_ff_range_queries = field_type.is_fast() - && is_type_valid_for_fastfield_range_query(field_type.value_type()); - - if !field_type.is_indexed() && !field_supports_ff_range_queries { - return Err(QueryParserError::FieldNotIndexed( - field_entry.name().to_string(), - )); - } - if !json_path.is_empty() && field_type.value_type() != Type::Json { - return Err(QueryParserError::UnsupportedQuery(format!( - "Json path is not supported for field {:?}", - field_entry.name() - ))); - } - match *field_type { - FieldType::U64(_) => { - let val: u64 = u64::from_str(phrase)?; - Ok(Term::from_field_u64(field, val)) - } - FieldType::I64(_) => { - let val: i64 = i64::from_str(phrase)?; - Ok(Term::from_field_i64(field, val)) - } - FieldType::F64(_) => { - let val: f64 = f64::from_str(phrase)?; - Ok(Term::from_field_f64(field, val)) - } - FieldType::Bool(_) => { - let val: bool = bool::from_str(phrase)?; - Ok(Term::from_field_bool(field, val)) - } - FieldType::Date(_) => { - let dt = OffsetDateTime::parse(phrase, &Rfc3339)?; - Ok(Term::from_field_date(field, DateTime::from_utc(dt))) - } - FieldType::Str(ref str_options) => { - let option = str_options.get_indexing_options().ok_or_else(|| { - // This should have been seen earlier really. - QueryParserError::FieldNotIndexed(field_entry.name().to_string()) - })?; - let text_analyzer = - self.tokenizer_manager - .get(option.tokenizer()) - .ok_or_else(|| QueryParserError::UnknownTokenizer { - field: field_entry.name().to_string(), - tokenizer: option.tokenizer().to_string(), - })?; - let mut terms: Vec = Vec::new(); - let mut token_stream = text_analyzer.token_stream(phrase); - token_stream.process(&mut |token| { - let term = Term::from_field_text(field, &token.text); - terms.push(term); - }); - if terms.len() != 1 { - return Err(QueryParserError::UnsupportedQuery(format!( - "Range query boundary cannot have multiple tokens: {phrase:?}." - ))); - } - Ok(terms.into_iter().next().unwrap()) - } - FieldType::JsonObject(_) => { - // Json range are not supported. - Err(QueryParserError::UnsupportedQuery( - "Range query are not supported on json field.".to_string(), - )) - } - FieldType::Facet(_) => match Facet::from_text(phrase) { - Ok(facet) => Ok(Term::from_facet(field, &facet)), - Err(e) => Err(QueryParserError::from(e)), - }, - FieldType::Bytes(_) => { - let bytes = BASE64 - .decode(phrase) - .map_err(QueryParserError::ExpectedBase64)?; - Ok(Term::from_field_bytes(field, &bytes)) - } - FieldType::IpAddr(_) => { - let ip_v6 = IpAddr::from_str(phrase)?.into_ipv6_addr(); - Ok(Term::from_field_ip_addr(field, ip_v6)) - } - } - */ + // let field_entry = self.schema.get_field_entry(field); + // let field_type = field_entry.field_type(); + // let field_supports_ff_range_queries = field_type.is_fast() + // && is_type_valid_for_fastfield_range_query(field_type.value_type()); + // + // if !field_type.is_indexed() && !field_supports_ff_range_queries { + // return Err(QueryParserError::FieldNotIndexed( + // field_entry.name().to_string(), + // )); + // } + // if !json_path.is_empty() && field_type.value_type() != Type::Json { + // return Err(QueryParserError::UnsupportedQuery(format!( + // "Json path is not supported for field {:?}", + // field_entry.name() + // ))); + // } + // match *field_type { + // FieldType::U64(_) => { + // let val: u64 = u64::from_str(phrase)?; + // Ok(Term::from_field_u64(field, val)) + // } + // FieldType::I64(_) => { + // let val: i64 = i64::from_str(phrase)?; + // Ok(Term::from_field_i64(field, val)) + // } + // FieldType::F64(_) => { + // let val: f64 = f64::from_str(phrase)?; + // Ok(Term::from_field_f64(field, val)) + // } + // FieldType::Bool(_) => { + // let val: bool = bool::from_str(phrase)?; + // Ok(Term::from_field_bool(field, val)) + // } + // FieldType::Date(_) => { + // let dt = OffsetDateTime::parse(phrase, &Rfc3339)?; + // Ok(Term::from_field_date(field, DateTime::from_utc(dt))) + // } + // FieldType::Str(ref str_options) => { + // let option = str_options.get_indexing_options().ok_or_else(|| { + // This should have been seen earlier really. + // QueryParserError::FieldNotIndexed(field_entry.name().to_string()) + // })?; + // let text_analyzer = + // self.tokenizer_manager + // .get(option.tokenizer()) + // .ok_or_else(|| QueryParserError::UnknownTokenizer { + // field: field_entry.name().to_string(), + // tokenizer: option.tokenizer().to_string(), + // })?; + // let mut terms: Vec = Vec::new(); + // let mut token_stream = text_analyzer.token_stream(phrase); + // token_stream.process(&mut |token| { + // let term = Term::from_field_text(field, &token.text); + // terms.push(term); + // }); + // if terms.len() != 1 { + // return Err(QueryParserError::UnsupportedQuery(format!( + // "Range query boundary cannot have multiple tokens: {phrase:?}." + // ))); + // } + // Ok(terms.into_iter().next().unwrap()) + // } + // FieldType::JsonObject(_) => { + // Json range are not supported. + // Err(QueryParserError::UnsupportedQuery( + // "Range query are not supported on json field.".to_string(), + // )) + // } + // FieldType::Facet(_) => match Facet::from_text(phrase) { + // Ok(facet) => Ok(Term::from_facet(field, &facet)), + // Err(e) => Err(QueryParserError::from(e)), + // }, + // FieldType::Bytes(_) => { + // let bytes = BASE64 + // .decode(phrase) + // .map_err(QueryParserError::ExpectedBase64)?; + // Ok(Term::from_field_bytes(field, &bytes)) + // } + // FieldType::IpAddr(_) => { + // let ip_v6 = IpAddr::from_str(phrase)?.into_ipv6_addr(); + // Ok(Term::from_field_ip_addr(field, ip_v6)) + // } + // } } fn compute_logical_ast_for_leaf( @@ -744,11 +750,12 @@ fn convert_literal_to_query( value_type, lower, upper, - } => { todo!(); -// Box::new(RangeQuery::new_term_bounds( -// field, value_type, &lower, &upper, -// )) - } , + } => { + todo!(); + // Box::new(RangeQuery::new_term_bounds( + // field, value_type, &lower, &upper, + // )) + } LogicalLiteral::Set { elements, .. } => Box::new(TermSetQuery::new(elements)), LogicalLiteral::All => Box::new(AllQuery), } diff --git a/src/schema/field_type.rs b/src/schema/field_type.rs index fd3b9c01b..c08d32a3e 100644 --- a/src/schema/field_type.rs +++ b/src/schema/field_type.rs @@ -8,7 +8,7 @@ use serde_json::Value as JsonValue; use thiserror::Error; use super::ip_options::IpAddrOptions; -use super:: IntoIpv6Addr; +use super::IntoIpv6Addr; use crate::schema::bytes_options::BytesOptions; use crate::schema::facet_options::FacetOptions; use crate::schema::{ diff --git a/src/schema/ip_options.rs b/src/schema/ip_options.rs index 4d5694c31..3b45f8051 100644 --- a/src/schema/ip_options.rs +++ b/src/schema/ip_options.rs @@ -87,7 +87,7 @@ impl IpAddrOptions { /// If more than one value is associated with a fast field, only the last one is /// kept. #[must_use] - pub fn set_fast(mut self,) -> Self { + pub fn set_fast(mut self) -> Self { self.fast = true; self } diff --git a/src/schema/mod.rs b/src/schema/mod.rs index 8ee6f4b88..3aa82b8df 100644 --- a/src/schema/mod.rs +++ b/src/schema/mod.rs @@ -141,9 +141,9 @@ pub use self::index_record_option::IndexRecordOption; pub use self::ip_options::{IntoIpv6Addr, IpAddrOptions}; pub use self::json_object_options::JsonObjectOptions; pub use self::named_field_document::NamedFieldDocument; -pub use self::numeric_options::NumericOptions; #[allow(deprecated)] pub use self::numeric_options::IntOptions; +pub use self::numeric_options::NumericOptions; pub use self::schema::{DocParsingError, Schema, SchemaBuilder}; pub use self::term::Term; pub use self::text_options::{TextFieldIndexing, TextOptions, STRING, TEXT}; diff --git a/src/schema/schema.rs b/src/schema/schema.rs index 8f2c29972..ead22989d 100644 --- a/src/schema/schema.rs +++ b/src/schema/schema.rs @@ -505,19 +505,13 @@ mod tests { #[test] pub fn test_schema_serialization() { let mut schema_builder = Schema::builder(); - let count_options = NumericOptions::default() - .set_stored() - .set_fast(); - let popularity_options = NumericOptions::default() - .set_stored() - .set_fast(); + let count_options = NumericOptions::default().set_stored().set_fast(); + let popularity_options = NumericOptions::default().set_stored().set_fast(); let score_options = NumericOptions::default() .set_indexed() .set_fieldnorm() .set_fast(); - let is_read_options = NumericOptions::default() - .set_stored() - .set_fast(); + let is_read_options = NumericOptions::default().set_stored().set_fast(); schema_builder.add_text_field("title", TEXT); schema_builder.add_text_field( "author", @@ -642,12 +636,8 @@ mod tests { #[test] pub fn test_document_to_json() { let mut schema_builder = Schema::builder(); - let count_options = NumericOptions::default() - .set_stored() - .set_fast(); - let is_read_options = NumericOptions::default() - .set_stored() - .set_fast(); + let count_options = NumericOptions::default().set_stored().set_fast(); + let is_read_options = NumericOptions::default().set_stored().set_fast(); schema_builder.add_text_field("title", TEXT); schema_builder.add_text_field("author", STRING); schema_builder.add_u64_field("count", count_options); @@ -747,15 +737,9 @@ mod tests { #[test] pub fn test_parse_document() { let mut schema_builder = Schema::builder(); - let count_options = NumericOptions::default() - .set_stored() - .set_fast(); - let popularity_options = NumericOptions::default() - .set_stored() - .set_fast(); - let score_options = NumericOptions::default() - .set_indexed() - .set_fast(); + let count_options = NumericOptions::default().set_stored().set_fast(); + let popularity_options = NumericOptions::default().set_stored().set_fast(); + let score_options = NumericOptions::default().set_indexed().set_fast(); let title_field = schema_builder.add_text_field("title", TEXT); let author_field = schema_builder.add_text_field("author", STRING); let count_field = schema_builder.add_u64_field("count", count_options);