mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-05-26 13:10:41 +00:00
Blop
This commit is contained in:
@@ -21,14 +21,14 @@
|
||||
|
||||
use std::net::Ipv6Addr;
|
||||
|
||||
use fastfield_codecs::MonotonicallyMappableToU64;
|
||||
use columnar::MonotonicallyMappableToU64;
|
||||
pub use fastfield_codecs::Column;
|
||||
|
||||
pub use self::alive_bitset::{intersect_alive_bitsets, write_alive_bitset, AliveBitSet};
|
||||
// pub use self::bytes::{BytesFastFieldReader, BytesFastFieldWriter};
|
||||
pub use self::error::{FastFieldNotAvailableError, Result};
|
||||
// pub use self::facet_reader::FacetReader;
|
||||
pub use self::readers::FastFieldReaders;
|
||||
pub use self::serializer::{Column, CompositeFastFieldSerializer};
|
||||
pub use self::writer::FastFieldsWriter;
|
||||
use crate::schema::{Type, Value};
|
||||
use crate::DateTime;
|
||||
@@ -38,7 +38,6 @@ mod alive_bitset;
|
||||
mod error;
|
||||
// mod facet_reader;
|
||||
mod readers;
|
||||
mod serializer;
|
||||
mod writer;
|
||||
|
||||
/// Trait for types that provide a zero value.
|
||||
@@ -71,7 +70,7 @@ impl MakeZero for Ipv6Addr {
|
||||
/// Trait for types that are allowed for fast fields:
|
||||
/// (u64, i64 and f64, bool, DateTime).
|
||||
pub trait FastValue:
|
||||
MonotonicallyMappableToU64 + Copy + Send + Sync + PartialOrd + 'static
|
||||
Copy + Send + Sync + columnar::MonotonicallyMappableToU64 + PartialOrd + 'static
|
||||
{
|
||||
/// Returns the `schema::Type` for this FastValue.
|
||||
fn to_type() -> Type;
|
||||
@@ -100,21 +99,21 @@ impl FastValue for bool {
|
||||
Type::Bool
|
||||
}
|
||||
}
|
||||
impl FastValue for DateTime {
|
||||
fn to_type() -> Type {
|
||||
Type::Date
|
||||
}
|
||||
}
|
||||
|
||||
impl MonotonicallyMappableToU64 for DateTime {
|
||||
impl columnar::MonotonicallyMappableToU64 for DateTime {
|
||||
fn to_u64(self) -> u64 {
|
||||
self.timestamp_micros.to_u64()
|
||||
}
|
||||
|
||||
fn from_u64(val: u64) -> Self {
|
||||
let timestamp_micros = i64::from_u64(val);
|
||||
DateTime { timestamp_micros }
|
||||
}
|
||||
}
|
||||
|
||||
impl FastValue for DateTime {
|
||||
fn to_type() -> Type {
|
||||
Type::Date
|
||||
DateTime {
|
||||
timestamp_micros: MonotonicallyMappableToU64::from_u64(val),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -166,7 +165,6 @@ mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use common::{HasLen, TerminatingWrite};
|
||||
use fastfield_codecs::{open, FastFieldCodecType};
|
||||
use once_cell::sync::Lazy;
|
||||
use rand::prelude::SliceRandom;
|
||||
use rand::rngs::StdRng;
|
||||
|
||||
@@ -4,15 +4,12 @@ use std::sync::Arc;
|
||||
|
||||
use columnar::{
|
||||
BytesColumn, ColumnType, ColumnValues, ColumnarReader, DynamicColumn, DynamicColumnHandle,
|
||||
HasAssociatedColumnType, NumericalType, StrColumn,
|
||||
HasAssociatedColumnType, StrColumn,
|
||||
};
|
||||
use fastfield_codecs::{open, open_u128, Column};
|
||||
use fastfield_codecs::Column;
|
||||
|
||||
use crate::directory::{CompositeFile, FileSlice};
|
||||
use crate::fastfield::{FastFieldNotAvailableError, FastValue};
|
||||
use crate::schema::{Field, FieldType, Schema};
|
||||
use crate::directory::FileSlice;
|
||||
use crate::space_usage::PerFieldSpaceUsage;
|
||||
use crate::{DateTime, TantivyError};
|
||||
|
||||
/// Provides access to all of the BitpackedFastFieldReader.
|
||||
///
|
||||
|
||||
@@ -1,122 +0,0 @@
|
||||
use std::fmt;
|
||||
use std::io::{self, Write};
|
||||
|
||||
pub use fastfield_codecs::Column;
|
||||
use fastfield_codecs::{FastFieldCodecType, MonotonicallyMappableToU64, ALL_CODEC_TYPES};
|
||||
|
||||
use crate::directory::{CompositeWrite, WritePtr};
|
||||
use crate::schema::Field;
|
||||
|
||||
/// `CompositeFastFieldSerializer` is in charge of serializing
|
||||
/// fastfields on disk.
|
||||
///
|
||||
/// Fast fields have different encodings like bit-packing.
|
||||
///
|
||||
/// `FastFieldWriter`s are in charge of pushing the data to
|
||||
/// the serializer.
|
||||
/// The serializer expects to receive the following calls.
|
||||
///
|
||||
/// * `create_auto_detect_u64_fast_field(...)`
|
||||
/// * `create_auto_detect_u64_fast_field(...)`
|
||||
/// * ...
|
||||
/// * `let bytes_fastfield = new_bytes_fast_field(...)`
|
||||
/// * `bytes_fastfield.write_all(...)`
|
||||
/// * `bytes_fastfield.write_all(...)`
|
||||
/// * `bytes_fastfield.flush()`
|
||||
/// * ...
|
||||
/// * `close()`
|
||||
pub struct CompositeFastFieldSerializer {
|
||||
composite_write: CompositeWrite<WritePtr>,
|
||||
codec_types: Vec<FastFieldCodecType>,
|
||||
}
|
||||
|
||||
impl CompositeFastFieldSerializer {
|
||||
/// New fast field serializer with all codec types
|
||||
pub fn from_write(write: WritePtr) -> io::Result<CompositeFastFieldSerializer> {
|
||||
Self::from_write_with_codec(write, &ALL_CODEC_TYPES)
|
||||
}
|
||||
|
||||
/// New fast field serializer with allowed codec types
|
||||
pub fn from_write_with_codec(
|
||||
write: WritePtr,
|
||||
codec_types: &[FastFieldCodecType],
|
||||
) -> io::Result<CompositeFastFieldSerializer> {
|
||||
let composite_write = CompositeWrite::wrap(write);
|
||||
Ok(CompositeFastFieldSerializer {
|
||||
composite_write,
|
||||
codec_types: codec_types.to_vec(),
|
||||
})
|
||||
}
|
||||
|
||||
/// Serialize data into a new u64 fast field. The best compression codec will be chosen
|
||||
/// automatically.
|
||||
pub fn create_auto_detect_u64_fast_field<T: MonotonicallyMappableToU64 + fmt::Debug>(
|
||||
&mut self,
|
||||
field: Field,
|
||||
fastfield_accessor: impl Column<T>,
|
||||
) -> io::Result<()> {
|
||||
self.create_auto_detect_u64_fast_field_with_idx(field, fastfield_accessor, 0)
|
||||
}
|
||||
|
||||
/// Serialize data into a new u64 fast field. The best compression codec will be chosen
|
||||
/// automatically.
|
||||
pub fn create_auto_detect_u64_fast_field_with_idx<
|
||||
T: MonotonicallyMappableToU64 + fmt::Debug,
|
||||
>(
|
||||
&mut self,
|
||||
field: Field,
|
||||
fastfield_accessor: impl Column<T>,
|
||||
idx: usize,
|
||||
) -> io::Result<()> {
|
||||
let field_write = self.composite_write.for_field_with_idx(field, idx);
|
||||
fastfield_codecs::serialize(fastfield_accessor, field_write, &self.codec_types)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Serialize data into a new u64 fast field. The best compression codec of the the provided
|
||||
/// will be chosen.
|
||||
pub fn create_auto_detect_u64_fast_field_with_idx_and_codecs<
|
||||
T: MonotonicallyMappableToU64 + fmt::Debug,
|
||||
>(
|
||||
&mut self,
|
||||
field: Field,
|
||||
fastfield_accessor: impl Column<T>,
|
||||
idx: usize,
|
||||
codec_types: &[FastFieldCodecType],
|
||||
) -> io::Result<()> {
|
||||
let field_write = self.composite_write.for_field_with_idx(field, idx);
|
||||
fastfield_codecs::serialize(fastfield_accessor, field_write, codec_types)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Serialize data into a new u128 fast field. The codec will be compact space compressor,
|
||||
/// which is optimized for scanning the fast field for a given range.
|
||||
pub fn create_u128_fast_field_with_idx<F: Fn() -> I, I: Iterator<Item = u128>>(
|
||||
&mut self,
|
||||
field: Field,
|
||||
iter_gen: F,
|
||||
num_vals: u32,
|
||||
idx: usize,
|
||||
) -> io::Result<()> {
|
||||
let field_write = self.composite_write.for_field_with_idx(field, idx);
|
||||
fastfield_codecs::serialize_u128(iter_gen, num_vals, field_write)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Start serializing a new [u8] fast field. Use the returned writer to write data into the
|
||||
/// bytes field. To associate the bytes with documents a seperate index must be created on
|
||||
/// index 0. See bytes/writer.rs::serialize for an example.
|
||||
///
|
||||
/// The bytes will be stored as is, no compression will be applied.
|
||||
pub fn new_bytes_fast_field(&mut self, field: Field) -> impl Write + '_ {
|
||||
self.composite_write.for_field_with_idx(field, 1)
|
||||
}
|
||||
|
||||
/// Closes the serializer
|
||||
///
|
||||
/// After this call the data must be persistently saved on disk.
|
||||
pub fn close(self) -> io::Result<()> {
|
||||
self.composite_write.close()
|
||||
}
|
||||
}
|
||||
@@ -3,12 +3,10 @@ use std::io;
|
||||
|
||||
use columnar::{ColumnType, ColumnarWriter, NumericalType, NumericalValue};
|
||||
use common;
|
||||
use fastfield_codecs::{Column, MonotonicallyMappableToU128, MonotonicallyMappableToU64};
|
||||
use rustc_hash::FxHashMap;
|
||||
use tantivy_bitpacker::BlockedBitpacker;
|
||||
|
||||
use super::FastFieldType;
|
||||
use crate::fastfield::CompositeFastFieldSerializer;
|
||||
use crate::indexer::doc_id_mapping::DocIdMapping;
|
||||
use crate::postings::UnorderedTermId;
|
||||
use crate::schema::{Document, Field, FieldEntry, FieldType, Schema, Type, Value};
|
||||
|
||||
@@ -801,7 +801,6 @@ mod tests {
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::net::Ipv6Addr;
|
||||
|
||||
use fastfield_codecs::MonotonicallyMappableToU128;
|
||||
use proptest::prelude::*;
|
||||
use proptest::prop_oneof;
|
||||
use proptest::strategy::Strategy;
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
use fastfield_codecs::MonotonicallyMappableToU64;
|
||||
use columnar::MonotonicallyMappableToU64;
|
||||
use murmurhash32::murmurhash2;
|
||||
use rustc_hash::FxHashMap;
|
||||
|
||||
|
||||
@@ -1,20 +1,15 @@
|
||||
use std::collections::HashMap;
|
||||
use std::io::Write;
|
||||
use std::sync::Arc;
|
||||
|
||||
use fastfield_codecs::VecColumn;
|
||||
use itertools::Itertools;
|
||||
use measure_time::debug_time;
|
||||
|
||||
use super::flat_map_with_buffer::FlatMapWithBufferIter;
|
||||
// use super::sorted_doc_id_multivalue_column::RemappedDocIdMultiValueIndexColumn;
|
||||
use crate::core::{Segment, SegmentReader};
|
||||
use crate::directory::WritePtr;
|
||||
use crate::docset::{DocSet, TERMINATED};
|
||||
use crate::error::DataCorruption;
|
||||
use crate::fastfield::{
|
||||
AliveBitSet, Column, CompositeFastFieldSerializer, FastFieldNotAvailableError,
|
||||
};
|
||||
use crate::fastfield::{AliveBitSet, Column, FastFieldNotAvailableError};
|
||||
use crate::fieldnorm::{FieldNormReader, FieldNormReaders, FieldNormsSerializer, FieldNormsWriter};
|
||||
use crate::indexer::doc_id_mapping::SegmentDocIdMapping;
|
||||
// use crate::indexer::sorted_doc_id_multivalue_column::RemappedDocIdMultiValueColumn;
|
||||
|
||||
@@ -2,7 +2,6 @@ use common::TerminatingWrite;
|
||||
|
||||
use crate::core::{Segment, SegmentComponent};
|
||||
use crate::directory::WritePtr;
|
||||
use crate::fastfield::CompositeFastFieldSerializer;
|
||||
use crate::fieldnorm::FieldNormsSerializer;
|
||||
use crate::postings::InvertedIndexSerializer;
|
||||
use crate::store::StoreWriter;
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
use fastfield_codecs::MonotonicallyMappableToU64;
|
||||
use columnar::MonotonicallyMappableToU64;
|
||||
use itertools::Itertools;
|
||||
|
||||
use super::doc_id_mapping::{get_doc_id_mapping_from_field, DocIdMapping};
|
||||
|
||||
@@ -5,9 +5,8 @@
|
||||
use std::net::Ipv6Addr;
|
||||
use std::ops::{Bound, RangeInclusive};
|
||||
|
||||
use columnar::Column;
|
||||
use columnar::{Column, MonotonicallyMappableToU128};
|
||||
use common::BinarySerializable;
|
||||
use fastfield_codecs::MonotonicallyMappableToU128;
|
||||
|
||||
use super::map_bound;
|
||||
use crate::query::range_query::fast_field_range_query::RangeDocSet;
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
|
||||
use std::ops::{Bound, RangeInclusive};
|
||||
|
||||
use fastfield_codecs::MonotonicallyMappableToU64;
|
||||
use columnar::MonotonicallyMappableToU64;
|
||||
|
||||
use super::fast_field_range_query::RangeDocSet;
|
||||
use super::map_bound;
|
||||
|
||||
@@ -132,7 +132,7 @@ mod tests {
|
||||
use std::net::{IpAddr, Ipv6Addr};
|
||||
use std::str::FromStr;
|
||||
|
||||
use fastfield_codecs::MonotonicallyMappableToU128;
|
||||
use columnar::MonotonicallyMappableToU128;
|
||||
|
||||
use crate::collector::{Count, TopDocs};
|
||||
use crate::query::{Query, QueryParser, TermQuery};
|
||||
|
||||
@@ -3,7 +3,7 @@ use std::hash::{Hash, Hasher};
|
||||
use std::net::Ipv6Addr;
|
||||
use std::{fmt, str};
|
||||
|
||||
use fastfield_codecs::MonotonicallyMappableToU128;
|
||||
use columnar::MonotonicallyMappableToU128;
|
||||
|
||||
use super::Field;
|
||||
use crate::fastfield::FastValue;
|
||||
|
||||
@@ -319,8 +319,8 @@ mod binary_serialize {
|
||||
use std::io::{self, Read, Write};
|
||||
use std::net::Ipv6Addr;
|
||||
|
||||
use columnar::MonotonicallyMappableToU128;
|
||||
use common::{f64_to_u64, u64_to_f64, BinarySerializable};
|
||||
use fastfield_codecs::MonotonicallyMappableToU128;
|
||||
|
||||
use super::Value;
|
||||
use crate::schema::Facet;
|
||||
|
||||
Reference in New Issue
Block a user