mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-01-06 09:12:55 +00:00
Moved GCD fast field codec. Partial.
This commit is contained in:
@@ -60,7 +60,6 @@ pretty_assertions = "1.2.1"
|
||||
serde_cbor = { version = "0.11.2", optional = true }
|
||||
async-trait = "0.1.53"
|
||||
arc-swap = "1.5.0"
|
||||
gcd = "2.1.0"
|
||||
|
||||
[target.'cfg(windows)'.dependencies]
|
||||
winapi = "0.3.9"
|
||||
|
||||
@@ -14,6 +14,7 @@ tantivy-bitpacker = { version="0.2", path = "../bitpacker/" }
|
||||
ownedbytes = { version = "0.3.0", path = "../ownedbytes" }
|
||||
prettytable-rs = {version="0.9.0", optional= true}
|
||||
rand = {version="0.8.3", optional= true}
|
||||
fastdivide = "0.4"
|
||||
|
||||
[dev-dependencies]
|
||||
more-asserts = "0.3.0"
|
||||
|
||||
@@ -2,10 +2,10 @@ use std::io::{self, Write};
|
||||
|
||||
use common::BinarySerializable;
|
||||
use fastdivide::DividerU64;
|
||||
use fastfield_codecs::FastFieldCodecReader;
|
||||
use gcd::Gcd;
|
||||
use ownedbytes::OwnedBytes;
|
||||
|
||||
use crate::FastFieldCodecReader;
|
||||
|
||||
pub const GCD_DEFAULT: u64 = 1;
|
||||
pub const GCD_CODEC_ID: u8 = 4;
|
||||
|
||||
@@ -56,6 +56,13 @@ pub fn write_gcd_header<W: Write>(field_write: &mut W, min_value: u64, gcd: u64)
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn compute_gcd(mut left: u64, mut right: u64) -> u64 {
|
||||
while right != 0 {
|
||||
(left, right) = (right, left % right);
|
||||
}
|
||||
left
|
||||
}
|
||||
|
||||
// Find GCD for iterator of numbers
|
||||
pub fn find_gcd(numbers: impl Iterator<Item = u64>) -> Option<u64> {
|
||||
let mut numbers = numbers.filter(|n| *n != 0);
|
||||
@@ -70,7 +77,7 @@ pub fn find_gcd(numbers: impl Iterator<Item = u64>) -> Option<u64> {
|
||||
if remainder == 0 {
|
||||
continue;
|
||||
}
|
||||
gcd = gcd.gcd(val);
|
||||
gcd = compute_gcd(gcd, val);
|
||||
if gcd == 1 {
|
||||
return Some(1);
|
||||
}
|
||||
@@ -82,15 +89,17 @@ pub fn find_gcd(numbers: impl Iterator<Item = u64>) -> Option<u64> {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
/*
|
||||
TODO Move test
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::path::Path;
|
||||
|
||||
use common::HasLen;
|
||||
|
||||
use crate::directory::{CompositeFile, RamDirectory, WritePtr};
|
||||
use crate::fastfield::serializer::FastFieldCodecEnableCheck;
|
||||
use crate::fastfield::tests::{FIELD, FIELDI64, SCHEMA, SCHEMAI64};
|
||||
use crate::fastfield::{
|
||||
use super::{
|
||||
find_gcd, CompositeFastFieldSerializer, DynamicFastFieldReader, FastFieldCodecName,
|
||||
FastFieldReader, FastFieldsWriter, ALL_CODECS,
|
||||
};
|
||||
@@ -211,6 +220,24 @@ mod tests {
|
||||
assert_eq!(test_fastfield.get(1), 200);
|
||||
assert_eq!(test_fastfield.get(2), 300);
|
||||
}
|
||||
*/
|
||||
|
||||
use crate::gcd::compute_gcd;
|
||||
use crate::gcd::find_gcd;
|
||||
|
||||
#[test]
|
||||
fn test_compute_gcd() {
|
||||
assert_eq!(compute_gcd(0, 0), 0);
|
||||
assert_eq!(compute_gcd(4, 0), 4);
|
||||
assert_eq!(compute_gcd(0, 4), 4);
|
||||
assert_eq!(compute_gcd(1, 4), 1);
|
||||
assert_eq!(compute_gcd(4, 1), 1);
|
||||
assert_eq!(compute_gcd(4, 2), 2);
|
||||
assert_eq!(compute_gcd(10, 25), 5);
|
||||
assert_eq!(compute_gcd(25, 10), 5);
|
||||
assert_eq!(compute_gcd(25, 25), 25);
|
||||
}
|
||||
|
||||
|
||||
#[test]
|
||||
fn find_gcd_test() {
|
||||
@@ -221,5 +248,6 @@ mod tests {
|
||||
assert_eq!(find_gcd([15, 30, 5, 10].into_iter()), Some(5));
|
||||
assert_eq!(find_gcd([15, 16, 10].into_iter()), Some(1));
|
||||
assert_eq!(find_gcd([0, 5, 5, 5].into_iter()), Some(5));
|
||||
assert_eq!(find_gcd([0, 0].into_iter()), Some(0));
|
||||
}
|
||||
}
|
||||
@@ -8,6 +8,7 @@ use std::io::Write;
|
||||
use ownedbytes::OwnedBytes;
|
||||
|
||||
pub mod bitpacked;
|
||||
pub mod gcd;
|
||||
pub mod linearinterpol;
|
||||
pub mod multilinearinterpol;
|
||||
|
||||
|
||||
@@ -24,7 +24,6 @@ pub use self::alive_bitset::{intersect_alive_bitsets, write_alive_bitset, AliveB
|
||||
pub use self::bytes::{BytesFastFieldReader, BytesFastFieldWriter};
|
||||
pub use self::error::{FastFieldNotAvailableError, Result};
|
||||
pub use self::facet_reader::FacetReader;
|
||||
pub(crate) use self::gcd::{find_gcd, GCDFastFieldCodec, GCD_CODEC_ID, GCD_DEFAULT};
|
||||
pub use self::multivalued::{MultiValuedFastFieldReader, MultiValuedFastFieldWriter};
|
||||
pub use self::reader::{DynamicFastFieldReader, FastFieldReader};
|
||||
pub use self::readers::FastFieldReaders;
|
||||
@@ -38,7 +37,6 @@ mod alive_bitset;
|
||||
mod bytes;
|
||||
mod error;
|
||||
mod facet_reader;
|
||||
mod gcd;
|
||||
mod multivalued;
|
||||
mod reader;
|
||||
mod readers;
|
||||
|
||||
@@ -5,6 +5,7 @@ use std::path::Path;
|
||||
use fastfield_codecs::bitpacked::{
|
||||
BitpackedFastFieldReader as BitpackedReader, BitpackedFastFieldSerializer,
|
||||
};
|
||||
use fastfield_codecs::gcd::{GCDFastFieldCodec, GCD_CODEC_ID};
|
||||
use fastfield_codecs::linearinterpol::{
|
||||
LinearInterpolFastFieldReader, LinearInterpolFastFieldSerializer,
|
||||
};
|
||||
@@ -13,7 +14,7 @@ use fastfield_codecs::multilinearinterpol::{
|
||||
};
|
||||
use fastfield_codecs::{FastFieldCodecReader, FastFieldCodecSerializer};
|
||||
|
||||
use super::{FastValue, GCDFastFieldCodec, GCD_CODEC_ID};
|
||||
use super::FastValue;
|
||||
use crate::directory::{CompositeFile, Directory, FileSlice, OwnedBytes, RamDirectory, WritePtr};
|
||||
use crate::fastfield::{CompositeFastFieldSerializer, FastFieldsWriter};
|
||||
use crate::schema::{Schema, FAST};
|
||||
|
||||
@@ -4,14 +4,13 @@ use common::{BinarySerializable, CountingWriter};
|
||||
pub use fastfield_codecs::bitpacked::{
|
||||
BitpackedFastFieldSerializer, BitpackedFastFieldSerializerLegacy,
|
||||
};
|
||||
use fastfield_codecs::gcd::{find_gcd, write_gcd_header, GCD_CODEC_ID, GCD_DEFAULT};
|
||||
use fastfield_codecs::linearinterpol::LinearInterpolFastFieldSerializer;
|
||||
use fastfield_codecs::multilinearinterpol::MultiLinearInterpolFastFieldSerializer;
|
||||
pub use fastfield_codecs::{FastFieldCodecSerializer, FastFieldDataAccess, FastFieldStats};
|
||||
|
||||
use super::{find_gcd, FastFieldCodecName, ALL_CODECS, GCD_DEFAULT};
|
||||
use super::{FastFieldCodecName, ALL_CODECS};
|
||||
use crate::directory::{CompositeWrite, WritePtr};
|
||||
use crate::fastfield::gcd::write_gcd_header;
|
||||
use crate::fastfield::GCD_CODEC_ID;
|
||||
use crate::schema::Field;
|
||||
|
||||
/// `CompositeFastFieldSerializer` is in charge of serializing
|
||||
@@ -142,7 +141,7 @@ impl CompositeFastFieldSerializer {
|
||||
let field_write = self.composite_write.for_field_with_idx(field, idx);
|
||||
let gcd = find_gcd(iter_gen().map(|val| val - stats.min_value)).unwrap_or(GCD_DEFAULT);
|
||||
|
||||
if gcd == 1 {
|
||||
if gcd <= 1 {
|
||||
return Self::create_auto_detect_u64_fast_field_with_idx_gcd(
|
||||
self.codec_enable_checker.clone(),
|
||||
field,
|
||||
|
||||
Reference in New Issue
Block a user