mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-01-04 00:02:55 +00:00
serialize ip as u128, add test for positions_to_docid
This commit is contained in:
@@ -107,6 +107,19 @@ impl FixedSize for u64 {
|
||||
const SIZE_IN_BYTES: usize = 8;
|
||||
}
|
||||
|
||||
impl BinarySerializable for u128 {
|
||||
fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
|
||||
writer.write_u128::<Endianness>(*self)
|
||||
}
|
||||
fn deserialize<R: Read>(reader: &mut R) -> io::Result<Self> {
|
||||
reader.read_u128::<Endianness>()
|
||||
}
|
||||
}
|
||||
|
||||
impl FixedSize for u128 {
|
||||
const SIZE_IN_BYTES: usize = 16;
|
||||
}
|
||||
|
||||
impl BinarySerializable for f32 {
|
||||
fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
|
||||
writer.write_f32::<Endianness>(*self)
|
||||
|
||||
@@ -220,8 +220,8 @@ impl<T: MonotonicallyMappableToU128> MultiValueLength for MultiValuedU128FastFie
|
||||
///
|
||||
/// Correctness: positions needs to be sorted.
|
||||
///
|
||||
/// TODO: Instead of a linear scan we can employ a binary search to match a docid to its value
|
||||
/// position.
|
||||
/// TODO: Instead of a linear scan we can employ a expotential search into binary search to match a
|
||||
/// docid to its value position.
|
||||
fn positions_to_docids<T: MultiValueLength>(positions: &[u64], multival_idx: &T) -> Vec<DocId> {
|
||||
let mut docs = vec![];
|
||||
let mut cur_doc = 0u32;
|
||||
@@ -250,8 +250,42 @@ fn positions_to_docids<T: MultiValueLength>(positions: &[u64], multival_idx: &T)
|
||||
mod tests {
|
||||
|
||||
use crate::core::Index;
|
||||
use crate::fastfield::multivalued::reader::positions_to_docids;
|
||||
use crate::fastfield::MultiValueLength;
|
||||
use crate::schema::{Cardinality, Facet, FacetOptions, NumericOptions, Schema};
|
||||
|
||||
#[test]
|
||||
fn test_positions_to_docid() {
|
||||
let positions = vec![10u64, 11, 15, 20, 21, 22];
|
||||
|
||||
let offsets = vec![0, 10, 12, 15, 22, 23];
|
||||
|
||||
struct MultiValueLenghtIdx {
|
||||
offsets: Vec<u64>,
|
||||
}
|
||||
|
||||
impl MultiValueLength for MultiValueLenghtIdx {
|
||||
fn get_range(&self, doc_id: crate::DocId) -> std::ops::Range<u64> {
|
||||
let idx = doc_id as u64;
|
||||
let start = self.offsets[idx as usize];
|
||||
let end = self.offsets[idx as usize + 1];
|
||||
start..end
|
||||
}
|
||||
|
||||
fn get_len(&self, _doc_id: crate::DocId) -> u64 {
|
||||
todo!()
|
||||
}
|
||||
|
||||
fn get_total_len(&self) -> u64 {
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
|
||||
let idx = MultiValueLenghtIdx { offsets };
|
||||
let docids = positions_to_docids(&positions, &idx);
|
||||
assert_eq!(docids, vec![1, 3, 4]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_multifastfield_reader() -> crate::Result<()> {
|
||||
let mut schema_builder = Schema::builder();
|
||||
|
||||
@@ -33,7 +33,7 @@ pub enum Value {
|
||||
Bytes(Vec<u8>),
|
||||
/// Json object value.
|
||||
JsonObject(serde_json::Map<String, serde_json::Value>),
|
||||
/// Ip
|
||||
/// Ip Address value
|
||||
IpAddr(IpAddr),
|
||||
}
|
||||
|
||||
@@ -53,7 +53,7 @@ impl Serialize for Value {
|
||||
Value::Facet(ref facet) => facet.serialize(serializer),
|
||||
Value::Bytes(ref bytes) => serializer.serialize_bytes(bytes),
|
||||
Value::JsonObject(ref obj) => obj.serialize(serializer),
|
||||
Value::IpAddr(ref obj) => obj.serialize(serializer), // TODO check serialization
|
||||
Value::IpAddr(ref obj) => obj.serialize(serializer),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -307,11 +307,11 @@ impl From<serde_json::Value> for Value {
|
||||
}
|
||||
|
||||
mod binary_serialize {
|
||||
use std::io::{self, ErrorKind, Read, Write};
|
||||
use std::io::{self, Read, Write};
|
||||
use std::net::IpAddr;
|
||||
use std::str::FromStr;
|
||||
|
||||
use common::{f64_to_u64, u64_to_f64, BinarySerializable};
|
||||
use fastfield_codecs::MonotonicallyMappableToU128;
|
||||
|
||||
use super::Value;
|
||||
use crate::schema::Facet;
|
||||
@@ -391,7 +391,7 @@ mod binary_serialize {
|
||||
}
|
||||
Value::IpAddr(ref ip) => {
|
||||
IP_CODE.serialize(writer)?;
|
||||
ip.to_string().serialize(writer) // TODO Check best format
|
||||
ip.to_u128().serialize(writer)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -445,7 +445,7 @@ mod binary_serialize {
|
||||
_ => Err(io::Error::new(
|
||||
io::ErrorKind::InvalidData,
|
||||
format!(
|
||||
"No extened field type is associated with code {:?}",
|
||||
"No extended field type is associated with code {:?}",
|
||||
ext_type_code
|
||||
),
|
||||
)),
|
||||
@@ -464,10 +464,8 @@ mod binary_serialize {
|
||||
Ok(Value::JsonObject(json_map))
|
||||
}
|
||||
IP_CODE => {
|
||||
let text = String::deserialize(reader)?;
|
||||
Ok(Value::IpAddr(IpAddr::from_str(&text).map_err(|err| {
|
||||
io::Error::new(ErrorKind::Other, err.to_string())
|
||||
})?))
|
||||
let value = u128::deserialize(reader)?;
|
||||
Ok(Value::IpAddr(IpAddr::from_u128(value)))
|
||||
}
|
||||
|
||||
_ => Err(io::Error::new(
|
||||
|
||||
Reference in New Issue
Block a user