mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-01-08 10:02:55 +00:00
@@ -1656,13 +1656,15 @@ mod tests {
|
||||
|
||||
let old_reader = index.reader()?;
|
||||
|
||||
let ip_exists = |id| id % 3 != 0; // 0 does not exist
|
||||
|
||||
for &op in ops {
|
||||
match op {
|
||||
IndexingOp::AddDoc { id } => {
|
||||
let facet = Facet::from(&("/cola/".to_string() + &id.to_string()));
|
||||
let ip_from_id = Ipv6Addr::from_u128(id as u128);
|
||||
|
||||
if id % 3 == 0 {
|
||||
if !ip_exists(id) {
|
||||
// every 3rd doc has no ip field
|
||||
index_writer.add_document(doc!(id_field=>id,
|
||||
bytes_field => id.to_le_bytes().as_slice(),
|
||||
@@ -1803,7 +1805,7 @@ mod tests {
|
||||
let expected_ips = expected_ids_and_num_occurrences
|
||||
.keys()
|
||||
.flat_map(|id| {
|
||||
if id % 3 == 0 {
|
||||
if !ip_exists(*id) {
|
||||
None
|
||||
} else {
|
||||
Some(Ipv6Addr::from_u128(*id as u128))
|
||||
@@ -1815,7 +1817,7 @@ mod tests {
|
||||
let expected_ips = expected_ids_and_num_occurrences
|
||||
.keys()
|
||||
.filter_map(|id| {
|
||||
if id % 3 == 0 {
|
||||
if !ip_exists(*id) {
|
||||
None
|
||||
} else {
|
||||
Some(Ipv6Addr::from_u128(*id as u128))
|
||||
@@ -1918,7 +1920,8 @@ mod tests {
|
||||
top_docs.iter().map(|el| el.1).collect::<Vec<_>>()
|
||||
};
|
||||
|
||||
for (existing_id, count) in expected_ids_and_num_occurrences {
|
||||
for (existing_id, count) in &expected_ids_and_num_occurrences {
|
||||
let (existing_id, count) = (*existing_id, *count);
|
||||
let assert_field = |field| do_search(&existing_id.to_string(), field).len() as u64;
|
||||
assert_eq!(assert_field(text_field), count);
|
||||
assert_eq!(assert_field(i64_field), count);
|
||||
@@ -1954,6 +1957,26 @@ mod tests {
|
||||
Term::from_field_date(date_field, DateTime::from_timestamp_secs(deleted_id as i64));
|
||||
assert_eq!(do_search2(term).len() as u64, 0);
|
||||
}
|
||||
// search ip address
|
||||
//
|
||||
for (existing_id, count) in &expected_ids_and_num_occurrences {
|
||||
let (existing_id, count) = (*existing_id, *count);
|
||||
if !ip_exists(existing_id) {
|
||||
continue;
|
||||
}
|
||||
let do_search_ip_field = |term: &str| do_search(term, ip_field).len() as u64;
|
||||
let ip_addr = Ipv6Addr::from_u128(existing_id as u128);
|
||||
// Test incoming ip as ipv6
|
||||
assert_eq!(do_search_ip_field(&format!("\"{}\"", ip_addr)), count);
|
||||
|
||||
let term = Term::from_field_ip_addr(ip_field, ip_addr);
|
||||
assert_eq!(do_search2(term).len() as u64, count);
|
||||
|
||||
// Test incoming ip as ipv4
|
||||
if let Some(ip_addr) = ip_addr.to_ipv4_mapped() {
|
||||
assert_eq!(do_search_ip_field(&format!("\"{}\"", ip_addr)), count);
|
||||
}
|
||||
}
|
||||
// test facets
|
||||
for segment_reader in searcher.segment_readers().iter() {
|
||||
let mut facet_reader = segment_reader.facet_reader(facet_field).unwrap();
|
||||
|
||||
@@ -320,7 +320,18 @@ impl SegmentWriter {
|
||||
ctx,
|
||||
)?;
|
||||
}
|
||||
FieldType::IpAddr(_) => {}
|
||||
FieldType::IpAddr(_) => {
|
||||
let mut num_vals = 0;
|
||||
for value in values {
|
||||
num_vals += 1;
|
||||
let ip_addr = value.as_ip_addr().ok_or_else(make_schema_error)?;
|
||||
term_buffer.set_ip_addr(ip_addr);
|
||||
postings_writer.subscribe(doc_id, 0u32, term_buffer, ctx);
|
||||
}
|
||||
if field_entry.has_fieldnorms() {
|
||||
self.fieldnorms_writer.record(doc_id, field, num_vals);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
|
||||
@@ -212,12 +212,12 @@ pub fn block_wand(
|
||||
}
|
||||
|
||||
/// Specialized version of [`block_wand`] for a single scorer.
|
||||
/// In this case, the algorithm is simple and readable and faster (~ x3)
|
||||
/// In this case, the algorithm is simple, readable and faster (~ x3)
|
||||
/// than the generic algorithm.
|
||||
/// The algorithm behaves as follows:
|
||||
/// - While we don't hit the end of the docset:
|
||||
/// - While the block max score is under the `threshold`, go to the next block.
|
||||
/// - On a block, advance until the end and execute `callback`` when the doc score is greater or
|
||||
/// - On a block, advance until the end and execute `callback` when the doc score is greater or
|
||||
/// equal to the `threshold`.
|
||||
pub fn block_wand_single_scorer(
|
||||
mut scorer: TermScorer,
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
use std::collections::HashMap;
|
||||
use std::net::{AddrParseError, IpAddr};
|
||||
use std::num::{ParseFloatError, ParseIntError};
|
||||
use std::ops::Bound;
|
||||
use std::str::{FromStr, ParseBoolError};
|
||||
@@ -15,7 +16,7 @@ use crate::query::{
|
||||
TermQuery,
|
||||
};
|
||||
use crate::schema::{
|
||||
Facet, FacetParseError, Field, FieldType, IndexRecordOption, Schema, Term, Type,
|
||||
Facet, FacetParseError, Field, FieldType, IndexRecordOption, IntoIpv6Addr, Schema, Term, Type,
|
||||
};
|
||||
use crate::time::format_description::well_known::Rfc3339;
|
||||
use crate::time::OffsetDateTime;
|
||||
@@ -84,6 +85,9 @@ pub enum QueryParserError {
|
||||
/// The format for the facet field is invalid.
|
||||
#[error("The facet field is malformed: {0}")]
|
||||
FacetFormatError(#[from] FacetParseError),
|
||||
/// The format for the ip field is invalid.
|
||||
#[error("The ip field is malformed: {0}")]
|
||||
IpFormatError(#[from] AddrParseError),
|
||||
}
|
||||
|
||||
/// Recursively remove empty clause from the AST
|
||||
@@ -401,7 +405,7 @@ impl QueryParser {
|
||||
Ok(Term::from_field_bytes(field, &bytes))
|
||||
}
|
||||
FieldType::IpAddr(_) => Err(QueryParserError::UnsupportedQuery(
|
||||
"Range query are not supported on IpAddr field.".to_string(),
|
||||
"Range query are not supported on ip field.".to_string(),
|
||||
)),
|
||||
}
|
||||
}
|
||||
@@ -509,7 +513,11 @@ impl QueryParser {
|
||||
let bytes_term = Term::from_field_bytes(field, &bytes);
|
||||
Ok(vec![LogicalLiteral::Term(bytes_term)])
|
||||
}
|
||||
FieldType::IpAddr(_) => Err(QueryParserError::FieldNotIndexed(field_name.to_string())),
|
||||
FieldType::IpAddr(_) => {
|
||||
let ip_v6 = IpAddr::from_str(phrase)?.into_ipv6_addr();
|
||||
let term = Term::from_field_ip_addr(field, ip_v6);
|
||||
Ok(vec![LogicalLiteral::Term(term)])
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -124,3 +124,70 @@ impl Query for TermQuery {
|
||||
visitor(&self.term, false);
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::net::{IpAddr, Ipv6Addr};
|
||||
use std::str::FromStr;
|
||||
|
||||
use fastfield_codecs::MonotonicallyMappableToU128;
|
||||
|
||||
use crate::collector::{Count, TopDocs};
|
||||
use crate::query::{Query, QueryParser, TermQuery};
|
||||
use crate::schema::{IndexRecordOption, IntoIpv6Addr, Schema, INDEXED, STORED};
|
||||
use crate::{doc, Index, Term};
|
||||
|
||||
#[test]
|
||||
fn search_ip_test() {
|
||||
let mut schema_builder = Schema::builder();
|
||||
let ip_field = schema_builder.add_ip_addr_field("ip", INDEXED | STORED);
|
||||
let schema = schema_builder.build();
|
||||
let index = Index::create_in_ram(schema);
|
||||
let ip_addr_1 = IpAddr::from_str("127.0.0.1").unwrap().into_ipv6_addr();
|
||||
let ip_addr_2 = Ipv6Addr::from_u128(10);
|
||||
|
||||
{
|
||||
let mut index_writer = index.writer(3_000_000).unwrap();
|
||||
index_writer
|
||||
.add_document(doc!(
|
||||
ip_field => ip_addr_1
|
||||
))
|
||||
.unwrap();
|
||||
index_writer
|
||||
.add_document(doc!(
|
||||
ip_field => ip_addr_2
|
||||
))
|
||||
.unwrap();
|
||||
|
||||
index_writer.commit().unwrap();
|
||||
}
|
||||
let reader = index.reader().unwrap();
|
||||
let searcher = reader.searcher();
|
||||
|
||||
let assert_single_hit = |query| {
|
||||
let (_top_docs, count) = searcher
|
||||
.search(&query, &(TopDocs::with_limit(2), Count))
|
||||
.unwrap();
|
||||
assert_eq!(count, 1);
|
||||
};
|
||||
let query_from_text = |text: String| {
|
||||
QueryParser::for_index(&index, vec![ip_field])
|
||||
.parse_query(&text)
|
||||
.unwrap()
|
||||
};
|
||||
|
||||
let query_from_ip = |ip_addr| -> Box<dyn Query> {
|
||||
Box::new(TermQuery::new(
|
||||
Term::from_field_ip_addr(ip_field, ip_addr),
|
||||
IndexRecordOption::Basic,
|
||||
))
|
||||
};
|
||||
|
||||
assert_single_hit(query_from_ip(ip_addr_1));
|
||||
assert_single_hit(query_from_ip(ip_addr_2));
|
||||
assert_single_hit(query_from_text("127.0.0.1".to_string()));
|
||||
assert_single_hit(query_from_text("\"127.0.0.1\"".to_string()));
|
||||
assert_single_hit(query_from_text(format!("\"{}\"", ip_addr_1)));
|
||||
assert_single_hit(query_from_text(format!("\"{}\"", ip_addr_2)));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
use std::net::{IpAddr, Ipv6Addr};
|
||||
use std::net::IpAddr;
|
||||
use std::str::FromStr;
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
@@ -6,7 +6,7 @@ use serde_json::Value as JsonValue;
|
||||
use thiserror::Error;
|
||||
|
||||
use super::ip_options::IpAddrOptions;
|
||||
use super::Cardinality;
|
||||
use super::{Cardinality, IntoIpv6Addr};
|
||||
use crate::schema::bytes_options::BytesOptions;
|
||||
use crate::schema::facet_options::FacetOptions;
|
||||
use crate::schema::{
|
||||
@@ -188,7 +188,7 @@ impl FieldType {
|
||||
FieldType::Facet(ref _facet_options) => true,
|
||||
FieldType::Bytes(ref bytes_options) => bytes_options.is_indexed(),
|
||||
FieldType::JsonObject(ref json_object_options) => json_object_options.is_indexed(),
|
||||
FieldType::IpAddr(_) => false,
|
||||
FieldType::IpAddr(ref ip_addr_options) => ip_addr_options.is_indexed(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -264,7 +264,7 @@ impl FieldType {
|
||||
FieldType::Facet(_) => false,
|
||||
FieldType::Bytes(ref bytes_options) => bytes_options.fieldnorms(),
|
||||
FieldType::JsonObject(ref _json_object_options) => false,
|
||||
FieldType::IpAddr(_) => false,
|
||||
FieldType::IpAddr(ref ip_addr_options) => ip_addr_options.fieldnorms(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -309,7 +309,13 @@ impl FieldType {
|
||||
FieldType::JsonObject(ref json_obj_options) => json_obj_options
|
||||
.get_text_indexing_options()
|
||||
.map(TextFieldIndexing::index_option),
|
||||
FieldType::IpAddr(_) => None,
|
||||
FieldType::IpAddr(ref ip_addr_options) => {
|
||||
if ip_addr_options.is_indexed() {
|
||||
Some(IndexRecordOption::Basic)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -356,11 +362,8 @@ impl FieldType {
|
||||
json: JsonValue::String(field_text),
|
||||
}
|
||||
})?;
|
||||
let ip_addr_v6: Ipv6Addr = match ip_addr {
|
||||
IpAddr::V4(v4) => v4.to_ipv6_mapped(),
|
||||
IpAddr::V6(v6) => v6,
|
||||
};
|
||||
Ok(Value::IpAddr(ip_addr_v6))
|
||||
|
||||
Ok(Value::IpAddr(ip_addr.into_ipv6_addr()))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
use std::net::{IpAddr, Ipv6Addr};
|
||||
use std::ops::BitOr;
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
@@ -5,12 +6,29 @@ use serde::{Deserialize, Serialize};
|
||||
use super::flags::{FastFlag, IndexedFlag, SchemaFlagList, StoredFlag};
|
||||
use super::Cardinality;
|
||||
|
||||
/// Trait to convert into an Ipv6Addr.
|
||||
pub trait IntoIpv6Addr {
|
||||
/// Consumes the object and returns an Ipv6Addr.
|
||||
fn into_ipv6_addr(self) -> Ipv6Addr;
|
||||
}
|
||||
|
||||
impl IntoIpv6Addr for IpAddr {
|
||||
fn into_ipv6_addr(self) -> Ipv6Addr {
|
||||
match self {
|
||||
IpAddr::V4(addr) => addr.to_ipv6_mapped(),
|
||||
IpAddr::V6(addr) => addr,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Define how an ip field should be handled by tantivy.
|
||||
#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize, Default)]
|
||||
pub struct IpAddrOptions {
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
fast: Option<Cardinality>,
|
||||
stored: bool,
|
||||
indexed: bool,
|
||||
fieldnorms: bool,
|
||||
}
|
||||
|
||||
impl IpAddrOptions {
|
||||
@@ -19,11 +37,21 @@ impl IpAddrOptions {
|
||||
self.fast.is_some()
|
||||
}
|
||||
|
||||
/// Returns `true` if the json object should be stored.
|
||||
/// Returns `true` if the ip address should be stored in the doc store.
|
||||
pub fn is_stored(&self) -> bool {
|
||||
self.stored
|
||||
}
|
||||
|
||||
/// Returns true iff the value is indexed and therefore searchable.
|
||||
pub fn is_indexed(&self) -> bool {
|
||||
self.indexed
|
||||
}
|
||||
|
||||
/// Returns true if and only if the value is normed.
|
||||
pub fn fieldnorms(&self) -> bool {
|
||||
self.fieldnorms
|
||||
}
|
||||
|
||||
/// Returns the cardinality of the fastfield.
|
||||
///
|
||||
/// If the field has not been declared as a fastfield, then
|
||||
@@ -32,6 +60,16 @@ impl IpAddrOptions {
|
||||
self.fast
|
||||
}
|
||||
|
||||
/// Set the field as normed.
|
||||
///
|
||||
/// Setting an integer as normed will generate
|
||||
/// the fieldnorm data for it.
|
||||
#[must_use]
|
||||
pub fn set_fieldnorms(mut self) -> Self {
|
||||
self.fieldnorms = true;
|
||||
self
|
||||
}
|
||||
|
||||
/// Sets the field as stored
|
||||
#[must_use]
|
||||
pub fn set_stored(mut self) -> Self {
|
||||
@@ -39,6 +77,19 @@ impl IpAddrOptions {
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the field as indexed.
|
||||
///
|
||||
/// Setting an ip address as indexed will generate
|
||||
/// a posting list for each value taken by the ip address.
|
||||
/// Ips are normalized to IpV6.
|
||||
///
|
||||
/// This is required for the field to be searchable.
|
||||
#[must_use]
|
||||
pub fn set_indexed(mut self) -> Self {
|
||||
self.indexed = true;
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the field as a fast field.
|
||||
///
|
||||
/// Fast fields are designed for random access.
|
||||
@@ -61,6 +112,8 @@ impl From<()> for IpAddrOptions {
|
||||
impl From<FastFlag> for IpAddrOptions {
|
||||
fn from(_: FastFlag) -> Self {
|
||||
IpAddrOptions {
|
||||
fieldnorms: false,
|
||||
indexed: false,
|
||||
stored: false,
|
||||
fast: Some(Cardinality::SingleValue),
|
||||
}
|
||||
@@ -70,6 +123,8 @@ impl From<FastFlag> for IpAddrOptions {
|
||||
impl From<StoredFlag> for IpAddrOptions {
|
||||
fn from(_: StoredFlag) -> Self {
|
||||
IpAddrOptions {
|
||||
fieldnorms: false,
|
||||
indexed: false,
|
||||
stored: true,
|
||||
fast: None,
|
||||
}
|
||||
@@ -79,6 +134,8 @@ impl From<StoredFlag> for IpAddrOptions {
|
||||
impl From<IndexedFlag> for IpAddrOptions {
|
||||
fn from(_: IndexedFlag) -> Self {
|
||||
IpAddrOptions {
|
||||
fieldnorms: true,
|
||||
indexed: true,
|
||||
stored: false,
|
||||
fast: None,
|
||||
}
|
||||
@@ -91,6 +148,8 @@ impl<T: Into<IpAddrOptions>> BitOr<T> for IpAddrOptions {
|
||||
fn bitor(self, other: T) -> IpAddrOptions {
|
||||
let other = other.into();
|
||||
IpAddrOptions {
|
||||
fieldnorms: self.fieldnorms | other.fieldnorms,
|
||||
indexed: self.indexed | other.indexed,
|
||||
stored: self.stored | other.stored,
|
||||
fast: self.fast.or(other.fast),
|
||||
}
|
||||
|
||||
@@ -138,7 +138,7 @@ pub use self::field_type::{FieldType, Type};
|
||||
pub use self::field_value::FieldValue;
|
||||
pub use self::flags::{FAST, INDEXED, STORED};
|
||||
pub use self::index_record_option::IndexRecordOption;
|
||||
pub use self::ip_options::IpAddrOptions;
|
||||
pub use self::ip_options::{IntoIpv6Addr, IpAddrOptions};
|
||||
pub use self::json_object_options::JsonObjectOptions;
|
||||
pub use self::named_field_document::NamedFieldDocument;
|
||||
pub use self::numeric_options::NumericOptions;
|
||||
|
||||
@@ -59,7 +59,7 @@ impl From<NumericOptionsDeser> for NumericOptions {
|
||||
}
|
||||
|
||||
impl NumericOptions {
|
||||
/// Returns true iff the value is stored.
|
||||
/// Returns true iff the value is stored in the doc store.
|
||||
pub fn is_stored(&self) -> bool {
|
||||
self.stored
|
||||
}
|
||||
|
||||
@@ -1,7 +1,10 @@
|
||||
use std::convert::TryInto;
|
||||
use std::hash::{Hash, Hasher};
|
||||
use std::net::Ipv6Addr;
|
||||
use std::{fmt, str};
|
||||
|
||||
use fastfield_codecs::MonotonicallyMappableToU128;
|
||||
|
||||
use super::Field;
|
||||
use crate::fastfield::FastValue;
|
||||
use crate::schema::{Facet, Type};
|
||||
@@ -68,6 +71,13 @@ impl Term {
|
||||
self.0.len() == TERM_METADATA_LENGTH
|
||||
}
|
||||
|
||||
/// Builds a term given a field, and a `Ipv6Addr`-value
|
||||
pub fn from_field_ip_addr(field: Field, ip_addr: Ipv6Addr) -> Term {
|
||||
let mut term = Self::with_type_and_field(Type::IpAddr, field);
|
||||
term.set_ip_addr(ip_addr);
|
||||
term
|
||||
}
|
||||
|
||||
/// Builds a term given a field, and a `u64`-value
|
||||
pub fn from_field_u64(field: Field, val: u64) -> Term {
|
||||
Term::from_fast_value(field, &val)
|
||||
@@ -155,6 +165,11 @@ impl Term {
|
||||
self.set_bytes(val.to_u64().to_be_bytes().as_ref());
|
||||
}
|
||||
|
||||
/// Sets a `Ipv6Addr` value in the term.
|
||||
pub fn set_ip_addr(&mut self, val: Ipv6Addr) {
|
||||
self.set_bytes(val.to_u128().to_be_bytes().as_ref());
|
||||
}
|
||||
|
||||
/// Sets the value of a `Bytes` field.
|
||||
pub fn set_bytes(&mut self, bytes: &[u8]) {
|
||||
self.truncate_value_bytes(0);
|
||||
|
||||
Reference in New Issue
Block a user