add indexing for ip field

Closes #1595
This commit is contained in:
Pascal Seitz
2022-10-14 13:54:33 +08:00
parent c9cf9c952a
commit 6800fdec9d
10 changed files with 209 additions and 23 deletions

View File

@@ -212,12 +212,12 @@ pub fn block_wand(
}
/// Specialized version of [`block_wand`] for a single scorer.
/// In this case, the algorithm is simple and readable and faster (~ x3)
/// In this case, the algorithm is simple, readable and faster (~ x3)
/// than the generic algorithm.
/// The algorithm behaves as follows:
/// - While we don't hit the end of the docset:
/// - While the block max score is under the `threshold`, go to the next block.
/// - On a block, advance until the end and execute `callback`` when the doc score is greater or
/// - On a block, advance until the end and execute `callback` when the doc score is greater or
/// equal to the `threshold`.
pub fn block_wand_single_scorer(
mut scorer: TermScorer,

View File

@@ -1,4 +1,5 @@
use std::collections::HashMap;
use std::net::{AddrParseError, IpAddr};
use std::num::{ParseFloatError, ParseIntError};
use std::ops::Bound;
use std::str::{FromStr, ParseBoolError};
@@ -15,7 +16,7 @@ use crate::query::{
TermQuery,
};
use crate::schema::{
Facet, FacetParseError, Field, FieldType, IndexRecordOption, Schema, Term, Type,
Facet, FacetParseError, Field, FieldType, IndexRecordOption, IntoIpv6Addr, Schema, Term, Type,
};
use crate::time::format_description::well_known::Rfc3339;
use crate::time::OffsetDateTime;
@@ -84,6 +85,9 @@ pub enum QueryParserError {
/// The format for the facet field is invalid.
#[error("The facet field is malformed: {0}")]
FacetFormatError(#[from] FacetParseError),
/// The format for the ip field is invalid.
#[error("The ip field is malformed: {0}")]
IpFormatError(#[from] AddrParseError),
}
/// Recursively remove empty clause from the AST
@@ -401,7 +405,7 @@ impl QueryParser {
Ok(Term::from_field_bytes(field, &bytes))
}
FieldType::IpAddr(_) => Err(QueryParserError::UnsupportedQuery(
"Range query are not supported on IpAddr field.".to_string(),
"Range query are not supported on ip field.".to_string(),
)),
}
}
@@ -509,7 +513,11 @@ impl QueryParser {
let bytes_term = Term::from_field_bytes(field, &bytes);
Ok(vec![LogicalLiteral::Term(bytes_term)])
}
FieldType::IpAddr(_) => Err(QueryParserError::FieldNotIndexed(field_name.to_string())),
FieldType::IpAddr(_) => {
let ip_v6 = IpAddr::from_str(phrase)?.into_ipv6_addr();
let term = Term::from_field_ip_addr(field, ip_v6);
Ok(vec![LogicalLiteral::Term(term)])
}
}
}

View File

@@ -124,3 +124,70 @@ impl Query for TermQuery {
visitor(&self.term, false);
}
}
#[cfg(test)]
mod tests {
use std::net::{IpAddr, Ipv6Addr};
use std::str::FromStr;
use fastfield_codecs::MonotonicallyMappableToU128;
use crate::collector::{Count, TopDocs};
use crate::query::{Query, QueryParser, TermQuery};
use crate::schema::{IndexRecordOption, IntoIpv6Addr, Schema, INDEXED, STORED};
use crate::{doc, Index, Term};
#[test]
fn search_ip_test() {
let mut schema_builder = Schema::builder();
let ip_field = schema_builder.add_ip_addr_field("ip", INDEXED | STORED);
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
let ip_addr_1 = IpAddr::from_str("127.0.0.1").unwrap().into_ipv6_addr();
let ip_addr_2 = Ipv6Addr::from_u128(10);
{
let mut index_writer = index.writer(3_000_000).unwrap();
index_writer
.add_document(doc!(
ip_field => ip_addr_1
))
.unwrap();
index_writer
.add_document(doc!(
ip_field => ip_addr_2
))
.unwrap();
index_writer.commit().unwrap();
}
let reader = index.reader().unwrap();
let searcher = reader.searcher();
let assert_single_hit = |query| {
let (_top_docs, count) = searcher
.search(&query, &(TopDocs::with_limit(2), Count))
.unwrap();
assert_eq!(count, 1);
};
let query_from_text = |text: String| {
QueryParser::for_index(&index, vec![ip_field])
.parse_query(&text)
.unwrap()
};
let query_from_ip = |ip_addr| -> Box<dyn Query> {
Box::new(TermQuery::new(
Term::from_field_ip_addr(ip_field, ip_addr),
IndexRecordOption::Basic,
))
};
assert_single_hit(query_from_ip(ip_addr_1));
assert_single_hit(query_from_ip(ip_addr_2));
assert_single_hit(query_from_text("127.0.0.1".to_string()));
assert_single_hit(query_from_text("\"127.0.0.1\"".to_string()));
assert_single_hit(query_from_text(format!("\"{}\"", ip_addr_1)));
assert_single_hit(query_from_text(format!("\"{}\"", ip_addr_2)));
}
}