From 0b86658389ea9e0d0edb4acbee658fabb7e60f23 Mon Sep 17 00:00:00 2001 From: Pascal Seitz Date: Thu, 6 Oct 2022 13:13:05 +0800 Subject: [PATCH] rename ip addr, use buffer --- src/fastfield/multivalued/writer.rs | 6 +-- src/fastfield/writer.rs | 4 +- src/indexer/merger.rs | 13 ++--- src/query/query_parser/query_parser.rs | 6 ++- src/schema/field_entry.rs | 2 +- src/schema/field_type.rs | 70 +++++++++++++------------- src/schema/schema.rs | 4 +- src/schema/value.rs | 22 ++++---- 8 files changed, 66 insertions(+), 61 deletions(-) diff --git a/src/fastfield/multivalued/writer.rs b/src/fastfield/multivalued/writer.rs index 1f7b6bb32..4cefeadac 100644 --- a/src/fastfield/multivalued/writer.rs +++ b/src/fastfield/multivalued/writer.rs @@ -318,10 +318,10 @@ impl MultiValueU128FastFieldWriter { if field_value.field == self.field { let value = field_value.value(); let ip_addr = value - .as_ip() + .as_ip_addr() .unwrap_or_else(|| panic!("expected and ip, but got {:?}", value)); - let value = ip_addr.to_u128(); - self.add_val(value); + let ip_addr_u128 = ip_addr.to_u128(); + self.add_val(ip_addr_u128); } } } diff --git a/src/fastfield/writer.rs b/src/fastfield/writer.rs index f6a3162c4..53cd2a211 100644 --- a/src/fastfield/writer.rs +++ b/src/fastfield/writer.rs @@ -330,7 +330,7 @@ impl U128FastFieldWriter { match doc.get_first(self.field) { Some(v) => { let ip_addr = v - .as_ip() + .as_ip_addr() .unwrap_or_else(|| panic!("expected and ip, but got {:?}", v)); let value = ip_addr.to_u128(); @@ -359,7 +359,7 @@ impl U128FastFieldWriter { }; serialize_u128(iter_gen, self.val_count as u64, field_write)?; } else { - let iter_gen = || (0..self.val_count).map(|idx| self.vals[idx as usize]); + let iter_gen = || self.vals.iter().cloned(); serialize_u128(iter_gen, self.val_count as u64, field_write)?; } diff --git a/src/indexer/merger.rs b/src/indexer/merger.rs index 5ea5b5da1..3d489d662 100644 --- a/src/indexer/merger.rs +++ b/src/indexer/merger.rs @@ -6,6 +6,7 @@ use fastfield_codecs::{serialize_u128, VecColumn}; use itertools::Itertools; use measure_time::debug_time; +use super::flat_map_with_buffer::FlatMapWithBufferIter; use super::sorted_doc_id_multivalue_column::RemappedDocIdMultiValueIndexColumn; use crate::core::{Segment, SegmentReader}; use crate::docset::{DocSet, TERMINATED}; @@ -356,12 +357,12 @@ impl IndexMerger { .collect::>(); let iter_gen = || { - doc_id_mapping.iter_old_doc_addrs().flat_map(|doc_addr| { - let fast_field_reader = &fast_field_readers[doc_addr.segment_ord as usize]; - let mut out = vec![]; - fast_field_reader.get_vals(doc_addr.doc_id, &mut out); - out.into_iter() - }) + doc_id_mapping + .iter_old_doc_addrs() + .flat_map_with_buffer(|doc_addr, buffer| { + let fast_field_reader = &fast_field_readers[doc_addr.segment_ord as usize]; + fast_field_reader.get_vals(doc_addr.doc_id, buffer); + }) }; let field_write = fast_field_serializer.get_field_writer(field, 1); serialize_u128(iter_gen, doc_id_mapping.len() as u64, field_write)?; diff --git a/src/query/query_parser/query_parser.rs b/src/query/query_parser/query_parser.rs index d533e599c..19b027bd3 100644 --- a/src/query/query_parser/query_parser.rs +++ b/src/query/query_parser/query_parser.rs @@ -400,7 +400,11 @@ impl QueryParser { let bytes = base64::decode(phrase).map_err(QueryParserError::ExpectedBase64)?; Ok(Term::from_field_bytes(field, &bytes)) } - FieldType::IpAddr(_) => Ok(Term::from_field_text(field, phrase)), + FieldType::IpAddr(_) => { + return Err(QueryParserError::UnsupportedQuery( + "Range query are not supported on IpAddr field.".to_string(), + )); + } } } diff --git a/src/schema/field_entry.rs b/src/schema/field_entry.rs index db2530040..9c66663af 100644 --- a/src/schema/field_entry.rs +++ b/src/schema/field_entry.rs @@ -61,7 +61,7 @@ impl FieldEntry { Self::new(field_name, FieldType::Date(date_options)) } - /// Creates a new ip field entry. + /// Creates a new ip address field entry. pub fn new_ip_addr(field_name: String, ip_options: IpAddrOptions) -> FieldEntry { Self::new(field_name, FieldType::IpAddr(ip_options)) } diff --git a/src/schema/field_type.rs b/src/schema/field_type.rs index 4ae6071e9..21390e09f 100644 --- a/src/schema/field_type.rs +++ b/src/schema/field_type.rs @@ -228,7 +228,7 @@ impl FieldType { | FieldType::F64(ref int_options) | FieldType::Bool(ref int_options) => int_options.is_fast(), FieldType::Date(ref date_options) => date_options.is_fast(), - FieldType::IpAddr(ref options) => options.is_fast(), + FieldType::IpAddr(ref ip_addr_options) => ip_addr_options.is_fast(), FieldType::Facet(_) => true, FieldType::JsonObject(_) => false, } @@ -325,45 +325,43 @@ impl FieldType { /// target field is a `Str`, this method will return an Error. pub fn value_from_json(&self, json: JsonValue) -> Result { match json { - JsonValue::String(field_text) => { - match self { - FieldType::Date(_) => { - let dt_with_fixed_tz = OffsetDateTime::parse(&field_text, &Rfc3339) - .map_err(|_err| ValueParsingError::TypeError { + JsonValue::String(field_text) => match self { + FieldType::Date(_) => { + let dt_with_fixed_tz = + OffsetDateTime::parse(&field_text, &Rfc3339).map_err(|_err| { + ValueParsingError::TypeError { expected: "rfc3339 format", json: JsonValue::String(field_text), - })?; - Ok(DateTime::from_utc(dt_with_fixed_tz).into()) - } - FieldType::Str(_) => Ok(Value::Str(field_text)), - FieldType::U64(_) | FieldType::I64(_) | FieldType::F64(_) => { - Err(ValueParsingError::TypeError { - expected: "an integer", - json: JsonValue::String(field_text), - }) - } - FieldType::Bool(_) => Err(ValueParsingError::TypeError { - expected: "a boolean", - json: JsonValue::String(field_text), - }), - FieldType::Facet(_) => Ok(Value::Facet(Facet::from(&field_text))), - FieldType::Bytes(_) => base64::decode(&field_text) - .map(Value::Bytes) - .map_err(|_| ValueParsingError::InvalidBase64 { base64: field_text }), - FieldType::JsonObject(_) => Err(ValueParsingError::TypeError { - expected: "a json object", - json: JsonValue::String(field_text), - }), - FieldType::IpAddr(_) => { - Ok(Value::Ip(IpAddr::from_str(&field_text).map_err(|err| { - ValueParsingError::ParseError { - error: err.to_string(), - json: JsonValue::String(field_text), } - })?)) - } + })?; + Ok(DateTime::from_utc(dt_with_fixed_tz).into()) } - } + FieldType::Str(_) => Ok(Value::Str(field_text)), + FieldType::U64(_) | FieldType::I64(_) | FieldType::F64(_) => { + Err(ValueParsingError::TypeError { + expected: "an integer", + json: JsonValue::String(field_text), + }) + } + FieldType::Bool(_) => Err(ValueParsingError::TypeError { + expected: "a boolean", + json: JsonValue::String(field_text), + }), + FieldType::Facet(_) => Ok(Value::Facet(Facet::from(&field_text))), + FieldType::Bytes(_) => base64::decode(&field_text) + .map(Value::Bytes) + .map_err(|_| ValueParsingError::InvalidBase64 { base64: field_text }), + FieldType::JsonObject(_) => Err(ValueParsingError::TypeError { + expected: "a json object", + json: JsonValue::String(field_text), + }), + FieldType::IpAddr(_) => Ok(Value::IpAddr(IpAddr::from_str(&field_text).map_err( + |err| ValueParsingError::ParseError { + error: err.to_string(), + json: JsonValue::String(field_text), + }, + )?)), + }, JsonValue::Number(field_val_num) => match self { FieldType::I64(_) | FieldType::Date(_) => { if let Some(field_val_i64) = field_val_num.as_i64() { diff --git a/src/schema/schema.rs b/src/schema/schema.rs index a8e2be29f..0884ef71b 100644 --- a/src/schema/schema.rs +++ b/src/schema/schema.rs @@ -146,9 +146,7 @@ impl SchemaBuilder { } /// Adds a ip field. - /// Returns the associated field handle - /// Internally, Tantivy simply stores ips as u64, - /// while the user supplies IpAddr values for convenience. + /// Returns the associated field handle. /// /// # Caution /// diff --git a/src/schema/value.rs b/src/schema/value.rs index 55cdc4dd7..7bc3ad2f2 100644 --- a/src/schema/value.rs +++ b/src/schema/value.rs @@ -34,14 +34,16 @@ pub enum Value { /// Json object value. JsonObject(serde_json::Map), /// Ip - Ip(IpAddr), + IpAddr(IpAddr), } impl Eq for Value {} impl Serialize for Value { fn serialize(&self, serializer: S) -> Result - where S: Serializer { + where + S: Serializer, + { match *self { Value::Str(ref v) => serializer.serialize_str(v), Value::PreTokStr(ref v) => v.serialize(serializer), @@ -53,14 +55,16 @@ impl Serialize for Value { Value::Facet(ref facet) => facet.serialize(serializer), Value::Bytes(ref bytes) => serializer.serialize_bytes(bytes), Value::JsonObject(ref obj) => obj.serialize(serializer), - Value::Ip(ref obj) => obj.serialize(serializer), // TODO check serialization + Value::IpAddr(ref obj) => obj.serialize(serializer), // TODO check serialization } } } impl<'de> Deserialize<'de> for Value { fn deserialize(deserializer: D) -> Result - where D: Deserializer<'de> { + where + D: Deserializer<'de>, + { struct ValueVisitor; impl<'de> Visitor<'de> for ValueVisitor { @@ -208,8 +212,8 @@ impl Value { /// Returns the ip addr, provided the value is of the `Ip` type. /// (Returns None if the value is not of the `Ip` type) - pub fn as_ip(&self) -> Option { - if let Value::Ip(val) = self { + pub fn as_ip_addr(&self) -> Option { + if let Value::IpAddr(val) = self { Some(*val) } else { None @@ -225,7 +229,7 @@ impl From for Value { impl From for Value { fn from(v: IpAddr) -> Value { - Value::Ip(v) + Value::IpAddr(v) } } @@ -389,7 +393,7 @@ mod binary_serialize { serde_json::to_writer(writer, &map)?; Ok(()) } - Value::Ip(ref ip) => { + Value::IpAddr(ref ip) => { IP_CODE.serialize(writer)?; ip.to_string().serialize(writer) // TODO Check best format } @@ -465,7 +469,7 @@ mod binary_serialize { } IP_CODE => { let text = String::deserialize(reader)?; - Ok(Value::Ip(IpAddr::from_str(&text).map_err(|err| { + Ok(Value::IpAddr(IpAddr::from_str(&text).map_err(|err| { io::Error::new(ErrorKind::Other, err.to_string()) })?)) }