mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2025-12-23 02:29:57 +00:00
Slot block kd-tree into Tantivy.
Implemented a geometry document field with a minimal `Geometry` enum. Now able to add that Geometry from GeoJSON parsed from a JSON document. Geometry is triangulated if it is a polygon, otherwise it is correctly encoded as a degenerate triangle if it is a point or a line string. Write accumulated triangles to a block kd-tree on commit. Serialize the original `f64` polygon for retrieval from search. Created a query method for intersection. Query against the memory mapped block kd-tree. Return hits and original `f64` polygon. Implemented a merge of one or more block kd-trees from one or more segments during merge. Updated the block kd-tree to write to a Tantivy `WritePtr` instead of more generic Rust I/O.
This commit is contained in:
committed by
Paul Masurel
parent
d3049cb323
commit
dbbc8c3f65
@@ -93,7 +93,6 @@ time = { version = "0.3.10", features = ["serde-well-known", "macros"] }
|
||||
postcard = { version = "1.0.4", features = [
|
||||
"use-std",
|
||||
], default-features = false }
|
||||
geojson = "0.24.2"
|
||||
|
||||
[target.'cfg(not(windows))'.dev-dependencies]
|
||||
criterion = { version = "0.5", default-features = false }
|
||||
|
||||
55
examples/geo_json.rs
Normal file
55
examples/geo_json.rs
Normal file
@@ -0,0 +1,55 @@
|
||||
use tantivy::collector::TopDocs;
|
||||
use tantivy::query::SpatialQuery;
|
||||
use tantivy::schema::{Schema, Value, SPATIAL, STORED, TEXT};
|
||||
use tantivy::{Index, IndexWriter, TantivyDocument};
|
||||
fn main() -> tantivy::Result<()> {
|
||||
let mut schema_builder = Schema::builder();
|
||||
schema_builder.add_json_field("properties", STORED | TEXT);
|
||||
schema_builder.add_spatial_field("geometry", STORED | SPATIAL);
|
||||
let schema = schema_builder.build();
|
||||
let index = Index::create_in_ram(schema.clone());
|
||||
let mut index_writer: IndexWriter = index.writer(50_000_000)?;
|
||||
let doc = TantivyDocument::parse_json(
|
||||
&schema,
|
||||
r#"{
|
||||
"type":"Feature",
|
||||
"geometry":{
|
||||
"type":"Polygon",
|
||||
"coordinates":[[[-99.483911,45.577697],[-99.483869,45.571457],[-99.481739,45.571461],[-99.474881,45.571584],[-99.473167,45.571615],[-99.463394,45.57168],[-99.463391,45.57883],[-99.463368,45.586076],[-99.48177,45.585926],[-99.48384,45.585953],[-99.483885,45.57873],[-99.483911,45.577697]]]
|
||||
},
|
||||
"properties":{
|
||||
"admin_level":"8",
|
||||
"border_type":"city",
|
||||
"boundary":"administrative",
|
||||
"gnis:feature_id":"1267426",
|
||||
"name":"Hosmer",
|
||||
"place":"city",
|
||||
"source":"TIGER/Line® 2008 Place Shapefiles (http://www.census.gov/geo/www/tiger/)",
|
||||
"wikidata":"Q2442118",
|
||||
"wikipedia":"en:Hosmer, South Dakota"
|
||||
}
|
||||
}"#,
|
||||
)?;
|
||||
index_writer.add_document(doc)?;
|
||||
index_writer.commit()?;
|
||||
|
||||
let reader = index.reader()?;
|
||||
let searcher = reader.searcher();
|
||||
let field = schema.get_field("geometry").unwrap();
|
||||
let query = SpatialQuery::new(
|
||||
field,
|
||||
[(-99.49, 45.56), (-99.45, 45.59)],
|
||||
tantivy::query::SpatialQueryType::Intersects,
|
||||
);
|
||||
let hits = searcher.search(&query, &TopDocs::with_limit(10))?;
|
||||
for (_score, doc_address) in &hits {
|
||||
let retrieved_doc: TantivyDocument = searcher.doc(*doc_address)?;
|
||||
if let Some(field_value) = retrieved_doc.get_first(field) {
|
||||
if let Some(geometry_box) = field_value.as_value().into_geometry() {
|
||||
println!("Retrieved geometry: {:?}", geometry_box);
|
||||
}
|
||||
}
|
||||
}
|
||||
assert_eq!(hits.len(), 1);
|
||||
Ok(())
|
||||
}
|
||||
@@ -227,6 +227,7 @@ pub(crate) fn index_json_value<'a, V: Value<'a>>(
|
||||
ReferenceValueLeaf::IpAddr(_) => {
|
||||
unimplemented!("IP address support in dynamic fields is not yet implemented")
|
||||
}
|
||||
ReferenceValueLeaf::Geometry(_) => todo!(),
|
||||
},
|
||||
ReferenceValue::Array(elements) => {
|
||||
for val in elements {
|
||||
|
||||
@@ -189,6 +189,7 @@ impl FastFieldsWriter {
|
||||
.record_str(doc_id, field_name, &token.text);
|
||||
}
|
||||
}
|
||||
ReferenceValueLeaf::Geometry(_) => todo!(),
|
||||
},
|
||||
ReferenceValue::Array(val) => {
|
||||
// TODO: Check this is the correct behaviour we want.
|
||||
@@ -320,6 +321,7 @@ fn record_json_value_to_columnar_writer<'a, V: Value<'a>>(
|
||||
"Pre-tokenized string support in dynamic fields is not yet implemented"
|
||||
)
|
||||
}
|
||||
ReferenceValueLeaf::Geometry(_) => todo!(),
|
||||
},
|
||||
ReferenceValue::Array(elements) => {
|
||||
for el in elements {
|
||||
|
||||
@@ -142,6 +142,7 @@ impl SegmentMeta {
|
||||
SegmentComponent::FastFields => ".fast".to_string(),
|
||||
SegmentComponent::FieldNorms => ".fieldnorm".to_string(),
|
||||
SegmentComponent::Delete => format!(".{}.del", self.delete_opstamp().unwrap_or(0)),
|
||||
SegmentComponent::Spatial => ".spatial".to_string(),
|
||||
});
|
||||
PathBuf::from(path)
|
||||
}
|
||||
|
||||
@@ -28,12 +28,14 @@ pub enum SegmentComponent {
|
||||
/// Bitset describing which document of the segment is alive.
|
||||
/// (It was representing deleted docs but changed to represent alive docs from v0.17)
|
||||
Delete,
|
||||
/// HUSH
|
||||
Spatial,
|
||||
}
|
||||
|
||||
impl SegmentComponent {
|
||||
/// Iterates through the components.
|
||||
pub fn iterator() -> slice::Iter<'static, SegmentComponent> {
|
||||
static SEGMENT_COMPONENTS: [SegmentComponent; 8] = [
|
||||
static SEGMENT_COMPONENTS: [SegmentComponent; 9] = [
|
||||
SegmentComponent::Postings,
|
||||
SegmentComponent::Positions,
|
||||
SegmentComponent::FastFields,
|
||||
@@ -42,6 +44,7 @@ impl SegmentComponent {
|
||||
SegmentComponent::Store,
|
||||
SegmentComponent::TempStore,
|
||||
SegmentComponent::Delete,
|
||||
SegmentComponent::Spatial,
|
||||
];
|
||||
SEGMENT_COMPONENTS.iter()
|
||||
}
|
||||
|
||||
@@ -14,6 +14,7 @@ use crate::index::{InvertedIndexReader, Segment, SegmentComponent, SegmentId};
|
||||
use crate::json_utils::json_path_sep_to_dot;
|
||||
use crate::schema::{Field, IndexRecordOption, Schema, Type};
|
||||
use crate::space_usage::SegmentSpaceUsage;
|
||||
use crate::spatial::reader::SpatialReaders;
|
||||
use crate::store::StoreReader;
|
||||
use crate::termdict::TermDictionary;
|
||||
use crate::{DocId, Opstamp};
|
||||
@@ -43,6 +44,7 @@ pub struct SegmentReader {
|
||||
positions_composite: CompositeFile,
|
||||
fast_fields_readers: FastFieldReaders,
|
||||
fieldnorm_readers: FieldNormReaders,
|
||||
spatial_readers: SpatialReaders,
|
||||
|
||||
store_file: FileSlice,
|
||||
alive_bitset_opt: Option<AliveBitSet>,
|
||||
@@ -92,6 +94,11 @@ impl SegmentReader {
|
||||
&self.fast_fields_readers
|
||||
}
|
||||
|
||||
/// HUSH
|
||||
pub fn spatial_fields(&self) -> &SpatialReaders {
|
||||
&self.spatial_readers
|
||||
}
|
||||
|
||||
/// Accessor to the `FacetReader` associated with a given `Field`.
|
||||
pub fn facet_reader(&self, field_name: &str) -> crate::Result<FacetReader> {
|
||||
let schema = self.schema();
|
||||
@@ -173,6 +180,8 @@ impl SegmentReader {
|
||||
let fast_fields_readers = FastFieldReaders::open(fast_fields_data, schema.clone())?;
|
||||
let fieldnorm_data = segment.open_read(SegmentComponent::FieldNorms)?;
|
||||
let fieldnorm_readers = FieldNormReaders::open(fieldnorm_data)?;
|
||||
let spatial_data = segment.open_read(SegmentComponent::Spatial)?;
|
||||
let spatial_readers = SpatialReaders::open(spatial_data)?;
|
||||
|
||||
let original_bitset = if segment.meta().has_deletes() {
|
||||
let alive_doc_file_slice = segment.open_read(SegmentComponent::Delete)?;
|
||||
@@ -198,6 +207,7 @@ impl SegmentReader {
|
||||
postings_composite,
|
||||
fast_fields_readers,
|
||||
fieldnorm_readers,
|
||||
spatial_readers,
|
||||
segment_id: segment.id(),
|
||||
delete_opstamp: segment.meta().delete_opstamp(),
|
||||
store_file,
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
use std::collections::HashMap;
|
||||
use std::io::Write;
|
||||
use std::sync::Arc;
|
||||
|
||||
use columnar::{
|
||||
@@ -6,6 +8,7 @@ use columnar::{
|
||||
use common::ReadOnlyBitSet;
|
||||
use itertools::Itertools;
|
||||
use measure_time::debug_time;
|
||||
use tempfile::NamedTempFile;
|
||||
|
||||
use crate::directory::WritePtr;
|
||||
use crate::docset::{DocSet, TERMINATED};
|
||||
@@ -17,6 +20,8 @@ use crate::indexer::doc_id_mapping::{MappingType, SegmentDocIdMapping};
|
||||
use crate::indexer::SegmentSerializer;
|
||||
use crate::postings::{InvertedIndexSerializer, Postings, SegmentPostings};
|
||||
use crate::schema::{value_type_to_column_type, Field, FieldType, Schema};
|
||||
use crate::spatial::bkd::LeafPageIterator;
|
||||
use crate::spatial::triangle::Triangle;
|
||||
use crate::store::StoreWriter;
|
||||
use crate::termdict::{TermMerger, TermOrdinal};
|
||||
use crate::{DocAddress, DocId, InvertedIndexReader};
|
||||
@@ -520,6 +525,71 @@ impl IndexMerger {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn write_spatial_fields(
|
||||
&self,
|
||||
serializer: &mut SegmentSerializer,
|
||||
doc_id_mapping: &SegmentDocIdMapping,
|
||||
) -> crate::Result<()> {
|
||||
use crate::spatial::bkd::Segment;
|
||||
let mut segment_mappings: Vec<Vec<Option<DocId>>> = Vec::new();
|
||||
for reader in &self.readers {
|
||||
let max_doc = reader.max_doc();
|
||||
segment_mappings.push(vec![None; max_doc as usize]);
|
||||
}
|
||||
for (new_doc_id, old_doc_addr) in doc_id_mapping.iter_old_doc_addrs().enumerate() {
|
||||
segment_mappings[old_doc_addr.segment_ord as usize][old_doc_addr.doc_id as usize] =
|
||||
Some(new_doc_id as DocId);
|
||||
}
|
||||
let mut temp_files: HashMap<Field, NamedTempFile> = HashMap::new();
|
||||
|
||||
for (field, field_entry) in self.schema.fields() {
|
||||
if matches!(field_entry.field_type(), FieldType::Spatial(_)) {
|
||||
temp_files.insert(field, NamedTempFile::new()?);
|
||||
}
|
||||
}
|
||||
for (segment_ord, reader) in self.readers.iter().enumerate() {
|
||||
for (field, temp_file) in &mut temp_files {
|
||||
let spatial_readers = reader.spatial_fields();
|
||||
let spatial_reader = match spatial_readers.get_field(*field)? {
|
||||
Some(reader) => reader,
|
||||
None => continue,
|
||||
};
|
||||
let segment = Segment::new(spatial_reader.get_bytes());
|
||||
for triangle_result in LeafPageIterator::new(&segment) {
|
||||
let triangles = triangle_result?;
|
||||
for triangle in triangles {
|
||||
if let Some(new_doc_id) =
|
||||
segment_mappings[segment_ord][triangle.doc_id as usize]
|
||||
{
|
||||
for &word in &triangle.words {
|
||||
temp_file.write_all(&word.to_le_bytes())?;
|
||||
}
|
||||
temp_file.write_all(&new_doc_id.to_le_bytes())?;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if let Some(mut spatial_serializer) = serializer.extract_spatial_serializer() {
|
||||
for (field, mut temp_file) in temp_files {
|
||||
// Flush and sync triangles.
|
||||
temp_file.flush()?;
|
||||
temp_file.as_file_mut().sync_all()?;
|
||||
// Memory map the triangle file.
|
||||
use memmap2::MmapOptions;
|
||||
let mmap = unsafe { MmapOptions::new().map_mut(temp_file.as_file())? };
|
||||
// Cast to &[Triangle] slice
|
||||
let triangle_count = mmap.len() / std::mem::size_of::<Triangle>();
|
||||
let triangles = unsafe {
|
||||
std::slice::from_raw_parts_mut(mmap.as_ptr() as *mut Triangle, triangle_count)
|
||||
};
|
||||
// Get spatial writer and rebuild block kd-tree.
|
||||
spatial_serializer.serialize_field(field, triangles)?;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Writes the merged segment by pushing information
|
||||
/// to the `SegmentSerializer`.
|
||||
///
|
||||
@@ -544,9 +614,10 @@ impl IndexMerger {
|
||||
|
||||
debug!("write-storagefields");
|
||||
self.write_storable_fields(serializer.get_store_writer())?;
|
||||
debug!("write-spatialfields");
|
||||
self.write_spatial_fields(&mut serializer, &doc_id_mapping)?;
|
||||
debug!("write-fastfields");
|
||||
self.write_fast_fields(serializer.get_fast_field_write(), doc_id_mapping)?;
|
||||
|
||||
debug!("close-serializer");
|
||||
serializer.close()?;
|
||||
Ok(self.max_doc)
|
||||
|
||||
@@ -4,6 +4,7 @@ use crate::directory::WritePtr;
|
||||
use crate::fieldnorm::FieldNormsSerializer;
|
||||
use crate::index::{Segment, SegmentComponent};
|
||||
use crate::postings::InvertedIndexSerializer;
|
||||
use crate::spatial::serializer::SpatialSerializer;
|
||||
use crate::store::StoreWriter;
|
||||
|
||||
/// Segment serializer is in charge of laying out on disk
|
||||
@@ -12,6 +13,7 @@ pub struct SegmentSerializer {
|
||||
segment: Segment,
|
||||
pub(crate) store_writer: StoreWriter,
|
||||
fast_field_write: WritePtr,
|
||||
spatial_serializer: Option<SpatialSerializer>,
|
||||
fieldnorms_serializer: Option<FieldNormsSerializer>,
|
||||
postings_serializer: InvertedIndexSerializer,
|
||||
}
|
||||
@@ -35,11 +37,15 @@ impl SegmentSerializer {
|
||||
let fieldnorms_write = segment.open_write(SegmentComponent::FieldNorms)?;
|
||||
let fieldnorms_serializer = FieldNormsSerializer::from_write(fieldnorms_write)?;
|
||||
|
||||
let spatial_write = segment.open_write(SegmentComponent::Spatial)?;
|
||||
let spatial_serializer = SpatialSerializer::from_write(spatial_write)?;
|
||||
|
||||
let postings_serializer = InvertedIndexSerializer::open(&mut segment)?;
|
||||
Ok(SegmentSerializer {
|
||||
segment,
|
||||
store_writer,
|
||||
fast_field_write,
|
||||
spatial_serializer: Some(spatial_serializer),
|
||||
fieldnorms_serializer: Some(fieldnorms_serializer),
|
||||
postings_serializer,
|
||||
})
|
||||
@@ -64,6 +70,11 @@ impl SegmentSerializer {
|
||||
&mut self.fast_field_write
|
||||
}
|
||||
|
||||
/// HUSH
|
||||
pub fn extract_spatial_serializer(&mut self) -> Option<SpatialSerializer> {
|
||||
self.spatial_serializer.take()
|
||||
}
|
||||
|
||||
/// Extract the field norm serializer.
|
||||
///
|
||||
/// Note the fieldnorms serializer can only be extracted once.
|
||||
@@ -81,6 +92,9 @@ impl SegmentSerializer {
|
||||
if let Some(fieldnorms_serializer) = self.extract_fieldnorms_serializer() {
|
||||
fieldnorms_serializer.close()?;
|
||||
}
|
||||
if let Some(spatial_serializer) = self.extract_spatial_serializer() {
|
||||
spatial_serializer.close()?;
|
||||
}
|
||||
self.fast_field_write.terminate()?;
|
||||
self.postings_serializer.close()?;
|
||||
self.store_writer.close()?;
|
||||
|
||||
@@ -16,6 +16,7 @@ use crate::postings::{
|
||||
};
|
||||
use crate::schema::document::{Document, Value};
|
||||
use crate::schema::{FieldEntry, FieldType, Schema, DATE_TIME_PRECISION_INDEXED};
|
||||
use crate::spatial::writer::SpatialWriter;
|
||||
use crate::tokenizer::{FacetTokenizer, PreTokenizedStream, TextAnalyzer, Tokenizer};
|
||||
use crate::{DocId, Opstamp, TantivyError};
|
||||
|
||||
@@ -52,6 +53,7 @@ pub struct SegmentWriter {
|
||||
pub(crate) segment_serializer: SegmentSerializer,
|
||||
pub(crate) fast_field_writers: FastFieldsWriter,
|
||||
pub(crate) fieldnorms_writer: FieldNormsWriter,
|
||||
pub(crate) spatial_writer: SpatialWriter,
|
||||
pub(crate) json_path_writer: JsonPathWriter,
|
||||
pub(crate) json_positions_per_path: IndexingPositionsPerPath,
|
||||
pub(crate) doc_opstamps: Vec<Opstamp>,
|
||||
@@ -104,6 +106,7 @@ impl SegmentWriter {
|
||||
ctx: IndexingContext::new(table_size),
|
||||
per_field_postings_writers,
|
||||
fieldnorms_writer: FieldNormsWriter::for_schema(&schema),
|
||||
spatial_writer: SpatialWriter::default(),
|
||||
json_path_writer: JsonPathWriter::default(),
|
||||
json_positions_per_path: IndexingPositionsPerPath::default(),
|
||||
segment_serializer,
|
||||
@@ -130,6 +133,7 @@ impl SegmentWriter {
|
||||
self.ctx,
|
||||
self.fast_field_writers,
|
||||
&self.fieldnorms_writer,
|
||||
&mut self.spatial_writer,
|
||||
self.segment_serializer,
|
||||
)?;
|
||||
Ok(self.doc_opstamps)
|
||||
@@ -142,6 +146,7 @@ impl SegmentWriter {
|
||||
+ self.fieldnorms_writer.mem_usage()
|
||||
+ self.fast_field_writers.mem_usage()
|
||||
+ self.segment_serializer.mem_usage()
|
||||
+ self.spatial_writer.mem_usage()
|
||||
}
|
||||
|
||||
fn index_document<D: Document>(&mut self, doc: &D) -> crate::Result<()> {
|
||||
@@ -338,6 +343,13 @@ impl SegmentWriter {
|
||||
self.fieldnorms_writer.record(doc_id, field, num_vals);
|
||||
}
|
||||
}
|
||||
FieldType::Spatial(_) => {
|
||||
for value in values {
|
||||
if let Some(geometry) = value.as_geometry() {
|
||||
self.spatial_writer.add_geometry(doc_id, field, *geometry);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
@@ -392,12 +404,16 @@ fn remap_and_write(
|
||||
ctx: IndexingContext,
|
||||
fast_field_writers: FastFieldsWriter,
|
||||
fieldnorms_writer: &FieldNormsWriter,
|
||||
spatial_writer: &mut SpatialWriter,
|
||||
mut serializer: SegmentSerializer,
|
||||
) -> crate::Result<()> {
|
||||
debug!("remap-and-write");
|
||||
if let Some(fieldnorms_serializer) = serializer.extract_fieldnorms_serializer() {
|
||||
fieldnorms_writer.serialize(fieldnorms_serializer)?;
|
||||
}
|
||||
if let Some(spatial_serializer) = serializer.extract_spatial_serializer() {
|
||||
spatial_writer.serialize(spatial_serializer)?;
|
||||
}
|
||||
let fieldnorm_data = serializer
|
||||
.segment()
|
||||
.open_read(SegmentComponent::FieldNorms)?;
|
||||
|
||||
@@ -51,6 +51,7 @@ fn posting_writer_from_field_entry(field_entry: &FieldEntry) -> Box<dyn Postings
|
||||
| FieldType::Date(_)
|
||||
| FieldType::Bytes(_)
|
||||
| FieldType::IpAddr(_)
|
||||
| FieldType::Spatial(_)
|
||||
| FieldType::Facet(_) => Box::<SpecializedPostingsWriter<DocIdRecorder>>::default(),
|
||||
FieldType::JsonObject(ref json_object_options) => {
|
||||
if let Some(text_indexing_option) = json_object_options.get_text_indexing_options() {
|
||||
|
||||
@@ -24,6 +24,7 @@ mod reqopt_scorer;
|
||||
mod scorer;
|
||||
mod set_query;
|
||||
mod size_hint;
|
||||
mod spatial_query;
|
||||
mod term_query;
|
||||
mod union;
|
||||
mod weight;
|
||||
@@ -62,6 +63,7 @@ pub use self::reqopt_scorer::RequiredOptionalScorer;
|
||||
pub use self::score_combiner::{DisjunctionMaxCombiner, ScoreCombiner, SumCombiner};
|
||||
pub use self::scorer::Scorer;
|
||||
pub use self::set_query::TermSetQuery;
|
||||
pub use self::spatial_query::{SpatialQuery, SpatialQueryType};
|
||||
pub use self::term_query::TermQuery;
|
||||
pub use self::union::BufferedUnionScorer;
|
||||
#[cfg(test)]
|
||||
|
||||
@@ -524,6 +524,9 @@ impl QueryParser {
|
||||
let ip_v6 = IpAddr::from_str(phrase)?.into_ipv6_addr();
|
||||
Ok(Term::from_field_ip_addr(field, ip_v6))
|
||||
}
|
||||
FieldType::Spatial(_) => Err(QueryParserError::UnsupportedQuery(
|
||||
"Spatial queries are not yet supported in text query parser".to_string(),
|
||||
)),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -624,6 +627,10 @@ impl QueryParser {
|
||||
let term = Term::from_field_ip_addr(field, ip_v6);
|
||||
Ok(vec![LogicalLiteral::Term(term)])
|
||||
}
|
||||
FieldType::Spatial(_) => Err(QueryParserError::UnsupportedQuery(format!(
|
||||
"Spatial queries are not yet supported for field '{}'",
|
||||
field_name
|
||||
))),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -20,6 +20,6 @@ pub(crate) fn is_type_valid_for_fastfield_range_query(typ: Type) -> bool {
|
||||
| Type::Date
|
||||
| Type::Json
|
||||
| Type::IpAddr => true,
|
||||
Type::Facet | Type::Bytes => false,
|
||||
Type::Facet | Type::Bytes | Type::Spatial => false,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -128,12 +128,15 @@ impl Weight for FastFieldRangeWeight {
|
||||
BoundsRange::new(bounds.lower_bound, bounds.upper_bound),
|
||||
)
|
||||
}
|
||||
Type::Bool | Type::Facet | Type::Bytes | Type::Json | Type::IpAddr => {
|
||||
Err(crate::TantivyError::InvalidArgument(format!(
|
||||
"unsupported value bytes type in json term value_bytes {:?}",
|
||||
term_value.typ()
|
||||
)))
|
||||
}
|
||||
Type::Bool
|
||||
| Type::Facet
|
||||
| Type::Bytes
|
||||
| Type::Json
|
||||
| Type::IpAddr
|
||||
| Type::Spatial => Err(crate::TantivyError::InvalidArgument(format!(
|
||||
"unsupported value bytes type in json term value_bytes {:?}",
|
||||
term_value.typ()
|
||||
))),
|
||||
}
|
||||
} else if field_type.is_ip_addr() {
|
||||
let parse_ip_from_bytes = |term: &Term| {
|
||||
@@ -435,7 +438,7 @@ pub(crate) fn maps_to_u64_fastfield(typ: Type) -> bool {
|
||||
match typ {
|
||||
Type::U64 | Type::I64 | Type::F64 | Type::Bool | Type::Date => true,
|
||||
Type::IpAddr => false,
|
||||
Type::Str | Type::Facet | Type::Bytes | Type::Json => false,
|
||||
Type::Str | Type::Facet | Type::Bytes | Type::Json | Type::Spatial => false,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
170
src/query/spatial_query.rs
Normal file
170
src/query/spatial_query.rs
Normal file
@@ -0,0 +1,170 @@
|
||||
//! HUSH
|
||||
|
||||
use common::BitSet;
|
||||
|
||||
use crate::query::{BitSetDocSet, Query, Scorer, Weight};
|
||||
use crate::schema::Field;
|
||||
use crate::spatial::bkd::{search_intersects, Segment};
|
||||
use crate::spatial::writer::as_point_i32;
|
||||
use crate::{DocId, DocSet, Score, TantivyError, TERMINATED};
|
||||
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
/// HUSH
|
||||
pub enum SpatialQueryType {
|
||||
/// HUSH
|
||||
Intersects,
|
||||
// Within,
|
||||
// Contains,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
/// HUSH
|
||||
pub struct SpatialQuery {
|
||||
field: Field,
|
||||
bounds: [(i32, i32); 2],
|
||||
query_type: SpatialQueryType,
|
||||
}
|
||||
|
||||
impl SpatialQuery {
|
||||
/// HUSH
|
||||
pub fn new(field: Field, bounds: [(f64, f64); 2], query_type: SpatialQueryType) -> Self {
|
||||
SpatialQuery {
|
||||
field,
|
||||
bounds: [as_point_i32(bounds[0]), as_point_i32(bounds[1])],
|
||||
query_type,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Query for SpatialQuery {
|
||||
fn weight(
|
||||
&self,
|
||||
_enable_scoring: super::EnableScoring<'_>,
|
||||
) -> crate::Result<Box<dyn super::Weight>> {
|
||||
Ok(Box::new(SpatialWeight::new(
|
||||
self.field,
|
||||
self.bounds,
|
||||
self.query_type,
|
||||
)))
|
||||
}
|
||||
}
|
||||
|
||||
pub struct SpatialWeight {
|
||||
field: Field,
|
||||
bounds: [(i32, i32); 2],
|
||||
query_type: SpatialQueryType,
|
||||
}
|
||||
|
||||
impl SpatialWeight {
|
||||
fn new(field: Field, bounds: [(i32, i32); 2], query_type: SpatialQueryType) -> Self {
|
||||
SpatialWeight {
|
||||
field,
|
||||
bounds,
|
||||
query_type,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Weight for SpatialWeight {
|
||||
fn scorer(
|
||||
&self,
|
||||
reader: &crate::SegmentReader,
|
||||
boost: crate::Score,
|
||||
) -> crate::Result<Box<dyn super::Scorer>> {
|
||||
let spatial_reader = reader
|
||||
.spatial_fields()
|
||||
.get_field(self.field)?
|
||||
.ok_or_else(|| TantivyError::SchemaError(format!("No spatial data for field")))?;
|
||||
let block_kd_tree = Segment::new(spatial_reader.get_bytes());
|
||||
match self.query_type {
|
||||
SpatialQueryType::Intersects => {
|
||||
let mut include = BitSet::with_max_value(reader.max_doc());
|
||||
search_intersects(
|
||||
&block_kd_tree,
|
||||
block_kd_tree.root_offset,
|
||||
&[
|
||||
self.bounds[0].1,
|
||||
self.bounds[0].0,
|
||||
self.bounds[1].1,
|
||||
self.bounds[1].0,
|
||||
],
|
||||
&mut include,
|
||||
)?;
|
||||
Ok(Box::new(SpatialScorer::new(boost, include, None)))
|
||||
}
|
||||
}
|
||||
}
|
||||
fn explain(
|
||||
&self,
|
||||
_reader: &crate::SegmentReader,
|
||||
_doc: DocId,
|
||||
) -> crate::Result<super::Explanation> {
|
||||
todo!();
|
||||
}
|
||||
}
|
||||
|
||||
struct SpatialScorer {
|
||||
include: BitSetDocSet,
|
||||
exclude: Option<BitSet>,
|
||||
doc_id: DocId,
|
||||
score: Score,
|
||||
}
|
||||
|
||||
impl SpatialScorer {
|
||||
pub fn new(score: Score, include: BitSet, exclude: Option<BitSet>) -> Self {
|
||||
let mut scorer = SpatialScorer {
|
||||
include: BitSetDocSet::from(include),
|
||||
exclude,
|
||||
doc_id: 0,
|
||||
score,
|
||||
};
|
||||
scorer.prime();
|
||||
scorer
|
||||
}
|
||||
fn prime(&mut self) {
|
||||
self.doc_id = self.include.doc();
|
||||
while self.exclude() {
|
||||
self.doc_id = self.include.advance();
|
||||
}
|
||||
}
|
||||
|
||||
fn exclude(&self) -> bool {
|
||||
if self.doc_id == TERMINATED {
|
||||
return false;
|
||||
}
|
||||
match &self.exclude {
|
||||
Some(exclude) => exclude.contains(self.doc_id),
|
||||
None => false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Scorer for SpatialScorer {
|
||||
fn score(&mut self) -> Score {
|
||||
self.score
|
||||
}
|
||||
}
|
||||
|
||||
impl DocSet for SpatialScorer {
|
||||
fn advance(&mut self) -> DocId {
|
||||
if self.doc_id == TERMINATED {
|
||||
return TERMINATED;
|
||||
}
|
||||
self.doc_id = self.include.advance();
|
||||
while self.exclude() {
|
||||
self.doc_id = self.include.advance();
|
||||
}
|
||||
self.doc_id
|
||||
}
|
||||
|
||||
fn size_hint(&self) -> u32 {
|
||||
match &self.exclude {
|
||||
Some(exclude) => self.include.size_hint() - exclude.len() as u32,
|
||||
None => self.include.size_hint(),
|
||||
}
|
||||
}
|
||||
|
||||
fn doc(&self) -> DocId {
|
||||
self.doc_id
|
||||
}
|
||||
}
|
||||
@@ -22,6 +22,7 @@ use super::se::BinaryObjectSerializer;
|
||||
use super::{OwnedValue, Value};
|
||||
use crate::schema::document::type_codes;
|
||||
use crate::schema::{Facet, Field};
|
||||
use crate::spatial::geometry::Geometry;
|
||||
use crate::store::DocStoreVersion;
|
||||
use crate::tokenizer::PreTokenizedString;
|
||||
|
||||
@@ -129,6 +130,9 @@ pub trait ValueDeserializer<'de> {
|
||||
/// Attempts to deserialize a pre-tokenized string value from the deserializer.
|
||||
fn deserialize_pre_tokenized_string(self) -> Result<PreTokenizedString, DeserializeError>;
|
||||
|
||||
/// HUSH
|
||||
fn deserialize_geometry(self) -> Result<Geometry, DeserializeError>;
|
||||
|
||||
/// Attempts to deserialize the value using a given visitor.
|
||||
fn deserialize_any<V>(self, visitor: V) -> Result<V::Value, DeserializeError>
|
||||
where V: ValueVisitor;
|
||||
@@ -166,6 +170,8 @@ pub enum ValueType {
|
||||
/// A JSON object value. Deprecated.
|
||||
#[deprecated(note = "We keep this for backwards compatibility, use Object instead")]
|
||||
JSONObject,
|
||||
/// HUSH
|
||||
Geometry,
|
||||
}
|
||||
|
||||
/// A value visitor for deserializing a document value.
|
||||
@@ -246,6 +252,12 @@ pub trait ValueVisitor {
|
||||
Err(DeserializeError::UnsupportedType(ValueType::PreTokStr))
|
||||
}
|
||||
|
||||
#[inline]
|
||||
/// Called when the deserializer visits a geometry value.
|
||||
fn visit_geometry(&self, _val: Geometry) -> Result<Self::Value, DeserializeError> {
|
||||
Err(DeserializeError::UnsupportedType(ValueType::Geometry))
|
||||
}
|
||||
|
||||
#[inline]
|
||||
/// Called when the deserializer visits an array.
|
||||
fn visit_array<'de, A>(&self, _access: A) -> Result<Self::Value, DeserializeError>
|
||||
@@ -380,6 +392,7 @@ where R: Read
|
||||
|
||||
match ext_type_code {
|
||||
type_codes::TOK_STR_EXT_CODE => ValueType::PreTokStr,
|
||||
type_codes::GEO_EXT_CODE => ValueType::Geometry,
|
||||
_ => {
|
||||
return Err(DeserializeError::from(io::Error::new(
|
||||
io::ErrorKind::InvalidData,
|
||||
@@ -495,6 +508,11 @@ where R: Read
|
||||
.map_err(DeserializeError::from)
|
||||
}
|
||||
|
||||
fn deserialize_geometry(self) -> Result<Geometry, DeserializeError> {
|
||||
self.validate_type(ValueType::Geometry)?;
|
||||
<Geometry as BinarySerializable>::deserialize(self.reader).map_err(DeserializeError::from)
|
||||
}
|
||||
|
||||
fn deserialize_any<V>(self, visitor: V) -> Result<V::Value, DeserializeError>
|
||||
where V: ValueVisitor {
|
||||
match self.value_type {
|
||||
@@ -539,6 +557,10 @@ where R: Read
|
||||
let val = self.deserialize_pre_tokenized_string()?;
|
||||
visitor.visit_pre_tokenized_string(val)
|
||||
}
|
||||
ValueType::Geometry => {
|
||||
let val = self.deserialize_geometry()?;
|
||||
visitor.visit_geometry(val)
|
||||
}
|
||||
ValueType::Array => {
|
||||
let access =
|
||||
BinaryArrayDeserializer::from_reader(self.reader, self.doc_store_version)?;
|
||||
|
||||
@@ -13,6 +13,7 @@ use crate::schema::document::{
|
||||
};
|
||||
use crate::schema::field_type::ValueParsingError;
|
||||
use crate::schema::{Facet, Field, NamedFieldDocument, OwnedValue, Schema};
|
||||
use crate::spatial::geometry::Geometry;
|
||||
use crate::tokenizer::PreTokenizedString;
|
||||
|
||||
#[repr(C, packed)]
|
||||
@@ -254,6 +255,7 @@ impl CompactDoc {
|
||||
}
|
||||
ReferenceValueLeaf::IpAddr(num) => write_into(&mut self.node_data, num.to_u128()),
|
||||
ReferenceValueLeaf::PreTokStr(pre_tok) => write_into(&mut self.node_data, *pre_tok),
|
||||
ReferenceValueLeaf::Geometry(geometry) => write_into(&mut self.node_data, *geometry),
|
||||
};
|
||||
ValueAddr { type_id, val_addr }
|
||||
}
|
||||
@@ -464,6 +466,13 @@ impl<'a> CompactDocValue<'a> {
|
||||
.map(Into::into)
|
||||
.map(ReferenceValueLeaf::PreTokStr)
|
||||
.map(Into::into),
|
||||
// ValueType::Geometry => todo!(),
|
||||
ValueType::Geometry => self
|
||||
.container
|
||||
.read_from::<Geometry>(addr)
|
||||
.map(Into::into)
|
||||
.map(ReferenceValueLeaf::Geometry)
|
||||
.map(Into::into),
|
||||
ValueType::Object => Ok(ReferenceValue::Object(CompactDocObjectIter::new(
|
||||
self.container,
|
||||
addr,
|
||||
@@ -542,6 +551,8 @@ pub enum ValueType {
|
||||
Object = 11,
|
||||
/// Pre-tokenized str type,
|
||||
Array = 12,
|
||||
/// HUSH
|
||||
Geometry = 13,
|
||||
}
|
||||
|
||||
impl BinarySerializable for ValueType {
|
||||
@@ -587,6 +598,7 @@ impl<'a> From<&ReferenceValueLeaf<'a>> for ValueType {
|
||||
ReferenceValueLeaf::PreTokStr(_) => ValueType::PreTokStr,
|
||||
ReferenceValueLeaf::Facet(_) => ValueType::Facet,
|
||||
ReferenceValueLeaf::Bytes(_) => ValueType::Bytes,
|
||||
ReferenceValueLeaf::Geometry(_) => ValueType::Geometry,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -273,4 +273,5 @@ pub(crate) mod type_codes {
|
||||
|
||||
// Extended type codes
|
||||
pub const TOK_STR_EXT_CODE: u8 = 0;
|
||||
pub const GEO_EXT_CODE: u8 = 1;
|
||||
}
|
||||
|
||||
@@ -15,6 +15,7 @@ use crate::schema::document::{
|
||||
ValueDeserializer, ValueVisitor,
|
||||
};
|
||||
use crate::schema::Facet;
|
||||
use crate::spatial::geometry::Geometry;
|
||||
use crate::tokenizer::PreTokenizedString;
|
||||
use crate::DateTime;
|
||||
|
||||
@@ -49,6 +50,8 @@ pub enum OwnedValue {
|
||||
Object(Vec<(String, Self)>),
|
||||
/// IpV6 Address. Internally there is no IpV4, it needs to be converted to `Ipv6Addr`.
|
||||
IpAddr(Ipv6Addr),
|
||||
/// A GeoRust multi-polygon.
|
||||
Geometry(Geometry),
|
||||
}
|
||||
|
||||
impl AsRef<OwnedValue> for OwnedValue {
|
||||
@@ -77,6 +80,9 @@ impl<'a> Value<'a> for &'a OwnedValue {
|
||||
OwnedValue::IpAddr(val) => ReferenceValueLeaf::IpAddr(*val).into(),
|
||||
OwnedValue::Array(array) => ReferenceValue::Array(array.iter()),
|
||||
OwnedValue::Object(object) => ReferenceValue::Object(ObjectMapIter(object.iter())),
|
||||
OwnedValue::Geometry(geometry) => {
|
||||
ReferenceValueLeaf::Geometry(Box::new(geometry.clone())).into()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -136,6 +142,10 @@ impl ValueDeserialize for OwnedValue {
|
||||
Ok(OwnedValue::PreTokStr(val))
|
||||
}
|
||||
|
||||
fn visit_geometry(&self, val: Geometry) -> Result<Self::Value, DeserializeError> {
|
||||
Ok(OwnedValue::Geometry(val))
|
||||
}
|
||||
|
||||
fn visit_array<'de, A>(&self, mut access: A) -> Result<Self::Value, DeserializeError>
|
||||
where A: ArrayAccess<'de> {
|
||||
let mut elements = Vec::with_capacity(access.size_hint());
|
||||
@@ -198,6 +208,7 @@ impl serde::Serialize for OwnedValue {
|
||||
}
|
||||
}
|
||||
OwnedValue::Array(ref array) => array.serialize(serializer),
|
||||
OwnedValue::Geometry(_) => todo!(),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -285,6 +296,7 @@ impl<'a, V: Value<'a>> From<ReferenceValue<'a, V>> for OwnedValue {
|
||||
ReferenceValueLeaf::IpAddr(val) => OwnedValue::IpAddr(val),
|
||||
ReferenceValueLeaf::Bool(val) => OwnedValue::Bool(val),
|
||||
ReferenceValueLeaf::PreTokStr(val) => OwnedValue::PreTokStr(*val.clone()),
|
||||
ReferenceValueLeaf::Geometry(_) => todo!(),
|
||||
},
|
||||
ReferenceValue::Array(val) => {
|
||||
OwnedValue::Array(val.map(|v| v.as_value().into()).collect())
|
||||
|
||||
@@ -133,6 +133,10 @@ where W: Write
|
||||
self.write_type_code(type_codes::EXT_CODE)?;
|
||||
self.serialize_with_type_code(type_codes::TOK_STR_EXT_CODE, &*val)
|
||||
}
|
||||
ReferenceValueLeaf::Geometry(val) => {
|
||||
self.write_type_code(type_codes::EXT_CODE)?;
|
||||
self.serialize_with_type_code(type_codes::GEO_EXT_CODE, &*val)
|
||||
}
|
||||
},
|
||||
ReferenceValue::Array(elements) => {
|
||||
self.write_type_code(type_codes::ARRAY_CODE)?;
|
||||
|
||||
@@ -3,6 +3,7 @@ use std::net::Ipv6Addr;
|
||||
|
||||
use common::DateTime;
|
||||
|
||||
use crate::spatial::geometry::Geometry;
|
||||
use crate::tokenizer::PreTokenizedString;
|
||||
|
||||
/// A single field value.
|
||||
@@ -108,6 +109,12 @@ pub trait Value<'a>: Send + Sync + Debug {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
/// HUSH
|
||||
fn as_geometry(&self) -> Option<Box<Geometry>> {
|
||||
self.as_leaf().and_then(|leaf| leaf.into_geometry())
|
||||
}
|
||||
}
|
||||
|
||||
/// A enum representing a leaf value for tantivy to index.
|
||||
@@ -136,6 +143,8 @@ pub enum ReferenceValueLeaf<'a> {
|
||||
Bool(bool),
|
||||
/// Pre-tokenized str type,
|
||||
PreTokStr(Box<PreTokenizedString>),
|
||||
/// HUSH
|
||||
Geometry(Box<Geometry>),
|
||||
}
|
||||
|
||||
impl From<u64> for ReferenceValueLeaf<'_> {
|
||||
@@ -220,6 +229,9 @@ impl<'a, T: Value<'a> + ?Sized> From<ReferenceValueLeaf<'a>> for ReferenceValue<
|
||||
ReferenceValueLeaf::PreTokStr(val) => {
|
||||
ReferenceValue::Leaf(ReferenceValueLeaf::PreTokStr(val))
|
||||
}
|
||||
ReferenceValueLeaf::Geometry(val) => {
|
||||
ReferenceValue::Leaf(ReferenceValueLeaf::Geometry(val))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -331,6 +343,16 @@ impl<'a> ReferenceValueLeaf<'a> {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
/// HUSH
|
||||
pub fn into_geometry(self) -> Option<Box<Geometry>> {
|
||||
if let Self::Geometry(val) = self {
|
||||
Some(val)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A enum representing a value for tantivy to index.
|
||||
@@ -448,4 +470,10 @@ where V: Value<'a>
|
||||
pub fn is_object(&self) -> bool {
|
||||
matches!(self, Self::Object(_))
|
||||
}
|
||||
|
||||
#[inline]
|
||||
/// HUSH
|
||||
pub fn into_geometry(self) -> Option<Box<Geometry>> {
|
||||
self.into_leaf().and_then(|leaf| leaf.into_geometry())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use super::ip_options::IpAddrOptions;
|
||||
use super::spatial_options::SpatialOptions;
|
||||
use crate::schema::bytes_options::BytesOptions;
|
||||
use crate::schema::{
|
||||
is_valid_field_name, DateOptions, FacetOptions, FieldType, JsonObjectOptions, NumericOptions,
|
||||
@@ -80,6 +81,11 @@ impl FieldEntry {
|
||||
Self::new(field_name, FieldType::JsonObject(json_object_options))
|
||||
}
|
||||
|
||||
/// Creates a field entry for a spatial field
|
||||
pub fn new_spatial(field_name: String, spatial_options: SpatialOptions) -> FieldEntry {
|
||||
Self::new(field_name, FieldType::Spatial(spatial_options))
|
||||
}
|
||||
|
||||
/// Returns the name of the field
|
||||
pub fn name(&self) -> &str {
|
||||
&self.name
|
||||
@@ -129,6 +135,7 @@ impl FieldEntry {
|
||||
FieldType::Bytes(ref options) => options.is_stored(),
|
||||
FieldType::JsonObject(ref options) => options.is_stored(),
|
||||
FieldType::IpAddr(ref options) => options.is_stored(),
|
||||
FieldType::Spatial(ref options) => options.is_stored(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -9,6 +9,7 @@ use serde_json::Value as JsonValue;
|
||||
use thiserror::Error;
|
||||
|
||||
use super::ip_options::IpAddrOptions;
|
||||
use super::spatial_options::SpatialOptions;
|
||||
use super::IntoIpv6Addr;
|
||||
use crate::schema::bytes_options::BytesOptions;
|
||||
use crate::schema::facet_options::FacetOptions;
|
||||
@@ -16,6 +17,7 @@ use crate::schema::{
|
||||
DateOptions, Facet, IndexRecordOption, JsonObjectOptions, NumericOptions, OwnedValue,
|
||||
TextFieldIndexing, TextOptions,
|
||||
};
|
||||
use crate::spatial::geometry::Geometry;
|
||||
use crate::time::format_description::well_known::Rfc3339;
|
||||
use crate::time::OffsetDateTime;
|
||||
use crate::tokenizer::PreTokenizedString;
|
||||
@@ -71,6 +73,8 @@ pub enum Type {
|
||||
Json = b'j',
|
||||
/// IpAddr
|
||||
IpAddr = b'p',
|
||||
/// Spatial
|
||||
Spatial = b't',
|
||||
}
|
||||
|
||||
impl From<ColumnType> for Type {
|
||||
@@ -139,6 +143,7 @@ impl Type {
|
||||
Type::Bytes => "Bytes",
|
||||
Type::Json => "Json",
|
||||
Type::IpAddr => "IpAddr",
|
||||
Type::Spatial => "Spatial",
|
||||
}
|
||||
}
|
||||
|
||||
@@ -189,6 +194,8 @@ pub enum FieldType {
|
||||
JsonObject(JsonObjectOptions),
|
||||
/// IpAddr field
|
||||
IpAddr(IpAddrOptions),
|
||||
/// Spatial field
|
||||
Spatial(SpatialOptions),
|
||||
}
|
||||
|
||||
impl FieldType {
|
||||
@@ -205,6 +212,7 @@ impl FieldType {
|
||||
FieldType::Bytes(_) => Type::Bytes,
|
||||
FieldType::JsonObject(_) => Type::Json,
|
||||
FieldType::IpAddr(_) => Type::IpAddr,
|
||||
FieldType::Spatial(_) => Type::Spatial,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -241,6 +249,7 @@ impl FieldType {
|
||||
FieldType::Bytes(ref bytes_options) => bytes_options.is_indexed(),
|
||||
FieldType::JsonObject(ref json_object_options) => json_object_options.is_indexed(),
|
||||
FieldType::IpAddr(ref ip_addr_options) => ip_addr_options.is_indexed(),
|
||||
FieldType::Spatial(ref _spatial_options) => true,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -278,6 +287,7 @@ impl FieldType {
|
||||
FieldType::IpAddr(ref ip_addr_options) => ip_addr_options.is_fast(),
|
||||
FieldType::Facet(_) => true,
|
||||
FieldType::JsonObject(ref json_object_options) => json_object_options.is_fast(),
|
||||
FieldType::Spatial(_) => false,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -297,6 +307,7 @@ impl FieldType {
|
||||
FieldType::Bytes(ref bytes_options) => bytes_options.fieldnorms(),
|
||||
FieldType::JsonObject(ref _json_object_options) => false,
|
||||
FieldType::IpAddr(ref ip_addr_options) => ip_addr_options.fieldnorms(),
|
||||
FieldType::Spatial(_) => false,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -348,6 +359,7 @@ impl FieldType {
|
||||
None
|
||||
}
|
||||
}
|
||||
FieldType::Spatial(_) => todo!(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -449,6 +461,10 @@ impl FieldType {
|
||||
|
||||
Ok(OwnedValue::IpAddr(ip_addr.into_ipv6_addr()))
|
||||
}
|
||||
FieldType::Spatial(_) => Err(ValueParsingError::TypeError {
|
||||
expected: "spatial field parsing not implemented",
|
||||
json: JsonValue::String(field_text),
|
||||
}),
|
||||
}
|
||||
}
|
||||
JsonValue::Number(field_val_num) => match self {
|
||||
@@ -508,6 +524,10 @@ impl FieldType {
|
||||
expected: "a string with an ip addr",
|
||||
json: JsonValue::Number(field_val_num),
|
||||
}),
|
||||
FieldType::Spatial(_) => Err(ValueParsingError::TypeError {
|
||||
expected: "spatial field parsing not implemented",
|
||||
json: JsonValue::Number(field_val_num),
|
||||
}),
|
||||
},
|
||||
JsonValue::Object(json_map) => match self {
|
||||
FieldType::Str(_) => {
|
||||
@@ -523,6 +543,14 @@ impl FieldType {
|
||||
}
|
||||
}
|
||||
FieldType::JsonObject(_) => Ok(OwnedValue::from(json_map)),
|
||||
FieldType::Spatial(_) => Ok(OwnedValue::Geometry(
|
||||
Geometry::from_geojson(&json_map).map_err(|e| {
|
||||
ValueParsingError::ParseError {
|
||||
error: format!("{:?}", e),
|
||||
json: JsonValue::Object(json_map),
|
||||
}
|
||||
})?,
|
||||
)),
|
||||
_ => Err(ValueParsingError::TypeError {
|
||||
expected: self.value_type().name(),
|
||||
json: JsonValue::Object(json_map),
|
||||
|
||||
@@ -170,6 +170,7 @@ pub(crate) fn value_type_to_column_type(typ: Type) -> Option<ColumnType> {
|
||||
Type::Bytes => Some(ColumnType::Bytes),
|
||||
Type::IpAddr => Some(ColumnType::IpAddr),
|
||||
Type::Json => None,
|
||||
Type::Spatial => None,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -194,6 +194,16 @@ impl SchemaBuilder {
|
||||
self.add_field(field_entry)
|
||||
}
|
||||
|
||||
/// Adds a spatial entry to the schema in build.
|
||||
pub fn add_spatial_field<T: Into<SpatialOptions>>(
|
||||
&mut self,
|
||||
field_name: &str,
|
||||
field_options: T,
|
||||
) -> Field {
|
||||
let field_entry = FieldEntry::new_spatial(field_name.to_string(), field_options.into());
|
||||
self.add_field(field_entry)
|
||||
}
|
||||
|
||||
/// Adds a field entry to the schema in build.
|
||||
pub fn add_field(&mut self, field_entry: FieldEntry) -> Field {
|
||||
let field = Field::from_field_id(self.fields.len() as u32);
|
||||
|
||||
@@ -39,13 +39,15 @@ impl From<StoredFlag> for SpatialOptions {
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::schema::*;
|
||||
|
||||
#[test]
|
||||
fn test_field_options() {
|
||||
let field_options = STORED | SPATIAL;
|
||||
assert!(field_options.is_stored());
|
||||
}
|
||||
}
|
||||
// #[cfg(test)]
|
||||
// mod tests {
|
||||
// use crate::schema::*;
|
||||
//
|
||||
// #[test]
|
||||
// fn test_field_options() {
|
||||
// let field_options = STORED | SPATIAL;
|
||||
// assert!(field_options.is_stored());
|
||||
// let mut schema_builder = Schema::builder();
|
||||
// schema_builder.add_spatial_index("where", SPATIAL | STORED);
|
||||
// }
|
||||
// }
|
||||
|
||||
@@ -503,6 +503,9 @@ where B: AsRef<[u8]>
|
||||
Type::IpAddr => {
|
||||
write_opt(f, self.as_ip_addr())?;
|
||||
}
|
||||
Type::Spatial => {
|
||||
write!(f, "<spatial term formatting not yet implemented>")?;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -125,6 +125,7 @@ impl SegmentSpaceUsage {
|
||||
SegmentComponent::Store => ComponentSpaceUsage::Store(self.store().clone()),
|
||||
SegmentComponent::TempStore => ComponentSpaceUsage::Store(self.store().clone()),
|
||||
Delete => Basic(self.deletes()),
|
||||
Spatial => todo!(),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -10,10 +10,11 @@
|
||||
//! branch nodes), enabling zero-copy access through memory-mapped segments without upfront
|
||||
//! decompression.
|
||||
use std::io;
|
||||
use std::io::{Seek, Write};
|
||||
use std::io::Write;
|
||||
|
||||
use common::BitSet;
|
||||
use common::{BitSet, CountingWriter};
|
||||
|
||||
use crate::directory::WritePtr;
|
||||
use crate::spatial::delta::{compress, decompress, Compressible};
|
||||
use crate::spatial::triangle::Triangle;
|
||||
|
||||
@@ -88,8 +89,8 @@ struct CompressibleTriangleI32<'a> {
|
||||
impl<'a> CompressibleTriangleI32<'a> {
|
||||
fn new(triangles: &'a [Triangle], dimension: usize) -> Self {
|
||||
CompressibleTriangleI32 {
|
||||
triangles: triangles,
|
||||
dimension: dimension,
|
||||
triangles,
|
||||
dimension,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -110,9 +111,7 @@ struct CompressibleTriangleDocID<'a> {
|
||||
|
||||
impl<'a> CompressibleTriangleDocID<'a> {
|
||||
fn new(triangles: &'a [Triangle]) -> Self {
|
||||
CompressibleTriangleDocID {
|
||||
triangles: triangles,
|
||||
}
|
||||
CompressibleTriangleDocID { triangles }
|
||||
}
|
||||
}
|
||||
|
||||
@@ -136,12 +135,12 @@ impl<'a> Compressible for CompressibleTriangleDocID<'a> {
|
||||
// We do not compress the tree nodes. We read them directly from the mapping.
|
||||
|
||||
//
|
||||
fn write_leaf_pages<W: Write + Seek>(
|
||||
fn write_leaf_pages(
|
||||
triangles: &mut [Triangle],
|
||||
write: &mut W,
|
||||
write: &mut CountingWriter<WritePtr>,
|
||||
) -> io::Result<BuildNode> {
|
||||
if triangles.len() <= 512 {
|
||||
let pos = write.stream_position()?;
|
||||
let pos = write.written_bytes();
|
||||
let mut spreads = [SpreadSurvey::default(); 4];
|
||||
let mut bounding_box = BoundingBoxSurvey::default();
|
||||
for triangle in triangles.iter() {
|
||||
@@ -174,10 +173,10 @@ fn write_leaf_pages<W: Write + Seek>(
|
||||
for i in 0..7 {
|
||||
compress(&compressible[i], write)?;
|
||||
}
|
||||
let len = write.stream_position()? - pos;
|
||||
let len = write.written_bytes() - pos;
|
||||
Ok(BuildNode::Leaf {
|
||||
bbox: bounding_box.bbox(),
|
||||
pos: pos,
|
||||
pos,
|
||||
len: len as u16,
|
||||
})
|
||||
} else {
|
||||
@@ -220,7 +219,7 @@ fn write_leaf_pages<W: Write + Seek>(
|
||||
}
|
||||
}
|
||||
|
||||
fn write_leaf_nodes<W: Write + Seek>(node: &BuildNode, write: &mut W) -> io::Result<()> {
|
||||
fn write_leaf_nodes(node: &BuildNode, write: &mut CountingWriter<WritePtr>) -> io::Result<()> {
|
||||
match node {
|
||||
BuildNode::Branch {
|
||||
bbox: _,
|
||||
@@ -242,11 +241,11 @@ fn write_leaf_nodes<W: Write + Seek>(node: &BuildNode, write: &mut W) -> io::Res
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn write_branch_nodes<W: Write + Seek>(
|
||||
fn write_branch_nodes(
|
||||
node: &BuildNode,
|
||||
branch_offset: &mut i32,
|
||||
leaf_offset: &mut i32,
|
||||
write: &mut W,
|
||||
write: &mut CountingWriter<WritePtr>,
|
||||
) -> io::Result<i32> {
|
||||
match node {
|
||||
BuildNode::Leaf { .. } => {
|
||||
@@ -284,21 +283,26 @@ fn write_branch_nodes<W: Write + Seek>(
|
||||
///
|
||||
/// The `triangles` slice will be reordered during tree construction as partitioning sorts by the
|
||||
/// selected dimension at each level.
|
||||
pub fn write_block_kd_tree<W: Write + Seek>(
|
||||
pub fn write_block_kd_tree(
|
||||
triangles: &mut [Triangle],
|
||||
write: &mut W,
|
||||
write: &mut CountingWriter<WritePtr>,
|
||||
) -> io::Result<()> {
|
||||
assert_eq!(
|
||||
triangles.as_ptr() as usize % std::mem::align_of::<Triangle>(),
|
||||
0
|
||||
);
|
||||
write.write_all(&1u16.to_le_bytes())?;
|
||||
let tree = write_leaf_pages(triangles, write)?;
|
||||
let current = write.stream_position()?;
|
||||
let current = write.written_bytes();
|
||||
let aligned = (current + 31) & !31;
|
||||
let padding = aligned - current;
|
||||
write.write_all(&vec![0u8; padding as usize])?;
|
||||
write_leaf_nodes(&tree, write)?;
|
||||
let branch_position = write.stream_position()?;
|
||||
let branch_position = write.written_bytes();
|
||||
let mut branch_offset: i32 = 0;
|
||||
let mut leaf_offset: i32 = -1;
|
||||
let root = write_branch_nodes(&tree, &mut branch_offset, &mut leaf_offset, write)?;
|
||||
write.write_all(&[0u8; 12])?;
|
||||
write.write_all(&triangles.len().to_le_bytes())?;
|
||||
write.write_all(&root.to_le_bytes())?;
|
||||
write.write_all(&branch_position.to_le_bytes())?;
|
||||
@@ -355,8 +359,9 @@ impl<'a> Segment<'a> {
|
||||
/// Reads the footer metadata from the last 12 bytes to locate the tree structure and root
|
||||
/// node.
|
||||
pub fn new(data: &'a [u8]) -> Self {
|
||||
assert_eq!(data.as_ptr() as usize % std::mem::align_of::<u64>(), 0);
|
||||
Segment {
|
||||
data: data,
|
||||
data,
|
||||
branch_position: u64::from_le_bytes(data[data.len() - 8..].try_into().unwrap()),
|
||||
root_offset: i32::from_le_bytes(
|
||||
data[data.len() - 12..data.len() - 8].try_into().unwrap(),
|
||||
@@ -758,3 +763,40 @@ pub fn search_contains(
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// HUSH
|
||||
pub struct LeafPageIterator<'a> {
|
||||
segment: &'a Segment<'a>,
|
||||
descent_stack: Vec<i32>,
|
||||
}
|
||||
|
||||
impl<'a> LeafPageIterator<'a> {
|
||||
/// HUSH
|
||||
pub fn new(segment: &'a Segment<'a>) -> Self {
|
||||
Self {
|
||||
segment,
|
||||
descent_stack: vec![segment.root_offset],
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Iterator for LeafPageIterator<'a> {
|
||||
type Item = io::Result<Vec<Triangle>>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
let offset = self.descent_stack.pop()?;
|
||||
if offset < 0 {
|
||||
let leaf_node = self.segment.leaf_node(offset);
|
||||
let leaf_page = self.segment.leaf_page(leaf_node);
|
||||
match decompress_leaf(leaf_page) {
|
||||
Ok(triangles) => Some(Ok(triangles)),
|
||||
Err(e) => Some(Err(e)),
|
||||
}
|
||||
} else {
|
||||
let branch_node = self.segment.branch_node(offset);
|
||||
self.descent_stack.push(branch_node.right);
|
||||
self.descent_stack.push(branch_node.left);
|
||||
self.next()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
479
src/spatial/geometry.rs
Normal file
479
src/spatial/geometry.rs
Normal file
@@ -0,0 +1,479 @@
|
||||
//! HUSH
|
||||
|
||||
use std::io::{self, Read, Write};
|
||||
|
||||
use common::{BinarySerializable, VInt};
|
||||
use serde_json::{json, Map, Value};
|
||||
|
||||
use crate::spatial::xor::{compress_f64, decompress_f64};
|
||||
|
||||
/// HUSH
|
||||
#[derive(Debug)]
|
||||
pub enum GeometryError {
|
||||
/// HUSH
|
||||
MissingType,
|
||||
/// HUSH
|
||||
MissingField(String), // "expected array", "wrong nesting depth", etc
|
||||
/// HUSH
|
||||
UnsupportedType(String),
|
||||
/// HUSH
|
||||
InvalidCoordinate(String), // Can report the actual bad value
|
||||
/// HUSH
|
||||
InvalidStructure(String), // "expected array", "wrong nesting depth", etc
|
||||
}
|
||||
|
||||
/// HUSH
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub enum Geometry {
|
||||
/// HUSH
|
||||
Point((f64, f64)),
|
||||
/// HUSH
|
||||
MultiPoint(Vec<(f64, f64)>),
|
||||
/// HUSH
|
||||
LineString(Vec<(f64, f64)>),
|
||||
/// HUSH
|
||||
MultiLineString(Vec<Vec<(f64, f64)>>),
|
||||
/// HUSH
|
||||
Polygon(Vec<Vec<(f64, f64)>>),
|
||||
/// HUSH
|
||||
MultiPolygon(Vec<Vec<Vec<(f64, f64)>>>),
|
||||
/// HUSH
|
||||
GeometryCollection(Vec<Self>),
|
||||
}
|
||||
|
||||
impl Geometry {
|
||||
/// HUSH
|
||||
pub fn from_geojson(object: &Map<String, Value>) -> Result<Self, GeometryError> {
|
||||
let geometry_type = object
|
||||
.get("type")
|
||||
.and_then(|v| v.as_str())
|
||||
.ok_or(GeometryError::MissingType)?;
|
||||
match geometry_type {
|
||||
"Point" => {
|
||||
let coordinates = get_coordinates(object)?;
|
||||
let point = to_point(coordinates)?;
|
||||
Ok(Geometry::Point(point))
|
||||
}
|
||||
"MultiPoint" => {
|
||||
let coordinates = get_coordinates(object)?;
|
||||
let multi_point = to_line_string(coordinates)?;
|
||||
Ok(Geometry::MultiPoint(multi_point))
|
||||
}
|
||||
"LineString" => {
|
||||
let coordinates = get_coordinates(object)?;
|
||||
let line_string = to_line_string(coordinates)?;
|
||||
if line_string.len() < 2 {
|
||||
return Err(GeometryError::InvalidStructure(
|
||||
"a line string contains at least 2 points".to_string(),
|
||||
));
|
||||
}
|
||||
Ok(Geometry::LineString(line_string))
|
||||
}
|
||||
"MultiLineString" => {
|
||||
let coordinates = get_coordinates(object)?;
|
||||
let multi_line_string = to_multi_line_string(coordinates)?;
|
||||
for line_string in &multi_line_string {
|
||||
if line_string.len() < 2 {
|
||||
return Err(GeometryError::InvalidStructure(
|
||||
"a line string contains at least 2 points".to_string(),
|
||||
));
|
||||
}
|
||||
}
|
||||
Ok(Geometry::MultiLineString(multi_line_string))
|
||||
}
|
||||
"Polygon" => {
|
||||
let coordinates = get_coordinates(object)?;
|
||||
let polygon = to_multi_line_string(coordinates)?;
|
||||
for ring in &polygon {
|
||||
if ring.len() < 3 {
|
||||
return Err(GeometryError::InvalidStructure(
|
||||
"a polygon ring contains at least 3 points".to_string(),
|
||||
));
|
||||
}
|
||||
}
|
||||
Ok(Geometry::Polygon(polygon))
|
||||
}
|
||||
"MultiPolygon" => {
|
||||
let mut result = Vec::new();
|
||||
let multi_polygons = get_coordinates(object)?;
|
||||
let multi_polygons =
|
||||
multi_polygons
|
||||
.as_array()
|
||||
.ok_or(GeometryError::InvalidStructure(
|
||||
"expected an array of polygons".to_string(),
|
||||
))?;
|
||||
for polygon in multi_polygons {
|
||||
let polygon = to_multi_line_string(polygon)?;
|
||||
for ring in &polygon {
|
||||
if ring.len() < 3 {
|
||||
return Err(GeometryError::InvalidStructure(
|
||||
"a polygon ring contains at least 3 points".to_string(),
|
||||
));
|
||||
}
|
||||
}
|
||||
result.push(polygon);
|
||||
}
|
||||
Ok(Geometry::MultiPolygon(result))
|
||||
}
|
||||
"GeometriesCollection" => {
|
||||
let geometries = object
|
||||
.get("geometries")
|
||||
.ok_or(GeometryError::MissingField("geometries".to_string()))?;
|
||||
let geometries = geometries
|
||||
.as_array()
|
||||
.ok_or(GeometryError::InvalidStructure(
|
||||
"geometries is not an array".to_string(),
|
||||
))?;
|
||||
let mut result = Vec::new();
|
||||
for geometry in geometries {
|
||||
let object = geometry.as_object().ok_or(GeometryError::InvalidStructure(
|
||||
"geometry is not an object".to_string(),
|
||||
))?;
|
||||
result.push(Geometry::from_geojson(object)?);
|
||||
}
|
||||
Ok(Geometry::GeometryCollection(result))
|
||||
}
|
||||
_ => Err(GeometryError::UnsupportedType(geometry_type.to_string())),
|
||||
}
|
||||
}
|
||||
|
||||
/// HUSH
|
||||
pub fn to_geojson(&self) -> Map<String, Value> {
|
||||
let mut map = Map::new();
|
||||
match self {
|
||||
Geometry::Point(point) => {
|
||||
map.insert("type".to_string(), Value::String("Point".to_string()));
|
||||
let coords = json!([point.0, point.1]);
|
||||
map.insert("coordinates".to_string(), coords);
|
||||
}
|
||||
Geometry::MultiPoint(points) => {
|
||||
map.insert("type".to_string(), Value::String("MultiPoint".to_string()));
|
||||
let coords: Vec<Value> = points.iter().map(|p| json!([p.0, p.1])).collect();
|
||||
map.insert("coordinates".to_string(), Value::Array(coords));
|
||||
}
|
||||
Geometry::LineString(line) => {
|
||||
map.insert("type".to_string(), Value::String("LineString".to_string()));
|
||||
let coords: Vec<Value> = line.iter().map(|p| json!([p.0, p.1])).collect();
|
||||
map.insert("coordinates".to_string(), Value::Array(coords));
|
||||
}
|
||||
Geometry::MultiLineString(lines) => {
|
||||
map.insert(
|
||||
"type".to_string(),
|
||||
Value::String("MultiLineString".to_string()),
|
||||
);
|
||||
let coords: Vec<Value> = lines
|
||||
.iter()
|
||||
.map(|line| Value::Array(line.iter().map(|p| json!([p.0, p.1])).collect()))
|
||||
.collect();
|
||||
map.insert("coordinates".to_string(), Value::Array(coords));
|
||||
}
|
||||
Geometry::Polygon(rings) => {
|
||||
map.insert("type".to_string(), Value::String("Polygon".to_string()));
|
||||
let coords: Vec<Value> = rings
|
||||
.iter()
|
||||
.map(|ring| Value::Array(ring.iter().map(|p| json!([p.0, p.1])).collect()))
|
||||
.collect();
|
||||
map.insert("coordinates".to_string(), Value::Array(coords));
|
||||
}
|
||||
Geometry::MultiPolygon(polygons) => {
|
||||
map.insert(
|
||||
"type".to_string(),
|
||||
Value::String("MultiPolygon".to_string()),
|
||||
);
|
||||
let coords: Vec<Value> = polygons
|
||||
.iter()
|
||||
.map(|polygon| {
|
||||
Value::Array(
|
||||
polygon
|
||||
.iter()
|
||||
.map(|ring| {
|
||||
Value::Array(ring.iter().map(|p| json!([p.0, p.1])).collect())
|
||||
})
|
||||
.collect(),
|
||||
)
|
||||
})
|
||||
.collect();
|
||||
map.insert("coordinates".to_string(), Value::Array(coords));
|
||||
}
|
||||
Geometry::GeometryCollection(geometries) => {
|
||||
map.insert(
|
||||
"type".to_string(),
|
||||
Value::String("GeometryCollection".to_string()),
|
||||
);
|
||||
let geoms: Vec<Value> = geometries
|
||||
.iter()
|
||||
.map(|g| Value::Object(g.to_geojson()))
|
||||
.collect();
|
||||
map.insert("geometries".to_string(), Value::Array(geoms));
|
||||
}
|
||||
}
|
||||
map
|
||||
}
|
||||
}
|
||||
|
||||
fn get_coordinates(object: &Map<String, Value>) -> Result<&Value, GeometryError> {
|
||||
let coordinates = object
|
||||
.get("coordinates")
|
||||
.ok_or(GeometryError::MissingField("coordinates".to_string()))?;
|
||||
Ok(coordinates)
|
||||
}
|
||||
|
||||
fn to_point(value: &Value) -> Result<(f64, f64), GeometryError> {
|
||||
let lonlat = value.as_array().ok_or(GeometryError::InvalidStructure(
|
||||
"expected 2 element array pair of lon/lat".to_string(),
|
||||
))?;
|
||||
if lonlat.len() != 2 {
|
||||
return Err(GeometryError::InvalidStructure(
|
||||
"expected 2 element array pair of lon/lat".to_string(),
|
||||
));
|
||||
}
|
||||
let lon = lonlat[0].as_f64().ok_or(GeometryError::InvalidCoordinate(
|
||||
"longitude must be f64".to_string(),
|
||||
))?;
|
||||
if !lon.is_finite() || !(-180.0..=180.0).contains(&lon) {
|
||||
return Err(GeometryError::InvalidCoordinate(format!(
|
||||
"invalid longitude: {}",
|
||||
lon
|
||||
)));
|
||||
}
|
||||
let lat = lonlat[1].as_f64().ok_or(GeometryError::InvalidCoordinate(
|
||||
"latitude must be f64".to_string(),
|
||||
))?;
|
||||
if !lat.is_finite() || !(-90.0..=90.0).contains(&lat) {
|
||||
return Err(GeometryError::InvalidCoordinate(format!(
|
||||
"invalid latitude: {}",
|
||||
lat
|
||||
)));
|
||||
}
|
||||
Ok((lon, lat))
|
||||
}
|
||||
|
||||
fn to_line_string(value: &Value) -> Result<Vec<(f64, f64)>, GeometryError> {
|
||||
let mut result = Vec::new();
|
||||
let coordinates = value.as_array().ok_or(GeometryError::InvalidStructure(
|
||||
"expected an array of lon/lat arrays".to_string(),
|
||||
))?;
|
||||
for coordinate in coordinates {
|
||||
result.push(to_point(coordinate)?);
|
||||
}
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
fn to_multi_line_string(value: &Value) -> Result<Vec<Vec<(f64, f64)>>, GeometryError> {
|
||||
let mut result = Vec::new();
|
||||
let coordinates = value.as_array().ok_or(GeometryError::InvalidStructure(
|
||||
"expected an array of an array of lon/lat arrays".to_string(),
|
||||
))?;
|
||||
for coordinate in coordinates {
|
||||
result.push(to_line_string(coordinate)?);
|
||||
}
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
impl BinarySerializable for Geometry {
|
||||
fn serialize<W: Write + ?Sized>(&self, writer: &mut W) -> io::Result<()> {
|
||||
match self {
|
||||
Geometry::Point(point) => {
|
||||
0u8.serialize(writer)?;
|
||||
point.0.serialize(writer)?;
|
||||
point.1.serialize(writer)?;
|
||||
Ok(())
|
||||
}
|
||||
Geometry::MultiPoint(points) => {
|
||||
1u8.serialize(writer)?;
|
||||
serialize_line_string(points, writer)
|
||||
}
|
||||
Geometry::LineString(line_string) => {
|
||||
2u8.serialize(writer)?;
|
||||
serialize_line_string(line_string, writer)
|
||||
}
|
||||
Geometry::MultiLineString(multi_line_string) => {
|
||||
3u8.serialize(writer)?;
|
||||
serialize_polygon(multi_line_string, writer)
|
||||
}
|
||||
Geometry::Polygon(polygon) => {
|
||||
4u8.serialize(writer)?;
|
||||
serialize_polygon(polygon, writer)
|
||||
}
|
||||
Geometry::MultiPolygon(multi_polygon) => {
|
||||
5u8.serialize(writer)?;
|
||||
BinarySerializable::serialize(&VInt(multi_polygon.len() as u64), writer)?;
|
||||
for polygon in multi_polygon {
|
||||
BinarySerializable::serialize(&VInt(polygon.len() as u64), writer)?;
|
||||
for ring in polygon {
|
||||
BinarySerializable::serialize(&VInt(ring.len() as u64), writer)?;
|
||||
}
|
||||
}
|
||||
let mut lon = Vec::new();
|
||||
let mut lat = Vec::new();
|
||||
for polygon in multi_polygon {
|
||||
for ring in polygon {
|
||||
for point in ring {
|
||||
lon.push(point.0);
|
||||
lat.push(point.1);
|
||||
}
|
||||
}
|
||||
}
|
||||
let lon = compress_f64(&lon);
|
||||
let lat = compress_f64(&lat);
|
||||
VInt(lon.len() as u64).serialize(writer)?;
|
||||
writer.write_all(&lon)?;
|
||||
VInt(lat.len() as u64).serialize(writer)?;
|
||||
writer.write_all(&lat)?;
|
||||
Ok(())
|
||||
}
|
||||
Geometry::GeometryCollection(geometries) => {
|
||||
6u8.serialize(writer)?;
|
||||
BinarySerializable::serialize(&VInt(geometries.len() as u64), writer)?;
|
||||
for geometry in geometries {
|
||||
geometry.serialize(writer)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn deserialize<R: Read>(reader: &mut R) -> io::Result<Self> {
|
||||
let discriminant: u8 = BinarySerializable::deserialize(reader)?;
|
||||
match discriminant {
|
||||
0 => {
|
||||
let lon = BinarySerializable::deserialize(reader)?;
|
||||
let lat = BinarySerializable::deserialize(reader)?;
|
||||
Ok(Geometry::Point((lon, lat)))
|
||||
}
|
||||
1 => Ok(Geometry::MultiPoint(deserialize_line_string(reader)?)),
|
||||
2 => Ok(Geometry::LineString(deserialize_line_string(reader)?)),
|
||||
3 => Ok(Geometry::MultiLineString(deserialize_polygon(reader)?)),
|
||||
4 => Ok(Geometry::Polygon(deserialize_polygon(reader)?)),
|
||||
5 => {
|
||||
let polygon_count = VInt::deserialize(reader)?.0 as usize;
|
||||
let mut polygons = Vec::new();
|
||||
let mut count = 0;
|
||||
for _ in 0..polygon_count {
|
||||
let ring_count = VInt::deserialize(reader)?.0 as usize;
|
||||
let mut rings = Vec::new();
|
||||
for _ in 0..ring_count {
|
||||
rings.push(VInt::deserialize(reader)?.0 as usize);
|
||||
count += 1;
|
||||
}
|
||||
polygons.push(rings);
|
||||
}
|
||||
let lon_bytes: Vec<u8> = BinarySerializable::deserialize(reader)?;
|
||||
let lat_bytes: Vec<u8> = BinarySerializable::deserialize(reader)?;
|
||||
let lon = decompress_f64(&lon_bytes, count);
|
||||
let lat = decompress_f64(&lat_bytes, count);
|
||||
let mut multi_polygon = Vec::new();
|
||||
let mut offset = 0;
|
||||
for rings in polygons {
|
||||
let mut polygon = Vec::new();
|
||||
for point_count in rings {
|
||||
let mut ring = Vec::new();
|
||||
for _ in 0..point_count {
|
||||
ring.push((lon[offset], lat[offset]));
|
||||
offset += 1;
|
||||
}
|
||||
polygon.push(ring);
|
||||
}
|
||||
multi_polygon.push(polygon);
|
||||
}
|
||||
Ok(Geometry::MultiPolygon(multi_polygon))
|
||||
}
|
||||
6 => {
|
||||
let geometry_count = VInt::deserialize(reader)?.0 as usize;
|
||||
let mut geometries = Vec::new();
|
||||
for _ in 0..geometry_count {
|
||||
geometries.push(Geometry::deserialize(reader)?);
|
||||
}
|
||||
Ok(Geometry::GeometryCollection(geometries))
|
||||
}
|
||||
_ => Err(io::Error::new(
|
||||
io::ErrorKind::InvalidData,
|
||||
"invalid geometry type",
|
||||
)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn serialize_line_string<W: Write + ?Sized>(
|
||||
line: &Vec<(f64, f64)>,
|
||||
writer: &mut W,
|
||||
) -> io::Result<()> {
|
||||
BinarySerializable::serialize(&VInt(line.len() as u64), writer)?;
|
||||
let mut lon = Vec::new();
|
||||
let mut lat = Vec::new();
|
||||
for point in line {
|
||||
lon.push(point.0);
|
||||
lat.push(point.1);
|
||||
}
|
||||
let lon = compress_f64(&lon);
|
||||
let lat = compress_f64(&lat);
|
||||
VInt(lon.len() as u64).serialize(writer)?;
|
||||
writer.write_all(&lon)?;
|
||||
VInt(lat.len() as u64).serialize(writer)?;
|
||||
writer.write_all(&lat)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn serialize_polygon<W: Write + ?Sized>(
|
||||
line_string: &Vec<Vec<(f64, f64)>>,
|
||||
writer: &mut W,
|
||||
) -> io::Result<()> {
|
||||
BinarySerializable::serialize(&VInt(line_string.len() as u64), writer)?;
|
||||
for ring in line_string {
|
||||
BinarySerializable::serialize(&VInt(ring.len() as u64), writer)?;
|
||||
}
|
||||
let mut lon = Vec::new();
|
||||
let mut lat = Vec::new();
|
||||
for ring in line_string {
|
||||
for point in ring {
|
||||
lon.push(point.0);
|
||||
lat.push(point.1);
|
||||
}
|
||||
}
|
||||
let lon = compress_f64(&lon);
|
||||
let lat = compress_f64(&lat);
|
||||
VInt(lon.len() as u64).serialize(writer)?;
|
||||
writer.write_all(&lon)?;
|
||||
VInt(lat.len() as u64).serialize(writer)?;
|
||||
writer.write_all(&lat)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn deserialize_line_string<R: Read>(reader: &mut R) -> io::Result<Vec<(f64, f64)>> {
|
||||
let point_count = VInt::deserialize(reader)?.0 as usize;
|
||||
let lon_bytes: Vec<u8> = BinarySerializable::deserialize(reader)?;
|
||||
let lat_bytes: Vec<u8> = BinarySerializable::deserialize(reader)?;
|
||||
let lon = decompress_f64(&lon_bytes, point_count);
|
||||
let lat = decompress_f64(&lat_bytes, point_count);
|
||||
let mut line_string = Vec::new();
|
||||
for offset in 0..point_count {
|
||||
line_string.push((lon[offset], lat[offset]));
|
||||
}
|
||||
Ok(line_string)
|
||||
}
|
||||
|
||||
fn deserialize_polygon<R: Read>(reader: &mut R) -> io::Result<Vec<Vec<(f64, f64)>>> {
|
||||
let ring_count = VInt::deserialize(reader)?.0 as usize;
|
||||
let mut rings = Vec::new();
|
||||
let mut count = 0;
|
||||
for _ in 0..ring_count {
|
||||
let point_count = VInt::deserialize(reader)?.0 as usize;
|
||||
rings.push(point_count);
|
||||
count += point_count;
|
||||
}
|
||||
let lon_bytes: Vec<u8> = BinarySerializable::deserialize(reader)?;
|
||||
let lat_bytes: Vec<u8> = BinarySerializable::deserialize(reader)?;
|
||||
let lon = decompress_f64(&lon_bytes, count);
|
||||
let lat = decompress_f64(&lat_bytes, count);
|
||||
let mut polygon = Vec::new();
|
||||
let mut offset = 0;
|
||||
for point_count in rings {
|
||||
let mut ring = Vec::new();
|
||||
for _ in 0..point_count {
|
||||
ring.push((lon[offset], lat[offset]));
|
||||
offset += 1;
|
||||
}
|
||||
polygon.push(ring);
|
||||
}
|
||||
Ok(polygon)
|
||||
}
|
||||
@@ -2,6 +2,10 @@
|
||||
|
||||
pub mod bkd;
|
||||
pub mod delta;
|
||||
pub mod geometry;
|
||||
pub mod radix_select;
|
||||
pub mod reader;
|
||||
pub mod serializer;
|
||||
pub mod triangle;
|
||||
pub mod writer;
|
||||
pub mod xor;
|
||||
|
||||
65
src/spatial/reader.rs
Normal file
65
src/spatial/reader.rs
Normal file
@@ -0,0 +1,65 @@
|
||||
//! HUSH
|
||||
use std::io;
|
||||
use std::sync::Arc;
|
||||
|
||||
use common::file_slice::FileSlice;
|
||||
use common::OwnedBytes;
|
||||
|
||||
use crate::directory::CompositeFile;
|
||||
use crate::schema::Field;
|
||||
use crate::space_usage::PerFieldSpaceUsage;
|
||||
|
||||
#[derive(Clone)]
|
||||
/// HUSH
|
||||
pub struct SpatialReaders {
|
||||
data: Arc<CompositeFile>,
|
||||
}
|
||||
|
||||
impl SpatialReaders {
|
||||
/// Creates a field norm reader.
|
||||
pub fn open(file: FileSlice) -> crate::Result<SpatialReaders> {
|
||||
let data = CompositeFile::open(&file)?;
|
||||
Ok(SpatialReaders {
|
||||
data: Arc::new(data),
|
||||
})
|
||||
}
|
||||
|
||||
/// Returns the FieldNormReader for a specific field.
|
||||
pub fn get_field(&self, field: Field) -> crate::Result<Option<SpatialReader>> {
|
||||
if let Some(file) = self.data.open_read(field) {
|
||||
let spatial_reader = SpatialReader::open(file)?;
|
||||
Ok(Some(spatial_reader))
|
||||
} else {
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
|
||||
/// Return a break down of the space usage per field.
|
||||
pub fn space_usage(&self) -> PerFieldSpaceUsage {
|
||||
self.data.space_usage()
|
||||
}
|
||||
|
||||
/// Returns a handle to inner file
|
||||
pub fn get_inner_file(&self) -> Arc<CompositeFile> {
|
||||
self.data.clone()
|
||||
}
|
||||
}
|
||||
|
||||
/// HUSH
|
||||
#[derive(Clone)]
|
||||
pub struct SpatialReader {
|
||||
data: OwnedBytes,
|
||||
}
|
||||
|
||||
impl SpatialReader {
|
||||
/// Opens the spatial reader from a `FileSlice`. Returns `None` if the file is empty (no
|
||||
/// spatial fields indexed.)
|
||||
pub fn open(spatial_file: FileSlice) -> io::Result<SpatialReader> {
|
||||
let data = spatial_file.read_bytes()?;
|
||||
Ok(SpatialReader { data })
|
||||
}
|
||||
/// HUSH
|
||||
pub fn get_bytes(&self) -> &[u8] {
|
||||
self.data.as_ref()
|
||||
}
|
||||
}
|
||||
37
src/spatial/serializer.rs
Normal file
37
src/spatial/serializer.rs
Normal file
@@ -0,0 +1,37 @@
|
||||
//! HUSH
|
||||
use std::io;
|
||||
use std::io::Write;
|
||||
|
||||
use crate::directory::{CompositeWrite, WritePtr};
|
||||
use crate::schema::Field;
|
||||
use crate::spatial::bkd::write_block_kd_tree;
|
||||
use crate::spatial::triangle::Triangle;
|
||||
|
||||
/// The fieldnorms serializer is in charge of
|
||||
/// the serialization of field norms for all fields.
|
||||
pub struct SpatialSerializer {
|
||||
composite_write: CompositeWrite,
|
||||
}
|
||||
|
||||
impl SpatialSerializer {
|
||||
/// Create a composite file from the write pointer.
|
||||
pub fn from_write(write: WritePtr) -> io::Result<SpatialSerializer> {
|
||||
// just making room for the pointer to header.
|
||||
let composite_write = CompositeWrite::wrap(write);
|
||||
Ok(SpatialSerializer { composite_write })
|
||||
}
|
||||
|
||||
/// Serialize the given field
|
||||
pub fn serialize_field(&mut self, field: Field, triangles: &mut [Triangle]) -> io::Result<()> {
|
||||
let write = self.composite_write.for_field(field);
|
||||
write_block_kd_tree(triangles, write)?;
|
||||
write.flush()?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Clean up, flush, and close.
|
||||
pub fn close(self) -> io::Result<()> {
|
||||
self.composite_write.close()?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
132
src/spatial/writer.rs
Normal file
132
src/spatial/writer.rs
Normal file
@@ -0,0 +1,132 @@
|
||||
//! HUSH
|
||||
use std::collections::HashMap;
|
||||
use std::io;
|
||||
|
||||
use i_triangle::i_overlay::i_float::int::point::IntPoint;
|
||||
use i_triangle::int::triangulatable::IntTriangulatable;
|
||||
|
||||
use crate::schema::Field;
|
||||
use crate::spatial::geometry::Geometry;
|
||||
use crate::spatial::serializer::SpatialSerializer;
|
||||
use crate::spatial::triangle::{delaunay_to_triangles, Triangle};
|
||||
use crate::DocId;
|
||||
|
||||
/// HUSH
|
||||
pub struct SpatialWriter {
|
||||
/// Map from field to its triangles buffer
|
||||
triangles_by_field: HashMap<Field, Vec<Triangle>>,
|
||||
}
|
||||
|
||||
impl SpatialWriter {
|
||||
/// HUST
|
||||
pub fn add_geometry(&mut self, doc_id: DocId, field: Field, geometry: Geometry) {
|
||||
let triangles = &mut self.triangles_by_field.entry(field).or_default();
|
||||
match geometry {
|
||||
Geometry::Point(point) => {
|
||||
into_point(triangles, doc_id, point);
|
||||
}
|
||||
Geometry::MultiPoint(multi_point) => {
|
||||
for point in multi_point {
|
||||
into_point(triangles, doc_id, point);
|
||||
}
|
||||
}
|
||||
Geometry::LineString(line_string) => {
|
||||
into_line_string(triangles, doc_id, line_string);
|
||||
}
|
||||
Geometry::MultiLineString(multi_line_string) => {
|
||||
for line_string in multi_line_string {
|
||||
into_line_string(triangles, doc_id, line_string);
|
||||
}
|
||||
}
|
||||
Geometry::Polygon(polygon) => {
|
||||
into_polygon(triangles, doc_id, polygon);
|
||||
}
|
||||
Geometry::MultiPolygon(multi_polygon) => {
|
||||
for polygon in multi_polygon {
|
||||
into_polygon(triangles, doc_id, polygon);
|
||||
}
|
||||
}
|
||||
Geometry::GeometryCollection(geometries) => {
|
||||
for geometry in geometries {
|
||||
self.add_geometry(doc_id, field, geometry);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Memory usage estimate
|
||||
pub fn mem_usage(&self) -> usize {
|
||||
self.triangles_by_field
|
||||
.values()
|
||||
.map(|triangles| triangles.len() * std::mem::size_of::<Triangle>())
|
||||
.sum()
|
||||
}
|
||||
|
||||
/// HUSH
|
||||
pub fn serialize(&mut self, mut serializer: SpatialSerializer) -> io::Result<()> {
|
||||
for (field, triangles) in &mut self.triangles_by_field {
|
||||
serializer.serialize_field(*field, triangles)?;
|
||||
}
|
||||
serializer.close()?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for SpatialWriter {
|
||||
/// HUSH
|
||||
fn default() -> Self {
|
||||
SpatialWriter {
|
||||
triangles_by_field: HashMap::new(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// HUSH
|
||||
pub fn as_point_i32(point: (f64, f64)) -> (i32, i32) {
|
||||
(
|
||||
(point.0 / (360.0 / (1i64 << 32) as f64)).floor() as i32,
|
||||
(point.1 / (180.0 / (1i64 << 32) as f64)).floor() as i32,
|
||||
)
|
||||
}
|
||||
|
||||
fn into_point(triangles: &mut Vec<Triangle>, doc_id: DocId, point: (f64, f64)) {
|
||||
let point = as_point_i32(point);
|
||||
triangles.push(Triangle::new(
|
||||
doc_id,
|
||||
[point.1, point.0, point.1, point.0, point.1, point.0],
|
||||
[true, true, true],
|
||||
));
|
||||
}
|
||||
|
||||
fn into_line_string(triangles: &mut Vec<Triangle>, doc_id: DocId, line_string: Vec<(f64, f64)>) {
|
||||
let mut previous = as_point_i32(line_string[0]);
|
||||
for point in line_string.into_iter().skip(1) {
|
||||
let point = as_point_i32(point);
|
||||
triangles.push(Triangle::new(
|
||||
doc_id,
|
||||
[
|
||||
previous.1, previous.0, point.1, point.0, previous.1, previous.0,
|
||||
],
|
||||
[true, true, true],
|
||||
));
|
||||
previous = point
|
||||
}
|
||||
}
|
||||
|
||||
fn into_ring(i_polygon: &mut Vec<Vec<IntPoint>>, ring: Vec<(f64, f64)>) {
|
||||
let mut i_ring = Vec::new();
|
||||
for point in ring {
|
||||
let point = as_point_i32(point);
|
||||
i_ring.push(IntPoint::new(point.0, point.1));
|
||||
}
|
||||
i_polygon.push(i_ring);
|
||||
}
|
||||
|
||||
fn into_polygon(triangles: &mut Vec<Triangle>, doc_id: DocId, polygon: Vec<Vec<(f64, f64)>>) {
|
||||
let mut i_polygon = Vec::new();
|
||||
for ring in polygon {
|
||||
into_ring(&mut i_polygon, ring);
|
||||
}
|
||||
let delaunay = i_polygon.triangulate().into_delaunay();
|
||||
delaunay_to_triangles(doc_id, &delaunay, triangles);
|
||||
}
|
||||
Reference in New Issue
Block a user