From dbbc8c3f659e97cfee97a27ee6fb4b97e681349d Mon Sep 17 00:00:00 2001 From: Alan Gutierrez Date: Sat, 25 Oct 2025 13:43:07 -0500 Subject: [PATCH] Slot block kd-tree into Tantivy. Implemented a geometry document field with a minimal `Geometry` enum. Now able to add that Geometry from GeoJSON parsed from a JSON document. Geometry is triangulated if it is a polygon, otherwise it is correctly encoded as a degenerate triangle if it is a point or a line string. Write accumulated triangles to a block kd-tree on commit. Serialize the original `f64` polygon for retrieval from search. Created a query method for intersection. Query against the memory mapped block kd-tree. Return hits and original `f64` polygon. Implemented a merge of one or more block kd-trees from one or more segments during merge. Updated the block kd-tree to write to a Tantivy `WritePtr` instead of more generic Rust I/O. --- Cargo.toml | 1 - examples/geo_json.rs | 55 ++ src/core/json_utils.rs | 1 + src/fastfield/writer.rs | 2 + src/index/index_meta.rs | 1 + src/index/segment_component.rs | 5 +- src/index/segment_reader.rs | 10 + src/indexer/merger.rs | 73 ++- src/indexer/segment_serializer.rs | 14 + src/indexer/segment_writer.rs | 16 + src/postings/per_field_postings_writer.rs | 1 + src/query/mod.rs | 2 + src/query/query_parser/query_parser.rs | 7 + src/query/range_query/mod.rs | 2 +- .../range_query/range_query_fastfield.rs | 17 +- src/query/spatial_query.rs | 170 +++++++ src/schema/document/de.rs | 22 + src/schema/document/default_document.rs | 12 + src/schema/document/mod.rs | 1 + src/schema/document/owned_value.rs | 12 + src/schema/document/se.rs | 4 + src/schema/document/value.rs | 28 + src/schema/field_entry.rs | 7 + src/schema/field_type.rs | 28 + src/schema/mod.rs | 1 + src/schema/schema.rs | 10 + src/schema/spatial_options.rs | 22 +- src/schema/term.rs | 3 + src/space_usage/mod.rs | 1 + src/spatial/bkd.rs | 82 ++- src/spatial/geometry.rs | 479 ++++++++++++++++++ src/spatial/mod.rs | 4 + src/spatial/reader.rs | 65 +++ src/spatial/serializer.rs | 37 ++ src/spatial/writer.rs | 132 +++++ 35 files changed, 1286 insertions(+), 41 deletions(-) create mode 100644 examples/geo_json.rs create mode 100644 src/query/spatial_query.rs create mode 100644 src/spatial/geometry.rs create mode 100644 src/spatial/reader.rs create mode 100644 src/spatial/serializer.rs create mode 100644 src/spatial/writer.rs diff --git a/Cargo.toml b/Cargo.toml index cb0c2b8bb..2cdb25b1c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -93,7 +93,6 @@ time = { version = "0.3.10", features = ["serde-well-known", "macros"] } postcard = { version = "1.0.4", features = [ "use-std", ], default-features = false } -geojson = "0.24.2" [target.'cfg(not(windows))'.dev-dependencies] criterion = { version = "0.5", default-features = false } diff --git a/examples/geo_json.rs b/examples/geo_json.rs new file mode 100644 index 000000000..3990d2767 --- /dev/null +++ b/examples/geo_json.rs @@ -0,0 +1,55 @@ +use tantivy::collector::TopDocs; +use tantivy::query::SpatialQuery; +use tantivy::schema::{Schema, Value, SPATIAL, STORED, TEXT}; +use tantivy::{Index, IndexWriter, TantivyDocument}; +fn main() -> tantivy::Result<()> { + let mut schema_builder = Schema::builder(); + schema_builder.add_json_field("properties", STORED | TEXT); + schema_builder.add_spatial_field("geometry", STORED | SPATIAL); + let schema = schema_builder.build(); + let index = Index::create_in_ram(schema.clone()); + let mut index_writer: IndexWriter = index.writer(50_000_000)?; + let doc = TantivyDocument::parse_json( + &schema, + r#"{ + "type":"Feature", + "geometry":{ + "type":"Polygon", + "coordinates":[[[-99.483911,45.577697],[-99.483869,45.571457],[-99.481739,45.571461],[-99.474881,45.571584],[-99.473167,45.571615],[-99.463394,45.57168],[-99.463391,45.57883],[-99.463368,45.586076],[-99.48177,45.585926],[-99.48384,45.585953],[-99.483885,45.57873],[-99.483911,45.577697]]] + }, + "properties":{ + "admin_level":"8", + "border_type":"city", + "boundary":"administrative", + "gnis:feature_id":"1267426", + "name":"Hosmer", + "place":"city", + "source":"TIGER/LineĀ® 2008 Place Shapefiles (http://www.census.gov/geo/www/tiger/)", + "wikidata":"Q2442118", + "wikipedia":"en:Hosmer, South Dakota" + } + }"#, + )?; + index_writer.add_document(doc)?; + index_writer.commit()?; + + let reader = index.reader()?; + let searcher = reader.searcher(); + let field = schema.get_field("geometry").unwrap(); + let query = SpatialQuery::new( + field, + [(-99.49, 45.56), (-99.45, 45.59)], + tantivy::query::SpatialQueryType::Intersects, + ); + let hits = searcher.search(&query, &TopDocs::with_limit(10))?; + for (_score, doc_address) in &hits { + let retrieved_doc: TantivyDocument = searcher.doc(*doc_address)?; + if let Some(field_value) = retrieved_doc.get_first(field) { + if let Some(geometry_box) = field_value.as_value().into_geometry() { + println!("Retrieved geometry: {:?}", geometry_box); + } + } + } + assert_eq!(hits.len(), 1); + Ok(()) +} diff --git a/src/core/json_utils.rs b/src/core/json_utils.rs index ade65fa11..ee9292d66 100644 --- a/src/core/json_utils.rs +++ b/src/core/json_utils.rs @@ -227,6 +227,7 @@ pub(crate) fn index_json_value<'a, V: Value<'a>>( ReferenceValueLeaf::IpAddr(_) => { unimplemented!("IP address support in dynamic fields is not yet implemented") } + ReferenceValueLeaf::Geometry(_) => todo!(), }, ReferenceValue::Array(elements) => { for val in elements { diff --git a/src/fastfield/writer.rs b/src/fastfield/writer.rs index a1288e0ad..311a3d6af 100644 --- a/src/fastfield/writer.rs +++ b/src/fastfield/writer.rs @@ -189,6 +189,7 @@ impl FastFieldsWriter { .record_str(doc_id, field_name, &token.text); } } + ReferenceValueLeaf::Geometry(_) => todo!(), }, ReferenceValue::Array(val) => { // TODO: Check this is the correct behaviour we want. @@ -320,6 +321,7 @@ fn record_json_value_to_columnar_writer<'a, V: Value<'a>>( "Pre-tokenized string support in dynamic fields is not yet implemented" ) } + ReferenceValueLeaf::Geometry(_) => todo!(), }, ReferenceValue::Array(elements) => { for el in elements { diff --git a/src/index/index_meta.rs b/src/index/index_meta.rs index 86eaa35d6..7a5cb2649 100644 --- a/src/index/index_meta.rs +++ b/src/index/index_meta.rs @@ -142,6 +142,7 @@ impl SegmentMeta { SegmentComponent::FastFields => ".fast".to_string(), SegmentComponent::FieldNorms => ".fieldnorm".to_string(), SegmentComponent::Delete => format!(".{}.del", self.delete_opstamp().unwrap_or(0)), + SegmentComponent::Spatial => ".spatial".to_string(), }); PathBuf::from(path) } diff --git a/src/index/segment_component.rs b/src/index/segment_component.rs index 42ac1d178..4e208ae1e 100644 --- a/src/index/segment_component.rs +++ b/src/index/segment_component.rs @@ -28,12 +28,14 @@ pub enum SegmentComponent { /// Bitset describing which document of the segment is alive. /// (It was representing deleted docs but changed to represent alive docs from v0.17) Delete, + /// HUSH + Spatial, } impl SegmentComponent { /// Iterates through the components. pub fn iterator() -> slice::Iter<'static, SegmentComponent> { - static SEGMENT_COMPONENTS: [SegmentComponent; 8] = [ + static SEGMENT_COMPONENTS: [SegmentComponent; 9] = [ SegmentComponent::Postings, SegmentComponent::Positions, SegmentComponent::FastFields, @@ -42,6 +44,7 @@ impl SegmentComponent { SegmentComponent::Store, SegmentComponent::TempStore, SegmentComponent::Delete, + SegmentComponent::Spatial, ]; SEGMENT_COMPONENTS.iter() } diff --git a/src/index/segment_reader.rs b/src/index/segment_reader.rs index f5589a690..ee1a9938a 100644 --- a/src/index/segment_reader.rs +++ b/src/index/segment_reader.rs @@ -14,6 +14,7 @@ use crate::index::{InvertedIndexReader, Segment, SegmentComponent, SegmentId}; use crate::json_utils::json_path_sep_to_dot; use crate::schema::{Field, IndexRecordOption, Schema, Type}; use crate::space_usage::SegmentSpaceUsage; +use crate::spatial::reader::SpatialReaders; use crate::store::StoreReader; use crate::termdict::TermDictionary; use crate::{DocId, Opstamp}; @@ -43,6 +44,7 @@ pub struct SegmentReader { positions_composite: CompositeFile, fast_fields_readers: FastFieldReaders, fieldnorm_readers: FieldNormReaders, + spatial_readers: SpatialReaders, store_file: FileSlice, alive_bitset_opt: Option, @@ -92,6 +94,11 @@ impl SegmentReader { &self.fast_fields_readers } + /// HUSH + pub fn spatial_fields(&self) -> &SpatialReaders { + &self.spatial_readers + } + /// Accessor to the `FacetReader` associated with a given `Field`. pub fn facet_reader(&self, field_name: &str) -> crate::Result { let schema = self.schema(); @@ -173,6 +180,8 @@ impl SegmentReader { let fast_fields_readers = FastFieldReaders::open(fast_fields_data, schema.clone())?; let fieldnorm_data = segment.open_read(SegmentComponent::FieldNorms)?; let fieldnorm_readers = FieldNormReaders::open(fieldnorm_data)?; + let spatial_data = segment.open_read(SegmentComponent::Spatial)?; + let spatial_readers = SpatialReaders::open(spatial_data)?; let original_bitset = if segment.meta().has_deletes() { let alive_doc_file_slice = segment.open_read(SegmentComponent::Delete)?; @@ -198,6 +207,7 @@ impl SegmentReader { postings_composite, fast_fields_readers, fieldnorm_readers, + spatial_readers, segment_id: segment.id(), delete_opstamp: segment.meta().delete_opstamp(), store_file, diff --git a/src/indexer/merger.rs b/src/indexer/merger.rs index 47ac5a55b..7630b611f 100644 --- a/src/indexer/merger.rs +++ b/src/indexer/merger.rs @@ -1,3 +1,5 @@ +use std::collections::HashMap; +use std::io::Write; use std::sync::Arc; use columnar::{ @@ -6,6 +8,7 @@ use columnar::{ use common::ReadOnlyBitSet; use itertools::Itertools; use measure_time::debug_time; +use tempfile::NamedTempFile; use crate::directory::WritePtr; use crate::docset::{DocSet, TERMINATED}; @@ -17,6 +20,8 @@ use crate::indexer::doc_id_mapping::{MappingType, SegmentDocIdMapping}; use crate::indexer::SegmentSerializer; use crate::postings::{InvertedIndexSerializer, Postings, SegmentPostings}; use crate::schema::{value_type_to_column_type, Field, FieldType, Schema}; +use crate::spatial::bkd::LeafPageIterator; +use crate::spatial::triangle::Triangle; use crate::store::StoreWriter; use crate::termdict::{TermMerger, TermOrdinal}; use crate::{DocAddress, DocId, InvertedIndexReader}; @@ -520,6 +525,71 @@ impl IndexMerger { Ok(()) } + fn write_spatial_fields( + &self, + serializer: &mut SegmentSerializer, + doc_id_mapping: &SegmentDocIdMapping, + ) -> crate::Result<()> { + use crate::spatial::bkd::Segment; + let mut segment_mappings: Vec>> = Vec::new(); + for reader in &self.readers { + let max_doc = reader.max_doc(); + segment_mappings.push(vec![None; max_doc as usize]); + } + for (new_doc_id, old_doc_addr) in doc_id_mapping.iter_old_doc_addrs().enumerate() { + segment_mappings[old_doc_addr.segment_ord as usize][old_doc_addr.doc_id as usize] = + Some(new_doc_id as DocId); + } + let mut temp_files: HashMap = HashMap::new(); + + for (field, field_entry) in self.schema.fields() { + if matches!(field_entry.field_type(), FieldType::Spatial(_)) { + temp_files.insert(field, NamedTempFile::new()?); + } + } + for (segment_ord, reader) in self.readers.iter().enumerate() { + for (field, temp_file) in &mut temp_files { + let spatial_readers = reader.spatial_fields(); + let spatial_reader = match spatial_readers.get_field(*field)? { + Some(reader) => reader, + None => continue, + }; + let segment = Segment::new(spatial_reader.get_bytes()); + for triangle_result in LeafPageIterator::new(&segment) { + let triangles = triangle_result?; + for triangle in triangles { + if let Some(new_doc_id) = + segment_mappings[segment_ord][triangle.doc_id as usize] + { + for &word in &triangle.words { + temp_file.write_all(&word.to_le_bytes())?; + } + temp_file.write_all(&new_doc_id.to_le_bytes())?; + } + } + } + } + } + if let Some(mut spatial_serializer) = serializer.extract_spatial_serializer() { + for (field, mut temp_file) in temp_files { + // Flush and sync triangles. + temp_file.flush()?; + temp_file.as_file_mut().sync_all()?; + // Memory map the triangle file. + use memmap2::MmapOptions; + let mmap = unsafe { MmapOptions::new().map_mut(temp_file.as_file())? }; + // Cast to &[Triangle] slice + let triangle_count = mmap.len() / std::mem::size_of::(); + let triangles = unsafe { + std::slice::from_raw_parts_mut(mmap.as_ptr() as *mut Triangle, triangle_count) + }; + // Get spatial writer and rebuild block kd-tree. + spatial_serializer.serialize_field(field, triangles)?; + } + } + Ok(()) + } + /// Writes the merged segment by pushing information /// to the `SegmentSerializer`. /// @@ -544,9 +614,10 @@ impl IndexMerger { debug!("write-storagefields"); self.write_storable_fields(serializer.get_store_writer())?; + debug!("write-spatialfields"); + self.write_spatial_fields(&mut serializer, &doc_id_mapping)?; debug!("write-fastfields"); self.write_fast_fields(serializer.get_fast_field_write(), doc_id_mapping)?; - debug!("close-serializer"); serializer.close()?; Ok(self.max_doc) diff --git a/src/indexer/segment_serializer.rs b/src/indexer/segment_serializer.rs index 057a51b10..473dbd1e8 100644 --- a/src/indexer/segment_serializer.rs +++ b/src/indexer/segment_serializer.rs @@ -4,6 +4,7 @@ use crate::directory::WritePtr; use crate::fieldnorm::FieldNormsSerializer; use crate::index::{Segment, SegmentComponent}; use crate::postings::InvertedIndexSerializer; +use crate::spatial::serializer::SpatialSerializer; use crate::store::StoreWriter; /// Segment serializer is in charge of laying out on disk @@ -12,6 +13,7 @@ pub struct SegmentSerializer { segment: Segment, pub(crate) store_writer: StoreWriter, fast_field_write: WritePtr, + spatial_serializer: Option, fieldnorms_serializer: Option, postings_serializer: InvertedIndexSerializer, } @@ -35,11 +37,15 @@ impl SegmentSerializer { let fieldnorms_write = segment.open_write(SegmentComponent::FieldNorms)?; let fieldnorms_serializer = FieldNormsSerializer::from_write(fieldnorms_write)?; + let spatial_write = segment.open_write(SegmentComponent::Spatial)?; + let spatial_serializer = SpatialSerializer::from_write(spatial_write)?; + let postings_serializer = InvertedIndexSerializer::open(&mut segment)?; Ok(SegmentSerializer { segment, store_writer, fast_field_write, + spatial_serializer: Some(spatial_serializer), fieldnorms_serializer: Some(fieldnorms_serializer), postings_serializer, }) @@ -64,6 +70,11 @@ impl SegmentSerializer { &mut self.fast_field_write } + /// HUSH + pub fn extract_spatial_serializer(&mut self) -> Option { + self.spatial_serializer.take() + } + /// Extract the field norm serializer. /// /// Note the fieldnorms serializer can only be extracted once. @@ -81,6 +92,9 @@ impl SegmentSerializer { if let Some(fieldnorms_serializer) = self.extract_fieldnorms_serializer() { fieldnorms_serializer.close()?; } + if let Some(spatial_serializer) = self.extract_spatial_serializer() { + spatial_serializer.close()?; + } self.fast_field_write.terminate()?; self.postings_serializer.close()?; self.store_writer.close()?; diff --git a/src/indexer/segment_writer.rs b/src/indexer/segment_writer.rs index 30338187c..630bd4fc5 100644 --- a/src/indexer/segment_writer.rs +++ b/src/indexer/segment_writer.rs @@ -16,6 +16,7 @@ use crate::postings::{ }; use crate::schema::document::{Document, Value}; use crate::schema::{FieldEntry, FieldType, Schema, DATE_TIME_PRECISION_INDEXED}; +use crate::spatial::writer::SpatialWriter; use crate::tokenizer::{FacetTokenizer, PreTokenizedStream, TextAnalyzer, Tokenizer}; use crate::{DocId, Opstamp, TantivyError}; @@ -52,6 +53,7 @@ pub struct SegmentWriter { pub(crate) segment_serializer: SegmentSerializer, pub(crate) fast_field_writers: FastFieldsWriter, pub(crate) fieldnorms_writer: FieldNormsWriter, + pub(crate) spatial_writer: SpatialWriter, pub(crate) json_path_writer: JsonPathWriter, pub(crate) json_positions_per_path: IndexingPositionsPerPath, pub(crate) doc_opstamps: Vec, @@ -104,6 +106,7 @@ impl SegmentWriter { ctx: IndexingContext::new(table_size), per_field_postings_writers, fieldnorms_writer: FieldNormsWriter::for_schema(&schema), + spatial_writer: SpatialWriter::default(), json_path_writer: JsonPathWriter::default(), json_positions_per_path: IndexingPositionsPerPath::default(), segment_serializer, @@ -130,6 +133,7 @@ impl SegmentWriter { self.ctx, self.fast_field_writers, &self.fieldnorms_writer, + &mut self.spatial_writer, self.segment_serializer, )?; Ok(self.doc_opstamps) @@ -142,6 +146,7 @@ impl SegmentWriter { + self.fieldnorms_writer.mem_usage() + self.fast_field_writers.mem_usage() + self.segment_serializer.mem_usage() + + self.spatial_writer.mem_usage() } fn index_document(&mut self, doc: &D) -> crate::Result<()> { @@ -338,6 +343,13 @@ impl SegmentWriter { self.fieldnorms_writer.record(doc_id, field, num_vals); } } + FieldType::Spatial(_) => { + for value in values { + if let Some(geometry) = value.as_geometry() { + self.spatial_writer.add_geometry(doc_id, field, *geometry); + } + } + } } } Ok(()) @@ -392,12 +404,16 @@ fn remap_and_write( ctx: IndexingContext, fast_field_writers: FastFieldsWriter, fieldnorms_writer: &FieldNormsWriter, + spatial_writer: &mut SpatialWriter, mut serializer: SegmentSerializer, ) -> crate::Result<()> { debug!("remap-and-write"); if let Some(fieldnorms_serializer) = serializer.extract_fieldnorms_serializer() { fieldnorms_writer.serialize(fieldnorms_serializer)?; } + if let Some(spatial_serializer) = serializer.extract_spatial_serializer() { + spatial_writer.serialize(spatial_serializer)?; + } let fieldnorm_data = serializer .segment() .open_read(SegmentComponent::FieldNorms)?; diff --git a/src/postings/per_field_postings_writer.rs b/src/postings/per_field_postings_writer.rs index f3d6d6534..a9035ca84 100644 --- a/src/postings/per_field_postings_writer.rs +++ b/src/postings/per_field_postings_writer.rs @@ -51,6 +51,7 @@ fn posting_writer_from_field_entry(field_entry: &FieldEntry) -> Box Box::>::default(), FieldType::JsonObject(ref json_object_options) => { if let Some(text_indexing_option) = json_object_options.get_text_indexing_options() { diff --git a/src/query/mod.rs b/src/query/mod.rs index d609a0402..8bc45ed2e 100644 --- a/src/query/mod.rs +++ b/src/query/mod.rs @@ -24,6 +24,7 @@ mod reqopt_scorer; mod scorer; mod set_query; mod size_hint; +mod spatial_query; mod term_query; mod union; mod weight; @@ -62,6 +63,7 @@ pub use self::reqopt_scorer::RequiredOptionalScorer; pub use self::score_combiner::{DisjunctionMaxCombiner, ScoreCombiner, SumCombiner}; pub use self::scorer::Scorer; pub use self::set_query::TermSetQuery; +pub use self::spatial_query::{SpatialQuery, SpatialQueryType}; pub use self::term_query::TermQuery; pub use self::union::BufferedUnionScorer; #[cfg(test)] diff --git a/src/query/query_parser/query_parser.rs b/src/query/query_parser/query_parser.rs index 7ebaf3a47..483aa3638 100644 --- a/src/query/query_parser/query_parser.rs +++ b/src/query/query_parser/query_parser.rs @@ -524,6 +524,9 @@ impl QueryParser { let ip_v6 = IpAddr::from_str(phrase)?.into_ipv6_addr(); Ok(Term::from_field_ip_addr(field, ip_v6)) } + FieldType::Spatial(_) => Err(QueryParserError::UnsupportedQuery( + "Spatial queries are not yet supported in text query parser".to_string(), + )), } } @@ -624,6 +627,10 @@ impl QueryParser { let term = Term::from_field_ip_addr(field, ip_v6); Ok(vec![LogicalLiteral::Term(term)]) } + FieldType::Spatial(_) => Err(QueryParserError::UnsupportedQuery(format!( + "Spatial queries are not yet supported for field '{}'", + field_name + ))), } } diff --git a/src/query/range_query/mod.rs b/src/query/range_query/mod.rs index e93bb4049..3297dae07 100644 --- a/src/query/range_query/mod.rs +++ b/src/query/range_query/mod.rs @@ -20,6 +20,6 @@ pub(crate) fn is_type_valid_for_fastfield_range_query(typ: Type) -> bool { | Type::Date | Type::Json | Type::IpAddr => true, - Type::Facet | Type::Bytes => false, + Type::Facet | Type::Bytes | Type::Spatial => false, } } diff --git a/src/query/range_query/range_query_fastfield.rs b/src/query/range_query/range_query_fastfield.rs index 54cf0cad5..f4d7e970e 100644 --- a/src/query/range_query/range_query_fastfield.rs +++ b/src/query/range_query/range_query_fastfield.rs @@ -128,12 +128,15 @@ impl Weight for FastFieldRangeWeight { BoundsRange::new(bounds.lower_bound, bounds.upper_bound), ) } - Type::Bool | Type::Facet | Type::Bytes | Type::Json | Type::IpAddr => { - Err(crate::TantivyError::InvalidArgument(format!( - "unsupported value bytes type in json term value_bytes {:?}", - term_value.typ() - ))) - } + Type::Bool + | Type::Facet + | Type::Bytes + | Type::Json + | Type::IpAddr + | Type::Spatial => Err(crate::TantivyError::InvalidArgument(format!( + "unsupported value bytes type in json term value_bytes {:?}", + term_value.typ() + ))), } } else if field_type.is_ip_addr() { let parse_ip_from_bytes = |term: &Term| { @@ -435,7 +438,7 @@ pub(crate) fn maps_to_u64_fastfield(typ: Type) -> bool { match typ { Type::U64 | Type::I64 | Type::F64 | Type::Bool | Type::Date => true, Type::IpAddr => false, - Type::Str | Type::Facet | Type::Bytes | Type::Json => false, + Type::Str | Type::Facet | Type::Bytes | Type::Json | Type::Spatial => false, } } diff --git a/src/query/spatial_query.rs b/src/query/spatial_query.rs new file mode 100644 index 000000000..a6c4d7050 --- /dev/null +++ b/src/query/spatial_query.rs @@ -0,0 +1,170 @@ +//! HUSH + +use common::BitSet; + +use crate::query::{BitSetDocSet, Query, Scorer, Weight}; +use crate::schema::Field; +use crate::spatial::bkd::{search_intersects, Segment}; +use crate::spatial::writer::as_point_i32; +use crate::{DocId, DocSet, Score, TantivyError, TERMINATED}; + +#[derive(Clone, Copy, Debug)] +/// HUSH +pub enum SpatialQueryType { + /// HUSH + Intersects, + // Within, + // Contains, +} + +#[derive(Clone, Copy, Debug)] +/// HUSH +pub struct SpatialQuery { + field: Field, + bounds: [(i32, i32); 2], + query_type: SpatialQueryType, +} + +impl SpatialQuery { + /// HUSH + pub fn new(field: Field, bounds: [(f64, f64); 2], query_type: SpatialQueryType) -> Self { + SpatialQuery { + field, + bounds: [as_point_i32(bounds[0]), as_point_i32(bounds[1])], + query_type, + } + } +} + +impl Query for SpatialQuery { + fn weight( + &self, + _enable_scoring: super::EnableScoring<'_>, + ) -> crate::Result> { + Ok(Box::new(SpatialWeight::new( + self.field, + self.bounds, + self.query_type, + ))) + } +} + +pub struct SpatialWeight { + field: Field, + bounds: [(i32, i32); 2], + query_type: SpatialQueryType, +} + +impl SpatialWeight { + fn new(field: Field, bounds: [(i32, i32); 2], query_type: SpatialQueryType) -> Self { + SpatialWeight { + field, + bounds, + query_type, + } + } +} + +impl Weight for SpatialWeight { + fn scorer( + &self, + reader: &crate::SegmentReader, + boost: crate::Score, + ) -> crate::Result> { + let spatial_reader = reader + .spatial_fields() + .get_field(self.field)? + .ok_or_else(|| TantivyError::SchemaError(format!("No spatial data for field")))?; + let block_kd_tree = Segment::new(spatial_reader.get_bytes()); + match self.query_type { + SpatialQueryType::Intersects => { + let mut include = BitSet::with_max_value(reader.max_doc()); + search_intersects( + &block_kd_tree, + block_kd_tree.root_offset, + &[ + self.bounds[0].1, + self.bounds[0].0, + self.bounds[1].1, + self.bounds[1].0, + ], + &mut include, + )?; + Ok(Box::new(SpatialScorer::new(boost, include, None))) + } + } + } + fn explain( + &self, + _reader: &crate::SegmentReader, + _doc: DocId, + ) -> crate::Result { + todo!(); + } +} + +struct SpatialScorer { + include: BitSetDocSet, + exclude: Option, + doc_id: DocId, + score: Score, +} + +impl SpatialScorer { + pub fn new(score: Score, include: BitSet, exclude: Option) -> Self { + let mut scorer = SpatialScorer { + include: BitSetDocSet::from(include), + exclude, + doc_id: 0, + score, + }; + scorer.prime(); + scorer + } + fn prime(&mut self) { + self.doc_id = self.include.doc(); + while self.exclude() { + self.doc_id = self.include.advance(); + } + } + + fn exclude(&self) -> bool { + if self.doc_id == TERMINATED { + return false; + } + match &self.exclude { + Some(exclude) => exclude.contains(self.doc_id), + None => false, + } + } +} + +impl Scorer for SpatialScorer { + fn score(&mut self) -> Score { + self.score + } +} + +impl DocSet for SpatialScorer { + fn advance(&mut self) -> DocId { + if self.doc_id == TERMINATED { + return TERMINATED; + } + self.doc_id = self.include.advance(); + while self.exclude() { + self.doc_id = self.include.advance(); + } + self.doc_id + } + + fn size_hint(&self) -> u32 { + match &self.exclude { + Some(exclude) => self.include.size_hint() - exclude.len() as u32, + None => self.include.size_hint(), + } + } + + fn doc(&self) -> DocId { + self.doc_id + } +} diff --git a/src/schema/document/de.rs b/src/schema/document/de.rs index 02ee7a6a0..9e6b6ecc0 100644 --- a/src/schema/document/de.rs +++ b/src/schema/document/de.rs @@ -22,6 +22,7 @@ use super::se::BinaryObjectSerializer; use super::{OwnedValue, Value}; use crate::schema::document::type_codes; use crate::schema::{Facet, Field}; +use crate::spatial::geometry::Geometry; use crate::store::DocStoreVersion; use crate::tokenizer::PreTokenizedString; @@ -129,6 +130,9 @@ pub trait ValueDeserializer<'de> { /// Attempts to deserialize a pre-tokenized string value from the deserializer. fn deserialize_pre_tokenized_string(self) -> Result; + /// HUSH + fn deserialize_geometry(self) -> Result; + /// Attempts to deserialize the value using a given visitor. fn deserialize_any(self, visitor: V) -> Result where V: ValueVisitor; @@ -166,6 +170,8 @@ pub enum ValueType { /// A JSON object value. Deprecated. #[deprecated(note = "We keep this for backwards compatibility, use Object instead")] JSONObject, + /// HUSH + Geometry, } /// A value visitor for deserializing a document value. @@ -246,6 +252,12 @@ pub trait ValueVisitor { Err(DeserializeError::UnsupportedType(ValueType::PreTokStr)) } + #[inline] + /// Called when the deserializer visits a geometry value. + fn visit_geometry(&self, _val: Geometry) -> Result { + Err(DeserializeError::UnsupportedType(ValueType::Geometry)) + } + #[inline] /// Called when the deserializer visits an array. fn visit_array<'de, A>(&self, _access: A) -> Result @@ -380,6 +392,7 @@ where R: Read match ext_type_code { type_codes::TOK_STR_EXT_CODE => ValueType::PreTokStr, + type_codes::GEO_EXT_CODE => ValueType::Geometry, _ => { return Err(DeserializeError::from(io::Error::new( io::ErrorKind::InvalidData, @@ -495,6 +508,11 @@ where R: Read .map_err(DeserializeError::from) } + fn deserialize_geometry(self) -> Result { + self.validate_type(ValueType::Geometry)?; + ::deserialize(self.reader).map_err(DeserializeError::from) + } + fn deserialize_any(self, visitor: V) -> Result where V: ValueVisitor { match self.value_type { @@ -539,6 +557,10 @@ where R: Read let val = self.deserialize_pre_tokenized_string()?; visitor.visit_pre_tokenized_string(val) } + ValueType::Geometry => { + let val = self.deserialize_geometry()?; + visitor.visit_geometry(val) + } ValueType::Array => { let access = BinaryArrayDeserializer::from_reader(self.reader, self.doc_store_version)?; diff --git a/src/schema/document/default_document.rs b/src/schema/document/default_document.rs index 915b685aa..c2416040a 100644 --- a/src/schema/document/default_document.rs +++ b/src/schema/document/default_document.rs @@ -13,6 +13,7 @@ use crate::schema::document::{ }; use crate::schema::field_type::ValueParsingError; use crate::schema::{Facet, Field, NamedFieldDocument, OwnedValue, Schema}; +use crate::spatial::geometry::Geometry; use crate::tokenizer::PreTokenizedString; #[repr(C, packed)] @@ -254,6 +255,7 @@ impl CompactDoc { } ReferenceValueLeaf::IpAddr(num) => write_into(&mut self.node_data, num.to_u128()), ReferenceValueLeaf::PreTokStr(pre_tok) => write_into(&mut self.node_data, *pre_tok), + ReferenceValueLeaf::Geometry(geometry) => write_into(&mut self.node_data, *geometry), }; ValueAddr { type_id, val_addr } } @@ -464,6 +466,13 @@ impl<'a> CompactDocValue<'a> { .map(Into::into) .map(ReferenceValueLeaf::PreTokStr) .map(Into::into), + // ValueType::Geometry => todo!(), + ValueType::Geometry => self + .container + .read_from::(addr) + .map(Into::into) + .map(ReferenceValueLeaf::Geometry) + .map(Into::into), ValueType::Object => Ok(ReferenceValue::Object(CompactDocObjectIter::new( self.container, addr, @@ -542,6 +551,8 @@ pub enum ValueType { Object = 11, /// Pre-tokenized str type, Array = 12, + /// HUSH + Geometry = 13, } impl BinarySerializable for ValueType { @@ -587,6 +598,7 @@ impl<'a> From<&ReferenceValueLeaf<'a>> for ValueType { ReferenceValueLeaf::PreTokStr(_) => ValueType::PreTokStr, ReferenceValueLeaf::Facet(_) => ValueType::Facet, ReferenceValueLeaf::Bytes(_) => ValueType::Bytes, + ReferenceValueLeaf::Geometry(_) => ValueType::Geometry, } } } diff --git a/src/schema/document/mod.rs b/src/schema/document/mod.rs index 8168ee811..ed4a03d6f 100644 --- a/src/schema/document/mod.rs +++ b/src/schema/document/mod.rs @@ -273,4 +273,5 @@ pub(crate) mod type_codes { // Extended type codes pub const TOK_STR_EXT_CODE: u8 = 0; + pub const GEO_EXT_CODE: u8 = 1; } diff --git a/src/schema/document/owned_value.rs b/src/schema/document/owned_value.rs index 9fbf1f8c2..8c72e358d 100644 --- a/src/schema/document/owned_value.rs +++ b/src/schema/document/owned_value.rs @@ -15,6 +15,7 @@ use crate::schema::document::{ ValueDeserializer, ValueVisitor, }; use crate::schema::Facet; +use crate::spatial::geometry::Geometry; use crate::tokenizer::PreTokenizedString; use crate::DateTime; @@ -49,6 +50,8 @@ pub enum OwnedValue { Object(Vec<(String, Self)>), /// IpV6 Address. Internally there is no IpV4, it needs to be converted to `Ipv6Addr`. IpAddr(Ipv6Addr), + /// A GeoRust multi-polygon. + Geometry(Geometry), } impl AsRef for OwnedValue { @@ -77,6 +80,9 @@ impl<'a> Value<'a> for &'a OwnedValue { OwnedValue::IpAddr(val) => ReferenceValueLeaf::IpAddr(*val).into(), OwnedValue::Array(array) => ReferenceValue::Array(array.iter()), OwnedValue::Object(object) => ReferenceValue::Object(ObjectMapIter(object.iter())), + OwnedValue::Geometry(geometry) => { + ReferenceValueLeaf::Geometry(Box::new(geometry.clone())).into() + } } } } @@ -136,6 +142,10 @@ impl ValueDeserialize for OwnedValue { Ok(OwnedValue::PreTokStr(val)) } + fn visit_geometry(&self, val: Geometry) -> Result { + Ok(OwnedValue::Geometry(val)) + } + fn visit_array<'de, A>(&self, mut access: A) -> Result where A: ArrayAccess<'de> { let mut elements = Vec::with_capacity(access.size_hint()); @@ -198,6 +208,7 @@ impl serde::Serialize for OwnedValue { } } OwnedValue::Array(ref array) => array.serialize(serializer), + OwnedValue::Geometry(_) => todo!(), } } } @@ -285,6 +296,7 @@ impl<'a, V: Value<'a>> From> for OwnedValue { ReferenceValueLeaf::IpAddr(val) => OwnedValue::IpAddr(val), ReferenceValueLeaf::Bool(val) => OwnedValue::Bool(val), ReferenceValueLeaf::PreTokStr(val) => OwnedValue::PreTokStr(*val.clone()), + ReferenceValueLeaf::Geometry(_) => todo!(), }, ReferenceValue::Array(val) => { OwnedValue::Array(val.map(|v| v.as_value().into()).collect()) diff --git a/src/schema/document/se.rs b/src/schema/document/se.rs index 528c8dffc..d1b884f01 100644 --- a/src/schema/document/se.rs +++ b/src/schema/document/se.rs @@ -133,6 +133,10 @@ where W: Write self.write_type_code(type_codes::EXT_CODE)?; self.serialize_with_type_code(type_codes::TOK_STR_EXT_CODE, &*val) } + ReferenceValueLeaf::Geometry(val) => { + self.write_type_code(type_codes::EXT_CODE)?; + self.serialize_with_type_code(type_codes::GEO_EXT_CODE, &*val) + } }, ReferenceValue::Array(elements) => { self.write_type_code(type_codes::ARRAY_CODE)?; diff --git a/src/schema/document/value.rs b/src/schema/document/value.rs index de1067ce6..87d2593d4 100644 --- a/src/schema/document/value.rs +++ b/src/schema/document/value.rs @@ -3,6 +3,7 @@ use std::net::Ipv6Addr; use common::DateTime; +use crate::spatial::geometry::Geometry; use crate::tokenizer::PreTokenizedString; /// A single field value. @@ -108,6 +109,12 @@ pub trait Value<'a>: Send + Sync + Debug { None } } + + #[inline] + /// HUSH + fn as_geometry(&self) -> Option> { + self.as_leaf().and_then(|leaf| leaf.into_geometry()) + } } /// A enum representing a leaf value for tantivy to index. @@ -136,6 +143,8 @@ pub enum ReferenceValueLeaf<'a> { Bool(bool), /// Pre-tokenized str type, PreTokStr(Box), + /// HUSH + Geometry(Box), } impl From for ReferenceValueLeaf<'_> { @@ -220,6 +229,9 @@ impl<'a, T: Value<'a> + ?Sized> From> for ReferenceValue< ReferenceValueLeaf::PreTokStr(val) => { ReferenceValue::Leaf(ReferenceValueLeaf::PreTokStr(val)) } + ReferenceValueLeaf::Geometry(val) => { + ReferenceValue::Leaf(ReferenceValueLeaf::Geometry(val)) + } } } } @@ -331,6 +343,16 @@ impl<'a> ReferenceValueLeaf<'a> { None } } + + #[inline] + /// HUSH + pub fn into_geometry(self) -> Option> { + if let Self::Geometry(val) = self { + Some(val) + } else { + None + } + } } /// A enum representing a value for tantivy to index. @@ -448,4 +470,10 @@ where V: Value<'a> pub fn is_object(&self) -> bool { matches!(self, Self::Object(_)) } + + #[inline] + /// HUSH + pub fn into_geometry(self) -> Option> { + self.into_leaf().and_then(|leaf| leaf.into_geometry()) + } } diff --git a/src/schema/field_entry.rs b/src/schema/field_entry.rs index 77e061bc6..d8325f7dc 100644 --- a/src/schema/field_entry.rs +++ b/src/schema/field_entry.rs @@ -1,6 +1,7 @@ use serde::{Deserialize, Serialize}; use super::ip_options::IpAddrOptions; +use super::spatial_options::SpatialOptions; use crate::schema::bytes_options::BytesOptions; use crate::schema::{ is_valid_field_name, DateOptions, FacetOptions, FieldType, JsonObjectOptions, NumericOptions, @@ -80,6 +81,11 @@ impl FieldEntry { Self::new(field_name, FieldType::JsonObject(json_object_options)) } + /// Creates a field entry for a spatial field + pub fn new_spatial(field_name: String, spatial_options: SpatialOptions) -> FieldEntry { + Self::new(field_name, FieldType::Spatial(spatial_options)) + } + /// Returns the name of the field pub fn name(&self) -> &str { &self.name @@ -129,6 +135,7 @@ impl FieldEntry { FieldType::Bytes(ref options) => options.is_stored(), FieldType::JsonObject(ref options) => options.is_stored(), FieldType::IpAddr(ref options) => options.is_stored(), + FieldType::Spatial(ref options) => options.is_stored(), } } } diff --git a/src/schema/field_type.rs b/src/schema/field_type.rs index 8b203f5b3..aa1e47094 100644 --- a/src/schema/field_type.rs +++ b/src/schema/field_type.rs @@ -9,6 +9,7 @@ use serde_json::Value as JsonValue; use thiserror::Error; use super::ip_options::IpAddrOptions; +use super::spatial_options::SpatialOptions; use super::IntoIpv6Addr; use crate::schema::bytes_options::BytesOptions; use crate::schema::facet_options::FacetOptions; @@ -16,6 +17,7 @@ use crate::schema::{ DateOptions, Facet, IndexRecordOption, JsonObjectOptions, NumericOptions, OwnedValue, TextFieldIndexing, TextOptions, }; +use crate::spatial::geometry::Geometry; use crate::time::format_description::well_known::Rfc3339; use crate::time::OffsetDateTime; use crate::tokenizer::PreTokenizedString; @@ -71,6 +73,8 @@ pub enum Type { Json = b'j', /// IpAddr IpAddr = b'p', + /// Spatial + Spatial = b't', } impl From for Type { @@ -139,6 +143,7 @@ impl Type { Type::Bytes => "Bytes", Type::Json => "Json", Type::IpAddr => "IpAddr", + Type::Spatial => "Spatial", } } @@ -189,6 +194,8 @@ pub enum FieldType { JsonObject(JsonObjectOptions), /// IpAddr field IpAddr(IpAddrOptions), + /// Spatial field + Spatial(SpatialOptions), } impl FieldType { @@ -205,6 +212,7 @@ impl FieldType { FieldType::Bytes(_) => Type::Bytes, FieldType::JsonObject(_) => Type::Json, FieldType::IpAddr(_) => Type::IpAddr, + FieldType::Spatial(_) => Type::Spatial, } } @@ -241,6 +249,7 @@ impl FieldType { FieldType::Bytes(ref bytes_options) => bytes_options.is_indexed(), FieldType::JsonObject(ref json_object_options) => json_object_options.is_indexed(), FieldType::IpAddr(ref ip_addr_options) => ip_addr_options.is_indexed(), + FieldType::Spatial(ref _spatial_options) => true, } } @@ -278,6 +287,7 @@ impl FieldType { FieldType::IpAddr(ref ip_addr_options) => ip_addr_options.is_fast(), FieldType::Facet(_) => true, FieldType::JsonObject(ref json_object_options) => json_object_options.is_fast(), + FieldType::Spatial(_) => false, } } @@ -297,6 +307,7 @@ impl FieldType { FieldType::Bytes(ref bytes_options) => bytes_options.fieldnorms(), FieldType::JsonObject(ref _json_object_options) => false, FieldType::IpAddr(ref ip_addr_options) => ip_addr_options.fieldnorms(), + FieldType::Spatial(_) => false, } } @@ -348,6 +359,7 @@ impl FieldType { None } } + FieldType::Spatial(_) => todo!(), } } @@ -449,6 +461,10 @@ impl FieldType { Ok(OwnedValue::IpAddr(ip_addr.into_ipv6_addr())) } + FieldType::Spatial(_) => Err(ValueParsingError::TypeError { + expected: "spatial field parsing not implemented", + json: JsonValue::String(field_text), + }), } } JsonValue::Number(field_val_num) => match self { @@ -508,6 +524,10 @@ impl FieldType { expected: "a string with an ip addr", json: JsonValue::Number(field_val_num), }), + FieldType::Spatial(_) => Err(ValueParsingError::TypeError { + expected: "spatial field parsing not implemented", + json: JsonValue::Number(field_val_num), + }), }, JsonValue::Object(json_map) => match self { FieldType::Str(_) => { @@ -523,6 +543,14 @@ impl FieldType { } } FieldType::JsonObject(_) => Ok(OwnedValue::from(json_map)), + FieldType::Spatial(_) => Ok(OwnedValue::Geometry( + Geometry::from_geojson(&json_map).map_err(|e| { + ValueParsingError::ParseError { + error: format!("{:?}", e), + json: JsonValue::Object(json_map), + } + })?, + )), _ => Err(ValueParsingError::TypeError { expected: self.value_type().name(), json: JsonValue::Object(json_map), diff --git a/src/schema/mod.rs b/src/schema/mod.rs index 627774e54..eabd1c5a1 100644 --- a/src/schema/mod.rs +++ b/src/schema/mod.rs @@ -170,6 +170,7 @@ pub(crate) fn value_type_to_column_type(typ: Type) -> Option { Type::Bytes => Some(ColumnType::Bytes), Type::IpAddr => Some(ColumnType::IpAddr), Type::Json => None, + Type::Spatial => None, } } diff --git a/src/schema/schema.rs b/src/schema/schema.rs index c1d22c0ba..ddfd533e4 100644 --- a/src/schema/schema.rs +++ b/src/schema/schema.rs @@ -194,6 +194,16 @@ impl SchemaBuilder { self.add_field(field_entry) } + /// Adds a spatial entry to the schema in build. + pub fn add_spatial_field>( + &mut self, + field_name: &str, + field_options: T, + ) -> Field { + let field_entry = FieldEntry::new_spatial(field_name.to_string(), field_options.into()); + self.add_field(field_entry) + } + /// Adds a field entry to the schema in build. pub fn add_field(&mut self, field_entry: FieldEntry) -> Field { let field = Field::from_field_id(self.fields.len() as u32); diff --git a/src/schema/spatial_options.rs b/src/schema/spatial_options.rs index 13cce1a6f..248a5500f 100644 --- a/src/schema/spatial_options.rs +++ b/src/schema/spatial_options.rs @@ -39,13 +39,15 @@ impl From for SpatialOptions { } } -#[cfg(test)] -mod tests { - use crate::schema::*; - - #[test] - fn test_field_options() { - let field_options = STORED | SPATIAL; - assert!(field_options.is_stored()); - } -} +// #[cfg(test)] +// mod tests { +// use crate::schema::*; +// +// #[test] +// fn test_field_options() { +// let field_options = STORED | SPATIAL; +// assert!(field_options.is_stored()); +// let mut schema_builder = Schema::builder(); +// schema_builder.add_spatial_index("where", SPATIAL | STORED); +// } +// } diff --git a/src/schema/term.rs b/src/schema/term.rs index 2dd78b82a..4414fe089 100644 --- a/src/schema/term.rs +++ b/src/schema/term.rs @@ -503,6 +503,9 @@ where B: AsRef<[u8]> Type::IpAddr => { write_opt(f, self.as_ip_addr())?; } + Type::Spatial => { + write!(f, "")?; + } } Ok(()) } diff --git a/src/space_usage/mod.rs b/src/space_usage/mod.rs index 70291153a..e44d6e427 100644 --- a/src/space_usage/mod.rs +++ b/src/space_usage/mod.rs @@ -125,6 +125,7 @@ impl SegmentSpaceUsage { SegmentComponent::Store => ComponentSpaceUsage::Store(self.store().clone()), SegmentComponent::TempStore => ComponentSpaceUsage::Store(self.store().clone()), Delete => Basic(self.deletes()), + Spatial => todo!(), } } diff --git a/src/spatial/bkd.rs b/src/spatial/bkd.rs index 99476e1c2..4c6652f64 100644 --- a/src/spatial/bkd.rs +++ b/src/spatial/bkd.rs @@ -10,10 +10,11 @@ //! branch nodes), enabling zero-copy access through memory-mapped segments without upfront //! decompression. use std::io; -use std::io::{Seek, Write}; +use std::io::Write; -use common::BitSet; +use common::{BitSet, CountingWriter}; +use crate::directory::WritePtr; use crate::spatial::delta::{compress, decompress, Compressible}; use crate::spatial::triangle::Triangle; @@ -88,8 +89,8 @@ struct CompressibleTriangleI32<'a> { impl<'a> CompressibleTriangleI32<'a> { fn new(triangles: &'a [Triangle], dimension: usize) -> Self { CompressibleTriangleI32 { - triangles: triangles, - dimension: dimension, + triangles, + dimension, } } } @@ -110,9 +111,7 @@ struct CompressibleTriangleDocID<'a> { impl<'a> CompressibleTriangleDocID<'a> { fn new(triangles: &'a [Triangle]) -> Self { - CompressibleTriangleDocID { - triangles: triangles, - } + CompressibleTriangleDocID { triangles } } } @@ -136,12 +135,12 @@ impl<'a> Compressible for CompressibleTriangleDocID<'a> { // We do not compress the tree nodes. We read them directly from the mapping. // -fn write_leaf_pages( +fn write_leaf_pages( triangles: &mut [Triangle], - write: &mut W, + write: &mut CountingWriter, ) -> io::Result { if triangles.len() <= 512 { - let pos = write.stream_position()?; + let pos = write.written_bytes(); let mut spreads = [SpreadSurvey::default(); 4]; let mut bounding_box = BoundingBoxSurvey::default(); for triangle in triangles.iter() { @@ -174,10 +173,10 @@ fn write_leaf_pages( for i in 0..7 { compress(&compressible[i], write)?; } - let len = write.stream_position()? - pos; + let len = write.written_bytes() - pos; Ok(BuildNode::Leaf { bbox: bounding_box.bbox(), - pos: pos, + pos, len: len as u16, }) } else { @@ -220,7 +219,7 @@ fn write_leaf_pages( } } -fn write_leaf_nodes(node: &BuildNode, write: &mut W) -> io::Result<()> { +fn write_leaf_nodes(node: &BuildNode, write: &mut CountingWriter) -> io::Result<()> { match node { BuildNode::Branch { bbox: _, @@ -242,11 +241,11 @@ fn write_leaf_nodes(node: &BuildNode, write: &mut W) -> io::Res Ok(()) } -fn write_branch_nodes( +fn write_branch_nodes( node: &BuildNode, branch_offset: &mut i32, leaf_offset: &mut i32, - write: &mut W, + write: &mut CountingWriter, ) -> io::Result { match node { BuildNode::Leaf { .. } => { @@ -284,21 +283,26 @@ fn write_branch_nodes( /// /// The `triangles` slice will be reordered during tree construction as partitioning sorts by the /// selected dimension at each level. -pub fn write_block_kd_tree( +pub fn write_block_kd_tree( triangles: &mut [Triangle], - write: &mut W, + write: &mut CountingWriter, ) -> io::Result<()> { + assert_eq!( + triangles.as_ptr() as usize % std::mem::align_of::(), + 0 + ); write.write_all(&1u16.to_le_bytes())?; let tree = write_leaf_pages(triangles, write)?; - let current = write.stream_position()?; + let current = write.written_bytes(); let aligned = (current + 31) & !31; let padding = aligned - current; write.write_all(&vec![0u8; padding as usize])?; write_leaf_nodes(&tree, write)?; - let branch_position = write.stream_position()?; + let branch_position = write.written_bytes(); let mut branch_offset: i32 = 0; let mut leaf_offset: i32 = -1; let root = write_branch_nodes(&tree, &mut branch_offset, &mut leaf_offset, write)?; + write.write_all(&[0u8; 12])?; write.write_all(&triangles.len().to_le_bytes())?; write.write_all(&root.to_le_bytes())?; write.write_all(&branch_position.to_le_bytes())?; @@ -355,8 +359,9 @@ impl<'a> Segment<'a> { /// Reads the footer metadata from the last 12 bytes to locate the tree structure and root /// node. pub fn new(data: &'a [u8]) -> Self { + assert_eq!(data.as_ptr() as usize % std::mem::align_of::(), 0); Segment { - data: data, + data, branch_position: u64::from_le_bytes(data[data.len() - 8..].try_into().unwrap()), root_offset: i32::from_le_bytes( data[data.len() - 12..data.len() - 8].try_into().unwrap(), @@ -758,3 +763,40 @@ pub fn search_contains( } Ok(()) } + +/// HUSH +pub struct LeafPageIterator<'a> { + segment: &'a Segment<'a>, + descent_stack: Vec, +} + +impl<'a> LeafPageIterator<'a> { + /// HUSH + pub fn new(segment: &'a Segment<'a>) -> Self { + Self { + segment, + descent_stack: vec![segment.root_offset], + } + } +} + +impl<'a> Iterator for LeafPageIterator<'a> { + type Item = io::Result>; + + fn next(&mut self) -> Option { + let offset = self.descent_stack.pop()?; + if offset < 0 { + let leaf_node = self.segment.leaf_node(offset); + let leaf_page = self.segment.leaf_page(leaf_node); + match decompress_leaf(leaf_page) { + Ok(triangles) => Some(Ok(triangles)), + Err(e) => Some(Err(e)), + } + } else { + let branch_node = self.segment.branch_node(offset); + self.descent_stack.push(branch_node.right); + self.descent_stack.push(branch_node.left); + self.next() + } + } +} diff --git a/src/spatial/geometry.rs b/src/spatial/geometry.rs new file mode 100644 index 000000000..5a770fc41 --- /dev/null +++ b/src/spatial/geometry.rs @@ -0,0 +1,479 @@ +//! HUSH + +use std::io::{self, Read, Write}; + +use common::{BinarySerializable, VInt}; +use serde_json::{json, Map, Value}; + +use crate::spatial::xor::{compress_f64, decompress_f64}; + +/// HUSH +#[derive(Debug)] +pub enum GeometryError { + /// HUSH + MissingType, + /// HUSH + MissingField(String), // "expected array", "wrong nesting depth", etc + /// HUSH + UnsupportedType(String), + /// HUSH + InvalidCoordinate(String), // Can report the actual bad value + /// HUSH + InvalidStructure(String), // "expected array", "wrong nesting depth", etc +} + +/// HUSH +#[derive(Debug, Clone, PartialEq)] +pub enum Geometry { + /// HUSH + Point((f64, f64)), + /// HUSH + MultiPoint(Vec<(f64, f64)>), + /// HUSH + LineString(Vec<(f64, f64)>), + /// HUSH + MultiLineString(Vec>), + /// HUSH + Polygon(Vec>), + /// HUSH + MultiPolygon(Vec>>), + /// HUSH + GeometryCollection(Vec), +} + +impl Geometry { + /// HUSH + pub fn from_geojson(object: &Map) -> Result { + let geometry_type = object + .get("type") + .and_then(|v| v.as_str()) + .ok_or(GeometryError::MissingType)?; + match geometry_type { + "Point" => { + let coordinates = get_coordinates(object)?; + let point = to_point(coordinates)?; + Ok(Geometry::Point(point)) + } + "MultiPoint" => { + let coordinates = get_coordinates(object)?; + let multi_point = to_line_string(coordinates)?; + Ok(Geometry::MultiPoint(multi_point)) + } + "LineString" => { + let coordinates = get_coordinates(object)?; + let line_string = to_line_string(coordinates)?; + if line_string.len() < 2 { + return Err(GeometryError::InvalidStructure( + "a line string contains at least 2 points".to_string(), + )); + } + Ok(Geometry::LineString(line_string)) + } + "MultiLineString" => { + let coordinates = get_coordinates(object)?; + let multi_line_string = to_multi_line_string(coordinates)?; + for line_string in &multi_line_string { + if line_string.len() < 2 { + return Err(GeometryError::InvalidStructure( + "a line string contains at least 2 points".to_string(), + )); + } + } + Ok(Geometry::MultiLineString(multi_line_string)) + } + "Polygon" => { + let coordinates = get_coordinates(object)?; + let polygon = to_multi_line_string(coordinates)?; + for ring in &polygon { + if ring.len() < 3 { + return Err(GeometryError::InvalidStructure( + "a polygon ring contains at least 3 points".to_string(), + )); + } + } + Ok(Geometry::Polygon(polygon)) + } + "MultiPolygon" => { + let mut result = Vec::new(); + let multi_polygons = get_coordinates(object)?; + let multi_polygons = + multi_polygons + .as_array() + .ok_or(GeometryError::InvalidStructure( + "expected an array of polygons".to_string(), + ))?; + for polygon in multi_polygons { + let polygon = to_multi_line_string(polygon)?; + for ring in &polygon { + if ring.len() < 3 { + return Err(GeometryError::InvalidStructure( + "a polygon ring contains at least 3 points".to_string(), + )); + } + } + result.push(polygon); + } + Ok(Geometry::MultiPolygon(result)) + } + "GeometriesCollection" => { + let geometries = object + .get("geometries") + .ok_or(GeometryError::MissingField("geometries".to_string()))?; + let geometries = geometries + .as_array() + .ok_or(GeometryError::InvalidStructure( + "geometries is not an array".to_string(), + ))?; + let mut result = Vec::new(); + for geometry in geometries { + let object = geometry.as_object().ok_or(GeometryError::InvalidStructure( + "geometry is not an object".to_string(), + ))?; + result.push(Geometry::from_geojson(object)?); + } + Ok(Geometry::GeometryCollection(result)) + } + _ => Err(GeometryError::UnsupportedType(geometry_type.to_string())), + } + } + + /// HUSH + pub fn to_geojson(&self) -> Map { + let mut map = Map::new(); + match self { + Geometry::Point(point) => { + map.insert("type".to_string(), Value::String("Point".to_string())); + let coords = json!([point.0, point.1]); + map.insert("coordinates".to_string(), coords); + } + Geometry::MultiPoint(points) => { + map.insert("type".to_string(), Value::String("MultiPoint".to_string())); + let coords: Vec = points.iter().map(|p| json!([p.0, p.1])).collect(); + map.insert("coordinates".to_string(), Value::Array(coords)); + } + Geometry::LineString(line) => { + map.insert("type".to_string(), Value::String("LineString".to_string())); + let coords: Vec = line.iter().map(|p| json!([p.0, p.1])).collect(); + map.insert("coordinates".to_string(), Value::Array(coords)); + } + Geometry::MultiLineString(lines) => { + map.insert( + "type".to_string(), + Value::String("MultiLineString".to_string()), + ); + let coords: Vec = lines + .iter() + .map(|line| Value::Array(line.iter().map(|p| json!([p.0, p.1])).collect())) + .collect(); + map.insert("coordinates".to_string(), Value::Array(coords)); + } + Geometry::Polygon(rings) => { + map.insert("type".to_string(), Value::String("Polygon".to_string())); + let coords: Vec = rings + .iter() + .map(|ring| Value::Array(ring.iter().map(|p| json!([p.0, p.1])).collect())) + .collect(); + map.insert("coordinates".to_string(), Value::Array(coords)); + } + Geometry::MultiPolygon(polygons) => { + map.insert( + "type".to_string(), + Value::String("MultiPolygon".to_string()), + ); + let coords: Vec = polygons + .iter() + .map(|polygon| { + Value::Array( + polygon + .iter() + .map(|ring| { + Value::Array(ring.iter().map(|p| json!([p.0, p.1])).collect()) + }) + .collect(), + ) + }) + .collect(); + map.insert("coordinates".to_string(), Value::Array(coords)); + } + Geometry::GeometryCollection(geometries) => { + map.insert( + "type".to_string(), + Value::String("GeometryCollection".to_string()), + ); + let geoms: Vec = geometries + .iter() + .map(|g| Value::Object(g.to_geojson())) + .collect(); + map.insert("geometries".to_string(), Value::Array(geoms)); + } + } + map + } +} + +fn get_coordinates(object: &Map) -> Result<&Value, GeometryError> { + let coordinates = object + .get("coordinates") + .ok_or(GeometryError::MissingField("coordinates".to_string()))?; + Ok(coordinates) +} + +fn to_point(value: &Value) -> Result<(f64, f64), GeometryError> { + let lonlat = value.as_array().ok_or(GeometryError::InvalidStructure( + "expected 2 element array pair of lon/lat".to_string(), + ))?; + if lonlat.len() != 2 { + return Err(GeometryError::InvalidStructure( + "expected 2 element array pair of lon/lat".to_string(), + )); + } + let lon = lonlat[0].as_f64().ok_or(GeometryError::InvalidCoordinate( + "longitude must be f64".to_string(), + ))?; + if !lon.is_finite() || !(-180.0..=180.0).contains(&lon) { + return Err(GeometryError::InvalidCoordinate(format!( + "invalid longitude: {}", + lon + ))); + } + let lat = lonlat[1].as_f64().ok_or(GeometryError::InvalidCoordinate( + "latitude must be f64".to_string(), + ))?; + if !lat.is_finite() || !(-90.0..=90.0).contains(&lat) { + return Err(GeometryError::InvalidCoordinate(format!( + "invalid latitude: {}", + lat + ))); + } + Ok((lon, lat)) +} + +fn to_line_string(value: &Value) -> Result, GeometryError> { + let mut result = Vec::new(); + let coordinates = value.as_array().ok_or(GeometryError::InvalidStructure( + "expected an array of lon/lat arrays".to_string(), + ))?; + for coordinate in coordinates { + result.push(to_point(coordinate)?); + } + Ok(result) +} + +fn to_multi_line_string(value: &Value) -> Result>, GeometryError> { + let mut result = Vec::new(); + let coordinates = value.as_array().ok_or(GeometryError::InvalidStructure( + "expected an array of an array of lon/lat arrays".to_string(), + ))?; + for coordinate in coordinates { + result.push(to_line_string(coordinate)?); + } + Ok(result) +} + +impl BinarySerializable for Geometry { + fn serialize(&self, writer: &mut W) -> io::Result<()> { + match self { + Geometry::Point(point) => { + 0u8.serialize(writer)?; + point.0.serialize(writer)?; + point.1.serialize(writer)?; + Ok(()) + } + Geometry::MultiPoint(points) => { + 1u8.serialize(writer)?; + serialize_line_string(points, writer) + } + Geometry::LineString(line_string) => { + 2u8.serialize(writer)?; + serialize_line_string(line_string, writer) + } + Geometry::MultiLineString(multi_line_string) => { + 3u8.serialize(writer)?; + serialize_polygon(multi_line_string, writer) + } + Geometry::Polygon(polygon) => { + 4u8.serialize(writer)?; + serialize_polygon(polygon, writer) + } + Geometry::MultiPolygon(multi_polygon) => { + 5u8.serialize(writer)?; + BinarySerializable::serialize(&VInt(multi_polygon.len() as u64), writer)?; + for polygon in multi_polygon { + BinarySerializable::serialize(&VInt(polygon.len() as u64), writer)?; + for ring in polygon { + BinarySerializable::serialize(&VInt(ring.len() as u64), writer)?; + } + } + let mut lon = Vec::new(); + let mut lat = Vec::new(); + for polygon in multi_polygon { + for ring in polygon { + for point in ring { + lon.push(point.0); + lat.push(point.1); + } + } + } + let lon = compress_f64(&lon); + let lat = compress_f64(&lat); + VInt(lon.len() as u64).serialize(writer)?; + writer.write_all(&lon)?; + VInt(lat.len() as u64).serialize(writer)?; + writer.write_all(&lat)?; + Ok(()) + } + Geometry::GeometryCollection(geometries) => { + 6u8.serialize(writer)?; + BinarySerializable::serialize(&VInt(geometries.len() as u64), writer)?; + for geometry in geometries { + geometry.serialize(writer)?; + } + Ok(()) + } + } + } + + fn deserialize(reader: &mut R) -> io::Result { + let discriminant: u8 = BinarySerializable::deserialize(reader)?; + match discriminant { + 0 => { + let lon = BinarySerializable::deserialize(reader)?; + let lat = BinarySerializable::deserialize(reader)?; + Ok(Geometry::Point((lon, lat))) + } + 1 => Ok(Geometry::MultiPoint(deserialize_line_string(reader)?)), + 2 => Ok(Geometry::LineString(deserialize_line_string(reader)?)), + 3 => Ok(Geometry::MultiLineString(deserialize_polygon(reader)?)), + 4 => Ok(Geometry::Polygon(deserialize_polygon(reader)?)), + 5 => { + let polygon_count = VInt::deserialize(reader)?.0 as usize; + let mut polygons = Vec::new(); + let mut count = 0; + for _ in 0..polygon_count { + let ring_count = VInt::deserialize(reader)?.0 as usize; + let mut rings = Vec::new(); + for _ in 0..ring_count { + rings.push(VInt::deserialize(reader)?.0 as usize); + count += 1; + } + polygons.push(rings); + } + let lon_bytes: Vec = BinarySerializable::deserialize(reader)?; + let lat_bytes: Vec = BinarySerializable::deserialize(reader)?; + let lon = decompress_f64(&lon_bytes, count); + let lat = decompress_f64(&lat_bytes, count); + let mut multi_polygon = Vec::new(); + let mut offset = 0; + for rings in polygons { + let mut polygon = Vec::new(); + for point_count in rings { + let mut ring = Vec::new(); + for _ in 0..point_count { + ring.push((lon[offset], lat[offset])); + offset += 1; + } + polygon.push(ring); + } + multi_polygon.push(polygon); + } + Ok(Geometry::MultiPolygon(multi_polygon)) + } + 6 => { + let geometry_count = VInt::deserialize(reader)?.0 as usize; + let mut geometries = Vec::new(); + for _ in 0..geometry_count { + geometries.push(Geometry::deserialize(reader)?); + } + Ok(Geometry::GeometryCollection(geometries)) + } + _ => Err(io::Error::new( + io::ErrorKind::InvalidData, + "invalid geometry type", + )), + } + } +} + +fn serialize_line_string( + line: &Vec<(f64, f64)>, + writer: &mut W, +) -> io::Result<()> { + BinarySerializable::serialize(&VInt(line.len() as u64), writer)?; + let mut lon = Vec::new(); + let mut lat = Vec::new(); + for point in line { + lon.push(point.0); + lat.push(point.1); + } + let lon = compress_f64(&lon); + let lat = compress_f64(&lat); + VInt(lon.len() as u64).serialize(writer)?; + writer.write_all(&lon)?; + VInt(lat.len() as u64).serialize(writer)?; + writer.write_all(&lat)?; + Ok(()) +} + +fn serialize_polygon( + line_string: &Vec>, + writer: &mut W, +) -> io::Result<()> { + BinarySerializable::serialize(&VInt(line_string.len() as u64), writer)?; + for ring in line_string { + BinarySerializable::serialize(&VInt(ring.len() as u64), writer)?; + } + let mut lon = Vec::new(); + let mut lat = Vec::new(); + for ring in line_string { + for point in ring { + lon.push(point.0); + lat.push(point.1); + } + } + let lon = compress_f64(&lon); + let lat = compress_f64(&lat); + VInt(lon.len() as u64).serialize(writer)?; + writer.write_all(&lon)?; + VInt(lat.len() as u64).serialize(writer)?; + writer.write_all(&lat)?; + Ok(()) +} + +fn deserialize_line_string(reader: &mut R) -> io::Result> { + let point_count = VInt::deserialize(reader)?.0 as usize; + let lon_bytes: Vec = BinarySerializable::deserialize(reader)?; + let lat_bytes: Vec = BinarySerializable::deserialize(reader)?; + let lon = decompress_f64(&lon_bytes, point_count); + let lat = decompress_f64(&lat_bytes, point_count); + let mut line_string = Vec::new(); + for offset in 0..point_count { + line_string.push((lon[offset], lat[offset])); + } + Ok(line_string) +} + +fn deserialize_polygon(reader: &mut R) -> io::Result>> { + let ring_count = VInt::deserialize(reader)?.0 as usize; + let mut rings = Vec::new(); + let mut count = 0; + for _ in 0..ring_count { + let point_count = VInt::deserialize(reader)?.0 as usize; + rings.push(point_count); + count += point_count; + } + let lon_bytes: Vec = BinarySerializable::deserialize(reader)?; + let lat_bytes: Vec = BinarySerializable::deserialize(reader)?; + let lon = decompress_f64(&lon_bytes, count); + let lat = decompress_f64(&lat_bytes, count); + let mut polygon = Vec::new(); + let mut offset = 0; + for point_count in rings { + let mut ring = Vec::new(); + for _ in 0..point_count { + ring.push((lon[offset], lat[offset])); + offset += 1; + } + polygon.push(ring); + } + Ok(polygon) +} diff --git a/src/spatial/mod.rs b/src/spatial/mod.rs index 0e75f3d89..70c59aafc 100644 --- a/src/spatial/mod.rs +++ b/src/spatial/mod.rs @@ -2,6 +2,10 @@ pub mod bkd; pub mod delta; +pub mod geometry; pub mod radix_select; +pub mod reader; +pub mod serializer; pub mod triangle; +pub mod writer; pub mod xor; diff --git a/src/spatial/reader.rs b/src/spatial/reader.rs new file mode 100644 index 000000000..9338111ff --- /dev/null +++ b/src/spatial/reader.rs @@ -0,0 +1,65 @@ +//! HUSH +use std::io; +use std::sync::Arc; + +use common::file_slice::FileSlice; +use common::OwnedBytes; + +use crate::directory::CompositeFile; +use crate::schema::Field; +use crate::space_usage::PerFieldSpaceUsage; + +#[derive(Clone)] +/// HUSH +pub struct SpatialReaders { + data: Arc, +} + +impl SpatialReaders { + /// Creates a field norm reader. + pub fn open(file: FileSlice) -> crate::Result { + let data = CompositeFile::open(&file)?; + Ok(SpatialReaders { + data: Arc::new(data), + }) + } + + /// Returns the FieldNormReader for a specific field. + pub fn get_field(&self, field: Field) -> crate::Result> { + if let Some(file) = self.data.open_read(field) { + let spatial_reader = SpatialReader::open(file)?; + Ok(Some(spatial_reader)) + } else { + Ok(None) + } + } + + /// Return a break down of the space usage per field. + pub fn space_usage(&self) -> PerFieldSpaceUsage { + self.data.space_usage() + } + + /// Returns a handle to inner file + pub fn get_inner_file(&self) -> Arc { + self.data.clone() + } +} + +/// HUSH +#[derive(Clone)] +pub struct SpatialReader { + data: OwnedBytes, +} + +impl SpatialReader { + /// Opens the spatial reader from a `FileSlice`. Returns `None` if the file is empty (no + /// spatial fields indexed.) + pub fn open(spatial_file: FileSlice) -> io::Result { + let data = spatial_file.read_bytes()?; + Ok(SpatialReader { data }) + } + /// HUSH + pub fn get_bytes(&self) -> &[u8] { + self.data.as_ref() + } +} diff --git a/src/spatial/serializer.rs b/src/spatial/serializer.rs new file mode 100644 index 000000000..8336d45c6 --- /dev/null +++ b/src/spatial/serializer.rs @@ -0,0 +1,37 @@ +//! HUSH +use std::io; +use std::io::Write; + +use crate::directory::{CompositeWrite, WritePtr}; +use crate::schema::Field; +use crate::spatial::bkd::write_block_kd_tree; +use crate::spatial::triangle::Triangle; + +/// The fieldnorms serializer is in charge of +/// the serialization of field norms for all fields. +pub struct SpatialSerializer { + composite_write: CompositeWrite, +} + +impl SpatialSerializer { + /// Create a composite file from the write pointer. + pub fn from_write(write: WritePtr) -> io::Result { + // just making room for the pointer to header. + let composite_write = CompositeWrite::wrap(write); + Ok(SpatialSerializer { composite_write }) + } + + /// Serialize the given field + pub fn serialize_field(&mut self, field: Field, triangles: &mut [Triangle]) -> io::Result<()> { + let write = self.composite_write.for_field(field); + write_block_kd_tree(triangles, write)?; + write.flush()?; + Ok(()) + } + + /// Clean up, flush, and close. + pub fn close(self) -> io::Result<()> { + self.composite_write.close()?; + Ok(()) + } +} diff --git a/src/spatial/writer.rs b/src/spatial/writer.rs new file mode 100644 index 000000000..b1584fcd1 --- /dev/null +++ b/src/spatial/writer.rs @@ -0,0 +1,132 @@ +//! HUSH +use std::collections::HashMap; +use std::io; + +use i_triangle::i_overlay::i_float::int::point::IntPoint; +use i_triangle::int::triangulatable::IntTriangulatable; + +use crate::schema::Field; +use crate::spatial::geometry::Geometry; +use crate::spatial::serializer::SpatialSerializer; +use crate::spatial::triangle::{delaunay_to_triangles, Triangle}; +use crate::DocId; + +/// HUSH +pub struct SpatialWriter { + /// Map from field to its triangles buffer + triangles_by_field: HashMap>, +} + +impl SpatialWriter { + /// HUST + pub fn add_geometry(&mut self, doc_id: DocId, field: Field, geometry: Geometry) { + let triangles = &mut self.triangles_by_field.entry(field).or_default(); + match geometry { + Geometry::Point(point) => { + into_point(triangles, doc_id, point); + } + Geometry::MultiPoint(multi_point) => { + for point in multi_point { + into_point(triangles, doc_id, point); + } + } + Geometry::LineString(line_string) => { + into_line_string(triangles, doc_id, line_string); + } + Geometry::MultiLineString(multi_line_string) => { + for line_string in multi_line_string { + into_line_string(triangles, doc_id, line_string); + } + } + Geometry::Polygon(polygon) => { + into_polygon(triangles, doc_id, polygon); + } + Geometry::MultiPolygon(multi_polygon) => { + for polygon in multi_polygon { + into_polygon(triangles, doc_id, polygon); + } + } + Geometry::GeometryCollection(geometries) => { + for geometry in geometries { + self.add_geometry(doc_id, field, geometry); + } + } + } + } + + /// Memory usage estimate + pub fn mem_usage(&self) -> usize { + self.triangles_by_field + .values() + .map(|triangles| triangles.len() * std::mem::size_of::()) + .sum() + } + + /// HUSH + pub fn serialize(&mut self, mut serializer: SpatialSerializer) -> io::Result<()> { + for (field, triangles) in &mut self.triangles_by_field { + serializer.serialize_field(*field, triangles)?; + } + serializer.close()?; + Ok(()) + } +} + +impl Default for SpatialWriter { + /// HUSH + fn default() -> Self { + SpatialWriter { + triangles_by_field: HashMap::new(), + } + } +} + +/// HUSH +pub fn as_point_i32(point: (f64, f64)) -> (i32, i32) { + ( + (point.0 / (360.0 / (1i64 << 32) as f64)).floor() as i32, + (point.1 / (180.0 / (1i64 << 32) as f64)).floor() as i32, + ) +} + +fn into_point(triangles: &mut Vec, doc_id: DocId, point: (f64, f64)) { + let point = as_point_i32(point); + triangles.push(Triangle::new( + doc_id, + [point.1, point.0, point.1, point.0, point.1, point.0], + [true, true, true], + )); +} + +fn into_line_string(triangles: &mut Vec, doc_id: DocId, line_string: Vec<(f64, f64)>) { + let mut previous = as_point_i32(line_string[0]); + for point in line_string.into_iter().skip(1) { + let point = as_point_i32(point); + triangles.push(Triangle::new( + doc_id, + [ + previous.1, previous.0, point.1, point.0, previous.1, previous.0, + ], + [true, true, true], + )); + previous = point + } +} + +fn into_ring(i_polygon: &mut Vec>, ring: Vec<(f64, f64)>) { + let mut i_ring = Vec::new(); + for point in ring { + let point = as_point_i32(point); + i_ring.push(IntPoint::new(point.0, point.1)); + } + i_polygon.push(i_ring); +} + +fn into_polygon(triangles: &mut Vec, doc_id: DocId, polygon: Vec>) { + let mut i_polygon = Vec::new(); + for ring in polygon { + into_ring(&mut i_polygon, ring); + } + let delaunay = i_polygon.triangulate().into_delaunay(); + delaunay_to_triangles(doc_id, &delaunay, triangles); +}