Files
tantivy/src/query/spatial_query.rs
Alan Gutierrez b36c3bf36d Slot block kd-tree into Tantivy.
Implemented a geometry document field with a minimal `Geometry` enum.
Now able to add that Geometry from GeoJSON parsed from a JSON document.
Geometry is triangulated if it is a polygon, otherwise it is correctly
encoded as a degenerate triangle if it is a point or a line string.
Write accumulated triangles to a block kd-tree on commit.

Serialize the original `f64` polygon for retrieval from search.

Created a query method for intersection. Query against the memory mapped
block kd-tree. Return hits and original `f64` polygon.

Implemented a merge of one or more block kd-trees from one or more
segments during merge.

Updated the block kd-tree to write to a Tantivy `WritePtr` instead of
more generic Rust I/O.
2025-11-04 01:27:18 -06:00

171 lines
4.1 KiB
Rust

//! HUSH
use common::BitSet;
use crate::query::{BitSetDocSet, Query, Scorer, Weight};
use crate::schema::Field;
use crate::spatial::bkd::{search_intersects, Segment};
use crate::spatial::writer::as_point_i32;
use crate::{DocId, DocSet, Score, TantivyError, TERMINATED};
#[derive(Clone, Copy, Debug)]
/// HUSH
pub enum SpatialQueryType {
/// HUSH
Intersects,
// Within,
// Contains,
}
#[derive(Clone, Copy, Debug)]
/// HUSH
pub struct SpatialQuery {
field: Field,
bounds: [(i32, i32); 2],
query_type: SpatialQueryType,
}
impl SpatialQuery {
/// HUSH
pub fn new(field: Field, bounds: [(f64, f64); 2], query_type: SpatialQueryType) -> Self {
SpatialQuery {
field,
bounds: [as_point_i32(bounds[0]), as_point_i32(bounds[1])],
query_type,
}
}
}
impl Query for SpatialQuery {
fn weight(
&self,
_enable_scoring: super::EnableScoring<'_>,
) -> crate::Result<Box<dyn super::Weight>> {
Ok(Box::new(SpatialWeight::new(
self.field,
self.bounds,
self.query_type,
)))
}
}
pub struct SpatialWeight {
field: Field,
bounds: [(i32, i32); 2],
query_type: SpatialQueryType,
}
impl SpatialWeight {
fn new(field: Field, bounds: [(i32, i32); 2], query_type: SpatialQueryType) -> Self {
SpatialWeight {
field,
bounds,
query_type,
}
}
}
impl Weight for SpatialWeight {
fn scorer(
&self,
reader: &crate::SegmentReader,
boost: crate::Score,
) -> crate::Result<Box<dyn super::Scorer>> {
let spatial_reader = reader
.spatial_fields()
.get_field(self.field)?
.ok_or_else(|| TantivyError::SchemaError(format!("No spatial data for field")))?;
let block_kd_tree = Segment::new(spatial_reader.get_bytes());
match self.query_type {
SpatialQueryType::Intersects => {
let mut include = BitSet::with_max_value(reader.max_doc());
search_intersects(
&block_kd_tree,
block_kd_tree.root_offset,
&[
self.bounds[0].1,
self.bounds[0].0,
self.bounds[1].1,
self.bounds[1].0,
],
&mut include,
)?;
Ok(Box::new(SpatialScorer::new(boost, include, None)))
}
}
}
fn explain(
&self,
_reader: &crate::SegmentReader,
_doc: DocId,
) -> crate::Result<super::Explanation> {
todo!();
}
}
struct SpatialScorer {
include: BitSetDocSet,
exclude: Option<BitSet>,
doc_id: DocId,
score: Score,
}
impl SpatialScorer {
pub fn new(score: Score, include: BitSet, exclude: Option<BitSet>) -> Self {
let mut scorer = SpatialScorer {
include: BitSetDocSet::from(include),
exclude,
doc_id: 0,
score,
};
scorer.prime();
scorer
}
fn prime(&mut self) {
self.doc_id = self.include.doc();
while self.exclude() {
self.doc_id = self.include.advance();
}
}
fn exclude(&self) -> bool {
if self.doc_id == TERMINATED {
return false;
}
match &self.exclude {
Some(exclude) => exclude.contains(self.doc_id),
None => false,
}
}
}
impl Scorer for SpatialScorer {
fn score(&mut self) -> Score {
self.score
}
}
impl DocSet for SpatialScorer {
fn advance(&mut self) -> DocId {
if self.doc_id == TERMINATED {
return TERMINATED;
}
self.doc_id = self.include.advance();
while self.exclude() {
self.doc_id = self.include.advance();
}
self.doc_id
}
fn size_hint(&self) -> u32 {
match &self.exclude {
Some(exclude) => self.include.size_hint() - exclude.len() as u32,
None => self.include.size_hint(),
}
}
fn doc(&self) -> DocId {
self.doc_id
}
}