Introduced geopoint.

This commit is contained in:
Paul Masurel
2025-12-03 17:05:27 +01:00
parent f85a27068d
commit 643639f14b
7 changed files with 59 additions and 45 deletions

View File

@@ -40,7 +40,16 @@ fn main() -> tantivy::Result<()> {
let field = schema.get_field("geometry").unwrap(); let field = schema.get_field("geometry").unwrap();
let query = SpatialQuery::new( let query = SpatialQuery::new(
field, field,
[GeoPoint { lon:-99.49, lat: 45.56}, GeoPoint {lon:-99.45, lat: 45.59}], [
GeoPoint {
lon: -99.49,
lat: 45.56,
},
GeoPoint {
lon: -99.45,
lat: 45.59,
},
],
tantivy::query::SpatialQueryType::Intersects, tantivy::query::SpatialQueryType::Intersects,
); );
let hits = searcher.search(&query, &TopDocs::with_limit(10).order_by_score())?; let hits = searcher.search(&query, &TopDocs::with_limit(10).order_by_score())?;

View File

@@ -699,18 +699,24 @@ mod tests {
let index = Index::create_in_ram(schema); let index = Index::create_in_ram(schema);
let mut index_writer = index.writer_for_tests().unwrap(); let mut index_writer = index.writer_for_tests().unwrap();
index_writer.set_merge_policy(Box::new(NoMergePolicy)); index_writer.set_merge_policy(Box::new(NoMergePolicy));
index_writer.add_document(doc!( index_writer
date_field => DateTime::from_u64(1i64.to_u64()), .add_document(doc!(
multi_date_field => DateTime::from_u64(2i64.to_u64()), date_field => DateTime::from_u64(1i64.to_u64()),
multi_date_field => DateTime::from_u64(3i64.to_u64()) multi_date_field => DateTime::from_u64(2i64.to_u64()),
)).unwrap(); multi_date_field => DateTime::from_u64(3i64.to_u64())
index_writer.add_document(doc!( ))
date_field => DateTime::from_u64(4i64.to_u64()) .unwrap();
)).unwrap(); index_writer
index_writer.add_document(doc!( .add_document(doc!(
multi_date_field => DateTime::from_u64(5i64.to_u64()), date_field => DateTime::from_u64(4i64.to_u64())
multi_date_field => DateTime::from_u64(6i64.to_u64()) ))
)).unwrap(); .unwrap();
index_writer
.add_document(doc!(
multi_date_field => DateTime::from_u64(5i64.to_u64()),
multi_date_field => DateTime::from_u64(6i64.to_u64())
))
.unwrap();
index_writer.commit().unwrap(); index_writer.commit().unwrap();
let reader = index.reader().unwrap(); let reader = index.reader().unwrap();
let searcher = reader.searcher(); let searcher = reader.searcher();

View File

@@ -181,12 +181,11 @@ impl SegmentReader {
let fieldnorm_data = segment.open_read(SegmentComponent::FieldNorms)?; let fieldnorm_data = segment.open_read(SegmentComponent::FieldNorms)?;
let fieldnorm_readers = FieldNormReaders::open(fieldnorm_data)?; let fieldnorm_readers = FieldNormReaders::open(fieldnorm_data)?;
let spatial_readers = if schema.contains_spatial_field() { let spatial_readers = if schema.contains_spatial_field() {
let spatial_data = segment.open_read(SegmentComponent::Spatial)?; let spatial_data = segment.open_read(SegmentComponent::Spatial)?;
SpatialReaders::open(spatial_data)? SpatialReaders::open(spatial_data)?
} else { } else {
SpatialReaders::empty() SpatialReaders::empty()
}; };
let original_bitset = if segment.meta().has_deletes() { let original_bitset = if segment.meta().has_deletes() {
let alive_doc_file_slice = segment.open_read(SegmentComponent::Delete)?; let alive_doc_file_slice = segment.open_read(SegmentComponent::Delete)?;

View File

@@ -546,7 +546,7 @@ impl IndexMerger {
let Some(mut spatial_serializer) = serializer.extract_spatial_serializer() else { let Some(mut spatial_serializer) = serializer.extract_spatial_serializer() else {
// The schema does not contain any spatial field. // The schema does not contain any spatial field.
return Ok(()) return Ok(());
}; };
let mut segment_mappings: Vec<Vec<Option<DocId>>> = Vec::new(); let mut segment_mappings: Vec<Vec<Option<DocId>>> = Vec::new();
@@ -592,19 +592,19 @@ impl IndexMerger {
// No need to fsync here. This file is not here for persistency. // No need to fsync here. This file is not here for persistency.
} }
} }
for (field, temp_file) in temp_files { for (field, temp_file) in temp_files {
// Memory map the triangle file. // Memory map the triangle file.
use memmap2::MmapOptions; use memmap2::MmapOptions;
let mmap = unsafe { MmapOptions::new().map_mut(temp_file.as_file())? }; let mmap = unsafe { MmapOptions::new().map_mut(temp_file.as_file())? };
// Cast to &[Triangle] slice // Cast to &[Triangle] slice
let triangle_count = mmap.len() / std::mem::size_of::<Triangle>(); let triangle_count = mmap.len() / std::mem::size_of::<Triangle>();
let triangles = unsafe { let triangles = unsafe {
std::slice::from_raw_parts_mut(mmap.as_ptr() as *mut Triangle, triangle_count) std::slice::from_raw_parts_mut(mmap.as_ptr() as *mut Triangle, triangle_count)
}; };
// Get spatial writer and rebuild block kd-tree. // Get spatial writer and rebuild block kd-tree.
spatial_serializer.serialize_field(field, triangles)?; spatial_serializer.serialize_field(field, triangles)?;
} }
spatial_serializer.close()?; spatial_serializer.close()?;
Ok(()) Ok(())
} }

View File

@@ -189,7 +189,9 @@ impl Geometry {
polygon polygon
.iter() .iter()
.map(|ring| { .map(|ring| {
Value::Array(ring.iter().map(|p| json!([p.lon, p.lat])).collect()) Value::Array(
ring.iter().map(|p| json!([p.lon, p.lat])).collect(),
)
}) })
.collect(), .collect(),
) )
@@ -400,10 +402,7 @@ impl BinarySerializable for Geometry {
} }
} }
fn serialize_line_string<W: Write + ?Sized>( fn serialize_line_string<W: Write + ?Sized>(line: &[GeoPoint], writer: &mut W) -> io::Result<()> {
line: &[GeoPoint],
writer: &mut W,
) -> io::Result<()> {
BinarySerializable::serialize(&VInt(line.len() as u64), writer)?; BinarySerializable::serialize(&VInt(line.len() as u64), writer)?;
let mut lon = Vec::new(); let mut lon = Vec::new();
let mut lat = Vec::new(); let mut lat = Vec::new();
@@ -453,7 +452,10 @@ fn deserialize_line_string<R: Read>(reader: &mut R) -> io::Result<Vec<GeoPoint>>
let lat: Vec<f64> = decompress_f64(&lat_bytes, point_count); let lat: Vec<f64> = decompress_f64(&lat_bytes, point_count);
let mut line_string: Vec<GeoPoint> = Vec::new(); let mut line_string: Vec<GeoPoint> = Vec::new();
for offset in 0..point_count { for offset in 0..point_count {
line_string.push(GeoPoint { lon: lon[offset], lat: lat[offset] }); line_string.push(GeoPoint {
lon: lon[offset],
lat: lat[offset],
});
} }
Ok(line_string) Ok(line_string)
} }
@@ -476,7 +478,10 @@ fn deserialize_polygon<R: Read>(reader: &mut R) -> io::Result<Vec<Vec<GeoPoint>>
for point_count in rings { for point_count in rings {
let mut ring = Vec::new(); let mut ring = Vec::new();
for _ in 0..point_count { for _ in 0..point_count {
ring.push(GeoPoint { lon: lon[offset], lat: lat[offset] }); ring.push(GeoPoint {
lon: lon[offset],
lat: lat[offset],
});
offset += 1; offset += 1;
} }
polygon.push(ring); polygon.push(ring);

View File

@@ -1,4 +1,3 @@
/// A point in the geographical coordinate system. /// A point in the geographical coordinate system.
#[derive(Debug, Clone, Copy, PartialEq)] #[derive(Debug, Clone, Copy, PartialEq)]
pub struct GeoPoint { pub struct GeoPoint {

View File

@@ -95,11 +95,7 @@ fn append_point(triangles: &mut Vec<Triangle>, doc_id: DocId, point: GeoPoint) {
triangles.push(Triangle::from_point(doc_id, point.0, point.1)); triangles.push(Triangle::from_point(doc_id, point.0, point.1));
} }
fn append_line_string( fn append_line_string(triangles: &mut Vec<Triangle>, doc_id: DocId, line_string: Vec<GeoPoint>) {
triangles: &mut Vec<Triangle>,
doc_id: DocId,
line_string: Vec<GeoPoint>,
) {
let mut previous = as_point_i32(line_string[0]); let mut previous = as_point_i32(line_string[0]);
for point in line_string.into_iter().skip(1) { for point in line_string.into_iter().skip(1) {
let point = as_point_i32(point); let point = as_point_i32(point);