Introduced geopoint.

This commit is contained in:
Paul Masurel
2025-12-03 17:05:16 +01:00
parent 1619e05bc5
commit f85a27068d
11 changed files with 116 additions and 75 deletions

View File

@@ -1,6 +1,8 @@
use geo_types::Point;
use tantivy::collector::TopDocs;
use tantivy::query::SpatialQuery;
use tantivy::schema::{Schema, Value, SPATIAL, STORED, TEXT};
use tantivy::spatial::point::GeoPoint;
use tantivy::{Index, IndexWriter, TantivyDocument};
fn main() -> tantivy::Result<()> {
let mut schema_builder = Schema::builder();
@@ -38,7 +40,7 @@ fn main() -> tantivy::Result<()> {
let field = schema.get_field("geometry").unwrap();
let query = SpatialQuery::new(
field,
[(-99.49, 45.56), (-99.45, 45.59)],
[GeoPoint { lon:-99.49, lat: 45.56}, GeoPoint {lon:-99.45, lat: 45.59}],
tantivy::query::SpatialQueryType::Intersects,
);
let hits = searcher.search(&query, &TopDocs::with_limit(10).order_by_score())?;

View File

@@ -683,7 +683,7 @@ mod tests {
}
#[test]
fn test_datefastfield() -> crate::Result<()> {
fn test_datefastfield() {
let mut schema_builder = Schema::builder();
let date_field = schema_builder.add_date_field(
"date",
@@ -697,22 +697,22 @@ mod tests {
);
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
let mut index_writer = index.writer_for_tests()?;
let mut index_writer = index.writer_for_tests().unwrap();
index_writer.set_merge_policy(Box::new(NoMergePolicy));
index_writer.add_document(doc!(
date_field => DateTime::from_u64(1i64.to_u64()),
multi_date_field => DateTime::from_u64(2i64.to_u64()),
multi_date_field => DateTime::from_u64(3i64.to_u64())
))?;
)).unwrap();
index_writer.add_document(doc!(
date_field => DateTime::from_u64(4i64.to_u64())
))?;
)).unwrap();
index_writer.add_document(doc!(
multi_date_field => DateTime::from_u64(5i64.to_u64()),
multi_date_field => DateTime::from_u64(6i64.to_u64())
))?;
index_writer.commit()?;
let reader = index.reader()?;
)).unwrap();
index_writer.commit().unwrap();
let reader = index.reader().unwrap();
let searcher = reader.searcher();
assert_eq!(searcher.segment_readers().len(), 1);
let segment_reader = searcher.segment_reader(0);
@@ -746,7 +746,6 @@ mod tests {
assert_eq!(dates[0].into_timestamp_nanos(), 5i64);
assert_eq!(dates[1].into_timestamp_nanos(), 6i64);
}
Ok(())
}
#[test]

View File

@@ -180,8 +180,13 @@ impl SegmentReader {
let fast_fields_readers = FastFieldReaders::open(fast_fields_data, schema.clone())?;
let fieldnorm_data = segment.open_read(SegmentComponent::FieldNorms)?;
let fieldnorm_readers = FieldNormReaders::open(fieldnorm_data)?;
let spatial_data = segment.open_read(SegmentComponent::Spatial)?;
let spatial_readers = SpatialReaders::open(spatial_data)?;
let spatial_readers = if schema.contains_spatial_field() {
let spatial_data = segment.open_read(SegmentComponent::Spatial)?;
SpatialReaders::open(spatial_data)?
} else {
SpatialReaders::empty()
};
let original_bitset = if segment.meta().has_deletes() {
let alive_doc_file_slice = segment.open_read(SegmentComponent::Delete)?;

View File

@@ -175,6 +175,7 @@ impl IndexMerger {
let mut readers = vec![];
for (segment, new_alive_bitset_opt) in segments.iter().zip(alive_bitset_opt) {
if segment.meta().num_docs() > 0 {
dbg!("segment");
let reader =
SegmentReader::open_with_custom_alive_set(segment, new_alive_bitset_opt)?;
readers.push(reader);
@@ -530,7 +531,6 @@ impl IndexMerger {
serializer: &mut SegmentSerializer,
doc_id_mapping: &SegmentDocIdMapping,
) -> crate::Result<()> {
/// Unfortunately, there are no special trick to merge segments.
/// We need to rebuild a BKD-tree based off the list of triangles.
///
/// Because the data can be large, we do this by writing the sequence of triangles to
@@ -543,6 +543,12 @@ impl IndexMerger {
/// swap, the memory will not be accounted as anonymous memory,
/// swap space is reserved etc.
use crate::spatial::bkd::Segment;
let Some(mut spatial_serializer) = serializer.extract_spatial_serializer() else {
// The schema does not contain any spatial field.
return Ok(())
};
let mut segment_mappings: Vec<Vec<Option<DocId>>> = Vec::new();
for reader in &self.readers {
let max_doc = reader.max_doc();
@@ -586,7 +592,6 @@ impl IndexMerger {
// No need to fsync here. This file is not here for persistency.
}
}
if let Some(mut spatial_serializer) = serializer.extract_spatial_serializer() {
for (field, temp_file) in temp_files {
// Memory map the triangle file.
use memmap2::MmapOptions;
@@ -600,7 +605,7 @@ impl IndexMerger {
spatial_serializer.serialize_field(field, triangles)?;
}
spatial_serializer.close()?;
}
Ok(())
}

View File

@@ -6,6 +6,7 @@ use crate::query::explanation::does_not_match;
use crate::query::{BitSetDocSet, Explanation, Query, Scorer, Weight};
use crate::schema::Field;
use crate::spatial::bkd::{search_intersects, Segment};
use crate::spatial::point::GeoPoint;
use crate::spatial::writer::as_point_i32;
use crate::{DocId, DocSet, Score, TantivyError, TERMINATED};
@@ -28,7 +29,7 @@ pub struct SpatialQuery {
impl SpatialQuery {
/// HUSH
pub fn new(field: Field, bounds: [(f64, f64); 2], query_type: SpatialQueryType) -> Self {
pub fn new(field: Field, bounds: [GeoPoint; 2], query_type: SpatialQueryType) -> Self {
SpatialQuery {
field,
bounds: [as_point_i32(bounds[0]), as_point_i32(bounds[1])],

View File

@@ -5,6 +5,7 @@ use std::io::{self, Read, Write};
use common::{BinarySerializable, VInt};
use serde_json::{json, Map, Value};
use crate::spatial::point::GeoPoint;
use crate::spatial::xor::{compress_f64, decompress_f64};
/// HUSH
@@ -26,17 +27,17 @@ pub enum GeometryError {
#[derive(Debug, Clone, PartialEq)]
pub enum Geometry {
/// HUSH
Point((f64, f64)),
Point(GeoPoint),
/// HUSH
MultiPoint(Vec<(f64, f64)>),
MultiPoint(Vec<GeoPoint>),
/// HUSH
LineString(Vec<(f64, f64)>),
LineString(Vec<GeoPoint>),
/// HUSH
MultiLineString(Vec<Vec<(f64, f64)>>),
MultiLineString(Vec<Vec<GeoPoint>>),
/// HUSH
Polygon(Vec<Vec<(f64, f64)>>),
Polygon(Vec<Vec<GeoPoint>>),
/// HUSH
MultiPolygon(Vec<Vec<Vec<(f64, f64)>>>),
MultiPolygon(Vec<Vec<Vec<GeoPoint>>>),
/// HUSH
GeometryCollection(Vec<Self>),
}
@@ -137,23 +138,24 @@ impl Geometry {
}
}
/// HUSH
/// Serialize the geometry to GeoJSON format.
/// https://fr.wikipedia.org/wiki/GeoJSON
pub fn to_geojson(&self) -> Map<String, Value> {
let mut map = Map::new();
match self {
Geometry::Point(point) => {
map.insert("type".to_string(), Value::String("Point".to_string()));
let coords = json!([point.0, point.1]);
let coords = json!([point.lon, point.lat]);
map.insert("coordinates".to_string(), coords);
}
Geometry::MultiPoint(points) => {
map.insert("type".to_string(), Value::String("MultiPoint".to_string()));
let coords: Vec<Value> = points.iter().map(|p| json!([p.0, p.1])).collect();
let coords: Vec<Value> = points.iter().map(|p| json!([p.lon, p.lat])).collect();
map.insert("coordinates".to_string(), Value::Array(coords));
}
Geometry::LineString(line) => {
map.insert("type".to_string(), Value::String("LineString".to_string()));
let coords: Vec<Value> = line.iter().map(|p| json!([p.0, p.1])).collect();
let coords: Vec<Value> = line.iter().map(|p| json!([p.lon, p.lat])).collect();
map.insert("coordinates".to_string(), Value::Array(coords));
}
Geometry::MultiLineString(lines) => {
@@ -163,7 +165,7 @@ impl Geometry {
);
let coords: Vec<Value> = lines
.iter()
.map(|line| Value::Array(line.iter().map(|p| json!([p.0, p.1])).collect()))
.map(|line| Value::Array(line.iter().map(|p| json!([p.lon, p.lat])).collect()))
.collect();
map.insert("coordinates".to_string(), Value::Array(coords));
}
@@ -171,7 +173,7 @@ impl Geometry {
map.insert("type".to_string(), Value::String("Polygon".to_string()));
let coords: Vec<Value> = rings
.iter()
.map(|ring| Value::Array(ring.iter().map(|p| json!([p.0, p.1])).collect()))
.map(|ring| Value::Array(ring.iter().map(|p| json!([p.lon, p.lat])).collect()))
.collect();
map.insert("coordinates".to_string(), Value::Array(coords));
}
@@ -187,7 +189,7 @@ impl Geometry {
polygon
.iter()
.map(|ring| {
Value::Array(ring.iter().map(|p| json!([p.0, p.1])).collect())
Value::Array(ring.iter().map(|p| json!([p.lon, p.lat])).collect())
})
.collect(),
)
@@ -218,7 +220,7 @@ fn get_coordinates(object: &Map<String, Value>) -> Result<&Value, GeometryError>
Ok(coordinates)
}
fn to_point(value: &Value) -> Result<(f64, f64), GeometryError> {
fn to_point(value: &Value) -> Result<GeoPoint, GeometryError> {
let lonlat = value.as_array().ok_or(GeometryError::InvalidStructure(
"expected 2 element array pair of lon/lat".to_string(),
))?;
@@ -245,10 +247,10 @@ fn to_point(value: &Value) -> Result<(f64, f64), GeometryError> {
lat
)));
}
Ok((lon, lat))
Ok(GeoPoint { lon, lat })
}
fn to_line_string(value: &Value) -> Result<Vec<(f64, f64)>, GeometryError> {
fn to_line_string(value: &Value) -> Result<Vec<GeoPoint>, GeometryError> {
let mut result = Vec::new();
let coordinates = value.as_array().ok_or(GeometryError::InvalidStructure(
"expected an array of lon/lat arrays".to_string(),
@@ -259,7 +261,7 @@ fn to_line_string(value: &Value) -> Result<Vec<(f64, f64)>, GeometryError> {
Ok(result)
}
fn to_multi_line_string(value: &Value) -> Result<Vec<Vec<(f64, f64)>>, GeometryError> {
fn to_multi_line_string(value: &Value) -> Result<Vec<Vec<GeoPoint>>, GeometryError> {
let mut result = Vec::new();
let coordinates = value.as_array().ok_or(GeometryError::InvalidStructure(
"expected an array of an array of lon/lat arrays".to_string(),
@@ -275,8 +277,8 @@ impl BinarySerializable for Geometry {
match self {
Geometry::Point(point) => {
0u8.serialize(writer)?;
point.0.serialize(writer)?;
point.1.serialize(writer)?;
point.lon.serialize(writer)?;
point.lat.serialize(writer)?;
Ok(())
}
Geometry::MultiPoint(points) => {
@@ -289,7 +291,7 @@ impl BinarySerializable for Geometry {
}
Geometry::MultiLineString(multi_line_string) => {
3u8.serialize(writer)?;
serialize_polygon(multi_line_string, writer)
serialize_polygon(&multi_line_string[..], writer)
}
Geometry::Polygon(polygon) => {
4u8.serialize(writer)?;
@@ -309,8 +311,8 @@ impl BinarySerializable for Geometry {
for polygon in multi_polygon {
for ring in polygon {
for point in ring {
lon.push(point.0);
lat.push(point.1);
lon.push(point.lon);
lat.push(point.lat);
}
}
}
@@ -339,7 +341,7 @@ impl BinarySerializable for Geometry {
0 => {
let lon = BinarySerializable::deserialize(reader)?;
let lat = BinarySerializable::deserialize(reader)?;
Ok(Geometry::Point((lon, lat)))
Ok(Geometry::Point(GeoPoint { lon, lat }))
}
1 => Ok(Geometry::MultiPoint(deserialize_line_string(reader)?)),
2 => Ok(Geometry::LineString(deserialize_line_string(reader)?)),
@@ -370,7 +372,10 @@ impl BinarySerializable for Geometry {
for point_count in rings {
let mut ring = Vec::new();
for _ in 0..point_count {
ring.push((lon[offset], lat[offset]));
ring.push(GeoPoint {
lon: lon[offset],
lat: lat[offset],
});
offset += 1;
}
polygon.push(ring);
@@ -396,15 +401,15 @@ impl BinarySerializable for Geometry {
}
fn serialize_line_string<W: Write + ?Sized>(
line: &Vec<(f64, f64)>,
line: &[GeoPoint],
writer: &mut W,
) -> io::Result<()> {
BinarySerializable::serialize(&VInt(line.len() as u64), writer)?;
let mut lon = Vec::new();
let mut lat = Vec::new();
for point in line {
lon.push(point.0);
lat.push(point.1);
lon.push(point.lon);
lat.push(point.lat);
}
let lon = compress_f64(&lon);
let lat = compress_f64(&lat);
@@ -416,23 +421,23 @@ fn serialize_line_string<W: Write + ?Sized>(
}
fn serialize_polygon<W: Write + ?Sized>(
line_string: &Vec<Vec<(f64, f64)>>,
line_string: &[Vec<GeoPoint>],
writer: &mut W,
) -> io::Result<()> {
BinarySerializable::serialize(&VInt(line_string.len() as u64), writer)?;
for ring in line_string {
BinarySerializable::serialize(&VInt(ring.len() as u64), writer)?;
}
let mut lon = Vec::new();
let mut lat = Vec::new();
let mut lon: Vec<f64> = Vec::new();
let mut lat: Vec<f64> = Vec::new();
for ring in line_string {
for point in ring {
lon.push(point.0);
lat.push(point.1);
lon.push(point.lon);
lat.push(point.lat);
}
}
let lon = compress_f64(&lon);
let lat = compress_f64(&lat);
let lon: Vec<u8> = compress_f64(&lon);
let lat: Vec<u8> = compress_f64(&lat);
VInt(lon.len() as u64).serialize(writer)?;
writer.write_all(&lon)?;
VInt(lat.len() as u64).serialize(writer)?;
@@ -440,20 +445,20 @@ fn serialize_polygon<W: Write + ?Sized>(
Ok(())
}
fn deserialize_line_string<R: Read>(reader: &mut R) -> io::Result<Vec<(f64, f64)>> {
fn deserialize_line_string<R: Read>(reader: &mut R) -> io::Result<Vec<GeoPoint>> {
let point_count = VInt::deserialize(reader)?.0 as usize;
let lon_bytes: Vec<u8> = BinarySerializable::deserialize(reader)?;
let lat_bytes: Vec<u8> = BinarySerializable::deserialize(reader)?;
let lon = decompress_f64(&lon_bytes, point_count);
let lat = decompress_f64(&lat_bytes, point_count);
let mut line_string = Vec::new();
let lon: Vec<f64> = decompress_f64(&lon_bytes, point_count);
let lat: Vec<f64> = decompress_f64(&lat_bytes, point_count);
let mut line_string: Vec<GeoPoint> = Vec::new();
for offset in 0..point_count {
line_string.push((lon[offset], lat[offset]));
line_string.push(GeoPoint { lon: lon[offset], lat: lat[offset] });
}
Ok(line_string)
}
fn deserialize_polygon<R: Read>(reader: &mut R) -> io::Result<Vec<Vec<(f64, f64)>>> {
fn deserialize_polygon<R: Read>(reader: &mut R) -> io::Result<Vec<Vec<GeoPoint>>> {
let ring_count = VInt::deserialize(reader)?.0 as usize;
let mut rings = Vec::new();
let mut count = 0;
@@ -464,14 +469,14 @@ fn deserialize_polygon<R: Read>(reader: &mut R) -> io::Result<Vec<Vec<(f64, f64)
}
let lon_bytes: Vec<u8> = BinarySerializable::deserialize(reader)?;
let lat_bytes: Vec<u8> = BinarySerializable::deserialize(reader)?;
let lon = decompress_f64(&lon_bytes, count);
let lat = decompress_f64(&lat_bytes, count);
let mut polygon = Vec::new();
let lon: Vec<f64> = decompress_f64(&lon_bytes, count);
let lat: Vec<f64> = decompress_f64(&lat_bytes, count);
let mut polygon: Vec<Vec<GeoPoint>> = Vec::new();
let mut offset = 0;
for point_count in rings {
let mut ring = Vec::new();
for _ in 0..point_count {
ring.push((lon[offset], lat[offset]));
ring.push(GeoPoint { lon: lon[offset], lat: lat[offset] });
offset += 1;
}
polygon.push(ring);

View File

@@ -3,6 +3,7 @@
pub mod bkd;
pub mod delta;
pub mod geometry;
pub mod point;
pub mod radix_select;
pub mod reader;
pub mod serializer;

9
src/spatial/point.rs Normal file
View File

@@ -0,0 +1,9 @@
/// A point in the geographical coordinate system.
#[derive(Debug, Clone, Copy, PartialEq)]
pub struct GeoPoint {
/// Longitude
pub lon: f64,
/// Latitude
pub lat: f64,
}

View File

@@ -10,12 +10,17 @@ use crate::schema::Field;
use crate::space_usage::PerFieldSpaceUsage;
#[derive(Clone)]
/// HUSH
pub struct SpatialReaders {
data: Arc<CompositeFile>,
}
impl SpatialReaders {
pub fn empty() -> SpatialReaders {
SpatialReaders {
data: Arc::new(CompositeFile::empty()),
}
}
/// Creates a field norm reader.
pub fn open(file: FileSlice) -> crate::Result<SpatialReaders> {
let data = CompositeFile::open(&file)?;

View File

@@ -7,6 +7,7 @@ use i_triangle::int::triangulatable::IntTriangulatable;
use crate::schema::Field;
use crate::spatial::geometry::Geometry;
use crate::spatial::point::GeoPoint;
use crate::spatial::serializer::SpatialSerializer;
use crate::spatial::triangle::{delaunay_to_triangles, Triangle};
use crate::DocId;
@@ -81,15 +82,15 @@ impl Default for SpatialWriter {
}
}
/// Convert a point of (longitude, latitude) to a integer point.
pub fn as_point_i32(point: (f64, f64)) -> (i32, i32) {
/// Convert a point of `(longitude, latitude)` to a integer point.
pub fn as_point_i32(point: GeoPoint) -> (i32, i32) {
(
(point.0 / (360.0 / (1i64 << 32) as f64)).floor() as i32,
(point.1 / (180.0 / (1i64 << 32) as f64)).floor() as i32,
(point.lon / (360.0 / (1i64 << 32) as f64)).floor() as i32,
(point.lat / (180.0 / (1i64 << 32) as f64)).floor() as i32,
)
}
fn append_point(triangles: &mut Vec<Triangle>, doc_id: DocId, point: (f64, f64)) {
fn append_point(triangles: &mut Vec<Triangle>, doc_id: DocId, point: GeoPoint) {
let point = as_point_i32(point);
triangles.push(Triangle::from_point(doc_id, point.0, point.1));
}
@@ -97,7 +98,7 @@ fn append_point(triangles: &mut Vec<Triangle>, doc_id: DocId, point: (f64, f64))
fn append_line_string(
triangles: &mut Vec<Triangle>,
doc_id: DocId,
line_string: Vec<(f64, f64)>,
line_string: Vec<GeoPoint>,
) {
let mut previous = as_point_i32(line_string[0]);
for point in line_string.into_iter().skip(1) {
@@ -109,7 +110,7 @@ fn append_line_string(
}
}
fn append_ring(i_polygon: &mut Vec<Vec<IntPoint>>, ring: &[(f64, f64)]) {
fn append_ring(i_polygon: &mut Vec<Vec<IntPoint>>, ring: &[GeoPoint]) {
let mut i_ring = Vec::with_capacity(ring.len() + 1);
for &point in ring {
let point = as_point_i32(point);
@@ -118,7 +119,7 @@ fn append_ring(i_polygon: &mut Vec<Vec<IntPoint>>, ring: &[(f64, f64)]) {
i_polygon.push(i_ring);
}
fn append_polygon(triangles: &mut Vec<Triangle>, doc_id: DocId, polygon: &[Vec<(f64, f64)>]) {
fn append_polygon(triangles: &mut Vec<Triangle>, doc_id: DocId, polygon: &[Vec<GeoPoint>]) {
let mut i_polygon: Vec<Vec<IntPoint>> = Vec::new();
for ring in polygon {
append_ring(&mut i_polygon, ring);

View File

@@ -18,7 +18,7 @@
//! Unlike delta.rs which uses arithmetic deltas for i32 spatial coordinates in the block kd-tree,
//! this module operates on f64 bit patterns directly to preserve exact floating-point values for
//! returning to users.
use std::io::{Cursor, Read};
use std::io::Read;
use common::VInt;
@@ -34,8 +34,8 @@ pub fn compress_f64(values: &[f64]) -> Vec<u8> {
if values.is_empty() {
return Vec::new();
}
let mut output = Vec::new();
let mut previous = values[0].to_bits();
let mut output: Vec<u8> = Vec::new();
let mut previous: u64 = f64_to_le(values[0]);
output.extend_from_slice(&previous.to_le_bytes());
for &value in &values[1..] {
let bits = value.to_bits();
@@ -46,13 +46,21 @@ pub fn compress_f64(values: &[f64]) -> Vec<u8> {
if output.len() >= values.len() * 8 {
let mut output = Vec::with_capacity(values.len() * 8);
for &value in values {
output.extend_from_slice(&value.to_bits().to_le_bytes());
output.extend_from_slice(&f64_to_le(value).to_le_bytes());
}
return output;
}
output
}
fn f64_to_le(value: f64) -> u64 {
u64::from_le_bytes(value.to_le_bytes())
}
fn f64_from_le(value: u64) -> f64 {
f64::from_le_bytes(value.to_le_bytes())
}
/// Decompresses f64 coordinates from XOR delta or raw encoding.
///
/// Detects compression format by byte length - if `bytes.len() == count * 8`, data is raw and
@@ -60,16 +68,16 @@ pub fn compress_f64(values: &[f64]) -> Vec<u8> {
/// reconstructing the original sequence.
///
/// Returns exact f64 values that were passed to `compress_f64()`.
pub fn decompress_f64(bytes: &[u8], count: usize) -> Vec<f64> {
pub fn decompress_f64(mut bytes: &[u8], count: usize) -> Vec<f64> {
let mut values = Vec::with_capacity(count);
if bytes.len() == count * 8 {
for i in 0..count {
let bits = u64::from_le_bytes(bytes[i * 8..(i + 1) * 8].try_into().unwrap());
values.push(f64::from_bits(bits));
values.push(f64_from_le(bits));
}
return values;
}
let mut cursor = Cursor::new(bytes);
let mut cursor: &mut &[u8] = &mut bytes;
// Read first value (raw 8 bytes)
let mut first_bytes = [0u8; 8];