Files
tantivy/src/spatial/geometry.rs
Alan Gutierrez dbbc8c3f65 Slot block kd-tree into Tantivy.
Implemented a geometry document field with a minimal `Geometry` enum.
Now able to add that Geometry from GeoJSON parsed from a JSON document.
Geometry is triangulated if it is a polygon, otherwise it is correctly
encoded as a degenerate triangle if it is a point or a line string.
Write accumulated triangles to a block kd-tree on commit.

Serialize the original `f64` polygon for retrieval from search.

Created a query method for intersection. Query against the memory mapped
block kd-tree. Return hits and original `f64` polygon.

Implemented a merge of one or more block kd-trees from one or more
segments during merge.

Updated the block kd-tree to write to a Tantivy `WritePtr` instead of
more generic Rust I/O.
2025-12-01 16:49:16 +01:00

480 lines
18 KiB
Rust

//! HUSH
use std::io::{self, Read, Write};
use common::{BinarySerializable, VInt};
use serde_json::{json, Map, Value};
use crate::spatial::xor::{compress_f64, decompress_f64};
/// HUSH
#[derive(Debug)]
pub enum GeometryError {
/// HUSH
MissingType,
/// HUSH
MissingField(String), // "expected array", "wrong nesting depth", etc
/// HUSH
UnsupportedType(String),
/// HUSH
InvalidCoordinate(String), // Can report the actual bad value
/// HUSH
InvalidStructure(String), // "expected array", "wrong nesting depth", etc
}
/// HUSH
#[derive(Debug, Clone, PartialEq)]
pub enum Geometry {
/// HUSH
Point((f64, f64)),
/// HUSH
MultiPoint(Vec<(f64, f64)>),
/// HUSH
LineString(Vec<(f64, f64)>),
/// HUSH
MultiLineString(Vec<Vec<(f64, f64)>>),
/// HUSH
Polygon(Vec<Vec<(f64, f64)>>),
/// HUSH
MultiPolygon(Vec<Vec<Vec<(f64, f64)>>>),
/// HUSH
GeometryCollection(Vec<Self>),
}
impl Geometry {
/// HUSH
pub fn from_geojson(object: &Map<String, Value>) -> Result<Self, GeometryError> {
let geometry_type = object
.get("type")
.and_then(|v| v.as_str())
.ok_or(GeometryError::MissingType)?;
match geometry_type {
"Point" => {
let coordinates = get_coordinates(object)?;
let point = to_point(coordinates)?;
Ok(Geometry::Point(point))
}
"MultiPoint" => {
let coordinates = get_coordinates(object)?;
let multi_point = to_line_string(coordinates)?;
Ok(Geometry::MultiPoint(multi_point))
}
"LineString" => {
let coordinates = get_coordinates(object)?;
let line_string = to_line_string(coordinates)?;
if line_string.len() < 2 {
return Err(GeometryError::InvalidStructure(
"a line string contains at least 2 points".to_string(),
));
}
Ok(Geometry::LineString(line_string))
}
"MultiLineString" => {
let coordinates = get_coordinates(object)?;
let multi_line_string = to_multi_line_string(coordinates)?;
for line_string in &multi_line_string {
if line_string.len() < 2 {
return Err(GeometryError::InvalidStructure(
"a line string contains at least 2 points".to_string(),
));
}
}
Ok(Geometry::MultiLineString(multi_line_string))
}
"Polygon" => {
let coordinates = get_coordinates(object)?;
let polygon = to_multi_line_string(coordinates)?;
for ring in &polygon {
if ring.len() < 3 {
return Err(GeometryError::InvalidStructure(
"a polygon ring contains at least 3 points".to_string(),
));
}
}
Ok(Geometry::Polygon(polygon))
}
"MultiPolygon" => {
let mut result = Vec::new();
let multi_polygons = get_coordinates(object)?;
let multi_polygons =
multi_polygons
.as_array()
.ok_or(GeometryError::InvalidStructure(
"expected an array of polygons".to_string(),
))?;
for polygon in multi_polygons {
let polygon = to_multi_line_string(polygon)?;
for ring in &polygon {
if ring.len() < 3 {
return Err(GeometryError::InvalidStructure(
"a polygon ring contains at least 3 points".to_string(),
));
}
}
result.push(polygon);
}
Ok(Geometry::MultiPolygon(result))
}
"GeometriesCollection" => {
let geometries = object
.get("geometries")
.ok_or(GeometryError::MissingField("geometries".to_string()))?;
let geometries = geometries
.as_array()
.ok_or(GeometryError::InvalidStructure(
"geometries is not an array".to_string(),
))?;
let mut result = Vec::new();
for geometry in geometries {
let object = geometry.as_object().ok_or(GeometryError::InvalidStructure(
"geometry is not an object".to_string(),
))?;
result.push(Geometry::from_geojson(object)?);
}
Ok(Geometry::GeometryCollection(result))
}
_ => Err(GeometryError::UnsupportedType(geometry_type.to_string())),
}
}
/// HUSH
pub fn to_geojson(&self) -> Map<String, Value> {
let mut map = Map::new();
match self {
Geometry::Point(point) => {
map.insert("type".to_string(), Value::String("Point".to_string()));
let coords = json!([point.0, point.1]);
map.insert("coordinates".to_string(), coords);
}
Geometry::MultiPoint(points) => {
map.insert("type".to_string(), Value::String("MultiPoint".to_string()));
let coords: Vec<Value> = points.iter().map(|p| json!([p.0, p.1])).collect();
map.insert("coordinates".to_string(), Value::Array(coords));
}
Geometry::LineString(line) => {
map.insert("type".to_string(), Value::String("LineString".to_string()));
let coords: Vec<Value> = line.iter().map(|p| json!([p.0, p.1])).collect();
map.insert("coordinates".to_string(), Value::Array(coords));
}
Geometry::MultiLineString(lines) => {
map.insert(
"type".to_string(),
Value::String("MultiLineString".to_string()),
);
let coords: Vec<Value> = lines
.iter()
.map(|line| Value::Array(line.iter().map(|p| json!([p.0, p.1])).collect()))
.collect();
map.insert("coordinates".to_string(), Value::Array(coords));
}
Geometry::Polygon(rings) => {
map.insert("type".to_string(), Value::String("Polygon".to_string()));
let coords: Vec<Value> = rings
.iter()
.map(|ring| Value::Array(ring.iter().map(|p| json!([p.0, p.1])).collect()))
.collect();
map.insert("coordinates".to_string(), Value::Array(coords));
}
Geometry::MultiPolygon(polygons) => {
map.insert(
"type".to_string(),
Value::String("MultiPolygon".to_string()),
);
let coords: Vec<Value> = polygons
.iter()
.map(|polygon| {
Value::Array(
polygon
.iter()
.map(|ring| {
Value::Array(ring.iter().map(|p| json!([p.0, p.1])).collect())
})
.collect(),
)
})
.collect();
map.insert("coordinates".to_string(), Value::Array(coords));
}
Geometry::GeometryCollection(geometries) => {
map.insert(
"type".to_string(),
Value::String("GeometryCollection".to_string()),
);
let geoms: Vec<Value> = geometries
.iter()
.map(|g| Value::Object(g.to_geojson()))
.collect();
map.insert("geometries".to_string(), Value::Array(geoms));
}
}
map
}
}
fn get_coordinates(object: &Map<String, Value>) -> Result<&Value, GeometryError> {
let coordinates = object
.get("coordinates")
.ok_or(GeometryError::MissingField("coordinates".to_string()))?;
Ok(coordinates)
}
fn to_point(value: &Value) -> Result<(f64, f64), GeometryError> {
let lonlat = value.as_array().ok_or(GeometryError::InvalidStructure(
"expected 2 element array pair of lon/lat".to_string(),
))?;
if lonlat.len() != 2 {
return Err(GeometryError::InvalidStructure(
"expected 2 element array pair of lon/lat".to_string(),
));
}
let lon = lonlat[0].as_f64().ok_or(GeometryError::InvalidCoordinate(
"longitude must be f64".to_string(),
))?;
if !lon.is_finite() || !(-180.0..=180.0).contains(&lon) {
return Err(GeometryError::InvalidCoordinate(format!(
"invalid longitude: {}",
lon
)));
}
let lat = lonlat[1].as_f64().ok_or(GeometryError::InvalidCoordinate(
"latitude must be f64".to_string(),
))?;
if !lat.is_finite() || !(-90.0..=90.0).contains(&lat) {
return Err(GeometryError::InvalidCoordinate(format!(
"invalid latitude: {}",
lat
)));
}
Ok((lon, lat))
}
fn to_line_string(value: &Value) -> Result<Vec<(f64, f64)>, GeometryError> {
let mut result = Vec::new();
let coordinates = value.as_array().ok_or(GeometryError::InvalidStructure(
"expected an array of lon/lat arrays".to_string(),
))?;
for coordinate in coordinates {
result.push(to_point(coordinate)?);
}
Ok(result)
}
fn to_multi_line_string(value: &Value) -> Result<Vec<Vec<(f64, f64)>>, GeometryError> {
let mut result = Vec::new();
let coordinates = value.as_array().ok_or(GeometryError::InvalidStructure(
"expected an array of an array of lon/lat arrays".to_string(),
))?;
for coordinate in coordinates {
result.push(to_line_string(coordinate)?);
}
Ok(result)
}
impl BinarySerializable for Geometry {
fn serialize<W: Write + ?Sized>(&self, writer: &mut W) -> io::Result<()> {
match self {
Geometry::Point(point) => {
0u8.serialize(writer)?;
point.0.serialize(writer)?;
point.1.serialize(writer)?;
Ok(())
}
Geometry::MultiPoint(points) => {
1u8.serialize(writer)?;
serialize_line_string(points, writer)
}
Geometry::LineString(line_string) => {
2u8.serialize(writer)?;
serialize_line_string(line_string, writer)
}
Geometry::MultiLineString(multi_line_string) => {
3u8.serialize(writer)?;
serialize_polygon(multi_line_string, writer)
}
Geometry::Polygon(polygon) => {
4u8.serialize(writer)?;
serialize_polygon(polygon, writer)
}
Geometry::MultiPolygon(multi_polygon) => {
5u8.serialize(writer)?;
BinarySerializable::serialize(&VInt(multi_polygon.len() as u64), writer)?;
for polygon in multi_polygon {
BinarySerializable::serialize(&VInt(polygon.len() as u64), writer)?;
for ring in polygon {
BinarySerializable::serialize(&VInt(ring.len() as u64), writer)?;
}
}
let mut lon = Vec::new();
let mut lat = Vec::new();
for polygon in multi_polygon {
for ring in polygon {
for point in ring {
lon.push(point.0);
lat.push(point.1);
}
}
}
let lon = compress_f64(&lon);
let lat = compress_f64(&lat);
VInt(lon.len() as u64).serialize(writer)?;
writer.write_all(&lon)?;
VInt(lat.len() as u64).serialize(writer)?;
writer.write_all(&lat)?;
Ok(())
}
Geometry::GeometryCollection(geometries) => {
6u8.serialize(writer)?;
BinarySerializable::serialize(&VInt(geometries.len() as u64), writer)?;
for geometry in geometries {
geometry.serialize(writer)?;
}
Ok(())
}
}
}
fn deserialize<R: Read>(reader: &mut R) -> io::Result<Self> {
let discriminant: u8 = BinarySerializable::deserialize(reader)?;
match discriminant {
0 => {
let lon = BinarySerializable::deserialize(reader)?;
let lat = BinarySerializable::deserialize(reader)?;
Ok(Geometry::Point((lon, lat)))
}
1 => Ok(Geometry::MultiPoint(deserialize_line_string(reader)?)),
2 => Ok(Geometry::LineString(deserialize_line_string(reader)?)),
3 => Ok(Geometry::MultiLineString(deserialize_polygon(reader)?)),
4 => Ok(Geometry::Polygon(deserialize_polygon(reader)?)),
5 => {
let polygon_count = VInt::deserialize(reader)?.0 as usize;
let mut polygons = Vec::new();
let mut count = 0;
for _ in 0..polygon_count {
let ring_count = VInt::deserialize(reader)?.0 as usize;
let mut rings = Vec::new();
for _ in 0..ring_count {
rings.push(VInt::deserialize(reader)?.0 as usize);
count += 1;
}
polygons.push(rings);
}
let lon_bytes: Vec<u8> = BinarySerializable::deserialize(reader)?;
let lat_bytes: Vec<u8> = BinarySerializable::deserialize(reader)?;
let lon = decompress_f64(&lon_bytes, count);
let lat = decompress_f64(&lat_bytes, count);
let mut multi_polygon = Vec::new();
let mut offset = 0;
for rings in polygons {
let mut polygon = Vec::new();
for point_count in rings {
let mut ring = Vec::new();
for _ in 0..point_count {
ring.push((lon[offset], lat[offset]));
offset += 1;
}
polygon.push(ring);
}
multi_polygon.push(polygon);
}
Ok(Geometry::MultiPolygon(multi_polygon))
}
6 => {
let geometry_count = VInt::deserialize(reader)?.0 as usize;
let mut geometries = Vec::new();
for _ in 0..geometry_count {
geometries.push(Geometry::deserialize(reader)?);
}
Ok(Geometry::GeometryCollection(geometries))
}
_ => Err(io::Error::new(
io::ErrorKind::InvalidData,
"invalid geometry type",
)),
}
}
}
fn serialize_line_string<W: Write + ?Sized>(
line: &Vec<(f64, f64)>,
writer: &mut W,
) -> io::Result<()> {
BinarySerializable::serialize(&VInt(line.len() as u64), writer)?;
let mut lon = Vec::new();
let mut lat = Vec::new();
for point in line {
lon.push(point.0);
lat.push(point.1);
}
let lon = compress_f64(&lon);
let lat = compress_f64(&lat);
VInt(lon.len() as u64).serialize(writer)?;
writer.write_all(&lon)?;
VInt(lat.len() as u64).serialize(writer)?;
writer.write_all(&lat)?;
Ok(())
}
fn serialize_polygon<W: Write + ?Sized>(
line_string: &Vec<Vec<(f64, f64)>>,
writer: &mut W,
) -> io::Result<()> {
BinarySerializable::serialize(&VInt(line_string.len() as u64), writer)?;
for ring in line_string {
BinarySerializable::serialize(&VInt(ring.len() as u64), writer)?;
}
let mut lon = Vec::new();
let mut lat = Vec::new();
for ring in line_string {
for point in ring {
lon.push(point.0);
lat.push(point.1);
}
}
let lon = compress_f64(&lon);
let lat = compress_f64(&lat);
VInt(lon.len() as u64).serialize(writer)?;
writer.write_all(&lon)?;
VInt(lat.len() as u64).serialize(writer)?;
writer.write_all(&lat)?;
Ok(())
}
fn deserialize_line_string<R: Read>(reader: &mut R) -> io::Result<Vec<(f64, f64)>> {
let point_count = VInt::deserialize(reader)?.0 as usize;
let lon_bytes: Vec<u8> = BinarySerializable::deserialize(reader)?;
let lat_bytes: Vec<u8> = BinarySerializable::deserialize(reader)?;
let lon = decompress_f64(&lon_bytes, point_count);
let lat = decompress_f64(&lat_bytes, point_count);
let mut line_string = Vec::new();
for offset in 0..point_count {
line_string.push((lon[offset], lat[offset]));
}
Ok(line_string)
}
fn deserialize_polygon<R: Read>(reader: &mut R) -> io::Result<Vec<Vec<(f64, f64)>>> {
let ring_count = VInt::deserialize(reader)?.0 as usize;
let mut rings = Vec::new();
let mut count = 0;
for _ in 0..ring_count {
let point_count = VInt::deserialize(reader)?.0 as usize;
rings.push(point_count);
count += point_count;
}
let lon_bytes: Vec<u8> = BinarySerializable::deserialize(reader)?;
let lat_bytes: Vec<u8> = BinarySerializable::deserialize(reader)?;
let lon = decompress_f64(&lon_bytes, count);
let lat = decompress_f64(&lat_bytes, count);
let mut polygon = Vec::new();
let mut offset = 0;
for point_count in rings {
let mut ring = Vec::new();
for _ in 0..point_count {
ring.push((lon[offset], lat[offset]));
offset += 1;
}
polygon.push(ring);
}
Ok(polygon)
}