Introduced geopoint.

This commit is contained in:
Paul Masurel
2025-12-03 17:05:16 +01:00
parent 1619e05bc5
commit f85a27068d
11 changed files with 116 additions and 75 deletions

View File

@@ -1,6 +1,8 @@
use geo_types::Point;
use tantivy::collector::TopDocs; use tantivy::collector::TopDocs;
use tantivy::query::SpatialQuery; use tantivy::query::SpatialQuery;
use tantivy::schema::{Schema, Value, SPATIAL, STORED, TEXT}; use tantivy::schema::{Schema, Value, SPATIAL, STORED, TEXT};
use tantivy::spatial::point::GeoPoint;
use tantivy::{Index, IndexWriter, TantivyDocument}; use tantivy::{Index, IndexWriter, TantivyDocument};
fn main() -> tantivy::Result<()> { fn main() -> tantivy::Result<()> {
let mut schema_builder = Schema::builder(); let mut schema_builder = Schema::builder();
@@ -38,7 +40,7 @@ fn main() -> tantivy::Result<()> {
let field = schema.get_field("geometry").unwrap(); let field = schema.get_field("geometry").unwrap();
let query = SpatialQuery::new( let query = SpatialQuery::new(
field, field,
[(-99.49, 45.56), (-99.45, 45.59)], [GeoPoint { lon:-99.49, lat: 45.56}, GeoPoint {lon:-99.45, lat: 45.59}],
tantivy::query::SpatialQueryType::Intersects, tantivy::query::SpatialQueryType::Intersects,
); );
let hits = searcher.search(&query, &TopDocs::with_limit(10).order_by_score())?; let hits = searcher.search(&query, &TopDocs::with_limit(10).order_by_score())?;

View File

@@ -683,7 +683,7 @@ mod tests {
} }
#[test] #[test]
fn test_datefastfield() -> crate::Result<()> { fn test_datefastfield() {
let mut schema_builder = Schema::builder(); let mut schema_builder = Schema::builder();
let date_field = schema_builder.add_date_field( let date_field = schema_builder.add_date_field(
"date", "date",
@@ -697,22 +697,22 @@ mod tests {
); );
let schema = schema_builder.build(); let schema = schema_builder.build();
let index = Index::create_in_ram(schema); let index = Index::create_in_ram(schema);
let mut index_writer = index.writer_for_tests()?; let mut index_writer = index.writer_for_tests().unwrap();
index_writer.set_merge_policy(Box::new(NoMergePolicy)); index_writer.set_merge_policy(Box::new(NoMergePolicy));
index_writer.add_document(doc!( index_writer.add_document(doc!(
date_field => DateTime::from_u64(1i64.to_u64()), date_field => DateTime::from_u64(1i64.to_u64()),
multi_date_field => DateTime::from_u64(2i64.to_u64()), multi_date_field => DateTime::from_u64(2i64.to_u64()),
multi_date_field => DateTime::from_u64(3i64.to_u64()) multi_date_field => DateTime::from_u64(3i64.to_u64())
))?; )).unwrap();
index_writer.add_document(doc!( index_writer.add_document(doc!(
date_field => DateTime::from_u64(4i64.to_u64()) date_field => DateTime::from_u64(4i64.to_u64())
))?; )).unwrap();
index_writer.add_document(doc!( index_writer.add_document(doc!(
multi_date_field => DateTime::from_u64(5i64.to_u64()), multi_date_field => DateTime::from_u64(5i64.to_u64()),
multi_date_field => DateTime::from_u64(6i64.to_u64()) multi_date_field => DateTime::from_u64(6i64.to_u64())
))?; )).unwrap();
index_writer.commit()?; index_writer.commit().unwrap();
let reader = index.reader()?; let reader = index.reader().unwrap();
let searcher = reader.searcher(); let searcher = reader.searcher();
assert_eq!(searcher.segment_readers().len(), 1); assert_eq!(searcher.segment_readers().len(), 1);
let segment_reader = searcher.segment_reader(0); let segment_reader = searcher.segment_reader(0);
@@ -746,7 +746,6 @@ mod tests {
assert_eq!(dates[0].into_timestamp_nanos(), 5i64); assert_eq!(dates[0].into_timestamp_nanos(), 5i64);
assert_eq!(dates[1].into_timestamp_nanos(), 6i64); assert_eq!(dates[1].into_timestamp_nanos(), 6i64);
} }
Ok(())
} }
#[test] #[test]

View File

@@ -180,8 +180,13 @@ impl SegmentReader {
let fast_fields_readers = FastFieldReaders::open(fast_fields_data, schema.clone())?; let fast_fields_readers = FastFieldReaders::open(fast_fields_data, schema.clone())?;
let fieldnorm_data = segment.open_read(SegmentComponent::FieldNorms)?; let fieldnorm_data = segment.open_read(SegmentComponent::FieldNorms)?;
let fieldnorm_readers = FieldNormReaders::open(fieldnorm_data)?; let fieldnorm_readers = FieldNormReaders::open(fieldnorm_data)?;
let spatial_readers = if schema.contains_spatial_field() {
let spatial_data = segment.open_read(SegmentComponent::Spatial)?; let spatial_data = segment.open_read(SegmentComponent::Spatial)?;
let spatial_readers = SpatialReaders::open(spatial_data)?; SpatialReaders::open(spatial_data)?
} else {
SpatialReaders::empty()
};
let original_bitset = if segment.meta().has_deletes() { let original_bitset = if segment.meta().has_deletes() {
let alive_doc_file_slice = segment.open_read(SegmentComponent::Delete)?; let alive_doc_file_slice = segment.open_read(SegmentComponent::Delete)?;

View File

@@ -175,6 +175,7 @@ impl IndexMerger {
let mut readers = vec![]; let mut readers = vec![];
for (segment, new_alive_bitset_opt) in segments.iter().zip(alive_bitset_opt) { for (segment, new_alive_bitset_opt) in segments.iter().zip(alive_bitset_opt) {
if segment.meta().num_docs() > 0 { if segment.meta().num_docs() > 0 {
dbg!("segment");
let reader = let reader =
SegmentReader::open_with_custom_alive_set(segment, new_alive_bitset_opt)?; SegmentReader::open_with_custom_alive_set(segment, new_alive_bitset_opt)?;
readers.push(reader); readers.push(reader);
@@ -530,7 +531,6 @@ impl IndexMerger {
serializer: &mut SegmentSerializer, serializer: &mut SegmentSerializer,
doc_id_mapping: &SegmentDocIdMapping, doc_id_mapping: &SegmentDocIdMapping,
) -> crate::Result<()> { ) -> crate::Result<()> {
/// Unfortunately, there are no special trick to merge segments.
/// We need to rebuild a BKD-tree based off the list of triangles. /// We need to rebuild a BKD-tree based off the list of triangles.
/// ///
/// Because the data can be large, we do this by writing the sequence of triangles to /// Because the data can be large, we do this by writing the sequence of triangles to
@@ -543,6 +543,12 @@ impl IndexMerger {
/// swap, the memory will not be accounted as anonymous memory, /// swap, the memory will not be accounted as anonymous memory,
/// swap space is reserved etc. /// swap space is reserved etc.
use crate::spatial::bkd::Segment; use crate::spatial::bkd::Segment;
let Some(mut spatial_serializer) = serializer.extract_spatial_serializer() else {
// The schema does not contain any spatial field.
return Ok(())
};
let mut segment_mappings: Vec<Vec<Option<DocId>>> = Vec::new(); let mut segment_mappings: Vec<Vec<Option<DocId>>> = Vec::new();
for reader in &self.readers { for reader in &self.readers {
let max_doc = reader.max_doc(); let max_doc = reader.max_doc();
@@ -586,7 +592,6 @@ impl IndexMerger {
// No need to fsync here. This file is not here for persistency. // No need to fsync here. This file is not here for persistency.
} }
} }
if let Some(mut spatial_serializer) = serializer.extract_spatial_serializer() {
for (field, temp_file) in temp_files { for (field, temp_file) in temp_files {
// Memory map the triangle file. // Memory map the triangle file.
use memmap2::MmapOptions; use memmap2::MmapOptions;
@@ -600,7 +605,7 @@ impl IndexMerger {
spatial_serializer.serialize_field(field, triangles)?; spatial_serializer.serialize_field(field, triangles)?;
} }
spatial_serializer.close()?; spatial_serializer.close()?;
}
Ok(()) Ok(())
} }

View File

@@ -6,6 +6,7 @@ use crate::query::explanation::does_not_match;
use crate::query::{BitSetDocSet, Explanation, Query, Scorer, Weight}; use crate::query::{BitSetDocSet, Explanation, Query, Scorer, Weight};
use crate::schema::Field; use crate::schema::Field;
use crate::spatial::bkd::{search_intersects, Segment}; use crate::spatial::bkd::{search_intersects, Segment};
use crate::spatial::point::GeoPoint;
use crate::spatial::writer::as_point_i32; use crate::spatial::writer::as_point_i32;
use crate::{DocId, DocSet, Score, TantivyError, TERMINATED}; use crate::{DocId, DocSet, Score, TantivyError, TERMINATED};
@@ -28,7 +29,7 @@ pub struct SpatialQuery {
impl SpatialQuery { impl SpatialQuery {
/// HUSH /// HUSH
pub fn new(field: Field, bounds: [(f64, f64); 2], query_type: SpatialQueryType) -> Self { pub fn new(field: Field, bounds: [GeoPoint; 2], query_type: SpatialQueryType) -> Self {
SpatialQuery { SpatialQuery {
field, field,
bounds: [as_point_i32(bounds[0]), as_point_i32(bounds[1])], bounds: [as_point_i32(bounds[0]), as_point_i32(bounds[1])],

View File

@@ -5,6 +5,7 @@ use std::io::{self, Read, Write};
use common::{BinarySerializable, VInt}; use common::{BinarySerializable, VInt};
use serde_json::{json, Map, Value}; use serde_json::{json, Map, Value};
use crate::spatial::point::GeoPoint;
use crate::spatial::xor::{compress_f64, decompress_f64}; use crate::spatial::xor::{compress_f64, decompress_f64};
/// HUSH /// HUSH
@@ -26,17 +27,17 @@ pub enum GeometryError {
#[derive(Debug, Clone, PartialEq)] #[derive(Debug, Clone, PartialEq)]
pub enum Geometry { pub enum Geometry {
/// HUSH /// HUSH
Point((f64, f64)), Point(GeoPoint),
/// HUSH /// HUSH
MultiPoint(Vec<(f64, f64)>), MultiPoint(Vec<GeoPoint>),
/// HUSH /// HUSH
LineString(Vec<(f64, f64)>), LineString(Vec<GeoPoint>),
/// HUSH /// HUSH
MultiLineString(Vec<Vec<(f64, f64)>>), MultiLineString(Vec<Vec<GeoPoint>>),
/// HUSH /// HUSH
Polygon(Vec<Vec<(f64, f64)>>), Polygon(Vec<Vec<GeoPoint>>),
/// HUSH /// HUSH
MultiPolygon(Vec<Vec<Vec<(f64, f64)>>>), MultiPolygon(Vec<Vec<Vec<GeoPoint>>>),
/// HUSH /// HUSH
GeometryCollection(Vec<Self>), GeometryCollection(Vec<Self>),
} }
@@ -137,23 +138,24 @@ impl Geometry {
} }
} }
/// HUSH /// Serialize the geometry to GeoJSON format.
/// https://fr.wikipedia.org/wiki/GeoJSON
pub fn to_geojson(&self) -> Map<String, Value> { pub fn to_geojson(&self) -> Map<String, Value> {
let mut map = Map::new(); let mut map = Map::new();
match self { match self {
Geometry::Point(point) => { Geometry::Point(point) => {
map.insert("type".to_string(), Value::String("Point".to_string())); map.insert("type".to_string(), Value::String("Point".to_string()));
let coords = json!([point.0, point.1]); let coords = json!([point.lon, point.lat]);
map.insert("coordinates".to_string(), coords); map.insert("coordinates".to_string(), coords);
} }
Geometry::MultiPoint(points) => { Geometry::MultiPoint(points) => {
map.insert("type".to_string(), Value::String("MultiPoint".to_string())); map.insert("type".to_string(), Value::String("MultiPoint".to_string()));
let coords: Vec<Value> = points.iter().map(|p| json!([p.0, p.1])).collect(); let coords: Vec<Value> = points.iter().map(|p| json!([p.lon, p.lat])).collect();
map.insert("coordinates".to_string(), Value::Array(coords)); map.insert("coordinates".to_string(), Value::Array(coords));
} }
Geometry::LineString(line) => { Geometry::LineString(line) => {
map.insert("type".to_string(), Value::String("LineString".to_string())); map.insert("type".to_string(), Value::String("LineString".to_string()));
let coords: Vec<Value> = line.iter().map(|p| json!([p.0, p.1])).collect(); let coords: Vec<Value> = line.iter().map(|p| json!([p.lon, p.lat])).collect();
map.insert("coordinates".to_string(), Value::Array(coords)); map.insert("coordinates".to_string(), Value::Array(coords));
} }
Geometry::MultiLineString(lines) => { Geometry::MultiLineString(lines) => {
@@ -163,7 +165,7 @@ impl Geometry {
); );
let coords: Vec<Value> = lines let coords: Vec<Value> = lines
.iter() .iter()
.map(|line| Value::Array(line.iter().map(|p| json!([p.0, p.1])).collect())) .map(|line| Value::Array(line.iter().map(|p| json!([p.lon, p.lat])).collect()))
.collect(); .collect();
map.insert("coordinates".to_string(), Value::Array(coords)); map.insert("coordinates".to_string(), Value::Array(coords));
} }
@@ -171,7 +173,7 @@ impl Geometry {
map.insert("type".to_string(), Value::String("Polygon".to_string())); map.insert("type".to_string(), Value::String("Polygon".to_string()));
let coords: Vec<Value> = rings let coords: Vec<Value> = rings
.iter() .iter()
.map(|ring| Value::Array(ring.iter().map(|p| json!([p.0, p.1])).collect())) .map(|ring| Value::Array(ring.iter().map(|p| json!([p.lon, p.lat])).collect()))
.collect(); .collect();
map.insert("coordinates".to_string(), Value::Array(coords)); map.insert("coordinates".to_string(), Value::Array(coords));
} }
@@ -187,7 +189,7 @@ impl Geometry {
polygon polygon
.iter() .iter()
.map(|ring| { .map(|ring| {
Value::Array(ring.iter().map(|p| json!([p.0, p.1])).collect()) Value::Array(ring.iter().map(|p| json!([p.lon, p.lat])).collect())
}) })
.collect(), .collect(),
) )
@@ -218,7 +220,7 @@ fn get_coordinates(object: &Map<String, Value>) -> Result<&Value, GeometryError>
Ok(coordinates) Ok(coordinates)
} }
fn to_point(value: &Value) -> Result<(f64, f64), GeometryError> { fn to_point(value: &Value) -> Result<GeoPoint, GeometryError> {
let lonlat = value.as_array().ok_or(GeometryError::InvalidStructure( let lonlat = value.as_array().ok_or(GeometryError::InvalidStructure(
"expected 2 element array pair of lon/lat".to_string(), "expected 2 element array pair of lon/lat".to_string(),
))?; ))?;
@@ -245,10 +247,10 @@ fn to_point(value: &Value) -> Result<(f64, f64), GeometryError> {
lat lat
))); )));
} }
Ok((lon, lat)) Ok(GeoPoint { lon, lat })
} }
fn to_line_string(value: &Value) -> Result<Vec<(f64, f64)>, GeometryError> { fn to_line_string(value: &Value) -> Result<Vec<GeoPoint>, GeometryError> {
let mut result = Vec::new(); let mut result = Vec::new();
let coordinates = value.as_array().ok_or(GeometryError::InvalidStructure( let coordinates = value.as_array().ok_or(GeometryError::InvalidStructure(
"expected an array of lon/lat arrays".to_string(), "expected an array of lon/lat arrays".to_string(),
@@ -259,7 +261,7 @@ fn to_line_string(value: &Value) -> Result<Vec<(f64, f64)>, GeometryError> {
Ok(result) Ok(result)
} }
fn to_multi_line_string(value: &Value) -> Result<Vec<Vec<(f64, f64)>>, GeometryError> { fn to_multi_line_string(value: &Value) -> Result<Vec<Vec<GeoPoint>>, GeometryError> {
let mut result = Vec::new(); let mut result = Vec::new();
let coordinates = value.as_array().ok_or(GeometryError::InvalidStructure( let coordinates = value.as_array().ok_or(GeometryError::InvalidStructure(
"expected an array of an array of lon/lat arrays".to_string(), "expected an array of an array of lon/lat arrays".to_string(),
@@ -275,8 +277,8 @@ impl BinarySerializable for Geometry {
match self { match self {
Geometry::Point(point) => { Geometry::Point(point) => {
0u8.serialize(writer)?; 0u8.serialize(writer)?;
point.0.serialize(writer)?; point.lon.serialize(writer)?;
point.1.serialize(writer)?; point.lat.serialize(writer)?;
Ok(()) Ok(())
} }
Geometry::MultiPoint(points) => { Geometry::MultiPoint(points) => {
@@ -289,7 +291,7 @@ impl BinarySerializable for Geometry {
} }
Geometry::MultiLineString(multi_line_string) => { Geometry::MultiLineString(multi_line_string) => {
3u8.serialize(writer)?; 3u8.serialize(writer)?;
serialize_polygon(multi_line_string, writer) serialize_polygon(&multi_line_string[..], writer)
} }
Geometry::Polygon(polygon) => { Geometry::Polygon(polygon) => {
4u8.serialize(writer)?; 4u8.serialize(writer)?;
@@ -309,8 +311,8 @@ impl BinarySerializable for Geometry {
for polygon in multi_polygon { for polygon in multi_polygon {
for ring in polygon { for ring in polygon {
for point in ring { for point in ring {
lon.push(point.0); lon.push(point.lon);
lat.push(point.1); lat.push(point.lat);
} }
} }
} }
@@ -339,7 +341,7 @@ impl BinarySerializable for Geometry {
0 => { 0 => {
let lon = BinarySerializable::deserialize(reader)?; let lon = BinarySerializable::deserialize(reader)?;
let lat = BinarySerializable::deserialize(reader)?; let lat = BinarySerializable::deserialize(reader)?;
Ok(Geometry::Point((lon, lat))) Ok(Geometry::Point(GeoPoint { lon, lat }))
} }
1 => Ok(Geometry::MultiPoint(deserialize_line_string(reader)?)), 1 => Ok(Geometry::MultiPoint(deserialize_line_string(reader)?)),
2 => Ok(Geometry::LineString(deserialize_line_string(reader)?)), 2 => Ok(Geometry::LineString(deserialize_line_string(reader)?)),
@@ -370,7 +372,10 @@ impl BinarySerializable for Geometry {
for point_count in rings { for point_count in rings {
let mut ring = Vec::new(); let mut ring = Vec::new();
for _ in 0..point_count { for _ in 0..point_count {
ring.push((lon[offset], lat[offset])); ring.push(GeoPoint {
lon: lon[offset],
lat: lat[offset],
});
offset += 1; offset += 1;
} }
polygon.push(ring); polygon.push(ring);
@@ -396,15 +401,15 @@ impl BinarySerializable for Geometry {
} }
fn serialize_line_string<W: Write + ?Sized>( fn serialize_line_string<W: Write + ?Sized>(
line: &Vec<(f64, f64)>, line: &[GeoPoint],
writer: &mut W, writer: &mut W,
) -> io::Result<()> { ) -> io::Result<()> {
BinarySerializable::serialize(&VInt(line.len() as u64), writer)?; BinarySerializable::serialize(&VInt(line.len() as u64), writer)?;
let mut lon = Vec::new(); let mut lon = Vec::new();
let mut lat = Vec::new(); let mut lat = Vec::new();
for point in line { for point in line {
lon.push(point.0); lon.push(point.lon);
lat.push(point.1); lat.push(point.lat);
} }
let lon = compress_f64(&lon); let lon = compress_f64(&lon);
let lat = compress_f64(&lat); let lat = compress_f64(&lat);
@@ -416,23 +421,23 @@ fn serialize_line_string<W: Write + ?Sized>(
} }
fn serialize_polygon<W: Write + ?Sized>( fn serialize_polygon<W: Write + ?Sized>(
line_string: &Vec<Vec<(f64, f64)>>, line_string: &[Vec<GeoPoint>],
writer: &mut W, writer: &mut W,
) -> io::Result<()> { ) -> io::Result<()> {
BinarySerializable::serialize(&VInt(line_string.len() as u64), writer)?; BinarySerializable::serialize(&VInt(line_string.len() as u64), writer)?;
for ring in line_string { for ring in line_string {
BinarySerializable::serialize(&VInt(ring.len() as u64), writer)?; BinarySerializable::serialize(&VInt(ring.len() as u64), writer)?;
} }
let mut lon = Vec::new(); let mut lon: Vec<f64> = Vec::new();
let mut lat = Vec::new(); let mut lat: Vec<f64> = Vec::new();
for ring in line_string { for ring in line_string {
for point in ring { for point in ring {
lon.push(point.0); lon.push(point.lon);
lat.push(point.1); lat.push(point.lat);
} }
} }
let lon = compress_f64(&lon); let lon: Vec<u8> = compress_f64(&lon);
let lat = compress_f64(&lat); let lat: Vec<u8> = compress_f64(&lat);
VInt(lon.len() as u64).serialize(writer)?; VInt(lon.len() as u64).serialize(writer)?;
writer.write_all(&lon)?; writer.write_all(&lon)?;
VInt(lat.len() as u64).serialize(writer)?; VInt(lat.len() as u64).serialize(writer)?;
@@ -440,20 +445,20 @@ fn serialize_polygon<W: Write + ?Sized>(
Ok(()) Ok(())
} }
fn deserialize_line_string<R: Read>(reader: &mut R) -> io::Result<Vec<(f64, f64)>> { fn deserialize_line_string<R: Read>(reader: &mut R) -> io::Result<Vec<GeoPoint>> {
let point_count = VInt::deserialize(reader)?.0 as usize; let point_count = VInt::deserialize(reader)?.0 as usize;
let lon_bytes: Vec<u8> = BinarySerializable::deserialize(reader)?; let lon_bytes: Vec<u8> = BinarySerializable::deserialize(reader)?;
let lat_bytes: Vec<u8> = BinarySerializable::deserialize(reader)?; let lat_bytes: Vec<u8> = BinarySerializable::deserialize(reader)?;
let lon = decompress_f64(&lon_bytes, point_count); let lon: Vec<f64> = decompress_f64(&lon_bytes, point_count);
let lat = decompress_f64(&lat_bytes, point_count); let lat: Vec<f64> = decompress_f64(&lat_bytes, point_count);
let mut line_string = Vec::new(); let mut line_string: Vec<GeoPoint> = Vec::new();
for offset in 0..point_count { for offset in 0..point_count {
line_string.push((lon[offset], lat[offset])); line_string.push(GeoPoint { lon: lon[offset], lat: lat[offset] });
} }
Ok(line_string) Ok(line_string)
} }
fn deserialize_polygon<R: Read>(reader: &mut R) -> io::Result<Vec<Vec<(f64, f64)>>> { fn deserialize_polygon<R: Read>(reader: &mut R) -> io::Result<Vec<Vec<GeoPoint>>> {
let ring_count = VInt::deserialize(reader)?.0 as usize; let ring_count = VInt::deserialize(reader)?.0 as usize;
let mut rings = Vec::new(); let mut rings = Vec::new();
let mut count = 0; let mut count = 0;
@@ -464,14 +469,14 @@ fn deserialize_polygon<R: Read>(reader: &mut R) -> io::Result<Vec<Vec<(f64, f64)
} }
let lon_bytes: Vec<u8> = BinarySerializable::deserialize(reader)?; let lon_bytes: Vec<u8> = BinarySerializable::deserialize(reader)?;
let lat_bytes: Vec<u8> = BinarySerializable::deserialize(reader)?; let lat_bytes: Vec<u8> = BinarySerializable::deserialize(reader)?;
let lon = decompress_f64(&lon_bytes, count); let lon: Vec<f64> = decompress_f64(&lon_bytes, count);
let lat = decompress_f64(&lat_bytes, count); let lat: Vec<f64> = decompress_f64(&lat_bytes, count);
let mut polygon = Vec::new(); let mut polygon: Vec<Vec<GeoPoint>> = Vec::new();
let mut offset = 0; let mut offset = 0;
for point_count in rings { for point_count in rings {
let mut ring = Vec::new(); let mut ring = Vec::new();
for _ in 0..point_count { for _ in 0..point_count {
ring.push((lon[offset], lat[offset])); ring.push(GeoPoint { lon: lon[offset], lat: lat[offset] });
offset += 1; offset += 1;
} }
polygon.push(ring); polygon.push(ring);

View File

@@ -3,6 +3,7 @@
pub mod bkd; pub mod bkd;
pub mod delta; pub mod delta;
pub mod geometry; pub mod geometry;
pub mod point;
pub mod radix_select; pub mod radix_select;
pub mod reader; pub mod reader;
pub mod serializer; pub mod serializer;

9
src/spatial/point.rs Normal file
View File

@@ -0,0 +1,9 @@
/// A point in the geographical coordinate system.
#[derive(Debug, Clone, Copy, PartialEq)]
pub struct GeoPoint {
/// Longitude
pub lon: f64,
/// Latitude
pub lat: f64,
}

View File

@@ -10,12 +10,17 @@ use crate::schema::Field;
use crate::space_usage::PerFieldSpaceUsage; use crate::space_usage::PerFieldSpaceUsage;
#[derive(Clone)] #[derive(Clone)]
/// HUSH
pub struct SpatialReaders { pub struct SpatialReaders {
data: Arc<CompositeFile>, data: Arc<CompositeFile>,
} }
impl SpatialReaders { impl SpatialReaders {
pub fn empty() -> SpatialReaders {
SpatialReaders {
data: Arc::new(CompositeFile::empty()),
}
}
/// Creates a field norm reader. /// Creates a field norm reader.
pub fn open(file: FileSlice) -> crate::Result<SpatialReaders> { pub fn open(file: FileSlice) -> crate::Result<SpatialReaders> {
let data = CompositeFile::open(&file)?; let data = CompositeFile::open(&file)?;

View File

@@ -7,6 +7,7 @@ use i_triangle::int::triangulatable::IntTriangulatable;
use crate::schema::Field; use crate::schema::Field;
use crate::spatial::geometry::Geometry; use crate::spatial::geometry::Geometry;
use crate::spatial::point::GeoPoint;
use crate::spatial::serializer::SpatialSerializer; use crate::spatial::serializer::SpatialSerializer;
use crate::spatial::triangle::{delaunay_to_triangles, Triangle}; use crate::spatial::triangle::{delaunay_to_triangles, Triangle};
use crate::DocId; use crate::DocId;
@@ -81,15 +82,15 @@ impl Default for SpatialWriter {
} }
} }
/// Convert a point of (longitude, latitude) to a integer point. /// Convert a point of `(longitude, latitude)` to a integer point.
pub fn as_point_i32(point: (f64, f64)) -> (i32, i32) { pub fn as_point_i32(point: GeoPoint) -> (i32, i32) {
( (
(point.0 / (360.0 / (1i64 << 32) as f64)).floor() as i32, (point.lon / (360.0 / (1i64 << 32) as f64)).floor() as i32,
(point.1 / (180.0 / (1i64 << 32) as f64)).floor() as i32, (point.lat / (180.0 / (1i64 << 32) as f64)).floor() as i32,
) )
} }
fn append_point(triangles: &mut Vec<Triangle>, doc_id: DocId, point: (f64, f64)) { fn append_point(triangles: &mut Vec<Triangle>, doc_id: DocId, point: GeoPoint) {
let point = as_point_i32(point); let point = as_point_i32(point);
triangles.push(Triangle::from_point(doc_id, point.0, point.1)); triangles.push(Triangle::from_point(doc_id, point.0, point.1));
} }
@@ -97,7 +98,7 @@ fn append_point(triangles: &mut Vec<Triangle>, doc_id: DocId, point: (f64, f64))
fn append_line_string( fn append_line_string(
triangles: &mut Vec<Triangle>, triangles: &mut Vec<Triangle>,
doc_id: DocId, doc_id: DocId,
line_string: Vec<(f64, f64)>, line_string: Vec<GeoPoint>,
) { ) {
let mut previous = as_point_i32(line_string[0]); let mut previous = as_point_i32(line_string[0]);
for point in line_string.into_iter().skip(1) { for point in line_string.into_iter().skip(1) {
@@ -109,7 +110,7 @@ fn append_line_string(
} }
} }
fn append_ring(i_polygon: &mut Vec<Vec<IntPoint>>, ring: &[(f64, f64)]) { fn append_ring(i_polygon: &mut Vec<Vec<IntPoint>>, ring: &[GeoPoint]) {
let mut i_ring = Vec::with_capacity(ring.len() + 1); let mut i_ring = Vec::with_capacity(ring.len() + 1);
for &point in ring { for &point in ring {
let point = as_point_i32(point); let point = as_point_i32(point);
@@ -118,7 +119,7 @@ fn append_ring(i_polygon: &mut Vec<Vec<IntPoint>>, ring: &[(f64, f64)]) {
i_polygon.push(i_ring); i_polygon.push(i_ring);
} }
fn append_polygon(triangles: &mut Vec<Triangle>, doc_id: DocId, polygon: &[Vec<(f64, f64)>]) { fn append_polygon(triangles: &mut Vec<Triangle>, doc_id: DocId, polygon: &[Vec<GeoPoint>]) {
let mut i_polygon: Vec<Vec<IntPoint>> = Vec::new(); let mut i_polygon: Vec<Vec<IntPoint>> = Vec::new();
for ring in polygon { for ring in polygon {
append_ring(&mut i_polygon, ring); append_ring(&mut i_polygon, ring);

View File

@@ -18,7 +18,7 @@
//! Unlike delta.rs which uses arithmetic deltas for i32 spatial coordinates in the block kd-tree, //! Unlike delta.rs which uses arithmetic deltas for i32 spatial coordinates in the block kd-tree,
//! this module operates on f64 bit patterns directly to preserve exact floating-point values for //! this module operates on f64 bit patterns directly to preserve exact floating-point values for
//! returning to users. //! returning to users.
use std::io::{Cursor, Read}; use std::io::Read;
use common::VInt; use common::VInt;
@@ -34,8 +34,8 @@ pub fn compress_f64(values: &[f64]) -> Vec<u8> {
if values.is_empty() { if values.is_empty() {
return Vec::new(); return Vec::new();
} }
let mut output = Vec::new(); let mut output: Vec<u8> = Vec::new();
let mut previous = values[0].to_bits(); let mut previous: u64 = f64_to_le(values[0]);
output.extend_from_slice(&previous.to_le_bytes()); output.extend_from_slice(&previous.to_le_bytes());
for &value in &values[1..] { for &value in &values[1..] {
let bits = value.to_bits(); let bits = value.to_bits();
@@ -46,13 +46,21 @@ pub fn compress_f64(values: &[f64]) -> Vec<u8> {
if output.len() >= values.len() * 8 { if output.len() >= values.len() * 8 {
let mut output = Vec::with_capacity(values.len() * 8); let mut output = Vec::with_capacity(values.len() * 8);
for &value in values { for &value in values {
output.extend_from_slice(&value.to_bits().to_le_bytes()); output.extend_from_slice(&f64_to_le(value).to_le_bytes());
} }
return output; return output;
} }
output output
} }
fn f64_to_le(value: f64) -> u64 {
u64::from_le_bytes(value.to_le_bytes())
}
fn f64_from_le(value: u64) -> f64 {
f64::from_le_bytes(value.to_le_bytes())
}
/// Decompresses f64 coordinates from XOR delta or raw encoding. /// Decompresses f64 coordinates from XOR delta or raw encoding.
/// ///
/// Detects compression format by byte length - if `bytes.len() == count * 8`, data is raw and /// Detects compression format by byte length - if `bytes.len() == count * 8`, data is raw and
@@ -60,16 +68,16 @@ pub fn compress_f64(values: &[f64]) -> Vec<u8> {
/// reconstructing the original sequence. /// reconstructing the original sequence.
/// ///
/// Returns exact f64 values that were passed to `compress_f64()`. /// Returns exact f64 values that were passed to `compress_f64()`.
pub fn decompress_f64(bytes: &[u8], count: usize) -> Vec<f64> { pub fn decompress_f64(mut bytes: &[u8], count: usize) -> Vec<f64> {
let mut values = Vec::with_capacity(count); let mut values = Vec::with_capacity(count);
if bytes.len() == count * 8 { if bytes.len() == count * 8 {
for i in 0..count { for i in 0..count {
let bits = u64::from_le_bytes(bytes[i * 8..(i + 1) * 8].try_into().unwrap()); let bits = u64::from_le_bytes(bytes[i * 8..(i + 1) * 8].try_into().unwrap());
values.push(f64::from_bits(bits)); values.push(f64_from_le(bits));
} }
return values; return values;
} }
let mut cursor = Cursor::new(bytes); let mut cursor: &mut &[u8] = &mut bytes;
// Read first value (raw 8 bytes) // Read first value (raw 8 bytes)
let mut first_bytes = [0u8; 8]; let mut first_bytes = [0u8; 8];