feat: add more geo functions (#4888)

* chore: add type conversion for array types

* feat: add h3_cells_contains

* refactor: resolve lint issues

* feat: add sphere distance function

* feat: euclidean distance between h3 centroids

* test: round float number

* feat: add more geospatial functions

* test: add tests for geometry functions

* refactor: move wkt function to dedicated module

* feat: add st_area

* refactor: only allow sphere distance between points
This commit is contained in:
Ning Sun
2024-11-05 11:44:25 +08:00
committed by GitHub
parent f3509fa312
commit a8b426aebe
10 changed files with 1162 additions and 12 deletions

151
Cargo.lock generated
View File

@@ -2070,6 +2070,8 @@ dependencies = [
"datafusion",
"datatypes",
"derive_more",
"geo",
"geo-types",
"geohash",
"h3o",
"jsonb",
@@ -2088,6 +2090,7 @@ dependencies = [
"store-api",
"table",
"tokio",
"wkt",
]
[[package]]
@@ -3706,6 +3709,16 @@ version = "1.0.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0d6ef0072f8a535281e4876be788938b528e9a1d43900b82c2569af7da799125"
[[package]]
name = "earcutr"
version = "0.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "79127ed59a85d7687c409e9978547cffb7dc79675355ed22da6b66fd5f6ead01"
dependencies = [
"itertools 0.11.0",
"num-traits",
]
[[package]]
name = "either"
version = "1.13.0"
@@ -4014,6 +4027,12 @@ dependencies = [
"libc",
]
[[package]]
name = "float_next_after"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8bf7cc16383c4b8d58b9905a8509f02926ce3058053c056376248d958c9df1e8"
[[package]]
name = "flow"
version = "0.9.5"
@@ -4438,6 +4457,24 @@ dependencies = [
"version_check",
]
[[package]]
name = "geo"
version = "0.29.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "81d088357a9cc60cec8253b3578f6834b4a3aa20edb55f5d1c030c36d8143f11"
dependencies = [
"earcutr",
"float_next_after",
"geo-types",
"geographiclib-rs",
"i_overlay",
"log",
"num-traits",
"robust",
"rstar",
"spade",
]
[[package]]
name = "geo-types"
version = "0.7.13"
@@ -4446,9 +4483,19 @@ checksum = "9ff16065e5720f376fbced200a5ae0f47ace85fd70b7e54269790281353b6d61"
dependencies = [
"approx 0.5.1",
"num-traits",
"rstar",
"serde",
]
[[package]]
name = "geographiclib-rs"
version = "0.2.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e6e5ed84f8089c70234b0a8e0aedb6dc733671612ddc0d37c6066052f9781960"
dependencies = [
"libm",
]
[[package]]
name = "geohash"
version = "0.13.1"
@@ -4597,6 +4644,15 @@ dependencies = [
"num-traits",
]
[[package]]
name = "hash32"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "47d60b12902ba28e2730cd37e95b8c9223af2808df9e902d4df49588d1470606"
dependencies = [
"byteorder",
]
[[package]]
name = "hashbrown"
version = "0.12.3"
@@ -4692,6 +4748,16 @@ dependencies = [
"http 1.1.0",
]
[[package]]
name = "heapless"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0bfb9eb618601c89945a70e254898da93b13be0388091d42117462b265bb3fad"
dependencies = [
"hash32",
"stable_deref_trait",
]
[[package]]
name = "heck"
version = "0.4.1"
@@ -5117,6 +5183,50 @@ dependencies = [
"tracing",
]
[[package]]
name = "i_float"
version = "1.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f5fe043aae28ce70bd2f78b2f5f82a3654d63607c82594da4dabb8b6cb81f2b2"
dependencies = [
"serde",
]
[[package]]
name = "i_key_sort"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "347c253b4748a1a28baf94c9ce133b6b166f08573157e05afe718812bc599fcd"
[[package]]
name = "i_overlay"
version = "1.7.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a469f68cb8a7cef375b2b0f581faf5859b4b50600438c00d46b71acc25ebbd0c"
dependencies = [
"i_float",
"i_key_sort",
"i_shape",
"i_tree",
"rayon",
]
[[package]]
name = "i_shape"
version = "1.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1b44852d57a991c7dedaf76c55bc44f677f547ff899a430d29e13efd6133d7d8"
dependencies = [
"i_float",
"serde",
]
[[package]]
name = "i_tree"
version = "0.8.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "155181bc97d770181cf9477da51218a19ee92a8e5be642e796661aee2b601139"
[[package]]
name = "iana-time-zone"
version = "0.1.61"
@@ -9570,6 +9680,12 @@ dependencies = [
"syn 1.0.109",
]
[[package]]
name = "robust"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cbf4a6aa5f6d6888f39e980649f3ad6b666acdce1d78e95b8a2cb076e687ae30"
[[package]]
name = "ron"
version = "0.7.1"
@@ -9664,6 +9780,17 @@ dependencies = [
"zstd 0.13.2",
]
[[package]]
name = "rstar"
version = "0.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "133315eb94c7b1e8d0cb097e5a710d850263372fd028fff18969de708afc7008"
dependencies = [
"heapless",
"num-traits",
"smallvec",
]
[[package]]
name = "rstest"
version = "0.21.0"
@@ -11156,6 +11283,18 @@ dependencies = [
"windows-sys 0.52.0",
]
[[package]]
name = "spade"
version = "2.12.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "93f5ef1f863aca7d1d7dda7ccfc36a0a4279bd6d3c375176e5e0712e25cb4889"
dependencies = [
"hashbrown 0.14.5",
"num-traits",
"robust",
"smallvec",
]
[[package]]
name = "sparsevec"
version = "0.2.0"
@@ -14149,6 +14288,18 @@ dependencies = [
"winapi",
]
[[package]]
name = "wkt"
version = "0.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "54f7f1ff4ea4c18936d6cd26a6fd24f0003af37e951a8e0e8b9e9a2d0bd0a46d"
dependencies = [
"geo-types",
"log",
"num-traits",
"thiserror",
]
[[package]]
name = "wyz"
version = "0.5.1"

View File

@@ -9,7 +9,7 @@ workspace = true
[features]
default = ["geo"]
geo = ["geohash", "h3o", "s2"]
geo = ["geohash", "h3o", "s2", "wkt", "geo-types", "dep:geo"]
[dependencies]
api.workspace = true
@@ -28,6 +28,8 @@ common-version.workspace = true
datafusion.workspace = true
datatypes.workspace = true
derive_more = { version = "1", default-features = false, features = ["display"] }
geo = { version = "0.29", optional = true }
geo-types = { version = "0.7", optional = true }
geohash = { version = "0.13", optional = true }
h3o = { version = "0.6", optional = true }
jsonb.workspace = true
@@ -44,6 +46,7 @@ sql.workspace = true
statrs = "0.16"
store-api.workspace = true
table.workspace = true
wkt = { version = "0.11", optional = true }
[dev-dependencies]
ron = "0.7"

View File

@@ -17,7 +17,10 @@ pub(crate) mod encoding;
mod geohash;
mod h3;
mod helpers;
mod measure;
mod relation;
mod s2;
mod wkt;
use crate::function_registry::FunctionRegistry;
@@ -48,6 +51,7 @@ impl GeoFunctions {
registry.register(Arc::new(h3::H3CellToChildrenSize));
registry.register(Arc::new(h3::H3CellToChildPos));
registry.register(Arc::new(h3::H3ChildPosToCell));
registry.register(Arc::new(h3::H3CellContains));
// h3 grid traversal
registry.register(Arc::new(h3::H3GridDisk));
@@ -55,10 +59,27 @@ impl GeoFunctions {
registry.register(Arc::new(h3::H3GridDistance));
registry.register(Arc::new(h3::H3GridPathCells));
// h3 measurement
registry.register(Arc::new(h3::H3CellDistanceSphereKm));
registry.register(Arc::new(h3::H3CellDistanceEuclideanDegree));
// s2
registry.register(Arc::new(s2::S2LatLngToCell));
registry.register(Arc::new(s2::S2CellLevel));
registry.register(Arc::new(s2::S2CellToToken));
registry.register(Arc::new(s2::S2CellParent));
// spatial data type
registry.register(Arc::new(wkt::LatLngToPointWkt));
// spatial relation
registry.register(Arc::new(relation::STContains));
registry.register(Arc::new(relation::STWithin));
registry.register(Arc::new(relation::STIntersects));
// spatial measure
registry.register(Arc::new(measure::STDistance));
registry.register(Arc::new(measure::STDistanceSphere));
registry.register(Arc::new(measure::STArea));
}
}

View File

@@ -23,8 +23,8 @@ use datatypes::prelude::ConcreteDataType;
use datatypes::scalars::{Scalar, ScalarVectorBuilder};
use datatypes::value::{ListValue, Value};
use datatypes::vectors::{
BooleanVectorBuilder, Int32VectorBuilder, ListVectorBuilder, MutableVector,
StringVectorBuilder, UInt64VectorBuilder, UInt8VectorBuilder, VectorRef,
BooleanVectorBuilder, Float64VectorBuilder, Int32VectorBuilder, ListVectorBuilder,
MutableVector, StringVectorBuilder, UInt64VectorBuilder, UInt8VectorBuilder, VectorRef,
};
use derive_more::Display;
use h3o::{CellIndex, LatLng, Resolution};
@@ -38,6 +38,7 @@ static CELL_TYPES: Lazy<Vec<ConcreteDataType>> = Lazy::new(|| {
vec![
ConcreteDataType::int64_datatype(),
ConcreteDataType::uint64_datatype(),
ConcreteDataType::string_datatype(),
]
});
@@ -952,6 +953,181 @@ impl Function for H3GridPathCells {
}
}
/// Tests if cells contains given cells
#[derive(Clone, Debug, Default, Display)]
#[display("{}", self.name())]
pub struct H3CellContains;
impl Function for H3CellContains {
fn name(&self) -> &str {
"h3_cells_contains"
}
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
Ok(ConcreteDataType::boolean_datatype())
}
fn signature(&self) -> Signature {
let multi_cell_types = vec![
ConcreteDataType::list_datatype(ConcreteDataType::int64_datatype()),
ConcreteDataType::list_datatype(ConcreteDataType::uint64_datatype()),
ConcreteDataType::list_datatype(ConcreteDataType::string_datatype()),
ConcreteDataType::string_datatype(),
];
let mut signatures = Vec::with_capacity(multi_cell_types.len() * CELL_TYPES.len());
for multi_cell_type in &multi_cell_types {
for cell_type in CELL_TYPES.as_slice() {
signatures.push(TypeSignature::Exact(vec![
multi_cell_type.clone(),
cell_type.clone(),
]));
}
}
Signature::one_of(signatures, Volatility::Stable)
}
fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
ensure_columns_n!(columns, 2);
let cells_vec = &columns[0];
let cell_this_vec = &columns[1];
let size = cell_this_vec.len();
let mut results = BooleanVectorBuilder::with_capacity(size);
for i in 0..size {
let mut result = None;
if let (cells, Some(cell_this)) = (
cells_from_value(cells_vec.get(i))?,
cell_from_value(cell_this_vec.get(i))?,
) {
result = Some(false);
for cell_that in cells.iter() {
// get cell resolution, and find cell_this's parent at
// this solution, test if cell_that equals the parent
let resolution = cell_that.resolution();
if let Some(cell_this_parent) = cell_this.parent(resolution) {
if cell_this_parent == *cell_that {
result = Some(true);
break;
}
}
}
}
results.push(result);
}
Ok(results.to_vector())
}
}
/// Get WGS84 great circle distance of two cell centroid
#[derive(Clone, Debug, Default, Display)]
#[display("{}", self.name())]
pub struct H3CellDistanceSphereKm;
impl Function for H3CellDistanceSphereKm {
fn name(&self) -> &str {
"h3_distance_sphere_km"
}
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
Ok(ConcreteDataType::float64_datatype())
}
fn signature(&self) -> Signature {
signature_of_double_cells()
}
fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
ensure_columns_n!(columns, 2);
let cell_this_vec = &columns[0];
let cell_that_vec = &columns[1];
let size = cell_this_vec.len();
let mut results = Float64VectorBuilder::with_capacity(size);
for i in 0..size {
let result = match (
cell_from_value(cell_this_vec.get(i))?,
cell_from_value(cell_that_vec.get(i))?,
) {
(Some(cell_this), Some(cell_that)) => {
let centroid_this = LatLng::from(cell_this);
let centroid_that = LatLng::from(cell_that);
Some(centroid_this.distance_km(centroid_that))
}
_ => None,
};
results.push(result);
}
Ok(results.to_vector())
}
}
/// Get Euclidean distance of two cell centroid
#[derive(Clone, Debug, Default, Display)]
#[display("{}", self.name())]
pub struct H3CellDistanceEuclideanDegree;
impl H3CellDistanceEuclideanDegree {
fn distance(centroid_this: LatLng, centroid_that: LatLng) -> f64 {
((centroid_this.lat() - centroid_that.lat()).powi(2)
+ (centroid_this.lng() - centroid_that.lng()).powi(2))
.sqrt()
}
}
impl Function for H3CellDistanceEuclideanDegree {
fn name(&self) -> &str {
"h3_distance_degree"
}
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
Ok(ConcreteDataType::float64_datatype())
}
fn signature(&self) -> Signature {
signature_of_double_cells()
}
fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
ensure_columns_n!(columns, 2);
let cell_this_vec = &columns[0];
let cell_that_vec = &columns[1];
let size = cell_this_vec.len();
let mut results = Float64VectorBuilder::with_capacity(size);
for i in 0..size {
let result = match (
cell_from_value(cell_this_vec.get(i))?,
cell_from_value(cell_that_vec.get(i))?,
) {
(Some(cell_this), Some(cell_that)) => {
let centroid_this = LatLng::from(cell_this);
let centroid_that = LatLng::from(cell_that);
let dist = Self::distance(centroid_this, centroid_that);
Some(dist)
}
_ => None,
};
results.push(result);
}
Ok(results.to_vector())
}
}
fn value_to_resolution(v: Value) -> Result<Resolution> {
let r = match v {
Value::Int8(v) => v as u8,
@@ -1073,7 +1249,126 @@ fn cell_from_value(v: Value) -> Result<Option<CellIndex>> {
})
.context(error::ExecuteSnafu)?,
),
Value::String(s) => Some(
CellIndex::from_str(s.as_utf8())
.map_err(|e| {
BoxedError::new(PlainError::new(
format!("H3 error: {}", e),
StatusCode::EngineExecuteQuery,
))
})
.context(error::ExecuteSnafu)?,
),
_ => None,
};
Ok(cell)
}
/// extract cell array from all possible types including:
/// - int64 list
/// - uint64 list
/// - string list
/// - comma-separated string
fn cells_from_value(v: Value) -> Result<Vec<CellIndex>> {
match v {
Value::List(list) => match list.datatype() {
ConcreteDataType::Int64(_) => list
.items()
.iter()
.map(|v| {
if let Value::Int64(v) = v {
CellIndex::try_from(*v as u64)
.map_err(|e| {
BoxedError::new(PlainError::new(
format!("H3 error: {}", e),
StatusCode::EngineExecuteQuery,
))
})
.context(error::ExecuteSnafu)
} else {
Err(BoxedError::new(PlainError::new(
"Invalid data type in array".to_string(),
StatusCode::EngineExecuteQuery,
)))
.context(error::ExecuteSnafu)
}
})
.collect::<Result<Vec<CellIndex>>>(),
ConcreteDataType::UInt64(_) => list
.items()
.iter()
.map(|v| {
if let Value::UInt64(v) = v {
CellIndex::try_from(*v)
.map_err(|e| {
BoxedError::new(PlainError::new(
format!("H3 error: {}", e),
StatusCode::EngineExecuteQuery,
))
})
.context(error::ExecuteSnafu)
} else {
Err(BoxedError::new(PlainError::new(
"Invalid data type in array".to_string(),
StatusCode::EngineExecuteQuery,
)))
.context(error::ExecuteSnafu)
}
})
.collect::<Result<Vec<CellIndex>>>(),
ConcreteDataType::String(_) => list
.items()
.iter()
.map(|v| {
if let Value::String(v) = v {
CellIndex::from_str(v.as_utf8().trim())
.map_err(|e| {
BoxedError::new(PlainError::new(
format!("H3 error: {}", e),
StatusCode::EngineExecuteQuery,
))
})
.context(error::ExecuteSnafu)
} else {
Err(BoxedError::new(PlainError::new(
"Invalid data type in array".to_string(),
StatusCode::EngineExecuteQuery,
)))
.context(error::ExecuteSnafu)
}
})
.collect::<Result<Vec<CellIndex>>>(),
_ => Ok(vec![]),
},
Value::String(csv) => {
let str_seq = csv.as_utf8().split(',');
str_seq
.map(|v| {
CellIndex::from_str(v.trim())
.map_err(|e| {
BoxedError::new(PlainError::new(
format!("H3 error: {}", e),
StatusCode::EngineExecuteQuery,
))
})
.context(error::ExecuteSnafu)
})
.collect::<Result<Vec<CellIndex>>>()
}
_ => Ok(vec![]),
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_h3_euclidean_distance() {
let point_this = LatLng::new(42.3521, -72.1235).expect("incorrect lat lng");
let point_that = LatLng::new(42.45, -72.1260).expect("incorrect lat lng");
let dist = H3CellDistanceEuclideanDegree::distance(point_this, point_that);
assert_eq!(dist, 0.09793191512474639);
}
}

View File

@@ -0,0 +1,195 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use common_error::ext::{BoxedError, PlainError};
use common_error::status_code::StatusCode;
use common_query::error::{self, Result};
use common_query::prelude::{Signature, TypeSignature};
use datafusion::logical_expr::Volatility;
use datatypes::prelude::ConcreteDataType;
use datatypes::scalars::ScalarVectorBuilder;
use datatypes::vectors::{Float64VectorBuilder, MutableVector, VectorRef};
use derive_more::Display;
use geo::algorithm::line_measures::metric_spaces::Euclidean;
use geo::{Area, Distance, Haversine};
use geo_types::Geometry;
use snafu::ResultExt;
use super::helpers::{ensure_columns_len, ensure_columns_n};
use super::wkt::parse_wkt;
use crate::function::{Function, FunctionContext};
/// Return WGS84(SRID: 4326) euclidean distance between two geometry object, in degree
#[derive(Clone, Debug, Default, Display)]
#[display("{}", self.name())]
pub struct STDistance;
impl Function for STDistance {
fn name(&self) -> &str {
"st_distance"
}
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
Ok(ConcreteDataType::float64_datatype())
}
fn signature(&self) -> Signature {
Signature::new(
TypeSignature::Exact(vec![
ConcreteDataType::string_datatype(),
ConcreteDataType::string_datatype(),
]),
Volatility::Stable,
)
}
fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
ensure_columns_n!(columns, 2);
let wkt_this_vec = &columns[0];
let wkt_that_vec = &columns[1];
let size = wkt_this_vec.len();
let mut results = Float64VectorBuilder::with_capacity(size);
for i in 0..size {
let wkt_this = wkt_this_vec.get(i).as_string();
let wkt_that = wkt_that_vec.get(i).as_string();
let result = match (wkt_this, wkt_that) {
(Some(wkt_this), Some(wkt_that)) => {
let geom_this = parse_wkt(&wkt_this)?;
let geom_that = parse_wkt(&wkt_that)?;
Some(Euclidean::distance(&geom_this, &geom_that))
}
_ => None,
};
results.push(result);
}
Ok(results.to_vector())
}
}
/// Return great circle distance between two geometry object, in meters
#[derive(Clone, Debug, Default, Display)]
#[display("{}", self.name())]
pub struct STDistanceSphere;
impl Function for STDistanceSphere {
fn name(&self) -> &str {
"st_distance_sphere_m"
}
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
Ok(ConcreteDataType::float64_datatype())
}
fn signature(&self) -> Signature {
Signature::new(
TypeSignature::Exact(vec![
ConcreteDataType::string_datatype(),
ConcreteDataType::string_datatype(),
]),
Volatility::Stable,
)
}
fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
ensure_columns_n!(columns, 2);
let wkt_this_vec = &columns[0];
let wkt_that_vec = &columns[1];
let size = wkt_this_vec.len();
let mut results = Float64VectorBuilder::with_capacity(size);
for i in 0..size {
let wkt_this = wkt_this_vec.get(i).as_string();
let wkt_that = wkt_that_vec.get(i).as_string();
let result = match (wkt_this, wkt_that) {
(Some(wkt_this), Some(wkt_that)) => {
let geom_this = parse_wkt(&wkt_this)?;
let geom_that = parse_wkt(&wkt_that)?;
match (geom_this, geom_that) {
(Geometry::Point(this), Geometry::Point(that)) => {
Some(Haversine::distance(this, that))
}
_ => {
Err(BoxedError::new(PlainError::new(
"Great circle distance between non-point objects are not supported for now.".to_string(),
StatusCode::Unsupported,
))).context(error::ExecuteSnafu)?
}
}
}
_ => None,
};
results.push(result);
}
Ok(results.to_vector())
}
}
/// Return area of given geometry object
#[derive(Clone, Debug, Default, Display)]
#[display("{}", self.name())]
pub struct STArea;
impl Function for STArea {
fn name(&self) -> &str {
"st_area"
}
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
Ok(ConcreteDataType::float64_datatype())
}
fn signature(&self) -> Signature {
Signature::new(
TypeSignature::Exact(vec![ConcreteDataType::string_datatype()]),
Volatility::Stable,
)
}
fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
ensure_columns_n!(columns, 1);
let wkt_vec = &columns[0];
let size = wkt_vec.len();
let mut results = Float64VectorBuilder::with_capacity(size);
for i in 0..size {
let wkt = wkt_vec.get(i).as_string();
let result = if let Some(wkt) = wkt {
let geom = parse_wkt(&wkt)?;
Some(geom.unsigned_area())
} else {
None
};
results.push(result);
}
Ok(results.to_vector())
}
}

View File

@@ -0,0 +1,190 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use common_query::error::Result;
use common_query::prelude::{Signature, TypeSignature};
use datafusion::logical_expr::Volatility;
use datatypes::prelude::ConcreteDataType;
use datatypes::scalars::ScalarVectorBuilder;
use datatypes::vectors::{BooleanVectorBuilder, MutableVector, VectorRef};
use derive_more::Display;
use geo::algorithm::contains::Contains;
use geo::algorithm::intersects::Intersects;
use geo::algorithm::within::Within;
use super::helpers::{ensure_columns_len, ensure_columns_n};
use super::wkt::parse_wkt;
use crate::function::{Function, FunctionContext};
/// Test if spatial relationship: contains
#[derive(Clone, Debug, Default, Display)]
#[display("{}", self.name())]
pub struct STContains;
impl Function for STContains {
fn name(&self) -> &str {
"st_contains"
}
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
Ok(ConcreteDataType::boolean_datatype())
}
fn signature(&self) -> Signature {
Signature::new(
TypeSignature::Exact(vec![
ConcreteDataType::string_datatype(),
ConcreteDataType::string_datatype(),
]),
Volatility::Stable,
)
}
fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
ensure_columns_n!(columns, 2);
let wkt_this_vec = &columns[0];
let wkt_that_vec = &columns[1];
let size = wkt_this_vec.len();
let mut results = BooleanVectorBuilder::with_capacity(size);
for i in 0..size {
let wkt_this = wkt_this_vec.get(i).as_string();
let wkt_that = wkt_that_vec.get(i).as_string();
let result = match (wkt_this, wkt_that) {
(Some(wkt_this), Some(wkt_that)) => {
let geom_this = parse_wkt(&wkt_this)?;
let geom_that = parse_wkt(&wkt_that)?;
Some(geom_this.contains(&geom_that))
}
_ => None,
};
results.push(result);
}
Ok(results.to_vector())
}
}
/// Test if spatial relationship: within
#[derive(Clone, Debug, Default, Display)]
#[display("{}", self.name())]
pub struct STWithin;
impl Function for STWithin {
fn name(&self) -> &str {
"st_within"
}
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
Ok(ConcreteDataType::boolean_datatype())
}
fn signature(&self) -> Signature {
Signature::new(
TypeSignature::Exact(vec![
ConcreteDataType::string_datatype(),
ConcreteDataType::string_datatype(),
]),
Volatility::Stable,
)
}
fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
ensure_columns_n!(columns, 2);
let wkt_this_vec = &columns[0];
let wkt_that_vec = &columns[1];
let size = wkt_this_vec.len();
let mut results = BooleanVectorBuilder::with_capacity(size);
for i in 0..size {
let wkt_this = wkt_this_vec.get(i).as_string();
let wkt_that = wkt_that_vec.get(i).as_string();
let result = match (wkt_this, wkt_that) {
(Some(wkt_this), Some(wkt_that)) => {
let geom_this = parse_wkt(&wkt_this)?;
let geom_that = parse_wkt(&wkt_that)?;
Some(geom_this.is_within(&geom_that))
}
_ => None,
};
results.push(result);
}
Ok(results.to_vector())
}
}
/// Test if spatial relationship: within
#[derive(Clone, Debug, Default, Display)]
#[display("{}", self.name())]
pub struct STIntersects;
impl Function for STIntersects {
fn name(&self) -> &str {
"st_intersects"
}
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
Ok(ConcreteDataType::boolean_datatype())
}
fn signature(&self) -> Signature {
Signature::new(
TypeSignature::Exact(vec![
ConcreteDataType::string_datatype(),
ConcreteDataType::string_datatype(),
]),
Volatility::Stable,
)
}
fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
ensure_columns_n!(columns, 2);
let wkt_this_vec = &columns[0];
let wkt_that_vec = &columns[1];
let size = wkt_this_vec.len();
let mut results = BooleanVectorBuilder::with_capacity(size);
for i in 0..size {
let wkt_this = wkt_this_vec.get(i).as_string();
let wkt_that = wkt_that_vec.get(i).as_string();
let result = match (wkt_this, wkt_that) {
(Some(wkt_this), Some(wkt_that)) => {
let geom_this = parse_wkt(&wkt_this)?;
let geom_that = parse_wkt(&wkt_that)?;
Some(geom_this.intersects(&geom_that))
}
_ => None,
};
results.push(result);
}
Ok(results.to_vector())
}
}

View File

@@ -0,0 +1,100 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use common_error::ext::{BoxedError, PlainError};
use common_error::status_code::StatusCode;
use common_query::error::{self, Result};
use common_query::prelude::{Signature, TypeSignature};
use datafusion::logical_expr::Volatility;
use datatypes::prelude::ConcreteDataType;
use datatypes::scalars::ScalarVectorBuilder;
use datatypes::vectors::{MutableVector, StringVectorBuilder, VectorRef};
use derive_more::Display;
use geo_types::{Geometry, Point};
use once_cell::sync::Lazy;
use snafu::ResultExt;
use wkt::{ToWkt, TryFromWkt};
use super::helpers::{ensure_columns_len, ensure_columns_n};
use crate::function::{Function, FunctionContext};
static COORDINATE_TYPES: Lazy<Vec<ConcreteDataType>> = Lazy::new(|| {
vec![
ConcreteDataType::float32_datatype(),
ConcreteDataType::float64_datatype(),
]
});
/// Return WGS84(SRID: 4326) euclidean distance between two geometry object, in degree
#[derive(Clone, Debug, Default, Display)]
#[display("{}", self.name())]
pub struct LatLngToPointWkt;
impl Function for LatLngToPointWkt {
fn name(&self) -> &str {
"wkt_point_from_latlng"
}
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
Ok(ConcreteDataType::string_datatype())
}
fn signature(&self) -> Signature {
let mut signatures = Vec::new();
for coord_type in COORDINATE_TYPES.as_slice() {
signatures.push(TypeSignature::Exact(vec![
// latitude
coord_type.clone(),
// longitude
coord_type.clone(),
]));
}
Signature::one_of(signatures, Volatility::Stable)
}
fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
ensure_columns_n!(columns, 2);
let lat_vec = &columns[0];
let lng_vec = &columns[1];
let size = lat_vec.len();
let mut results = StringVectorBuilder::with_capacity(size);
for i in 0..size {
let lat = lat_vec.get(i).as_f64_lossy();
let lng = lng_vec.get(i).as_f64_lossy();
let result = match (lat, lng) {
(Some(lat), Some(lng)) => Some(Point::new(lng, lat).wkt_string()),
_ => None,
};
results.push(result.as_deref());
}
Ok(results.to_vector())
}
}
pub(super) fn parse_wkt(s: &str) -> Result<Geometry> {
Geometry::try_from_wkt_str(s)
.map_err(|e| {
BoxedError::new(PlainError::new(
format!("Fail to parse WKT: {}", e),
StatusCode::EngineExecuteQuery,
))
})
.context(error::ExecuteSnafu)
}

View File

@@ -24,6 +24,7 @@ use chrono::{NaiveDate, NaiveDateTime, NaiveTime};
use common_time::{IntervalDayTime, IntervalMonthDayNano, IntervalYearMonth};
use datafusion_common::ScalarValue;
use datafusion_expr::LogicalPlan;
use datatypes::arrow::datatypes::DataType as ArrowDataType;
use datatypes::prelude::{ConcreteDataType, Value};
use datatypes::schema::Schema;
use datatypes::types::{IntervalType, TimestampType};
@@ -529,6 +530,21 @@ pub(super) fn type_pg_to_gt(origin: &Type) -> Result<ConcreteDataType> {
)),
&Type::DATE => Ok(ConcreteDataType::date_datatype()),
&Type::TIME => Ok(ConcreteDataType::datetime_datatype()),
&Type::CHAR_ARRAY => Ok(ConcreteDataType::list_datatype(
ConcreteDataType::int8_datatype(),
)),
&Type::INT2_ARRAY => Ok(ConcreteDataType::list_datatype(
ConcreteDataType::int16_datatype(),
)),
&Type::INT4_ARRAY => Ok(ConcreteDataType::list_datatype(
ConcreteDataType::int32_datatype(),
)),
&Type::INT8_ARRAY => Ok(ConcreteDataType::list_datatype(
ConcreteDataType::int64_datatype(),
)),
&Type::VARCHAR_ARRAY => Ok(ConcreteDataType::list_datatype(
ConcreteDataType::string_datatype(),
)),
_ => server_error::InternalSnafu {
err_msg: format!("unimplemented datatype {origin:?}"),
}
@@ -974,6 +990,42 @@ pub(super) fn parameters_to_scalar_values(
ScalarValue::Binary(data.map(|d| d.to_string().into_bytes()))
}
}
&Type::INT2_ARRAY => {
let data = portal.parameter::<Vec<i16>>(idx, &client_type)?;
if let Some(data) = data {
let values = data.into_iter().map(|i| i.into()).collect::<Vec<_>>();
ScalarValue::List(ScalarValue::new_list(&values, &ArrowDataType::Int16))
} else {
ScalarValue::Null
}
}
&Type::INT4_ARRAY => {
let data = portal.parameter::<Vec<i32>>(idx, &client_type)?;
if let Some(data) = data {
let values = data.into_iter().map(|i| i.into()).collect::<Vec<_>>();
ScalarValue::List(ScalarValue::new_list(&values, &ArrowDataType::Int32))
} else {
ScalarValue::Null
}
}
&Type::INT8_ARRAY => {
let data = portal.parameter::<Vec<i64>>(idx, &client_type)?;
if let Some(data) = data {
let values = data.into_iter().map(|i| i.into()).collect::<Vec<_>>();
ScalarValue::List(ScalarValue::new_list(&values, &ArrowDataType::Int64))
} else {
ScalarValue::Null
}
}
&Type::VARCHAR_ARRAY => {
let data = portal.parameter::<Vec<String>>(idx, &client_type)?;
if let Some(data) = data {
let values = data.into_iter().map(|i| i.into()).collect::<Vec<_>>();
ScalarValue::List(ScalarValue::new_list(&values, &ArrowDataType::Utf8))
} else {
ScalarValue::Null
}
}
_ => Err(invalid_parameter_error(
"unsupported_parameter_value",
Some(format!("Found type: {}", client_type)),

File diff suppressed because one or more lines are too long

View File

@@ -48,13 +48,31 @@ FROM (SELECT h3_latlng_to_cell(37.76938, -122.3889, 8::UInt64) AS cell);
SELECT
h3_grid_distance(cell1, cell2) AS distance,
h3_grid_path_cells(cell1, cell2) AS path_cells,
round(h3_distance_sphere_km(cell1, cell2), 5) AS sphere_distance,
h3_distance_degree(cell1, cell2) AS euclidean_distance,
FROM
(
SELECT
h3_latlng_to_cell(37.76938, -122.3889, 8::UInt64) AS cell1,
h3_latlng_to_cell(39.634, -104.999, 8::UInt64) AS cell2
h3_string_to_cell('86283082fffffff') AS cell1,
h3_string_to_cell('86283470fffffff') AS cell2
);
SELECT
h3_cells_contains('86283470fffffff,862834777ffffff, 862834757ffffff, 86283471fffffff, 862834707ffffff', '8b283470d112fff') AS R00,
h3_cells_contains('86283470fffffff,862834777ffffff, 862834757ffffff, 86283471fffffff, 862834707ffffff', 604189641792290815) AS R01,
h3_cells_contains('86283470fffffff,862834777ffffff, 862834757ffffff, 86283471fffffff, 862834707ffffff', 626707639343067135) AS R02;
SELECT
h3_cells_contains(['86283470fffffff', '862834777ffffff', '862834757ffffff', '86283471fffffff', '862834707ffffff'], '86283472fffffff') AS R10,
h3_cells_contains(['86283470fffffff', '862834777ffffff', '862834757ffffff', '86283471fffffff', '862834707ffffff'], '8b283470d112fff') AS R11,
h3_cells_contains(['86283470fffffff', '862834777ffffff', '862834757ffffff', '86283471fffffff', '862834707ffffff'], 626707639343067135) AS R12;
SELECT
h3_cells_contains([604189641255419903, 604189643000250367, 604189642463379455, 604189641523855359, 604189641121202175], '8b283470d112fff') AS R20,
h3_cells_contains([604189641255419903, 604189643000250367, 604189642463379455, 604189641523855359, 604189641121202175], 604189641792290815) AS R21,
h3_cells_contains([604189641255419903, 604189643000250367, 604189642463379455, 604189641523855359, 604189641121202175], 626707639343067135) AS R22;
SELECT geohash(37.76938, -122.3889, 9);
SELECT geohash(37.76938, -122.3889, 10);
@@ -104,3 +122,39 @@ FROM(
UNION ALL
SELECT 37.77001 AS lat, -122.3888 AS lon, 1728083372::TimestampSecond AS ts
);
SELECT wkt_point_from_latlng(37.76938, -122.3889) AS point;
SELECT
st_distance(p1, p2) AS euclidean_dist,
st_distance_sphere_m(p1, p2) AS sphere_dist_m,
st_distance(p1, polygon1) AS euclidean_dist_pp,
st_area(p1) as area_point,
st_area(polygon1) as area_polygon,
FROM
(
SELECT
wkt_point_from_latlng(37.76938, -122.3889) AS p1,
wkt_point_from_latlng(38.5216, -121.4247) AS p2,
'POLYGON ((-121.491698 38.653343, -121.582353 38.556757, -121.469721 38.449287, -121.315883 38.541721, -121.491698 38.653343))' AS polygon1,
);
SELECT st_distance_sphere_m(wkt_point_from_latlng(37.76938, -122.3889), 'POLYGON ((-121.491698 38.653343, -121.582353 38.556757, -121.469721 38.449287, -121.315883 38.541721, -121.491698 38.653343))');
SELECT
st_contains(polygon1, p1),
st_contains(polygon2, p1),
st_within(p1, polygon1),
st_within(p1, polygon2),
st_intersects(polygon1, polygon2),
st_intersects(polygon1, polygon3),
FROM
(
SELECT
wkt_point_from_latlng(37.383287, -122.01325) AS p1,
'POLYGON ((-122.031661 37.428252, -122.139829 37.387072, -122.135365 37.361971, -122.057759 37.332222, -121.987707 37.328946, -121.943754 37.333041, -121.919373 37.349145, -121.945814 37.376705, -121.975689 37.417345, -121.998696 37.409164, -122.031661 37.428252))' AS polygon1,
'POLYGON ((-121.491698 38.653343, -121.582353 38.556757, -121.469721 38.449287, -121.315883 38.541721, -121.491698 38.653343))' AS polygon2,
'POLYGON ((-122.089628 37.450332, -122.20535 37.378342, -122.093062 37.36088, -122.044301 37.372886, -122.089628 37.450332))' AS polygon3,
);