mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2026-05-22 07:50:38 +00:00
feat: add some s2 geo functions (#4823)
* feat: add first batch of s2 functions * refactor: update reusable code from main * test: add sqlness tests for s2 * feat: add tostring function for s2 * Update src/common/function/src/scalars/geo/s2.rs Co-authored-by: Lei, HUANG <6406592+v0y4g3r@users.noreply.github.com> * Apply suggestions from code review * one more change Signed-off-by: Ruihang Xia <waynestxia@gmail.com> --------- Signed-off-by: Ruihang Xia <waynestxia@gmail.com> Co-authored-by: Ruihang Xia <waynestxia@gmail.com> Co-authored-by: Lei, HUANG <6406592+v0y4g3r@users.noreply.github.com>
This commit is contained in:
@@ -9,7 +9,7 @@ workspace = true
|
||||
|
||||
[features]
|
||||
default = ["geo"]
|
||||
geo = ["geohash", "h3o"]
|
||||
geo = ["geohash", "h3o", "s2"]
|
||||
|
||||
[dependencies]
|
||||
api.workspace = true
|
||||
@@ -35,6 +35,7 @@ num = "0.4"
|
||||
num-traits = "0.2"
|
||||
once_cell.workspace = true
|
||||
paste = "1.0"
|
||||
s2 = { version = "0.0.12", optional = true }
|
||||
serde.workspace = true
|
||||
serde_json.workspace = true
|
||||
session.workspace = true
|
||||
|
||||
@@ -17,8 +17,7 @@ pub(crate) mod encoding;
|
||||
mod geohash;
|
||||
mod h3;
|
||||
mod helpers;
|
||||
|
||||
use geohash::{GeohashFunction, GeohashNeighboursFunction};
|
||||
mod s2;
|
||||
|
||||
use crate::function_registry::FunctionRegistry;
|
||||
|
||||
@@ -27,8 +26,8 @@ pub(crate) struct GeoFunctions;
|
||||
impl GeoFunctions {
|
||||
pub fn register(registry: &FunctionRegistry) {
|
||||
// geohash
|
||||
registry.register(Arc::new(GeohashFunction));
|
||||
registry.register(Arc::new(GeohashNeighboursFunction));
|
||||
registry.register(Arc::new(geohash::GeohashFunction));
|
||||
registry.register(Arc::new(geohash::GeohashNeighboursFunction));
|
||||
|
||||
// h3 index
|
||||
registry.register(Arc::new(h3::H3LatLngToCell));
|
||||
@@ -55,5 +54,11 @@ impl GeoFunctions {
|
||||
registry.register(Arc::new(h3::H3GridDiskDistances));
|
||||
registry.register(Arc::new(h3::H3GridDistance));
|
||||
registry.register(Arc::new(h3::H3GridPathCells));
|
||||
|
||||
// s2
|
||||
registry.register(Arc::new(s2::S2LatLngToCell));
|
||||
registry.register(Arc::new(s2::S2CellLevel));
|
||||
registry.register(Arc::new(s2::S2CellToToken));
|
||||
registry.register(Arc::new(s2::S2CellParent));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -17,7 +17,7 @@ use std::sync::Arc;
|
||||
use common_error::ext::{BoxedError, PlainError};
|
||||
use common_error::status_code::StatusCode;
|
||||
use common_macro::{as_aggr_func_creator, AggrFuncTypeStore};
|
||||
use common_query::error::{self, InvalidFuncArgsSnafu, InvalidInputStateSnafu, Result};
|
||||
use common_query::error::{self, InvalidInputStateSnafu, Result};
|
||||
use common_query::logical_plan::accumulator::AggrFuncTypeStore;
|
||||
use common_query::logical_plan::{Accumulator, AggregateFunctionCreator};
|
||||
use common_query::prelude::AccumulatorCreatorFunction;
|
||||
|
||||
@@ -16,7 +16,7 @@ use std::str::FromStr;
|
||||
|
||||
use common_error::ext::{BoxedError, PlainError};
|
||||
use common_error::status_code::StatusCode;
|
||||
use common_query::error::{self, InvalidFuncArgsSnafu, Result};
|
||||
use common_query::error::{self, Result};
|
||||
use common_query::prelude::{Signature, TypeSignature};
|
||||
use datafusion::logical_expr::Volatility;
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
@@ -29,9 +29,9 @@ use datatypes::vectors::{
|
||||
use derive_more::Display;
|
||||
use h3o::{CellIndex, LatLng, Resolution};
|
||||
use once_cell::sync::Lazy;
|
||||
use snafu::{ensure, ResultExt};
|
||||
use snafu::ResultExt;
|
||||
|
||||
use super::helpers::{ensure_columns_len, ensure_columns_n};
|
||||
use super::helpers::{ensure_and_coerce, ensure_columns_len, ensure_columns_n};
|
||||
use crate::function::{Function, FunctionContext};
|
||||
|
||||
static CELL_TYPES: Lazy<Vec<ConcreteDataType>> = Lazy::new(|| {
|
||||
@@ -382,15 +382,7 @@ impl Function for H3CellResolution {
|
||||
}
|
||||
|
||||
fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
|
||||
ensure!(
|
||||
columns.len() == 1,
|
||||
InvalidFuncArgsSnafu {
|
||||
err_msg: format!(
|
||||
"The length of the args is not correct, expect 1, provided : {}",
|
||||
columns.len()
|
||||
),
|
||||
}
|
||||
);
|
||||
ensure_columns_n!(columns, 1);
|
||||
|
||||
let cell_vec = &columns[0];
|
||||
let size = cell_vec.len();
|
||||
@@ -982,18 +974,6 @@ fn value_to_resolution(v: Value) -> Result<Resolution> {
|
||||
.context(error::ExecuteSnafu)
|
||||
}
|
||||
|
||||
macro_rules! ensure_and_coerce {
|
||||
($compare:expr, $coerce:expr) => {{
|
||||
ensure!(
|
||||
$compare,
|
||||
InvalidFuncArgsSnafu {
|
||||
err_msg: "Argument was outside of acceptable range "
|
||||
}
|
||||
);
|
||||
Ok($coerce)
|
||||
}};
|
||||
}
|
||||
|
||||
fn value_to_position(v: Value) -> Result<u64> {
|
||||
match v {
|
||||
Value::Int8(v) => ensure_and_coerce!(v >= 0, v as u64),
|
||||
|
||||
@@ -14,15 +14,15 @@
|
||||
|
||||
macro_rules! ensure_columns_len {
|
||||
($columns:ident) => {
|
||||
ensure!(
|
||||
snafu::ensure!(
|
||||
$columns.windows(2).all(|c| c[0].len() == c[1].len()),
|
||||
InvalidFuncArgsSnafu {
|
||||
common_query::error::InvalidFuncArgsSnafu {
|
||||
err_msg: "The length of input columns are in different size"
|
||||
}
|
||||
)
|
||||
};
|
||||
($column_a:ident, $column_b:ident, $($column_n:ident),*) => {
|
||||
ensure!(
|
||||
snafu::ensure!(
|
||||
{
|
||||
let mut result = $column_a.len() == $column_b.len();
|
||||
$(
|
||||
@@ -30,7 +30,7 @@ macro_rules! ensure_columns_len {
|
||||
)*
|
||||
result
|
||||
}
|
||||
InvalidFuncArgsSnafu {
|
||||
common_query::error::InvalidFuncArgsSnafu {
|
||||
err_msg: "The length of input columns are in different size"
|
||||
}
|
||||
)
|
||||
@@ -41,9 +41,9 @@ pub(super) use ensure_columns_len;
|
||||
|
||||
macro_rules! ensure_columns_n {
|
||||
($columns:ident, $n:literal) => {
|
||||
ensure!(
|
||||
snafu::ensure!(
|
||||
$columns.len() == $n,
|
||||
InvalidFuncArgsSnafu {
|
||||
common_query::error::InvalidFuncArgsSnafu {
|
||||
err_msg: format!(
|
||||
"The length of arguments is not correct, expect {}, provided : {}",
|
||||
stringify!($n),
|
||||
@@ -59,3 +59,17 @@ macro_rules! ensure_columns_n {
|
||||
}
|
||||
|
||||
pub(super) use ensure_columns_n;
|
||||
|
||||
macro_rules! ensure_and_coerce {
|
||||
($compare:expr, $coerce:expr) => {{
|
||||
snafu::ensure!(
|
||||
$compare,
|
||||
common_query::error::InvalidFuncArgsSnafu {
|
||||
err_msg: "Argument was outside of acceptable range "
|
||||
}
|
||||
);
|
||||
Ok($coerce)
|
||||
}};
|
||||
}
|
||||
|
||||
pub(super) use ensure_and_coerce;
|
||||
|
||||
275
src/common/function/src/scalars/geo/s2.rs
Normal file
275
src/common/function/src/scalars/geo/s2.rs
Normal file
@@ -0,0 +1,275 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use common_query::error::{InvalidFuncArgsSnafu, Result};
|
||||
use common_query::prelude::{Signature, TypeSignature};
|
||||
use datafusion::logical_expr::Volatility;
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use datatypes::scalars::ScalarVectorBuilder;
|
||||
use datatypes::value::Value;
|
||||
use datatypes::vectors::{MutableVector, StringVectorBuilder, UInt64VectorBuilder, VectorRef};
|
||||
use derive_more::Display;
|
||||
use once_cell::sync::Lazy;
|
||||
use s2::cellid::{CellID, MAX_LEVEL};
|
||||
use s2::latlng::LatLng;
|
||||
use snafu::ensure;
|
||||
|
||||
use crate::function::{Function, FunctionContext};
|
||||
use crate::scalars::geo::helpers::{ensure_and_coerce, ensure_columns_len, ensure_columns_n};
|
||||
|
||||
static CELL_TYPES: Lazy<Vec<ConcreteDataType>> = Lazy::new(|| {
|
||||
vec![
|
||||
ConcreteDataType::int64_datatype(),
|
||||
ConcreteDataType::uint64_datatype(),
|
||||
]
|
||||
});
|
||||
|
||||
static COORDINATE_TYPES: Lazy<Vec<ConcreteDataType>> = Lazy::new(|| {
|
||||
vec![
|
||||
ConcreteDataType::float32_datatype(),
|
||||
ConcreteDataType::float64_datatype(),
|
||||
]
|
||||
});
|
||||
|
||||
static LEVEL_TYPES: Lazy<Vec<ConcreteDataType>> = Lazy::new(|| {
|
||||
vec![
|
||||
ConcreteDataType::int8_datatype(),
|
||||
ConcreteDataType::int16_datatype(),
|
||||
ConcreteDataType::int32_datatype(),
|
||||
ConcreteDataType::int64_datatype(),
|
||||
ConcreteDataType::uint8_datatype(),
|
||||
ConcreteDataType::uint16_datatype(),
|
||||
ConcreteDataType::uint32_datatype(),
|
||||
ConcreteDataType::uint64_datatype(),
|
||||
]
|
||||
});
|
||||
|
||||
/// Function that returns [s2] encoding cellid for a given geospatial coordinate.
|
||||
///
|
||||
/// [s2]: http://s2geometry.io
|
||||
#[derive(Clone, Debug, Default, Display)]
|
||||
#[display("{}", self.name())]
|
||||
pub struct S2LatLngToCell;
|
||||
|
||||
impl Function for S2LatLngToCell {
|
||||
fn name(&self) -> &str {
|
||||
"s2_latlng_to_cell"
|
||||
}
|
||||
|
||||
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
|
||||
Ok(ConcreteDataType::uint64_datatype())
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
let mut signatures = Vec::with_capacity(COORDINATE_TYPES.len());
|
||||
for coord_type in COORDINATE_TYPES.as_slice() {
|
||||
signatures.push(TypeSignature::Exact(vec![
|
||||
// latitude
|
||||
coord_type.clone(),
|
||||
// longitude
|
||||
coord_type.clone(),
|
||||
]));
|
||||
}
|
||||
Signature::one_of(signatures, Volatility::Stable)
|
||||
}
|
||||
|
||||
fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
|
||||
ensure_columns_n!(columns, 2);
|
||||
|
||||
let lat_vec = &columns[0];
|
||||
let lon_vec = &columns[1];
|
||||
|
||||
let size = lat_vec.len();
|
||||
let mut results = UInt64VectorBuilder::with_capacity(size);
|
||||
|
||||
for i in 0..size {
|
||||
let lat = lat_vec.get(i).as_f64_lossy();
|
||||
let lon = lon_vec.get(i).as_f64_lossy();
|
||||
|
||||
let result = match (lat, lon) {
|
||||
(Some(lat), Some(lon)) => {
|
||||
let coord = LatLng::from_degrees(lat, lon);
|
||||
ensure!(
|
||||
coord.is_valid(),
|
||||
InvalidFuncArgsSnafu {
|
||||
err_msg: "The input coordinates are invalid",
|
||||
}
|
||||
);
|
||||
let cellid = CellID::from(coord);
|
||||
let encoded: u64 = cellid.0;
|
||||
Some(encoded)
|
||||
}
|
||||
_ => None,
|
||||
};
|
||||
|
||||
results.push(result);
|
||||
}
|
||||
|
||||
Ok(results.to_vector())
|
||||
}
|
||||
}
|
||||
|
||||
/// Return the level of current s2 cell
|
||||
#[derive(Clone, Debug, Default, Display)]
|
||||
#[display("{}", self.name())]
|
||||
pub struct S2CellLevel;
|
||||
|
||||
impl Function for S2CellLevel {
|
||||
fn name(&self) -> &str {
|
||||
"s2_cell_level"
|
||||
}
|
||||
|
||||
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
|
||||
Ok(ConcreteDataType::uint64_datatype())
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
signature_of_cell()
|
||||
}
|
||||
|
||||
fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
|
||||
ensure_columns_n!(columns, 1);
|
||||
|
||||
let cell_vec = &columns[0];
|
||||
let size = cell_vec.len();
|
||||
let mut results = UInt64VectorBuilder::with_capacity(size);
|
||||
|
||||
for i in 0..size {
|
||||
let cell = cell_from_value(cell_vec.get(i));
|
||||
let res = cell.map(|cell| cell.level());
|
||||
|
||||
results.push(res);
|
||||
}
|
||||
|
||||
Ok(results.to_vector())
|
||||
}
|
||||
}
|
||||
|
||||
/// Return the string presentation of the cell
|
||||
#[derive(Clone, Debug, Default, Display)]
|
||||
#[display("{}", self.name())]
|
||||
pub struct S2CellToToken;
|
||||
|
||||
impl Function for S2CellToToken {
|
||||
fn name(&self) -> &str {
|
||||
"s2_cell_to_token"
|
||||
}
|
||||
|
||||
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
|
||||
Ok(ConcreteDataType::string_datatype())
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
signature_of_cell()
|
||||
}
|
||||
|
||||
fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
|
||||
ensure_columns_n!(columns, 1);
|
||||
|
||||
let cell_vec = &columns[0];
|
||||
let size = cell_vec.len();
|
||||
let mut results = StringVectorBuilder::with_capacity(size);
|
||||
|
||||
for i in 0..size {
|
||||
let cell = cell_from_value(cell_vec.get(i));
|
||||
let res = cell.map(|cell| cell.to_token());
|
||||
|
||||
results.push(res.as_deref());
|
||||
}
|
||||
|
||||
Ok(results.to_vector())
|
||||
}
|
||||
}
|
||||
|
||||
/// Return parent at given level of current s2 cell
|
||||
#[derive(Clone, Debug, Default, Display)]
|
||||
#[display("{}", self.name())]
|
||||
pub struct S2CellParent;
|
||||
|
||||
impl Function for S2CellParent {
|
||||
fn name(&self) -> &str {
|
||||
"s2_cell_parent"
|
||||
}
|
||||
|
||||
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
|
||||
Ok(ConcreteDataType::uint64_datatype())
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
signature_of_cell_and_level()
|
||||
}
|
||||
|
||||
fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
|
||||
ensure_columns_n!(columns, 2);
|
||||
|
||||
let cell_vec = &columns[0];
|
||||
let level_vec = &columns[1];
|
||||
let size = cell_vec.len();
|
||||
let mut results = UInt64VectorBuilder::with_capacity(size);
|
||||
|
||||
for i in 0..size {
|
||||
let cell = cell_from_value(cell_vec.get(i));
|
||||
let level = value_to_level(level_vec.get(i))?;
|
||||
let result = cell.map(|cell| cell.parent(level).0);
|
||||
|
||||
results.push(result);
|
||||
}
|
||||
|
||||
Ok(results.to_vector())
|
||||
}
|
||||
}
|
||||
|
||||
fn signature_of_cell() -> Signature {
|
||||
let mut signatures = Vec::with_capacity(CELL_TYPES.len());
|
||||
for cell_type in CELL_TYPES.as_slice() {
|
||||
signatures.push(TypeSignature::Exact(vec![cell_type.clone()]));
|
||||
}
|
||||
|
||||
Signature::one_of(signatures, Volatility::Stable)
|
||||
}
|
||||
|
||||
fn signature_of_cell_and_level() -> Signature {
|
||||
let mut signatures = Vec::with_capacity(CELL_TYPES.len() * LEVEL_TYPES.len());
|
||||
for cell_type in CELL_TYPES.as_slice() {
|
||||
for level_type in LEVEL_TYPES.as_slice() {
|
||||
signatures.push(TypeSignature::Exact(vec![
|
||||
cell_type.clone(),
|
||||
level_type.clone(),
|
||||
]));
|
||||
}
|
||||
}
|
||||
Signature::one_of(signatures, Volatility::Stable)
|
||||
}
|
||||
|
||||
fn cell_from_value(v: Value) -> Option<CellID> {
|
||||
match v {
|
||||
Value::Int64(v) => Some(CellID(v as u64)),
|
||||
Value::UInt64(v) => Some(CellID(v)),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn value_to_level(v: Value) -> Result<u64> {
|
||||
match v {
|
||||
Value::Int8(v) => ensure_and_coerce!(v >= 0 && v <= MAX_LEVEL as i8, v as u64),
|
||||
Value::Int16(v) => ensure_and_coerce!(v >= 0 && v <= MAX_LEVEL as i16, v as u64),
|
||||
Value::Int32(v) => ensure_and_coerce!(v >= 0 && v <= MAX_LEVEL as i32, v as u64),
|
||||
Value::Int64(v) => ensure_and_coerce!(v >= 0 && v <= MAX_LEVEL as i64, v as u64),
|
||||
Value::UInt8(v) => ensure_and_coerce!(v <= MAX_LEVEL as u8, v as u64),
|
||||
Value::UInt16(v) => ensure_and_coerce!(v <= MAX_LEVEL as u16, v as u64),
|
||||
Value::UInt32(v) => ensure_and_coerce!(v <= MAX_LEVEL as u32, v as u64),
|
||||
Value::UInt64(v) => ensure_and_coerce!(v <= MAX_LEVEL, v),
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user