mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2026-05-21 23:40:38 +00:00
test: adds sqlness test for vector index (#7634)
* test: adds sqlness test for vector index Signed-off-by: Dennis Zhuang <killme2008@gmail.com> * fix: CI Signed-off-by: Dennis Zhuang <killme2008@gmail.com> * test: redacted flat map and size Signed-off-by: Dennis Zhuang <killme2008@gmail.com> * test: simplify the replace rules Signed-off-by: Dennis Zhuang <killme2008@gmail.com> * chore: update comments and tests Signed-off-by: Dennis Zhuang <killme2008@gmail.com> --------- Signed-off-by: Dennis Zhuang <killme2008@gmail.com>
This commit is contained in:
2
.github/workflows/develop.yml
vendored
2
.github/workflows/develop.yml
vendored
@@ -119,7 +119,7 @@ jobs:
|
||||
- name: Build greptime binaries
|
||||
shell: bash
|
||||
# `cargo gc` will invoke `cargo build` with specified args
|
||||
run: cargo gc -- --bin greptime --bin sqlness-runner --features "pg_kvbackend,mysql_kvbackend"
|
||||
run: cargo gc -- --bin greptime --bin sqlness-runner --features "pg_kvbackend,mysql_kvbackend,vector_index"
|
||||
- name: Pack greptime binaries
|
||||
shell: bash
|
||||
run: |
|
||||
|
||||
@@ -1576,6 +1576,10 @@ impl StreamContext {
|
||||
let exprs: Vec<_> = predicate.exprs().iter().map(|e| e.to_string()).collect();
|
||||
write!(f, ", \"filters\": {:?}", exprs)?;
|
||||
}
|
||||
#[cfg(feature = "vector_index")]
|
||||
if let Some(vector_index_k) = self.input.vector_index_k {
|
||||
write!(f, ", \"vector_index_k\": {}", vector_index_k)?;
|
||||
}
|
||||
if !self.input.files.is_empty() {
|
||||
write!(f, ", \"files\": ")?;
|
||||
f.debug_list()
|
||||
|
||||
@@ -323,14 +323,18 @@ impl TreeNodeVisitor<'_> for ScanHintVisitor {
|
||||
}
|
||||
|
||||
// Avoid carrying vector hints across branching inputs (join/subquery) to prevent
|
||||
// pruning results before global ordering is applied.
|
||||
let is_branching = matches!(node, LogicalPlan::Subquery(_)) || node.inputs().len() > 1;
|
||||
if is_branching && self.ts_row_selector.is_some() {
|
||||
// pruning results before global ordering is applied. Only treat a subquery as a
|
||||
// barrier when it contains non-inlineable operators.
|
||||
let is_branching_for_ts = matches!(
|
||||
node,
|
||||
LogicalPlan::Subquery(_) | LogicalPlan::SubqueryAlias(_)
|
||||
) || node.inputs().len() > 1;
|
||||
if is_branching_for_ts && self.ts_row_selector.is_some() {
|
||||
// clean previous time series selector hint when encounter subqueries or join
|
||||
self.ts_row_selector = None;
|
||||
}
|
||||
#[cfg(feature = "vector_index")]
|
||||
if is_branching {
|
||||
if is_branching_for_vector(node) {
|
||||
self.vector_search.on_branching_enter();
|
||||
}
|
||||
|
||||
@@ -371,8 +375,10 @@ impl TreeNodeVisitor<'_> for ScanHintVisitor {
|
||||
LogicalPlan::Filter(_) => {
|
||||
self.vector_search.on_filter_exit();
|
||||
}
|
||||
LogicalPlan::Subquery(_) => {
|
||||
self.vector_search.on_branching_exit();
|
||||
LogicalPlan::Subquery(_) | LogicalPlan::SubqueryAlias(_) => {
|
||||
if is_branching_for_vector(_node) {
|
||||
self.vector_search.on_branching_exit();
|
||||
}
|
||||
}
|
||||
_ if _node.inputs().len() > 1 => {
|
||||
self.vector_search.on_branching_exit();
|
||||
@@ -398,6 +404,43 @@ impl ScanHintVisitor {
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "vector_index")]
|
||||
fn is_branching_for_vector(node: &LogicalPlan) -> bool {
|
||||
if node.inputs().len() > 1 {
|
||||
return true;
|
||||
}
|
||||
|
||||
match node {
|
||||
LogicalPlan::Subquery(subquery) => has_non_inlineable_ops(subquery.subquery.as_ref()),
|
||||
LogicalPlan::SubqueryAlias(alias) => has_non_inlineable_ops(alias.input.as_ref()),
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "vector_index")]
|
||||
fn has_non_inlineable_ops(plan: &LogicalPlan) -> bool {
|
||||
if matches!(
|
||||
plan,
|
||||
LogicalPlan::Limit(_)
|
||||
| LogicalPlan::Sort(_)
|
||||
| LogicalPlan::Distinct(_)
|
||||
| LogicalPlan::Aggregate(_)
|
||||
| LogicalPlan::Window(_)
|
||||
| LogicalPlan::Union(_)
|
||||
| LogicalPlan::Join(_)
|
||||
) {
|
||||
return true;
|
||||
}
|
||||
|
||||
for input in plan.inputs() {
|
||||
if has_non_inlineable_ops(input) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
false
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use std::sync::Arc;
|
||||
|
||||
@@ -35,6 +35,16 @@ use crate::dummy_catalog::DummyTableProvider;
|
||||
/// - A LIMIT (or Sort.fetch) is present to derive k.
|
||||
/// - The hint stays within a single input chain (not across join/subquery branches).
|
||||
/// - The target column is non-nullable, or an explicit IS NOT NULL filter exists.
|
||||
///
|
||||
/// Known limitations:
|
||||
/// - Dynamic overfetching is not implemented yet. When filters exist or ORDER BY includes
|
||||
/// additional tie-breaker columns (e.g., ORDER BY distance, id), the current fixed k may
|
||||
/// return incorrect results. A future improvement should dynamically adjust k based on
|
||||
/// filter selectivity and secondary sort requirements.
|
||||
/// - Hints only block at subquery boundaries when the subquery contains non-inlineable
|
||||
/// operators (Limit/Sort/Distinct/Aggregate/Window). Simple subqueries without these
|
||||
/// operators allow hints to propagate through. In distributed mode, the dist analyzer
|
||||
/// may inline subqueries before this rule runs, further reducing isolation.
|
||||
#[derive(Default)]
|
||||
pub(crate) struct VectorSearchState {
|
||||
current_distance: Option<VectorDistanceInfo>,
|
||||
@@ -241,23 +251,47 @@ impl VectorSearchState {
|
||||
fn extract_distance_from_sort(
|
||||
sort: &datafusion_expr::logical_plan::Sort,
|
||||
) -> Option<VectorDistanceInfo> {
|
||||
if sort.expr.len() != 1 {
|
||||
debug!(
|
||||
"Skip vector hint: Sort has {} expressions, expected 1",
|
||||
sort.expr.len()
|
||||
);
|
||||
if sort.expr.is_empty() {
|
||||
debug!("Skip vector hint: Sort has no expressions");
|
||||
return None;
|
||||
}
|
||||
let sort_expr: &SortExpr = &sort.expr[0];
|
||||
let info = Self::extract_distance_info(&sort_expr.expr)?;
|
||||
let expected_asc = info.metric != VectorDistanceMetric::InnerProduct;
|
||||
if sort_expr.asc == expected_asc {
|
||||
if sort_expr.asc != expected_asc {
|
||||
return None;
|
||||
}
|
||||
|
||||
if Self::tie_breakers_allowed(&sort.expr[1..], &info) {
|
||||
Some(info)
|
||||
} else {
|
||||
if sort.expr.len() > 1 {
|
||||
debug!(
|
||||
"Skip vector hint: Sort has unsupported tie-breakers ({} expressions)",
|
||||
sort.expr.len()
|
||||
);
|
||||
}
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
fn tie_breakers_allowed(sort_exprs: &[SortExpr], distance_info: &VectorDistanceInfo) -> bool {
|
||||
if sort_exprs.is_empty() {
|
||||
return true;
|
||||
}
|
||||
|
||||
sort_exprs.iter().all(|sort_expr| {
|
||||
let Expr::Column(col) = &sort_expr.expr else {
|
||||
return false;
|
||||
};
|
||||
|
||||
match &distance_info.table_reference {
|
||||
Some(table) => col.relation.as_ref() == Some(table),
|
||||
None => col.relation.is_none(),
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
fn extract_limit_info(limit: &datafusion_expr::logical_plan::Limit) -> Option<VectorLimitInfo> {
|
||||
let fetch = match limit.get_fetch_type().ok()? {
|
||||
FetchType::Literal(fetch) => fetch?,
|
||||
@@ -767,8 +801,10 @@ mod tests {
|
||||
assert!(t2_provider.get_vector_search_hint().is_none());
|
||||
}
|
||||
|
||||
// Simple subqueries (without non-inlineable ops like Limit/Sort/Distinct/Aggregate/Window)
|
||||
// allow hints to propagate through. See known limitations in VectorSearchState docs.
|
||||
#[test]
|
||||
fn test_no_vector_hint_above_subquery() {
|
||||
fn test_simple_subquery_allows_hint_propagation() {
|
||||
let provider = build_dummy_provider(10);
|
||||
let table_source = Arc::new(DefaultTableSource::new(provider.clone()));
|
||||
let scan_plan = LogicalPlanBuilder::scan_with_filters("t", table_source, None, vec![])
|
||||
@@ -794,6 +830,42 @@ mod tests {
|
||||
let context = OptimizerContext::default();
|
||||
let _ = ScanHintRule.rewrite(plan, &context).unwrap();
|
||||
|
||||
// Hint propagates through simple subquery
|
||||
let hint = provider.get_vector_search_hint().unwrap();
|
||||
assert_eq!(hint.k, 5);
|
||||
}
|
||||
|
||||
// Subqueries with non-inlineable ops (Limit/Sort/Distinct/Aggregate/Window) block hint propagation.
|
||||
#[test]
|
||||
fn test_subquery_with_limit_blocks_hint() {
|
||||
let provider = build_dummy_provider(10);
|
||||
let table_source = Arc::new(DefaultTableSource::new(provider.clone()));
|
||||
let scan_plan = LogicalPlanBuilder::scan_with_filters("t", table_source, None, vec![])
|
||||
.unwrap()
|
||||
.limit(0, Some(100)) // non-inlineable op inside subquery
|
||||
.unwrap()
|
||||
.build()
|
||||
.unwrap();
|
||||
|
||||
let subquery = LogicalPlan::Subquery(Subquery {
|
||||
subquery: Arc::new(scan_plan),
|
||||
outer_ref_columns: vec![],
|
||||
spans: Default::default(),
|
||||
});
|
||||
|
||||
let expr = vec_distance_expr(VEC_L2SQ_DISTANCE);
|
||||
let plan = LogicalPlanBuilder::from(subquery)
|
||||
.sort(vec![expr.sort(true, false)])
|
||||
.unwrap()
|
||||
.limit(0, Some(5))
|
||||
.unwrap()
|
||||
.build()
|
||||
.unwrap();
|
||||
|
||||
let context = OptimizerContext::default();
|
||||
let _ = ScanHintRule.rewrite(plan, &context).unwrap();
|
||||
|
||||
// Hint does NOT propagate through subquery with non-inlineable ops
|
||||
assert!(provider.get_vector_search_hint().is_none());
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,602 @@
|
||||
-- Test vector index creation and KNN search
|
||||
-- ============================================
|
||||
-- Part 1: Basic L2 squared distance tests
|
||||
-- ============================================
|
||||
-- Create a table with vector column and L2sq vector index
|
||||
CREATE TABLE vectors_l2sq (
|
||||
vec_id INT,
|
||||
ts TIMESTAMP TIME INDEX,
|
||||
embedding VECTOR(4) NOT NULL VECTOR INDEX WITH (metric = 'l2sq'),
|
||||
PRIMARY KEY (vec_id)
|
||||
);
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
-- Insert test vectors
|
||||
INSERT INTO vectors_l2sq VALUES
|
||||
(1, '2024-01-01 00:00:00', '[1.0, 0.0, 0.0, 0.0]'),
|
||||
(2, '2024-01-01 00:00:01', '[0.0, 1.0, 0.0, 0.0]'),
|
||||
(3, '2024-01-01 00:00:02', '[0.0, 0.0, 1.0, 0.0]'),
|
||||
(4, '2024-01-01 00:00:03', '[0.0, 0.0, 0.0, 1.0]'),
|
||||
(5, '2024-01-01 00:00:04', '[1.0, 1.0, 0.0, 0.0]'),
|
||||
(6, '2024-01-01 00:00:05', '[0.0, 1.0, 1.0, 0.0]'),
|
||||
(7, '2024-01-01 00:00:06', '[0.0, 0.0, 1.0, 1.0]'),
|
||||
(8, '2024-01-01 00:00:07', '[1.0, 0.0, 0.0, 1.0]');
|
||||
|
||||
Affected Rows: 8
|
||||
|
||||
-- Query BEFORE flush (memtable search)
|
||||
-- Expected: vec_id=1 (distance=0), vec_id=5 (distance=1), vec_id=8 (distance=1)
|
||||
SELECT vec_id, vec_l2sq_distance(embedding, '[1.0, 0.0, 0.0, 0.0]') as distance
|
||||
FROM vectors_l2sq
|
||||
ORDER BY vec_l2sq_distance(embedding, '[1.0, 0.0, 0.0, 0.0]'), vec_id
|
||||
LIMIT 3;
|
||||
|
||||
+--------+----------+
|
||||
| vec_id | distance |
|
||||
+--------+----------+
|
||||
| 1 | 0.0 |
|
||||
| 5 | 1.0 |
|
||||
| 8 | 1.0 |
|
||||
+--------+----------+
|
||||
|
||||
-- Flush to create SST files with vector index
|
||||
ADMIN FLUSH_TABLE('vectors_l2sq');
|
||||
|
||||
+-----------------------------------+
|
||||
| ADMIN FLUSH_TABLE('vectors_l2sq') |
|
||||
+-----------------------------------+
|
||||
| 0 |
|
||||
+-----------------------------------+
|
||||
|
||||
-- Query AFTER flush (SST index search)
|
||||
SELECT vec_id, vec_l2sq_distance(embedding, '[1.0, 0.0, 0.0, 0.0]') as distance
|
||||
FROM vectors_l2sq
|
||||
ORDER BY vec_l2sq_distance(embedding, '[1.0, 0.0, 0.0, 0.0]'), vec_id
|
||||
LIMIT 3;
|
||||
|
||||
+--------+----------+
|
||||
| vec_id | distance |
|
||||
+--------+----------+
|
||||
| 1 | 0.0 |
|
||||
| 5 | 1.0 |
|
||||
| 8 | 1.0 |
|
||||
+--------+----------+
|
||||
|
||||
-- Query with different target vector
|
||||
-- Expected: vec_id=6 (distance=0), vec_id=2 (distance=1), vec_id=3 (distance=1)
|
||||
SELECT vec_id, vec_l2sq_distance(embedding, '[0.0, 1.0, 1.0, 0.0]') as distance
|
||||
FROM vectors_l2sq
|
||||
ORDER BY vec_l2sq_distance(embedding, '[0.0, 1.0, 1.0, 0.0]'), vec_id
|
||||
LIMIT 3;
|
||||
|
||||
+--------+----------+
|
||||
| vec_id | distance |
|
||||
+--------+----------+
|
||||
| 6 | 0.0 |
|
||||
| 2 | 1.0 |
|
||||
| 3 | 1.0 |
|
||||
+--------+----------+
|
||||
|
||||
DROP TABLE vectors_l2sq;
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
-- ============================================
|
||||
-- Part 2: Cosine distance tests
|
||||
-- ============================================
|
||||
CREATE TABLE vectors_cosine (
|
||||
vec_id INT,
|
||||
ts TIMESTAMP TIME INDEX,
|
||||
embedding VECTOR(4) NOT NULL VECTOR INDEX WITH (metric = 'cosine'),
|
||||
PRIMARY KEY (vec_id)
|
||||
);
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
-- Insert vectors with different magnitudes but same/different directions
|
||||
INSERT INTO vectors_cosine VALUES
|
||||
(1, '2024-01-01 00:00:00', '[1.0, 0.0, 0.0, 0.0]'),
|
||||
(2, '2024-01-01 00:00:01', '[2.0, 0.0, 0.0, 0.0]'),
|
||||
(3, '2024-01-01 00:00:02', '[0.0, 1.0, 0.0, 0.0]'),
|
||||
(4, '2024-01-01 00:00:03', '[1.0, 1.0, 0.0, 0.0]'),
|
||||
(5, '2024-01-01 00:00:04', '[-1.0, 0.0, 0.0, 0.0]');
|
||||
|
||||
Affected Rows: 5
|
||||
|
||||
-- Memtable search with cosine distance
|
||||
-- vec_id=1 and vec_id=2 should have same cosine distance (0) since they point same direction
|
||||
SELECT vec_id, vec_cos_distance(embedding, '[1.0, 0.0, 0.0, 0.0]') as distance
|
||||
FROM vectors_cosine
|
||||
ORDER BY vec_cos_distance(embedding, '[1.0, 0.0, 0.0, 0.0]'), vec_id
|
||||
LIMIT 3;
|
||||
|
||||
+--------+------------+
|
||||
| vec_id | distance |
|
||||
+--------+------------+
|
||||
| 1 | 0.0 |
|
||||
| 2 | 0.0 |
|
||||
| 4 | 0.29289323 |
|
||||
+--------+------------+
|
||||
|
||||
ADMIN FLUSH_TABLE('vectors_cosine');
|
||||
|
||||
+-------------------------------------+
|
||||
| ADMIN FLUSH_TABLE('vectors_cosine') |
|
||||
+-------------------------------------+
|
||||
| 0 |
|
||||
+-------------------------------------+
|
||||
|
||||
-- SST index search with cosine distance
|
||||
SELECT vec_id, vec_cos_distance(embedding, '[1.0, 0.0, 0.0, 0.0]') as distance
|
||||
FROM vectors_cosine
|
||||
ORDER BY vec_cos_distance(embedding, '[1.0, 0.0, 0.0, 0.0]'), vec_id
|
||||
LIMIT 3;
|
||||
|
||||
+--------+------------+
|
||||
| vec_id | distance |
|
||||
+--------+------------+
|
||||
| 1 | 0.0 |
|
||||
| 2 | 0.0 |
|
||||
| 4 | 0.29289323 |
|
||||
+--------+------------+
|
||||
|
||||
DROP TABLE vectors_cosine;
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
-- ============================================
|
||||
-- Part 3: Dot product (inner product) tests
|
||||
-- ============================================
|
||||
CREATE TABLE vectors_dot (
|
||||
vec_id INT,
|
||||
ts TIMESTAMP TIME INDEX,
|
||||
embedding VECTOR(4) NOT NULL VECTOR INDEX WITH (metric = 'dot'),
|
||||
PRIMARY KEY (vec_id)
|
||||
);
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
INSERT INTO vectors_dot VALUES
|
||||
(1, '2024-01-01 00:00:00', '[1.0, 0.0, 0.0, 0.0]'),
|
||||
(2, '2024-01-01 00:00:01', '[2.0, 0.0, 0.0, 0.0]'),
|
||||
(3, '2024-01-01 00:00:02', '[0.0, 1.0, 0.0, 0.0]'),
|
||||
(4, '2024-01-01 00:00:03', '[1.0, 1.0, 0.0, 0.0]'),
|
||||
(5, '2024-01-01 00:00:04', '[-1.0, 0.0, 0.0, 0.0]');
|
||||
|
||||
Affected Rows: 5
|
||||
|
||||
-- Memtable search with dot product
|
||||
-- Larger dot product means more similar, so we use negative for ordering
|
||||
-- vec_id=2 should be best (dot=2), vec_id=1 and vec_id=4 have dot=1
|
||||
SELECT vec_id, vec_dot_product(embedding, '[1.0, 0.0, 0.0, 0.0]') as dot_product
|
||||
FROM vectors_dot
|
||||
ORDER BY vec_dot_product(embedding, '[1.0, 0.0, 0.0, 0.0]') DESC, vec_id
|
||||
LIMIT 3;
|
||||
|
||||
+--------+-------------+
|
||||
| vec_id | dot_product |
|
||||
+--------+-------------+
|
||||
| 2 | 2.0 |
|
||||
| 1 | 1.0 |
|
||||
| 4 | 1.0 |
|
||||
+--------+-------------+
|
||||
|
||||
ADMIN FLUSH_TABLE('vectors_dot');
|
||||
|
||||
+----------------------------------+
|
||||
| ADMIN FLUSH_TABLE('vectors_dot') |
|
||||
+----------------------------------+
|
||||
| 0 |
|
||||
+----------------------------------+
|
||||
|
||||
-- SST index search with dot product
|
||||
SELECT vec_id, vec_dot_product(embedding, '[1.0, 0.0, 0.0, 0.0]') as dot_product
|
||||
FROM vectors_dot
|
||||
ORDER BY vec_dot_product(embedding, '[1.0, 0.0, 0.0, 0.0]') DESC, vec_id
|
||||
LIMIT 3;
|
||||
|
||||
+--------+-------------+
|
||||
| vec_id | dot_product |
|
||||
+--------+-------------+
|
||||
| 2 | 2.0 |
|
||||
| 1 | 1.0 |
|
||||
| 4 | 1.0 |
|
||||
+--------+-------------+
|
||||
|
||||
DROP TABLE vectors_dot;
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
-- ============================================
|
||||
-- Part 4: NULL vector handling tests
|
||||
-- ============================================
|
||||
CREATE TABLE vectors_null (
|
||||
vec_id INT,
|
||||
ts TIMESTAMP TIME INDEX,
|
||||
embedding VECTOR(4) VECTOR INDEX WITH (metric = 'l2sq'),
|
||||
PRIMARY KEY (vec_id)
|
||||
);
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
-- Insert vectors with some NULLs
|
||||
INSERT INTO vectors_null VALUES
|
||||
(1, '2024-01-01 00:00:00', '[1.0, 0.0, 0.0, 0.0]'),
|
||||
(2, '2024-01-01 00:00:01', NULL),
|
||||
(3, '2024-01-01 00:00:02', '[0.0, 1.0, 0.0, 0.0]'),
|
||||
(4, '2024-01-01 00:00:03', NULL),
|
||||
(5, '2024-01-01 00:00:04', '[0.0, 0.0, 1.0, 0.0]');
|
||||
|
||||
Affected Rows: 5
|
||||
|
||||
-- Memtable search should skip NULL vectors
|
||||
SELECT vec_id, vec_l2sq_distance(embedding, '[1.0, 0.0, 0.0, 0.0]') as distance
|
||||
FROM vectors_null
|
||||
WHERE embedding IS NOT NULL
|
||||
ORDER BY vec_l2sq_distance(embedding, '[1.0, 0.0, 0.0, 0.0]'), vec_id
|
||||
LIMIT 3;
|
||||
|
||||
+--------+----------+
|
||||
| vec_id | distance |
|
||||
+--------+----------+
|
||||
| 1 | 0.0 |
|
||||
| 3 | 2.0 |
|
||||
| 5 | 2.0 |
|
||||
+--------+----------+
|
||||
|
||||
ADMIN FLUSH_TABLE('vectors_null');
|
||||
|
||||
+-----------------------------------+
|
||||
| ADMIN FLUSH_TABLE('vectors_null') |
|
||||
+-----------------------------------+
|
||||
| 0 |
|
||||
+-----------------------------------+
|
||||
|
||||
-- SST index search should also skip NULL vectors
|
||||
SELECT vec_id, vec_l2sq_distance(embedding, '[1.0, 0.0, 0.0, 0.0]') as distance
|
||||
FROM vectors_null
|
||||
WHERE embedding IS NOT NULL
|
||||
ORDER BY vec_l2sq_distance(embedding, '[1.0, 0.0, 0.0, 0.0]'), vec_id
|
||||
LIMIT 3;
|
||||
|
||||
+--------+----------+
|
||||
| vec_id | distance |
|
||||
+--------+----------+
|
||||
| 1 | 0.0 |
|
||||
| 3 | 2.0 |
|
||||
| 5 | 2.0 |
|
||||
+--------+----------+
|
||||
|
||||
DROP TABLE vectors_null;
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
-- ============================================
|
||||
-- Part 5: Mixed memtable + SST search tests
|
||||
-- ============================================
|
||||
CREATE TABLE vectors_mixed (
|
||||
vec_id INT,
|
||||
ts TIMESTAMP TIME INDEX,
|
||||
embedding VECTOR(4) NOT NULL VECTOR INDEX WITH (metric = 'l2sq'),
|
||||
PRIMARY KEY (vec_id)
|
||||
);
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
-- Insert first batch and flush to SST
|
||||
INSERT INTO vectors_mixed VALUES
|
||||
(1, '2024-01-01 00:00:00', '[1.0, 0.0, 0.0, 0.0]'),
|
||||
(2, '2024-01-01 00:00:01', '[0.0, 1.0, 0.0, 0.0]'),
|
||||
(3, '2024-01-01 00:00:02', '[0.0, 0.0, 1.0, 0.0]');
|
||||
|
||||
Affected Rows: 3
|
||||
|
||||
ADMIN FLUSH_TABLE('vectors_mixed');
|
||||
|
||||
+------------------------------------+
|
||||
| ADMIN FLUSH_TABLE('vectors_mixed') |
|
||||
+------------------------------------+
|
||||
| 0 |
|
||||
+------------------------------------+
|
||||
|
||||
-- Insert second batch (stays in memtable)
|
||||
INSERT INTO vectors_mixed VALUES
|
||||
(4, '2024-01-01 00:00:03', '[0.5, 0.5, 0.0, 0.0]'),
|
||||
(5, '2024-01-01 00:00:04', '[0.9, 0.1, 0.0, 0.0]');
|
||||
|
||||
Affected Rows: 2
|
||||
|
||||
-- Query should search both SST (vec_id 1,2,3) and memtable (vec_id 4,5)
|
||||
-- Target: [1.0, 0.0, 0.0, 0.0]
|
||||
-- Expected: vec_id=1 (dist=0), vec_id=5 (dist=0.02), vec_id=4 (dist=0.5)
|
||||
SELECT vec_id, vec_l2sq_distance(embedding, '[1.0, 0.0, 0.0, 0.0]') as distance
|
||||
FROM vectors_mixed
|
||||
ORDER BY vec_l2sq_distance(embedding, '[1.0, 0.0, 0.0, 0.0]'), vec_id
|
||||
LIMIT 3;
|
||||
|
||||
+--------+-------------+
|
||||
| vec_id | distance |
|
||||
+--------+-------------+
|
||||
| 1 | 0.0 |
|
||||
| 5 | 0.020000005 |
|
||||
| 4 | 0.5 |
|
||||
+--------+-------------+
|
||||
|
||||
DROP TABLE vectors_mixed;
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
-- ============================================
|
||||
-- Part 6: KNN with WHERE clause tests
|
||||
-- ============================================
|
||||
CREATE TABLE vectors_filter (
|
||||
vec_id INT,
|
||||
category STRING,
|
||||
ts TIMESTAMP TIME INDEX,
|
||||
embedding VECTOR(4) NOT NULL VECTOR INDEX WITH (metric = 'l2sq'),
|
||||
PRIMARY KEY (vec_id)
|
||||
);
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
INSERT INTO vectors_filter VALUES
|
||||
(1, 'A', '2024-01-01 00:00:00', '[1.0, 0.0, 0.0, 0.0]'),
|
||||
(2, 'B', '2024-01-01 00:00:01', '[0.9, 0.1, 0.0, 0.0]'),
|
||||
(3, 'A', '2024-01-01 00:00:02', '[0.0, 1.0, 0.0, 0.0]'),
|
||||
(4, 'B', '2024-01-01 00:00:03', '[0.1, 0.9, 0.0, 0.0]'),
|
||||
(5, 'A', '2024-01-01 00:00:04', '[0.5, 0.5, 0.0, 0.0]');
|
||||
|
||||
Affected Rows: 5
|
||||
|
||||
-- Memtable search with filter
|
||||
SELECT vec_id, category, vec_l2sq_distance(embedding, '[1.0, 0.0, 0.0, 0.0]') as distance
|
||||
FROM vectors_filter
|
||||
WHERE category = 'A'
|
||||
ORDER BY vec_l2sq_distance(embedding, '[1.0, 0.0, 0.0, 0.0]'), vec_id
|
||||
LIMIT 2;
|
||||
|
||||
+--------+----------+----------+
|
||||
| vec_id | category | distance |
|
||||
+--------+----------+----------+
|
||||
| 1 | A | 0.0 |
|
||||
| 5 | A | 0.5 |
|
||||
+--------+----------+----------+
|
||||
|
||||
ADMIN FLUSH_TABLE('vectors_filter');
|
||||
|
||||
+-------------------------------------+
|
||||
| ADMIN FLUSH_TABLE('vectors_filter') |
|
||||
+-------------------------------------+
|
||||
| 0 |
|
||||
+-------------------------------------+
|
||||
|
||||
-- SST index search with filter
|
||||
SELECT vec_id, category, vec_l2sq_distance(embedding, '[1.0, 0.0, 0.0, 0.0]') as distance
|
||||
FROM vectors_filter
|
||||
WHERE category = 'A'
|
||||
ORDER BY vec_l2sq_distance(embedding, '[1.0, 0.0, 0.0, 0.0]'), vec_id
|
||||
LIMIT 2;
|
||||
|
||||
+--------+----------+----------+
|
||||
| vec_id | category | distance |
|
||||
+--------+----------+----------+
|
||||
| 1 | A | 0.0 |
|
||||
| 5 | A | 0.5 |
|
||||
+--------+----------+----------+
|
||||
|
||||
-- Filter with time range
|
||||
SELECT vec_id, vec_l2sq_distance(embedding, '[1.0, 0.0, 0.0, 0.0]') as distance
|
||||
FROM vectors_filter
|
||||
WHERE ts >= '2024-01-01 00:00:02' AND ts <= '2024-01-01 00:00:04'
|
||||
ORDER BY vec_l2sq_distance(embedding, '[1.0, 0.0, 0.0, 0.0]'), vec_id
|
||||
LIMIT 2;
|
||||
|
||||
+--------+-----------+
|
||||
| vec_id | distance |
|
||||
+--------+-----------+
|
||||
| 5 | 0.5 |
|
||||
| 4 | 1.6199999 |
|
||||
+--------+-----------+
|
||||
|
||||
DROP TABLE vectors_filter;
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
-- ============================================
|
||||
-- Part 7: Higher dimension vectors
|
||||
-- ============================================
|
||||
CREATE TABLE vectors_high_dim (
|
||||
vec_id INT,
|
||||
ts TIMESTAMP TIME INDEX,
|
||||
embedding VECTOR(128) NOT NULL VECTOR INDEX WITH (metric = 'l2sq'),
|
||||
PRIMARY KEY (vec_id)
|
||||
);
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
-- Insert high-dimensional vectors (simplified: first few elements differ)
|
||||
INSERT INTO vectors_high_dim VALUES
|
||||
(1, '2024-01-01 00:00:00', '[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]'),
|
||||
(2, '2024-01-01 00:00:01', '[0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]'),
|
||||
(3, '2024-01-01 00:00:02', '[0.5, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]');
|
||||
|
||||
Affected Rows: 3
|
||||
|
||||
-- Memtable search
|
||||
SELECT vec_id FROM vectors_high_dim
|
||||
ORDER BY vec_l2sq_distance(embedding, '[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]'), vec_id
|
||||
LIMIT 2;
|
||||
|
||||
+--------+
|
||||
| vec_id |
|
||||
+--------+
|
||||
| 1 |
|
||||
| 3 |
|
||||
+--------+
|
||||
|
||||
ADMIN FLUSH_TABLE('vectors_high_dim');
|
||||
|
||||
+---------------------------------------+
|
||||
| ADMIN FLUSH_TABLE('vectors_high_dim') |
|
||||
+---------------------------------------+
|
||||
| 0 |
|
||||
+---------------------------------------+
|
||||
|
||||
-- SST index search
|
||||
SELECT vec_id FROM vectors_high_dim
|
||||
ORDER BY vec_l2sq_distance(embedding, '[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]'), vec_id
|
||||
LIMIT 2;
|
||||
|
||||
+--------+
|
||||
| vec_id |
|
||||
+--------+
|
||||
| 1 |
|
||||
| 3 |
|
||||
+--------+
|
||||
|
||||
DROP TABLE vectors_high_dim;
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
-- ============================================
|
||||
-- Part 8: Different k values (LIMIT)
|
||||
-- ============================================
|
||||
CREATE TABLE vectors_k (
|
||||
vec_id INT,
|
||||
ts TIMESTAMP TIME INDEX,
|
||||
embedding VECTOR(4) NOT NULL VECTOR INDEX WITH (metric = 'l2sq'),
|
||||
PRIMARY KEY (vec_id)
|
||||
);
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
INSERT INTO vectors_k VALUES
|
||||
(1, '2024-01-01 00:00:00', '[1.0, 0.0, 0.0, 0.0]'),
|
||||
(2, '2024-01-01 00:00:01', '[0.9, 0.1, 0.0, 0.0]'),
|
||||
(3, '2024-01-01 00:00:02', '[0.8, 0.2, 0.0, 0.0]'),
|
||||
(4, '2024-01-01 00:00:03', '[0.7, 0.3, 0.0, 0.0]'),
|
||||
(5, '2024-01-01 00:00:04', '[0.6, 0.4, 0.0, 0.0]'),
|
||||
(6, '2024-01-01 00:00:05', '[0.5, 0.5, 0.0, 0.0]'),
|
||||
(7, '2024-01-01 00:00:06', '[0.4, 0.6, 0.0, 0.0]'),
|
||||
(8, '2024-01-01 00:00:07', '[0.3, 0.7, 0.0, 0.0]'),
|
||||
(9, '2024-01-01 00:00:08', '[0.2, 0.8, 0.0, 0.0]'),
|
||||
(10, '2024-01-01 00:00:09', '[0.1, 0.9, 0.0, 0.0]');
|
||||
|
||||
Affected Rows: 10
|
||||
|
||||
ADMIN FLUSH_TABLE('vectors_k');
|
||||
|
||||
+--------------------------------+
|
||||
| ADMIN FLUSH_TABLE('vectors_k') |
|
||||
+--------------------------------+
|
||||
| 0 |
|
||||
+--------------------------------+
|
||||
|
||||
-- k=1
|
||||
SELECT vec_id FROM vectors_k
|
||||
ORDER BY vec_l2sq_distance(embedding, '[1.0, 0.0, 0.0, 0.0]'), vec_id
|
||||
LIMIT 1;
|
||||
|
||||
+--------+
|
||||
| vec_id |
|
||||
+--------+
|
||||
| 1 |
|
||||
+--------+
|
||||
|
||||
-- k=5
|
||||
SELECT vec_id FROM vectors_k
|
||||
ORDER BY vec_l2sq_distance(embedding, '[1.0, 0.0, 0.0, 0.0]'), vec_id
|
||||
LIMIT 5;
|
||||
|
||||
+--------+
|
||||
| vec_id |
|
||||
+--------+
|
||||
| 1 |
|
||||
| 2 |
|
||||
| 3 |
|
||||
| 4 |
|
||||
| 5 |
|
||||
+--------+
|
||||
|
||||
-- k=10 (all vectors)
|
||||
SELECT vec_id FROM vectors_k
|
||||
ORDER BY vec_l2sq_distance(embedding, '[1.0, 0.0, 0.0, 0.0]'), vec_id
|
||||
LIMIT 10;
|
||||
|
||||
+--------+
|
||||
| vec_id |
|
||||
+--------+
|
||||
| 1 |
|
||||
| 2 |
|
||||
| 3 |
|
||||
| 4 |
|
||||
| 5 |
|
||||
| 6 |
|
||||
| 7 |
|
||||
| 8 |
|
||||
| 9 |
|
||||
| 10 |
|
||||
+--------+
|
||||
|
||||
DROP TABLE vectors_k;
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
-- ============================================
|
||||
-- Part 9: Engine parameter tests
|
||||
-- ============================================
|
||||
-- Create table with explicit engine parameter (usearch is default)
|
||||
CREATE TABLE vectors_engine (
|
||||
vec_id INT,
|
||||
ts TIMESTAMP TIME INDEX,
|
||||
embedding VECTOR(4) NOT NULL VECTOR INDEX WITH (engine = 'usearch', metric = 'l2sq'),
|
||||
PRIMARY KEY (vec_id)
|
||||
);
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
-- Insert test vectors
|
||||
INSERT INTO vectors_engine VALUES
|
||||
(1, '2024-01-01 00:00:00', '[1.0, 0.0, 0.0, 0.0]'),
|
||||
(2, '2024-01-01 00:00:01', '[0.0, 1.0, 0.0, 0.0]'),
|
||||
(3, '2024-01-01 00:00:02', '[0.5, 0.5, 0.0, 0.0]');
|
||||
|
||||
Affected Rows: 3
|
||||
|
||||
-- Memtable search
|
||||
SELECT vec_id FROM vectors_engine
|
||||
ORDER BY vec_l2sq_distance(embedding, '[1.0, 0.0, 0.0, 0.0]'), vec_id
|
||||
LIMIT 2;
|
||||
|
||||
+--------+
|
||||
| vec_id |
|
||||
+--------+
|
||||
| 1 |
|
||||
| 3 |
|
||||
+--------+
|
||||
|
||||
ADMIN FLUSH_TABLE('vectors_engine');
|
||||
|
||||
+-------------------------------------+
|
||||
| ADMIN FLUSH_TABLE('vectors_engine') |
|
||||
+-------------------------------------+
|
||||
| 0 |
|
||||
+-------------------------------------+
|
||||
|
||||
-- SST index search with usearch engine
|
||||
SELECT vec_id FROM vectors_engine
|
||||
ORDER BY vec_l2sq_distance(embedding, '[1.0, 0.0, 0.0, 0.0]'), vec_id
|
||||
LIMIT 2;
|
||||
|
||||
+--------+
|
||||
| vec_id |
|
||||
+--------+
|
||||
| 1 |
|
||||
| 3 |
|
||||
+--------+
|
||||
|
||||
DROP TABLE vectors_engine;
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
341
tests/cases/standalone/common/function/vector/vector_index.sql
Normal file
341
tests/cases/standalone/common/function/vector/vector_index.sql
Normal file
@@ -0,0 +1,341 @@
|
||||
-- Test vector index creation and KNN search
|
||||
|
||||
-- ============================================
|
||||
-- Part 1: Basic L2 squared distance tests
|
||||
-- ============================================
|
||||
|
||||
-- Create a table with vector column and L2sq vector index
|
||||
CREATE TABLE vectors_l2sq (
|
||||
vec_id INT,
|
||||
ts TIMESTAMP TIME INDEX,
|
||||
embedding VECTOR(4) NOT NULL VECTOR INDEX WITH (metric = 'l2sq'),
|
||||
PRIMARY KEY (vec_id)
|
||||
);
|
||||
|
||||
-- Insert test vectors
|
||||
INSERT INTO vectors_l2sq VALUES
|
||||
(1, '2024-01-01 00:00:00', '[1.0, 0.0, 0.0, 0.0]'),
|
||||
(2, '2024-01-01 00:00:01', '[0.0, 1.0, 0.0, 0.0]'),
|
||||
(3, '2024-01-01 00:00:02', '[0.0, 0.0, 1.0, 0.0]'),
|
||||
(4, '2024-01-01 00:00:03', '[0.0, 0.0, 0.0, 1.0]'),
|
||||
(5, '2024-01-01 00:00:04', '[1.0, 1.0, 0.0, 0.0]'),
|
||||
(6, '2024-01-01 00:00:05', '[0.0, 1.0, 1.0, 0.0]'),
|
||||
(7, '2024-01-01 00:00:06', '[0.0, 0.0, 1.0, 1.0]'),
|
||||
(8, '2024-01-01 00:00:07', '[1.0, 0.0, 0.0, 1.0]');
|
||||
|
||||
-- Query BEFORE flush (memtable search)
|
||||
-- Expected: vec_id=1 (distance=0), vec_id=5 (distance=1), vec_id=8 (distance=1)
|
||||
SELECT vec_id, vec_l2sq_distance(embedding, '[1.0, 0.0, 0.0, 0.0]') as distance
|
||||
FROM vectors_l2sq
|
||||
ORDER BY vec_l2sq_distance(embedding, '[1.0, 0.0, 0.0, 0.0]'), vec_id
|
||||
LIMIT 3;
|
||||
|
||||
-- Flush to create SST files with vector index
|
||||
ADMIN FLUSH_TABLE('vectors_l2sq');
|
||||
|
||||
-- Query AFTER flush (SST index search)
|
||||
SELECT vec_id, vec_l2sq_distance(embedding, '[1.0, 0.0, 0.0, 0.0]') as distance
|
||||
FROM vectors_l2sq
|
||||
ORDER BY vec_l2sq_distance(embedding, '[1.0, 0.0, 0.0, 0.0]'), vec_id
|
||||
LIMIT 3;
|
||||
|
||||
-- Query with different target vector
|
||||
-- Expected: vec_id=6 (distance=0), vec_id=2 (distance=1), vec_id=3 (distance=1)
|
||||
SELECT vec_id, vec_l2sq_distance(embedding, '[0.0, 1.0, 1.0, 0.0]') as distance
|
||||
FROM vectors_l2sq
|
||||
ORDER BY vec_l2sq_distance(embedding, '[0.0, 1.0, 1.0, 0.0]'), vec_id
|
||||
LIMIT 3;
|
||||
|
||||
DROP TABLE vectors_l2sq;
|
||||
|
||||
-- ============================================
|
||||
-- Part 2: Cosine distance tests
|
||||
-- ============================================
|
||||
|
||||
CREATE TABLE vectors_cosine (
|
||||
vec_id INT,
|
||||
ts TIMESTAMP TIME INDEX,
|
||||
embedding VECTOR(4) NOT NULL VECTOR INDEX WITH (metric = 'cosine'),
|
||||
PRIMARY KEY (vec_id)
|
||||
);
|
||||
|
||||
-- Insert vectors with different magnitudes but same/different directions
|
||||
INSERT INTO vectors_cosine VALUES
|
||||
(1, '2024-01-01 00:00:00', '[1.0, 0.0, 0.0, 0.0]'),
|
||||
(2, '2024-01-01 00:00:01', '[2.0, 0.0, 0.0, 0.0]'),
|
||||
(3, '2024-01-01 00:00:02', '[0.0, 1.0, 0.0, 0.0]'),
|
||||
(4, '2024-01-01 00:00:03', '[1.0, 1.0, 0.0, 0.0]'),
|
||||
(5, '2024-01-01 00:00:04', '[-1.0, 0.0, 0.0, 0.0]');
|
||||
|
||||
-- Memtable search with cosine distance
|
||||
-- vec_id=1 and vec_id=2 should have same cosine distance (0) since they point same direction
|
||||
SELECT vec_id, vec_cos_distance(embedding, '[1.0, 0.0, 0.0, 0.0]') as distance
|
||||
FROM vectors_cosine
|
||||
ORDER BY vec_cos_distance(embedding, '[1.0, 0.0, 0.0, 0.0]'), vec_id
|
||||
LIMIT 3;
|
||||
|
||||
ADMIN FLUSH_TABLE('vectors_cosine');
|
||||
|
||||
-- SST index search with cosine distance
|
||||
SELECT vec_id, vec_cos_distance(embedding, '[1.0, 0.0, 0.0, 0.0]') as distance
|
||||
FROM vectors_cosine
|
||||
ORDER BY vec_cos_distance(embedding, '[1.0, 0.0, 0.0, 0.0]'), vec_id
|
||||
LIMIT 3;
|
||||
|
||||
DROP TABLE vectors_cosine;
|
||||
|
||||
-- ============================================
|
||||
-- Part 3: Dot product (inner product) tests
|
||||
-- ============================================
|
||||
|
||||
CREATE TABLE vectors_dot (
|
||||
vec_id INT,
|
||||
ts TIMESTAMP TIME INDEX,
|
||||
embedding VECTOR(4) NOT NULL VECTOR INDEX WITH (metric = 'dot'),
|
||||
PRIMARY KEY (vec_id)
|
||||
);
|
||||
|
||||
INSERT INTO vectors_dot VALUES
|
||||
(1, '2024-01-01 00:00:00', '[1.0, 0.0, 0.0, 0.0]'),
|
||||
(2, '2024-01-01 00:00:01', '[2.0, 0.0, 0.0, 0.0]'),
|
||||
(3, '2024-01-01 00:00:02', '[0.0, 1.0, 0.0, 0.0]'),
|
||||
(4, '2024-01-01 00:00:03', '[1.0, 1.0, 0.0, 0.0]'),
|
||||
(5, '2024-01-01 00:00:04', '[-1.0, 0.0, 0.0, 0.0]');
|
||||
|
||||
-- Memtable search with dot product
|
||||
-- Larger dot product means more similar, so we use negative for ordering
|
||||
-- vec_id=2 should be best (dot=2), vec_id=1 and vec_id=4 have dot=1
|
||||
SELECT vec_id, vec_dot_product(embedding, '[1.0, 0.0, 0.0, 0.0]') as dot_product
|
||||
FROM vectors_dot
|
||||
ORDER BY vec_dot_product(embedding, '[1.0, 0.0, 0.0, 0.0]') DESC, vec_id
|
||||
LIMIT 3;
|
||||
|
||||
ADMIN FLUSH_TABLE('vectors_dot');
|
||||
|
||||
-- SST index search with dot product
|
||||
SELECT vec_id, vec_dot_product(embedding, '[1.0, 0.0, 0.0, 0.0]') as dot_product
|
||||
FROM vectors_dot
|
||||
ORDER BY vec_dot_product(embedding, '[1.0, 0.0, 0.0, 0.0]') DESC, vec_id
|
||||
LIMIT 3;
|
||||
|
||||
DROP TABLE vectors_dot;
|
||||
|
||||
-- ============================================
|
||||
-- Part 4: NULL vector handling tests
|
||||
-- ============================================
|
||||
|
||||
CREATE TABLE vectors_null (
|
||||
vec_id INT,
|
||||
ts TIMESTAMP TIME INDEX,
|
||||
embedding VECTOR(4) VECTOR INDEX WITH (metric = 'l2sq'),
|
||||
PRIMARY KEY (vec_id)
|
||||
);
|
||||
|
||||
-- Insert vectors with some NULLs
|
||||
INSERT INTO vectors_null VALUES
|
||||
(1, '2024-01-01 00:00:00', '[1.0, 0.0, 0.0, 0.0]'),
|
||||
(2, '2024-01-01 00:00:01', NULL),
|
||||
(3, '2024-01-01 00:00:02', '[0.0, 1.0, 0.0, 0.0]'),
|
||||
(4, '2024-01-01 00:00:03', NULL),
|
||||
(5, '2024-01-01 00:00:04', '[0.0, 0.0, 1.0, 0.0]');
|
||||
|
||||
-- Memtable search should skip NULL vectors
|
||||
SELECT vec_id, vec_l2sq_distance(embedding, '[1.0, 0.0, 0.0, 0.0]') as distance
|
||||
FROM vectors_null
|
||||
WHERE embedding IS NOT NULL
|
||||
ORDER BY vec_l2sq_distance(embedding, '[1.0, 0.0, 0.0, 0.0]'), vec_id
|
||||
LIMIT 3;
|
||||
|
||||
ADMIN FLUSH_TABLE('vectors_null');
|
||||
|
||||
-- SST index search should also skip NULL vectors
|
||||
SELECT vec_id, vec_l2sq_distance(embedding, '[1.0, 0.0, 0.0, 0.0]') as distance
|
||||
FROM vectors_null
|
||||
WHERE embedding IS NOT NULL
|
||||
ORDER BY vec_l2sq_distance(embedding, '[1.0, 0.0, 0.0, 0.0]'), vec_id
|
||||
LIMIT 3;
|
||||
|
||||
DROP TABLE vectors_null;
|
||||
|
||||
-- ============================================
|
||||
-- Part 5: Mixed memtable + SST search tests
|
||||
-- ============================================
|
||||
|
||||
CREATE TABLE vectors_mixed (
|
||||
vec_id INT,
|
||||
ts TIMESTAMP TIME INDEX,
|
||||
embedding VECTOR(4) NOT NULL VECTOR INDEX WITH (metric = 'l2sq'),
|
||||
PRIMARY KEY (vec_id)
|
||||
);
|
||||
|
||||
-- Insert first batch and flush to SST
|
||||
INSERT INTO vectors_mixed VALUES
|
||||
(1, '2024-01-01 00:00:00', '[1.0, 0.0, 0.0, 0.0]'),
|
||||
(2, '2024-01-01 00:00:01', '[0.0, 1.0, 0.0, 0.0]'),
|
||||
(3, '2024-01-01 00:00:02', '[0.0, 0.0, 1.0, 0.0]');
|
||||
|
||||
ADMIN FLUSH_TABLE('vectors_mixed');
|
||||
|
||||
-- Insert second batch (stays in memtable)
|
||||
INSERT INTO vectors_mixed VALUES
|
||||
(4, '2024-01-01 00:00:03', '[0.5, 0.5, 0.0, 0.0]'),
|
||||
(5, '2024-01-01 00:00:04', '[0.9, 0.1, 0.0, 0.0]');
|
||||
|
||||
-- Query should search both SST (vec_id 1,2,3) and memtable (vec_id 4,5)
|
||||
-- Target: [1.0, 0.0, 0.0, 0.0]
|
||||
-- Expected: vec_id=1 (dist=0), vec_id=5 (dist=0.02), vec_id=4 (dist=0.5)
|
||||
SELECT vec_id, vec_l2sq_distance(embedding, '[1.0, 0.0, 0.0, 0.0]') as distance
|
||||
FROM vectors_mixed
|
||||
ORDER BY vec_l2sq_distance(embedding, '[1.0, 0.0, 0.0, 0.0]'), vec_id
|
||||
LIMIT 3;
|
||||
|
||||
DROP TABLE vectors_mixed;
|
||||
|
||||
-- ============================================
|
||||
-- Part 6: KNN with WHERE clause tests
|
||||
-- ============================================
|
||||
|
||||
CREATE TABLE vectors_filter (
|
||||
vec_id INT,
|
||||
category STRING,
|
||||
ts TIMESTAMP TIME INDEX,
|
||||
embedding VECTOR(4) NOT NULL VECTOR INDEX WITH (metric = 'l2sq'),
|
||||
PRIMARY KEY (vec_id)
|
||||
);
|
||||
|
||||
INSERT INTO vectors_filter VALUES
|
||||
(1, 'A', '2024-01-01 00:00:00', '[1.0, 0.0, 0.0, 0.0]'),
|
||||
(2, 'B', '2024-01-01 00:00:01', '[0.9, 0.1, 0.0, 0.0]'),
|
||||
(3, 'A', '2024-01-01 00:00:02', '[0.0, 1.0, 0.0, 0.0]'),
|
||||
(4, 'B', '2024-01-01 00:00:03', '[0.1, 0.9, 0.0, 0.0]'),
|
||||
(5, 'A', '2024-01-01 00:00:04', '[0.5, 0.5, 0.0, 0.0]');
|
||||
|
||||
-- Memtable search with filter
|
||||
SELECT vec_id, category, vec_l2sq_distance(embedding, '[1.0, 0.0, 0.0, 0.0]') as distance
|
||||
FROM vectors_filter
|
||||
WHERE category = 'A'
|
||||
ORDER BY vec_l2sq_distance(embedding, '[1.0, 0.0, 0.0, 0.0]'), vec_id
|
||||
LIMIT 2;
|
||||
|
||||
ADMIN FLUSH_TABLE('vectors_filter');
|
||||
|
||||
-- SST index search with filter
|
||||
SELECT vec_id, category, vec_l2sq_distance(embedding, '[1.0, 0.0, 0.0, 0.0]') as distance
|
||||
FROM vectors_filter
|
||||
WHERE category = 'A'
|
||||
ORDER BY vec_l2sq_distance(embedding, '[1.0, 0.0, 0.0, 0.0]'), vec_id
|
||||
LIMIT 2;
|
||||
|
||||
-- Filter with time range
|
||||
SELECT vec_id, vec_l2sq_distance(embedding, '[1.0, 0.0, 0.0, 0.0]') as distance
|
||||
FROM vectors_filter
|
||||
WHERE ts >= '2024-01-01 00:00:02' AND ts <= '2024-01-01 00:00:04'
|
||||
ORDER BY vec_l2sq_distance(embedding, '[1.0, 0.0, 0.0, 0.0]'), vec_id
|
||||
LIMIT 2;
|
||||
|
||||
DROP TABLE vectors_filter;
|
||||
|
||||
-- ============================================
|
||||
-- Part 7: Higher dimension vectors
|
||||
-- ============================================
|
||||
|
||||
CREATE TABLE vectors_high_dim (
|
||||
vec_id INT,
|
||||
ts TIMESTAMP TIME INDEX,
|
||||
embedding VECTOR(128) NOT NULL VECTOR INDEX WITH (metric = 'l2sq'),
|
||||
PRIMARY KEY (vec_id)
|
||||
);
|
||||
|
||||
-- Insert high-dimensional vectors (simplified: first few elements differ)
|
||||
INSERT INTO vectors_high_dim VALUES
|
||||
(1, '2024-01-01 00:00:00', '[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]'),
|
||||
(2, '2024-01-01 00:00:01', '[0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]'),
|
||||
(3, '2024-01-01 00:00:02', '[0.5, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]');
|
||||
|
||||
-- Memtable search
|
||||
SELECT vec_id FROM vectors_high_dim
|
||||
ORDER BY vec_l2sq_distance(embedding, '[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]'), vec_id
|
||||
LIMIT 2;
|
||||
|
||||
ADMIN FLUSH_TABLE('vectors_high_dim');
|
||||
|
||||
-- SST index search
|
||||
SELECT vec_id FROM vectors_high_dim
|
||||
ORDER BY vec_l2sq_distance(embedding, '[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]'), vec_id
|
||||
LIMIT 2;
|
||||
|
||||
DROP TABLE vectors_high_dim;
|
||||
|
||||
-- ============================================
|
||||
-- Part 8: Different k values (LIMIT)
|
||||
-- ============================================
|
||||
|
||||
CREATE TABLE vectors_k (
|
||||
vec_id INT,
|
||||
ts TIMESTAMP TIME INDEX,
|
||||
embedding VECTOR(4) NOT NULL VECTOR INDEX WITH (metric = 'l2sq'),
|
||||
PRIMARY KEY (vec_id)
|
||||
);
|
||||
|
||||
INSERT INTO vectors_k VALUES
|
||||
(1, '2024-01-01 00:00:00', '[1.0, 0.0, 0.0, 0.0]'),
|
||||
(2, '2024-01-01 00:00:01', '[0.9, 0.1, 0.0, 0.0]'),
|
||||
(3, '2024-01-01 00:00:02', '[0.8, 0.2, 0.0, 0.0]'),
|
||||
(4, '2024-01-01 00:00:03', '[0.7, 0.3, 0.0, 0.0]'),
|
||||
(5, '2024-01-01 00:00:04', '[0.6, 0.4, 0.0, 0.0]'),
|
||||
(6, '2024-01-01 00:00:05', '[0.5, 0.5, 0.0, 0.0]'),
|
||||
(7, '2024-01-01 00:00:06', '[0.4, 0.6, 0.0, 0.0]'),
|
||||
(8, '2024-01-01 00:00:07', '[0.3, 0.7, 0.0, 0.0]'),
|
||||
(9, '2024-01-01 00:00:08', '[0.2, 0.8, 0.0, 0.0]'),
|
||||
(10, '2024-01-01 00:00:09', '[0.1, 0.9, 0.0, 0.0]');
|
||||
|
||||
ADMIN FLUSH_TABLE('vectors_k');
|
||||
|
||||
-- k=1
|
||||
SELECT vec_id FROM vectors_k
|
||||
ORDER BY vec_l2sq_distance(embedding, '[1.0, 0.0, 0.0, 0.0]'), vec_id
|
||||
LIMIT 1;
|
||||
|
||||
-- k=5
|
||||
SELECT vec_id FROM vectors_k
|
||||
ORDER BY vec_l2sq_distance(embedding, '[1.0, 0.0, 0.0, 0.0]'), vec_id
|
||||
LIMIT 5;
|
||||
|
||||
-- k=10 (all vectors)
|
||||
SELECT vec_id FROM vectors_k
|
||||
ORDER BY vec_l2sq_distance(embedding, '[1.0, 0.0, 0.0, 0.0]'), vec_id
|
||||
LIMIT 10;
|
||||
|
||||
DROP TABLE vectors_k;
|
||||
|
||||
-- ============================================
|
||||
-- Part 9: Engine parameter tests
|
||||
-- ============================================
|
||||
|
||||
-- Create table with explicit engine parameter (usearch is default)
|
||||
CREATE TABLE vectors_engine (
|
||||
vec_id INT,
|
||||
ts TIMESTAMP TIME INDEX,
|
||||
embedding VECTOR(4) NOT NULL VECTOR INDEX WITH (engine = 'usearch', metric = 'l2sq'),
|
||||
PRIMARY KEY (vec_id)
|
||||
);
|
||||
|
||||
-- Insert test vectors
|
||||
INSERT INTO vectors_engine VALUES
|
||||
(1, '2024-01-01 00:00:00', '[1.0, 0.0, 0.0, 0.0]'),
|
||||
(2, '2024-01-01 00:00:01', '[0.0, 1.0, 0.0, 0.0]'),
|
||||
(3, '2024-01-01 00:00:02', '[0.5, 0.5, 0.0, 0.0]');
|
||||
|
||||
-- Memtable search
|
||||
SELECT vec_id FROM vectors_engine
|
||||
ORDER BY vec_l2sq_distance(embedding, '[1.0, 0.0, 0.0, 0.0]'), vec_id
|
||||
LIMIT 2;
|
||||
|
||||
ADMIN FLUSH_TABLE('vectors_engine');
|
||||
|
||||
-- SST index search with usearch engine
|
||||
SELECT vec_id FROM vectors_engine
|
||||
ORDER BY vec_l2sq_distance(embedding, '[1.0, 0.0, 0.0, 0.0]'), vec_id
|
||||
LIMIT 2;
|
||||
|
||||
DROP TABLE vectors_engine;
|
||||
@@ -0,0 +1,270 @@
|
||||
-- Vector index explain analyze coverage
|
||||
-- ============================================
|
||||
-- Part 1: Single table KNN explain
|
||||
-- ============================================
|
||||
CREATE TABLE vectors_explain (
|
||||
vec_id INT,
|
||||
ts TIMESTAMP TIME INDEX,
|
||||
embedding VECTOR(2) NOT NULL VECTOR INDEX WITH (metric = 'l2sq'),
|
||||
PRIMARY KEY (vec_id)
|
||||
);
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
INSERT INTO vectors_explain VALUES
|
||||
(1, '2024-01-01 00:00:00', '[1.0, 0.0]'),
|
||||
(2, '2024-01-01 00:00:01', '[0.9, 0.0]'),
|
||||
(3, '2024-01-01 00:00:02', '[0.0, 1.0]'),
|
||||
(4, '2024-01-01 00:00:03', '[0.0, 0.9]');
|
||||
|
||||
Affected Rows: 4
|
||||
|
||||
ADMIN FLUSH_TABLE('vectors_explain');
|
||||
|
||||
+--------------------------------------+
|
||||
| ADMIN FLUSH_TABLE('vectors_explain') |
|
||||
+--------------------------------------+
|
||||
| 0 |
|
||||
+--------------------------------------+
|
||||
|
||||
-- SQLNESS REPLACE ("metrics_per_partition":\s*.*metrics=) "metrics_per_partition": REDACTED metrics=
|
||||
-- SQLNESS REPLACE (metrics=\{.*\}) metrics=REDACTED
|
||||
-- SQLNESS REPLACE (metrics=\[[^\]]*\]) metrics=REDACTED
|
||||
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
|
||||
-- SQLNESS REPLACE Hash\(\[vec_id@0\],.* Hash([vec_id@0],REDACTED
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
-- SQLNESS REPLACE (\s\s+) _
|
||||
-- SQLNESS REPLACE "(file_id|time_range_start|time_range_end)":"[^"]+" "$1":"REDACTED"
|
||||
-- SQLNESS REPLACE ("[a-z_]+":"[0-9\.]+(ns|us|µs|ms|s)") "DURATION": REDACTED
|
||||
-- SQLNESS REPLACE "(size|flat_format)":\s*(\d+|true|false) "$1":REDACTED
|
||||
-- SQLNESS REPLACE ,\s*filter=.*?metrics= metrics=
|
||||
-- SQLNESS REPLACE Total\s+rows:\s+\d+ Total rows: REDACTED
|
||||
-- SQLNESS REPLACE (peers.*) REDACTED
|
||||
-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED
|
||||
EXPLAIN ANALYZE VERBOSE
|
||||
SELECT vec_id
|
||||
FROM vectors_explain
|
||||
ORDER BY vec_l2sq_distance(embedding, '[1.0, 0.0]'), vec_id
|
||||
LIMIT 2;
|
||||
|
||||
+-+-+-+
|
||||
| stage | node | plan_|
|
||||
+-+-+-+
|
||||
| 0_| 0_|_CooperativeExec metrics=REDACTED_|
|
||||
|_|_|_MergeScanExec: REDACTED
|
||||
|_|_|_|
|
||||
| 1_| 0_|_ProjectionExec: expr=[vec_id@0 as vec_id] metrics=REDACTED_|
|
||||
|_|_|_SortPreservingMergeExec: [vec_l2sq_distance(embedding@1, [1.0, 0.0]) ASC NULLS LAST, vec_id@0 ASC NULLS LAST], fetch=2 metrics=REDACTED_|
|
||||
|_|_|_SortExec: TopK(fetch=2), expr=[vec_l2sq_distance(embedding@1, [1.0, 0.0]) ASC NULLS LAST, vec_id@0 ASC NULLS LAST], preserve_partitioning=[true] metrics=REDACTED_|
|
||||
|_|_|_CooperativeExec metrics=REDACTED_|
|
||||
|_|_|_SeqScan: region=REDACTED, {"partition_count":{"count":1, "mem_ranges":0, "files":1, "file_ranges":1}, "projection": ["vec_id", "embedding"], "vector_index_k": 2, "files": [{"file_id":"REDACTED","time_range_start":"REDACTED","time_range_end":"REDACTED","rows":4,"size":REDACTED,"index_size":902}], "flat_format":REDACTED, "metrics_per_partition": REDACTED metrics=REDACTED |
|
||||
|_|_|_|
|
||||
|_|_| Total rows: REDACTED_|
|
||||
+-+-+-+
|
||||
|
||||
DROP TABLE vectors_explain;
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
-- ============================================
|
||||
-- Part 2: Join with vector order/limit
|
||||
-- ============================================
|
||||
CREATE TABLE vectors_explain_left (
|
||||
vec_id INT,
|
||||
ts TIMESTAMP TIME INDEX,
|
||||
embedding VECTOR(2) NOT NULL VECTOR INDEX WITH (metric = 'l2sq'),
|
||||
PRIMARY KEY (vec_id)
|
||||
);
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
CREATE TABLE vectors_explain_right (
|
||||
vec_id INT,
|
||||
note STRING,
|
||||
ts TIMESTAMP TIME INDEX,
|
||||
PRIMARY KEY (vec_id)
|
||||
);
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
INSERT INTO vectors_explain_left VALUES
|
||||
(1, '2024-01-01 00:00:00', '[1.0, 0.0]'),
|
||||
(2, '2024-01-01 00:00:01', '[0.9, 0.0]'),
|
||||
(3, '2024-01-01 00:00:02', '[0.0, 1.0]'),
|
||||
(4, '2024-01-01 00:00:03', '[0.0, 0.9]');
|
||||
|
||||
Affected Rows: 4
|
||||
|
||||
INSERT INTO vectors_explain_right VALUES
|
||||
(3, 'keep', '2024-01-01 00:00:02'),
|
||||
(4, 'keep', '2024-01-01 00:00:03');
|
||||
|
||||
Affected Rows: 2
|
||||
|
||||
ADMIN FLUSH_TABLE('vectors_explain_left');
|
||||
|
||||
+-------------------------------------------+
|
||||
| ADMIN FLUSH_TABLE('vectors_explain_left') |
|
||||
+-------------------------------------------+
|
||||
| 0 |
|
||||
+-------------------------------------------+
|
||||
|
||||
-- SQLNESS REPLACE ("metrics_per_partition":\s*.*metrics=) "metrics_per_partition": REDACTED metrics=
|
||||
-- SQLNESS REPLACE (metrics=\{.*\}) metrics=REDACTED
|
||||
-- SQLNESS REPLACE (metrics=\[[^\]]*\]) metrics=REDACTED
|
||||
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
|
||||
-- SQLNESS REPLACE Hash\(\[vec_id@0\],.* Hash([vec_id@0],REDACTED
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
-- SQLNESS REPLACE (\s\s+) _
|
||||
-- SQLNESS REPLACE "(file_id|time_range_start|time_range_end)":"[^"]+" "$1":"REDACTED"
|
||||
-- SQLNESS REPLACE ("[a-z_]+":"[0-9\.]+(ns|us|µs|ms|s)") "DURATION": REDACTED
|
||||
-- SQLNESS REPLACE "(size|flat_format)":\s*(\d+|true|false) "$1":REDACTED
|
||||
-- SQLNESS REPLACE ,\s*filter=.*?metrics= metrics=
|
||||
-- SQLNESS REPLACE Total\s+rows:\s+\d+ Total rows: REDACTED
|
||||
-- SQLNESS REPLACE (peers.*) REDACTED
|
||||
-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED
|
||||
EXPLAIN ANALYZE VERBOSE
|
||||
SELECT l.vec_id
|
||||
FROM vectors_explain_left l
|
||||
JOIN vectors_explain_right r ON l.vec_id = r.vec_id
|
||||
ORDER BY vec_l2sq_distance(l.embedding, '[1.0, 0.0]'), l.vec_id
|
||||
LIMIT 1;
|
||||
|
||||
+-+-+-+
|
||||
| stage | node | plan_|
|
||||
+-+-+-+
|
||||
| 0_| 0_|_ProjectionExec: expr=[vec_id@0 as vec_id] metrics=REDACTED_|
|
||||
|_|_|_SortPreservingMergeExec: [vec_l2sq_distance(embedding@1, [1.0, 0.0]) ASC NULLS LAST, vec_id@0 ASC NULLS LAST], fetch=1 metrics=REDACTED_|
|
||||
|_|_|_SortExec: TopK(fetch=1), expr=[vec_l2sq_distance(embedding@1, [1.0, 0.0]) ASC NULLS LAST, vec_id@0 ASC NULLS LAST], preserve_partitioning=[true] metrics=REDACTED_|
|
||||
|_|_|_CoalesceBatchesExec: target_batch_size=8192 metrics=REDACTED_|
|
||||
|_|_|_HashJoinExec: mode=Partitioned, join_type=Inner, on=[(vec_id@0, vec_id@0)], projection=[vec_id@0, embedding@1] metrics=REDACTED_|
|
||||
|_|_|_CoalesceBatchesExec: target_batch_size=8192 metrics=REDACTED_|
|
||||
|_|_|_RepartitionExec: partitioning=Hash([vec_id@0],REDACTED
|
||||
|_|_|_ProjectionExec: expr=[vec_id@0 as vec_id, embedding@2 as embedding] metrics=REDACTED_|
|
||||
|_|_|_CooperativeExec metrics=REDACTED_|
|
||||
|_|_|_MergeScanExec: REDACTED
|
||||
|_|_|_CoalesceBatchesExec: target_batch_size=8192 metrics=REDACTED_|
|
||||
|_|_|_RepartitionExec: partitioning=Hash([vec_id@0],REDACTED
|
||||
|_|_|_ProjectionExec: expr=[vec_id@0 as vec_id] metrics=REDACTED_|
|
||||
|_|_|_CooperativeExec metrics=REDACTED_|
|
||||
|_|_|_MergeScanExec: REDACTED
|
||||
|_|_|_|
|
||||
| 1_| 0_|_CooperativeExec metrics=REDACTED_|
|
||||
|_|_|_SeqScan: region=REDACTED, {"partition_count":{"count":1, "mem_ranges":0, "files":1, "file_ranges":1}, "projection": ["vec_id", "ts", "embedding"], "files": [{"file_id":"REDACTED","time_range_start":"REDACTED","time_range_end":"REDACTED","rows":4,"size":REDACTED,"index_size":902}], "flat_format":REDACTED, "metrics_per_partition": REDACTED metrics=REDACTED |
|
||||
|_|_|_|
|
||||
| 1_| 0_|_CooperativeExec metrics=REDACTED_|
|
||||
|_|_|_SeqScan: region=REDACTED, {"partition_count":{"count":1, "mem_ranges":1, "files":0, "file_ranges":0}, "projection": ["vec_id", "note", "ts"], "flat_format":REDACTED, "metrics_per_partition": REDACTED metrics=REDACTED_|
|
||||
|_|_|_|
|
||||
|_|_| Total rows: REDACTED_|
|
||||
+-+-+-+
|
||||
|
||||
DROP TABLE vectors_explain_left;
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
DROP TABLE vectors_explain_right;
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
-- ============================================
|
||||
-- Part 3: Cosine and dot explain coverage
|
||||
-- ============================================
|
||||
CREATE TABLE vectors_explain_metric (
|
||||
vec_id INT,
|
||||
ts TIMESTAMP TIME INDEX,
|
||||
embedding VECTOR(2) NOT NULL VECTOR INDEX WITH (metric = 'cosine'),
|
||||
PRIMARY KEY (vec_id)
|
||||
);
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
INSERT INTO vectors_explain_metric VALUES
|
||||
(1, '2024-01-01 00:00:00', '[1.0, 0.0]'),
|
||||
(2, '2024-01-01 00:00:01', '[0.0, 1.0]'),
|
||||
(3, '2024-01-01 00:00:02', '[-1.0, 0.0]'),
|
||||
(4, '2024-01-01 00:00:03', '[0.0, -1.0]');
|
||||
|
||||
Affected Rows: 4
|
||||
|
||||
ADMIN FLUSH_TABLE('vectors_explain_metric');
|
||||
|
||||
+---------------------------------------------+
|
||||
| ADMIN FLUSH_TABLE('vectors_explain_metric') |
|
||||
+---------------------------------------------+
|
||||
| 0 |
|
||||
+---------------------------------------------+
|
||||
|
||||
-- SQLNESS REPLACE ("metrics_per_partition":\s*.*metrics=) "metrics_per_partition": REDACTED metrics=
|
||||
-- SQLNESS REPLACE (metrics=\{.*\}) metrics=REDACTED
|
||||
-- SQLNESS REPLACE (metrics=\[[^\]]*\]) metrics=REDACTED
|
||||
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
|
||||
-- SQLNESS REPLACE Hash\(\[vec_id@0\],.* Hash([vec_id@0],REDACTED
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
-- SQLNESS REPLACE (\s\s+) _
|
||||
-- SQLNESS REPLACE "(file_id|time_range_start|time_range_end)":"[^"]+" "$1":"REDACTED"
|
||||
-- SQLNESS REPLACE ("[a-z_]+":"[0-9\.]+(ns|us|µs|ms|s)") "DURATION": REDACTED
|
||||
-- SQLNESS REPLACE "(size|flat_format)":\s*(\d+|true|false) "$1":REDACTED
|
||||
-- SQLNESS REPLACE ,\s*filter=.*?metrics= metrics=
|
||||
-- SQLNESS REPLACE Total\s+rows:\s+\d+ Total rows: REDACTED
|
||||
-- SQLNESS REPLACE (peers.*) REDACTED
|
||||
-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED
|
||||
EXPLAIN ANALYZE VERBOSE
|
||||
SELECT vec_id
|
||||
FROM vectors_explain_metric
|
||||
ORDER BY vec_cos_distance(embedding, '[1.0, 0.0]'), vec_id
|
||||
LIMIT 2;
|
||||
|
||||
+-+-+-+
|
||||
| stage | node | plan_|
|
||||
+-+-+-+
|
||||
| 0_| 0_|_CooperativeExec metrics=REDACTED_|
|
||||
|_|_|_MergeScanExec: REDACTED
|
||||
|_|_|_|
|
||||
| 1_| 0_|_ProjectionExec: expr=[vec_id@0 as vec_id] metrics=REDACTED_|
|
||||
|_|_|_SortPreservingMergeExec: [vec_cos_distance(embedding@1, [1.0, 0.0]) ASC NULLS LAST, vec_id@0 ASC NULLS LAST], fetch=2 metrics=REDACTED_|
|
||||
|_|_|_SortExec: TopK(fetch=2), expr=[vec_cos_distance(embedding@1, [1.0, 0.0]) ASC NULLS LAST, vec_id@0 ASC NULLS LAST], preserve_partitioning=[true] metrics=REDACTED_|
|
||||
|_|_|_CooperativeExec metrics=REDACTED_|
|
||||
|_|_|_SeqScan: region=REDACTED, {"partition_count":{"count":1, "mem_ranges":0, "files":1, "file_ranges":1}, "projection": ["vec_id", "embedding"], "vector_index_k": 2, "files": [{"file_id":"REDACTED","time_range_start":"REDACTED","time_range_end":"REDACTED","rows":4,"size":REDACTED,"index_size":902}], "flat_format":REDACTED, "metrics_per_partition": REDACTED metrics=REDACTED |
|
||||
|_|_|_|
|
||||
|_|_| Total rows: REDACTED_|
|
||||
+-+-+-+
|
||||
|
||||
-- SQLNESS REPLACE ("metrics_per_partition":\s*.*metrics=) "metrics_per_partition": REDACTED metrics=
|
||||
-- SQLNESS REPLACE (metrics=\{.*\}) metrics=REDACTED
|
||||
-- SQLNESS REPLACE (metrics=\[[^\]]*\]) metrics=REDACTED
|
||||
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
|
||||
-- SQLNESS REPLACE Hash\(\[vec_id@0\],.* Hash([vec_id@0],REDACTED
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
-- SQLNESS REPLACE (\s\s+) _
|
||||
-- SQLNESS REPLACE "(file_id|time_range_start|time_range_end)":"[^"]+" "$1":"REDACTED"
|
||||
-- SQLNESS REPLACE ("[a-z_]+":"[0-9\.]+(ns|us|µs|ms|s)") "DURATION": REDACTED
|
||||
-- SQLNESS REPLACE "(size|flat_format)":\s*(\d+|true|false) "$1":REDACTED
|
||||
-- SQLNESS REPLACE ,\s*filter=.*?metrics= metrics=
|
||||
-- SQLNESS REPLACE Total\s+rows:\s+\d+ Total rows: REDACTED
|
||||
-- SQLNESS REPLACE (peers.*) REDACTED
|
||||
-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED
|
||||
EXPLAIN ANALYZE VERBOSE
|
||||
SELECT vec_id
|
||||
FROM vectors_explain_metric
|
||||
ORDER BY vec_dot_product(embedding, '[1.0, 0.0]') DESC, vec_id
|
||||
LIMIT 2;
|
||||
|
||||
+-+-+-+
|
||||
| stage | node | plan_|
|
||||
+-+-+-+
|
||||
| 0_| 0_|_CooperativeExec metrics=REDACTED_|
|
||||
|_|_|_MergeScanExec: REDACTED
|
||||
|_|_|_|
|
||||
| 1_| 0_|_ProjectionExec: expr=[vec_id@0 as vec_id] metrics=REDACTED_|
|
||||
|_|_|_SortPreservingMergeExec: [vec_dot_product(embedding@1, [1.0, 0.0]) DESC, vec_id@0 ASC NULLS LAST], fetch=2 metrics=REDACTED_|
|
||||
|_|_|_SortExec: TopK(fetch=2), expr=[vec_dot_product(embedding@1, [1.0, 0.0]) DESC, vec_id@0 ASC NULLS LAST], preserve_partitioning=[true] metrics=REDACTED_|
|
||||
|_|_|_CooperativeExec metrics=REDACTED_|
|
||||
|_|_|_SeqScan: region=REDACTED, {"partition_count":{"count":1, "mem_ranges":0, "files":1, "file_ranges":1}, "projection": ["vec_id", "embedding"], "vector_index_k": 2, "files": [{"file_id":"REDACTED","time_range_start":"REDACTED","time_range_end":"REDACTED","rows":4,"size":REDACTED,"index_size":902}], "flat_format":REDACTED, "metrics_per_partition": REDACTED metrics=REDACTED |
|
||||
|_|_|_|
|
||||
|_|_| Total rows: REDACTED_|
|
||||
+-+-+-+
|
||||
|
||||
DROP TABLE vectors_explain_metric;
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
@@ -0,0 +1,155 @@
|
||||
-- Vector index explain analyze coverage
|
||||
|
||||
|
||||
-- ============================================
|
||||
-- Part 1: Single table KNN explain
|
||||
-- ============================================
|
||||
CREATE TABLE vectors_explain (
|
||||
vec_id INT,
|
||||
ts TIMESTAMP TIME INDEX,
|
||||
embedding VECTOR(2) NOT NULL VECTOR INDEX WITH (metric = 'l2sq'),
|
||||
PRIMARY KEY (vec_id)
|
||||
);
|
||||
|
||||
INSERT INTO vectors_explain VALUES
|
||||
(1, '2024-01-01 00:00:00', '[1.0, 0.0]'),
|
||||
(2, '2024-01-01 00:00:01', '[0.9, 0.0]'),
|
||||
(3, '2024-01-01 00:00:02', '[0.0, 1.0]'),
|
||||
(4, '2024-01-01 00:00:03', '[0.0, 0.9]');
|
||||
|
||||
ADMIN FLUSH_TABLE('vectors_explain');
|
||||
|
||||
-- SQLNESS REPLACE ("metrics_per_partition":\s*.*metrics=) "metrics_per_partition": REDACTED metrics=
|
||||
-- SQLNESS REPLACE (metrics=\{.*\}) metrics=REDACTED
|
||||
-- SQLNESS REPLACE (metrics=\[[^\]]*\]) metrics=REDACTED
|
||||
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
|
||||
-- SQLNESS REPLACE Hash\(\[vec_id@0\],.* Hash([vec_id@0],REDACTED
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
-- SQLNESS REPLACE (\s\s+) _
|
||||
-- SQLNESS REPLACE "(file_id|time_range_start|time_range_end)":"[^"]+" "$1":"REDACTED"
|
||||
-- SQLNESS REPLACE ("[a-z_]+":"[0-9\.]+(ns|us|µs|ms|s)") "DURATION": REDACTED
|
||||
-- SQLNESS REPLACE "(size|flat_format)":\s*(\d+|true|false) "$1":REDACTED
|
||||
-- SQLNESS REPLACE ,\s*filter=.*?metrics= metrics=
|
||||
-- SQLNESS REPLACE Total\s+rows:\s+\d+ Total rows: REDACTED
|
||||
-- SQLNESS REPLACE (peers.*) REDACTED
|
||||
-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED
|
||||
EXPLAIN ANALYZE VERBOSE
|
||||
SELECT vec_id
|
||||
FROM vectors_explain
|
||||
ORDER BY vec_l2sq_distance(embedding, '[1.0, 0.0]'), vec_id
|
||||
LIMIT 2;
|
||||
|
||||
DROP TABLE vectors_explain;
|
||||
|
||||
-- ============================================
|
||||
-- Part 2: Join with vector order/limit
|
||||
-- ============================================
|
||||
CREATE TABLE vectors_explain_left (
|
||||
vec_id INT,
|
||||
ts TIMESTAMP TIME INDEX,
|
||||
embedding VECTOR(2) NOT NULL VECTOR INDEX WITH (metric = 'l2sq'),
|
||||
PRIMARY KEY (vec_id)
|
||||
);
|
||||
|
||||
CREATE TABLE vectors_explain_right (
|
||||
vec_id INT,
|
||||
note STRING,
|
||||
ts TIMESTAMP TIME INDEX,
|
||||
PRIMARY KEY (vec_id)
|
||||
);
|
||||
|
||||
INSERT INTO vectors_explain_left VALUES
|
||||
(1, '2024-01-01 00:00:00', '[1.0, 0.0]'),
|
||||
(2, '2024-01-01 00:00:01', '[0.9, 0.0]'),
|
||||
(3, '2024-01-01 00:00:02', '[0.0, 1.0]'),
|
||||
(4, '2024-01-01 00:00:03', '[0.0, 0.9]');
|
||||
|
||||
INSERT INTO vectors_explain_right VALUES
|
||||
(3, 'keep', '2024-01-01 00:00:02'),
|
||||
(4, 'keep', '2024-01-01 00:00:03');
|
||||
|
||||
ADMIN FLUSH_TABLE('vectors_explain_left');
|
||||
|
||||
-- SQLNESS REPLACE ("metrics_per_partition":\s*.*metrics=) "metrics_per_partition": REDACTED metrics=
|
||||
-- SQLNESS REPLACE (metrics=\{.*\}) metrics=REDACTED
|
||||
-- SQLNESS REPLACE (metrics=\[[^\]]*\]) metrics=REDACTED
|
||||
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
|
||||
-- SQLNESS REPLACE Hash\(\[vec_id@0\],.* Hash([vec_id@0],REDACTED
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
-- SQLNESS REPLACE (\s\s+) _
|
||||
-- SQLNESS REPLACE "(file_id|time_range_start|time_range_end)":"[^"]+" "$1":"REDACTED"
|
||||
-- SQLNESS REPLACE ("[a-z_]+":"[0-9\.]+(ns|us|µs|ms|s)") "DURATION": REDACTED
|
||||
-- SQLNESS REPLACE "(size|flat_format)":\s*(\d+|true|false) "$1":REDACTED
|
||||
-- SQLNESS REPLACE ,\s*filter=.*?metrics= metrics=
|
||||
-- SQLNESS REPLACE Total\s+rows:\s+\d+ Total rows: REDACTED
|
||||
-- SQLNESS REPLACE (peers.*) REDACTED
|
||||
-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED
|
||||
EXPLAIN ANALYZE VERBOSE
|
||||
SELECT l.vec_id
|
||||
FROM vectors_explain_left l
|
||||
JOIN vectors_explain_right r ON l.vec_id = r.vec_id
|
||||
ORDER BY vec_l2sq_distance(l.embedding, '[1.0, 0.0]'), l.vec_id
|
||||
LIMIT 1;
|
||||
|
||||
DROP TABLE vectors_explain_left;
|
||||
DROP TABLE vectors_explain_right;
|
||||
|
||||
-- ============================================
|
||||
-- Part 3: Cosine and dot explain coverage
|
||||
-- ============================================
|
||||
CREATE TABLE vectors_explain_metric (
|
||||
vec_id INT,
|
||||
ts TIMESTAMP TIME INDEX,
|
||||
embedding VECTOR(2) NOT NULL VECTOR INDEX WITH (metric = 'cosine'),
|
||||
PRIMARY KEY (vec_id)
|
||||
);
|
||||
|
||||
INSERT INTO vectors_explain_metric VALUES
|
||||
(1, '2024-01-01 00:00:00', '[1.0, 0.0]'),
|
||||
(2, '2024-01-01 00:00:01', '[0.0, 1.0]'),
|
||||
(3, '2024-01-01 00:00:02', '[-1.0, 0.0]'),
|
||||
(4, '2024-01-01 00:00:03', '[0.0, -1.0]');
|
||||
|
||||
ADMIN FLUSH_TABLE('vectors_explain_metric');
|
||||
|
||||
-- SQLNESS REPLACE ("metrics_per_partition":\s*.*metrics=) "metrics_per_partition": REDACTED metrics=
|
||||
-- SQLNESS REPLACE (metrics=\{.*\}) metrics=REDACTED
|
||||
-- SQLNESS REPLACE (metrics=\[[^\]]*\]) metrics=REDACTED
|
||||
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
|
||||
-- SQLNESS REPLACE Hash\(\[vec_id@0\],.* Hash([vec_id@0],REDACTED
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
-- SQLNESS REPLACE (\s\s+) _
|
||||
-- SQLNESS REPLACE "(file_id|time_range_start|time_range_end)":"[^"]+" "$1":"REDACTED"
|
||||
-- SQLNESS REPLACE ("[a-z_]+":"[0-9\.]+(ns|us|µs|ms|s)") "DURATION": REDACTED
|
||||
-- SQLNESS REPLACE "(size|flat_format)":\s*(\d+|true|false) "$1":REDACTED
|
||||
-- SQLNESS REPLACE ,\s*filter=.*?metrics= metrics=
|
||||
-- SQLNESS REPLACE Total\s+rows:\s+\d+ Total rows: REDACTED
|
||||
-- SQLNESS REPLACE (peers.*) REDACTED
|
||||
-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED
|
||||
EXPLAIN ANALYZE VERBOSE
|
||||
SELECT vec_id
|
||||
FROM vectors_explain_metric
|
||||
ORDER BY vec_cos_distance(embedding, '[1.0, 0.0]'), vec_id
|
||||
LIMIT 2;
|
||||
|
||||
-- SQLNESS REPLACE ("metrics_per_partition":\s*.*metrics=) "metrics_per_partition": REDACTED metrics=
|
||||
-- SQLNESS REPLACE (metrics=\{.*\}) metrics=REDACTED
|
||||
-- SQLNESS REPLACE (metrics=\[[^\]]*\]) metrics=REDACTED
|
||||
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
|
||||
-- SQLNESS REPLACE Hash\(\[vec_id@0\],.* Hash([vec_id@0],REDACTED
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
-- SQLNESS REPLACE (\s\s+) _
|
||||
-- SQLNESS REPLACE "(file_id|time_range_start|time_range_end)":"[^"]+" "$1":"REDACTED"
|
||||
-- SQLNESS REPLACE ("[a-z_]+":"[0-9\.]+(ns|us|µs|ms|s)") "DURATION": REDACTED
|
||||
-- SQLNESS REPLACE "(size|flat_format)":\s*(\d+|true|false) "$1":REDACTED
|
||||
-- SQLNESS REPLACE ,\s*filter=.*?metrics= metrics=
|
||||
-- SQLNESS REPLACE Total\s+rows:\s+\d+ Total rows: REDACTED
|
||||
-- SQLNESS REPLACE (peers.*) REDACTED
|
||||
-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED
|
||||
EXPLAIN ANALYZE VERBOSE
|
||||
SELECT vec_id
|
||||
FROM vectors_explain_metric
|
||||
ORDER BY vec_dot_product(embedding, '[1.0, 0.0]') DESC, vec_id
|
||||
LIMIT 2;
|
||||
|
||||
DROP TABLE vectors_explain_metric;
|
||||
@@ -0,0 +1,240 @@
|
||||
-- Vector index join/subquery coverage
|
||||
-- ============================================
|
||||
-- Part 1: Join should not pre-limit left table
|
||||
-- ============================================
|
||||
CREATE TABLE vectors_join_left (
|
||||
vec_id INT,
|
||||
ts TIMESTAMP TIME INDEX,
|
||||
embedding VECTOR(2) NOT NULL VECTOR INDEX WITH (metric = 'l2sq'),
|
||||
PRIMARY KEY (vec_id)
|
||||
);
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
CREATE TABLE vectors_join_right (
|
||||
vec_id INT,
|
||||
note STRING,
|
||||
ts TIMESTAMP TIME INDEX,
|
||||
PRIMARY KEY (vec_id)
|
||||
);
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
INSERT INTO vectors_join_left VALUES
|
||||
(1, '2024-01-01 00:00:00', '[1.0, 0.0]'),
|
||||
(2, '2024-01-01 00:00:01', '[0.9, 0.0]'),
|
||||
(3, '2024-01-01 00:00:02', '[0.0, 1.0]'),
|
||||
(4, '2024-01-01 00:00:03', '[0.0, 0.9]');
|
||||
|
||||
Affected Rows: 4
|
||||
|
||||
INSERT INTO vectors_join_right VALUES
|
||||
(3, 'keep', '2024-01-01 00:00:02'),
|
||||
(4, 'keep', '2024-01-01 00:00:03');
|
||||
|
||||
Affected Rows: 2
|
||||
|
||||
SELECT l.vec_id, round(vec_l2sq_distance(l.embedding, '[1.0, 0.0]'), 2) AS dist
|
||||
FROM vectors_join_left l
|
||||
JOIN vectors_join_right r ON l.vec_id = r.vec_id
|
||||
ORDER BY dist, l.vec_id
|
||||
LIMIT 1;
|
||||
|
||||
+--------+------+
|
||||
| vec_id | dist |
|
||||
+--------+------+
|
||||
| 4 | 1.81 |
|
||||
+--------+------+
|
||||
|
||||
DROP TABLE vectors_join_left;
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
DROP TABLE vectors_join_right;
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
-- ============================================
|
||||
-- Part 2: Subquery should be a barrier
|
||||
-- ============================================
|
||||
CREATE TABLE vectors_subquery (
|
||||
vec_id INT,
|
||||
ts TIMESTAMP TIME INDEX,
|
||||
embedding VECTOR(2) NOT NULL VECTOR INDEX WITH (metric = 'l2sq'),
|
||||
PRIMARY KEY (vec_id)
|
||||
);
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
INSERT INTO vectors_subquery VALUES
|
||||
(1, '2024-01-01 00:00:00', '[1.0, 0.0]'),
|
||||
(2, '2024-01-01 00:00:01', '[0.9, 0.0]'),
|
||||
(3, '2024-01-01 00:00:02', '[0.0, 1.0]'),
|
||||
(4, '2024-01-01 00:00:03', '[0.0, 0.9]');
|
||||
|
||||
Affected Rows: 4
|
||||
|
||||
SELECT s.vec_id, round(vec_l2sq_distance(s.embedding, '[1.0, 0.0]'), 2) AS dist
|
||||
FROM (
|
||||
SELECT * FROM vectors_subquery WHERE vec_id >= 3
|
||||
) s
|
||||
ORDER BY dist, s.vec_id
|
||||
LIMIT 1;
|
||||
|
||||
+--------+------+
|
||||
| vec_id | dist |
|
||||
+--------+------+
|
||||
| 4 | 1.81 |
|
||||
+--------+------+
|
||||
|
||||
DROP TABLE vectors_subquery;
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
-- ============================================
|
||||
-- Part 3: LEFT JOIN should not pre-limit
|
||||
-- ============================================
|
||||
CREATE TABLE vectors_left_join (
|
||||
vec_id INT,
|
||||
ts TIMESTAMP TIME INDEX,
|
||||
embedding VECTOR(2) NOT NULL VECTOR INDEX WITH (metric = 'l2sq'),
|
||||
PRIMARY KEY (vec_id)
|
||||
);
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
CREATE TABLE vectors_left_join_filter (
|
||||
vec_id INT,
|
||||
keep BOOLEAN,
|
||||
ts TIMESTAMP TIME INDEX,
|
||||
PRIMARY KEY (vec_id)
|
||||
);
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
INSERT INTO vectors_left_join VALUES
|
||||
(1, '2024-01-01 00:00:00', '[1.0, 0.0]'),
|
||||
(2, '2024-01-01 00:00:01', '[0.9, 0.0]'),
|
||||
(3, '2024-01-01 00:00:02', '[0.0, 1.0]'),
|
||||
(4, '2024-01-01 00:00:03', '[0.0, 0.9]');
|
||||
|
||||
Affected Rows: 4
|
||||
|
||||
-- Only vec_id 3,4 have matching rows in filter table
|
||||
INSERT INTO vectors_left_join_filter VALUES
|
||||
(3, true, '2024-01-01 00:00:02'),
|
||||
(4, true, '2024-01-01 00:00:03');
|
||||
|
||||
Affected Rows: 2
|
||||
|
||||
-- LEFT JOIN then filter by IS NOT NULL
|
||||
-- Should return vec_id=4 (dist=1.81), not vec_id=1 or 2
|
||||
SELECT l.vec_id, round(vec_l2sq_distance(l.embedding, '[1.0, 0.0]'), 2) AS dist
|
||||
FROM vectors_left_join l
|
||||
LEFT JOIN vectors_left_join_filter r ON l.vec_id = r.vec_id
|
||||
WHERE r.vec_id IS NOT NULL
|
||||
ORDER BY dist, l.vec_id
|
||||
LIMIT 1;
|
||||
|
||||
+--------+------+
|
||||
| vec_id | dist |
|
||||
+--------+------+
|
||||
| 4 | 1.81 |
|
||||
+--------+------+
|
||||
|
||||
DROP TABLE vectors_left_join;
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
DROP TABLE vectors_left_join_filter;
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
-- ============================================
|
||||
-- Part 4: Inlineable subquery should allow hint
|
||||
-- ============================================
|
||||
CREATE TABLE vectors_inline_subquery (
|
||||
vec_id INT,
|
||||
ts TIMESTAMP TIME INDEX,
|
||||
embedding VECTOR(2) NOT NULL VECTOR INDEX WITH (metric = 'l2sq'),
|
||||
PRIMARY KEY (vec_id)
|
||||
);
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
INSERT INTO vectors_inline_subquery VALUES
|
||||
(1, '2024-01-01 00:00:00', '[1.0, 0.0]'),
|
||||
(2, '2024-01-01 00:00:01', '[0.9, 0.0]'),
|
||||
(3, '2024-01-01 00:00:02', '[0.0, 1.0]'),
|
||||
(4, '2024-01-01 00:00:03', '[0.0, 0.9]');
|
||||
|
||||
Affected Rows: 4
|
||||
|
||||
ADMIN FLUSH_TABLE('vectors_inline_subquery');
|
||||
|
||||
+----------------------------------------------+
|
||||
| ADMIN FLUSH_TABLE('vectors_inline_subquery') |
|
||||
+----------------------------------------------+
|
||||
| 0 |
|
||||
+----------------------------------------------+
|
||||
|
||||
-- Subquery without LIMIT/DISTINCT/aggregation can be inlined
|
||||
-- Vector hint should be able to push down
|
||||
SELECT s.vec_id, round(vec_l2sq_distance(s.embedding, '[1.0, 0.0]'), 2) AS dist
|
||||
FROM (
|
||||
SELECT * FROM vectors_inline_subquery WHERE vec_id >= 1
|
||||
) s
|
||||
ORDER BY dist, s.vec_id
|
||||
LIMIT 2;
|
||||
|
||||
+--------+------+
|
||||
| vec_id | dist |
|
||||
+--------+------+
|
||||
| 1 | 0.0 |
|
||||
| 2 | 0.01 |
|
||||
+--------+------+
|
||||
|
||||
DROP TABLE vectors_inline_subquery;
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
-- ============================================
|
||||
-- Part 5: CTE should be a barrier
|
||||
-- ============================================
|
||||
CREATE TABLE vectors_cte (
|
||||
vec_id INT,
|
||||
ts TIMESTAMP TIME INDEX,
|
||||
embedding VECTOR(2) NOT NULL VECTOR INDEX WITH (metric = 'l2sq'),
|
||||
PRIMARY KEY (vec_id)
|
||||
);
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
INSERT INTO vectors_cte VALUES
|
||||
(1, '2024-01-01 00:00:00', '[1.0, 0.0]'),
|
||||
(2, '2024-01-01 00:00:01', '[0.9, 0.0]'),
|
||||
(3, '2024-01-01 00:00:02', '[0.0, 1.0]'),
|
||||
(4, '2024-01-01 00:00:03', '[0.0, 0.9]');
|
||||
|
||||
Affected Rows: 4
|
||||
|
||||
-- CTE acts as optimization barrier
|
||||
-- Filter in CTE limits to vec_id >= 3, so result should be vec_id=4 (dist=1.81)
|
||||
WITH filtered AS (
|
||||
SELECT * FROM vectors_cte WHERE vec_id >= 3
|
||||
)
|
||||
SELECT vec_id, round(vec_l2sq_distance(embedding, '[1.0, 0.0]'), 2) AS dist
|
||||
FROM filtered
|
||||
ORDER BY dist, vec_id
|
||||
LIMIT 1;
|
||||
|
||||
+--------+------+
|
||||
| vec_id | dist |
|
||||
+--------+------+
|
||||
| 4 | 1.81 |
|
||||
+--------+------+
|
||||
|
||||
DROP TABLE vectors_cte;
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
@@ -0,0 +1,159 @@
|
||||
-- Vector index join/subquery coverage
|
||||
|
||||
-- ============================================
|
||||
-- Part 1: Join should not pre-limit left table
|
||||
-- ============================================
|
||||
CREATE TABLE vectors_join_left (
|
||||
vec_id INT,
|
||||
ts TIMESTAMP TIME INDEX,
|
||||
embedding VECTOR(2) NOT NULL VECTOR INDEX WITH (metric = 'l2sq'),
|
||||
PRIMARY KEY (vec_id)
|
||||
);
|
||||
|
||||
CREATE TABLE vectors_join_right (
|
||||
vec_id INT,
|
||||
note STRING,
|
||||
ts TIMESTAMP TIME INDEX,
|
||||
PRIMARY KEY (vec_id)
|
||||
);
|
||||
|
||||
INSERT INTO vectors_join_left VALUES
|
||||
(1, '2024-01-01 00:00:00', '[1.0, 0.0]'),
|
||||
(2, '2024-01-01 00:00:01', '[0.9, 0.0]'),
|
||||
(3, '2024-01-01 00:00:02', '[0.0, 1.0]'),
|
||||
(4, '2024-01-01 00:00:03', '[0.0, 0.9]');
|
||||
|
||||
INSERT INTO vectors_join_right VALUES
|
||||
(3, 'keep', '2024-01-01 00:00:02'),
|
||||
(4, 'keep', '2024-01-01 00:00:03');
|
||||
|
||||
SELECT l.vec_id, round(vec_l2sq_distance(l.embedding, '[1.0, 0.0]'), 2) AS dist
|
||||
FROM vectors_join_left l
|
||||
JOIN vectors_join_right r ON l.vec_id = r.vec_id
|
||||
ORDER BY dist, l.vec_id
|
||||
LIMIT 1;
|
||||
|
||||
DROP TABLE vectors_join_left;
|
||||
DROP TABLE vectors_join_right;
|
||||
|
||||
-- ============================================
|
||||
-- Part 2: Subquery should be a barrier
|
||||
-- ============================================
|
||||
CREATE TABLE vectors_subquery (
|
||||
vec_id INT,
|
||||
ts TIMESTAMP TIME INDEX,
|
||||
embedding VECTOR(2) NOT NULL VECTOR INDEX WITH (metric = 'l2sq'),
|
||||
PRIMARY KEY (vec_id)
|
||||
);
|
||||
|
||||
INSERT INTO vectors_subquery VALUES
|
||||
(1, '2024-01-01 00:00:00', '[1.0, 0.0]'),
|
||||
(2, '2024-01-01 00:00:01', '[0.9, 0.0]'),
|
||||
(3, '2024-01-01 00:00:02', '[0.0, 1.0]'),
|
||||
(4, '2024-01-01 00:00:03', '[0.0, 0.9]');
|
||||
|
||||
SELECT s.vec_id, round(vec_l2sq_distance(s.embedding, '[1.0, 0.0]'), 2) AS dist
|
||||
FROM (
|
||||
SELECT * FROM vectors_subquery WHERE vec_id >= 3
|
||||
) s
|
||||
ORDER BY dist, s.vec_id
|
||||
LIMIT 1;
|
||||
|
||||
DROP TABLE vectors_subquery;
|
||||
|
||||
-- ============================================
|
||||
-- Part 3: LEFT JOIN should not pre-limit
|
||||
-- ============================================
|
||||
CREATE TABLE vectors_left_join (
|
||||
vec_id INT,
|
||||
ts TIMESTAMP TIME INDEX,
|
||||
embedding VECTOR(2) NOT NULL VECTOR INDEX WITH (metric = 'l2sq'),
|
||||
PRIMARY KEY (vec_id)
|
||||
);
|
||||
|
||||
CREATE TABLE vectors_left_join_filter (
|
||||
vec_id INT,
|
||||
keep BOOLEAN,
|
||||
ts TIMESTAMP TIME INDEX,
|
||||
PRIMARY KEY (vec_id)
|
||||
);
|
||||
|
||||
INSERT INTO vectors_left_join VALUES
|
||||
(1, '2024-01-01 00:00:00', '[1.0, 0.0]'),
|
||||
(2, '2024-01-01 00:00:01', '[0.9, 0.0]'),
|
||||
(3, '2024-01-01 00:00:02', '[0.0, 1.0]'),
|
||||
(4, '2024-01-01 00:00:03', '[0.0, 0.9]');
|
||||
|
||||
-- Only vec_id 3,4 have matching rows in filter table
|
||||
INSERT INTO vectors_left_join_filter VALUES
|
||||
(3, true, '2024-01-01 00:00:02'),
|
||||
(4, true, '2024-01-01 00:00:03');
|
||||
|
||||
-- LEFT JOIN then filter by IS NOT NULL
|
||||
-- Should return vec_id=4 (dist=1.81), not vec_id=1 or 2
|
||||
SELECT l.vec_id, round(vec_l2sq_distance(l.embedding, '[1.0, 0.0]'), 2) AS dist
|
||||
FROM vectors_left_join l
|
||||
LEFT JOIN vectors_left_join_filter r ON l.vec_id = r.vec_id
|
||||
WHERE r.vec_id IS NOT NULL
|
||||
ORDER BY dist, l.vec_id
|
||||
LIMIT 1;
|
||||
|
||||
DROP TABLE vectors_left_join;
|
||||
DROP TABLE vectors_left_join_filter;
|
||||
|
||||
-- ============================================
|
||||
-- Part 4: Inlineable subquery should allow hint
|
||||
-- ============================================
|
||||
CREATE TABLE vectors_inline_subquery (
|
||||
vec_id INT,
|
||||
ts TIMESTAMP TIME INDEX,
|
||||
embedding VECTOR(2) NOT NULL VECTOR INDEX WITH (metric = 'l2sq'),
|
||||
PRIMARY KEY (vec_id)
|
||||
);
|
||||
|
||||
INSERT INTO vectors_inline_subquery VALUES
|
||||
(1, '2024-01-01 00:00:00', '[1.0, 0.0]'),
|
||||
(2, '2024-01-01 00:00:01', '[0.9, 0.0]'),
|
||||
(3, '2024-01-01 00:00:02', '[0.0, 1.0]'),
|
||||
(4, '2024-01-01 00:00:03', '[0.0, 0.9]');
|
||||
|
||||
ADMIN FLUSH_TABLE('vectors_inline_subquery');
|
||||
|
||||
-- Subquery without LIMIT/DISTINCT/aggregation can be inlined
|
||||
-- Vector hint should be able to push down
|
||||
SELECT s.vec_id, round(vec_l2sq_distance(s.embedding, '[1.0, 0.0]'), 2) AS dist
|
||||
FROM (
|
||||
SELECT * FROM vectors_inline_subquery WHERE vec_id >= 1
|
||||
) s
|
||||
ORDER BY dist, s.vec_id
|
||||
LIMIT 2;
|
||||
|
||||
DROP TABLE vectors_inline_subquery;
|
||||
|
||||
-- ============================================
|
||||
-- Part 5: CTE should be a barrier
|
||||
-- ============================================
|
||||
CREATE TABLE vectors_cte (
|
||||
vec_id INT,
|
||||
ts TIMESTAMP TIME INDEX,
|
||||
embedding VECTOR(2) NOT NULL VECTOR INDEX WITH (metric = 'l2sq'),
|
||||
PRIMARY KEY (vec_id)
|
||||
);
|
||||
|
||||
INSERT INTO vectors_cte VALUES
|
||||
(1, '2024-01-01 00:00:00', '[1.0, 0.0]'),
|
||||
(2, '2024-01-01 00:00:01', '[0.9, 0.0]'),
|
||||
(3, '2024-01-01 00:00:02', '[0.0, 1.0]'),
|
||||
(4, '2024-01-01 00:00:03', '[0.0, 0.9]');
|
||||
|
||||
-- CTE acts as optimization barrier
|
||||
-- Filter in CTE limits to vec_id >= 3, so result should be vec_id=4 (dist=1.81)
|
||||
WITH filtered AS (
|
||||
SELECT * FROM vectors_cte WHERE vec_id >= 3
|
||||
)
|
||||
SELECT vec_id, round(vec_l2sq_distance(embedding, '[1.0, 0.0]'), 2) AS dist
|
||||
FROM filtered
|
||||
ORDER BY dist, vec_id
|
||||
LIMIT 1;
|
||||
|
||||
DROP TABLE vectors_cte;
|
||||
2
tests/runner/src/env/bare.rs
vendored
2
tests/runner/src/env/bare.rs
vendored
@@ -528,7 +528,7 @@ impl Env {
|
||||
"--bin",
|
||||
"greptime",
|
||||
"--features",
|
||||
"pg_kvbackend,mysql_kvbackend",
|
||||
"pg_kvbackend,mysql_kvbackend,vector_index",
|
||||
])
|
||||
.output()
|
||||
.expect("Failed to start GreptimeDB");
|
||||
|
||||
Reference in New Issue
Block a user