Compare commits

..

12 Commits

Author SHA1 Message Date
Lei Xu
09585390f7 flexible vector column 2023-08-29 22:09:58 -07:00
Will Jones
8391ffee84 chore: make crate more discoverable (#443)
A few small changes to make the Rust crate more discoverable.
2023-08-25 08:59:14 -07:00
Lance Release
fe8848efb9 [python] Bump version: 0.2.1 → 0.2.2 2023-08-24 23:18:10 +00:00
Chang She
213c313b99 Revert "Updating package-lock.json" (#455)
This reverts commit ab97e5d632.

Co-authored-by: Chang She <chang@lancedb.com>
2023-08-24 15:54:57 -07:00
Chang She
157e995a43 Revert "Bump version: 0.2.4 → 0.2.5" (#454)
This reverts commit 87e9a0250f.

I triggered the nodejs release commit GHA by mistake. Reverting it.
The tag will be removed manually.

Co-authored-by: Chang She <chang@lancedb.com>
2023-08-24 15:44:37 -07:00
Lance Release
ab97e5d632 Updating package-lock.json 2023-08-24 21:54:35 +00:00
Lance Release
87e9a0250f Bump version: 0.2.4 → 0.2.5 2023-08-24 21:54:18 +00:00
Chang She
e587a17a64 [python] Support schema evolution in local LanceDB (#452)
Previously if you needed to add a column to a table you'd have to
rewrite the whole table. Instead,
we use the merge functionality from Lance format
to incrementally add columns from another table
or dataframe.

---------

Co-authored-by: Chang She <chang@lancedb.com>
Co-authored-by: Weston Pace <weston.pace@gmail.com>
2023-08-24 14:40:49 -07:00
Chang She
2f1f9f6338 [python] improve restore functionality (#451)
Previously the temporary restore feature required copying data. The new
feature in pylance does not.

---------

Co-authored-by: Chang She <chang@lancedb.com>
Co-authored-by: Weston Pace <weston.pace@gmail.com>
2023-08-24 11:00:34 -07:00
Lance Release
a34fa4df26 Updating package-lock.json 2023-08-24 05:23:19 +00:00
Lance Release
e20979b335 Updating package-lock.json 2023-08-24 04:48:11 +00:00
Lance Release
08689c345d Bump version: 0.2.3 → 0.2.4 2023-08-24 04:47:57 +00:00
12 changed files with 294 additions and 66 deletions

View File

@@ -1,5 +1,5 @@
[bumpversion]
current_version = 0.2.3
current_version = 0.2.4
commit = True
message = Bump version: {current_version} → {new_version}
tag = True

74
node/package-lock.json generated
View File

@@ -1,12 +1,12 @@
{
"name": "vectordb",
"version": "0.2.3",
"version": "0.2.4",
"lockfileVersion": 2,
"requires": true,
"packages": {
"": {
"name": "vectordb",
"version": "0.2.3",
"version": "0.2.4",
"cpu": [
"x64",
"arm64"
@@ -51,11 +51,11 @@
"typescript": "*"
},
"optionalDependencies": {
"@lancedb/vectordb-darwin-arm64": "0.2.3",
"@lancedb/vectordb-darwin-x64": "0.2.3",
"@lancedb/vectordb-linux-arm64-gnu": "0.2.3",
"@lancedb/vectordb-linux-x64-gnu": "0.2.3",
"@lancedb/vectordb-win32-x64-msvc": "0.2.3"
"@lancedb/vectordb-darwin-arm64": "0.2.4",
"@lancedb/vectordb-darwin-x64": "0.2.4",
"@lancedb/vectordb-linux-arm64-gnu": "0.2.4",
"@lancedb/vectordb-linux-x64-gnu": "0.2.4",
"@lancedb/vectordb-win32-x64-msvc": "0.2.4"
}
},
"node_modules/@apache-arrow/ts": {
@@ -315,9 +315,9 @@
}
},
"node_modules/@lancedb/vectordb-darwin-arm64": {
"version": "0.2.3",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.2.3.tgz",
"integrity": "sha512-/9dRCXrV/UsZv3fqAC/Q+D2FPKXMRprcb+a77tt4I0Iy5iGT55UDRfpaXvmJeKquhTJkZ0AuyoK5BmOh7cY41w==",
"version": "0.2.4",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.2.4.tgz",
"integrity": "sha512-MqiZXamHYEOfguPsHWLBQ56IabIN6Az8u2Hx8LCyXcxW9gcyJZMSAfJc+CcA4KYHKotv0KsVBhgxZ3kaZQQyiw==",
"cpu": [
"arm64"
],
@@ -327,9 +327,9 @@
]
},
"node_modules/@lancedb/vectordb-darwin-x64": {
"version": "0.2.3",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.2.3.tgz",
"integrity": "sha512-p06WkjmdVwDxkH8ghIWh59SCgUhjXBpy1gQISgktouymqfoFbBHz7vmeI6VO1oBA5ji6vSgGZxqjmeLRKM6blA==",
"version": "0.2.4",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.2.4.tgz",
"integrity": "sha512-DzL+mw5WhKDwXdEFlPh8M9zSDhGnfks7NvEh6ZqKbU6znH206YB7g3OA4WfFyV579IIEQ8jd4v/XDthNzQKuSA==",
"cpu": [
"x64"
],
@@ -339,9 +339,9 @@
]
},
"node_modules/@lancedb/vectordb-linux-arm64-gnu": {
"version": "0.2.3",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.2.3.tgz",
"integrity": "sha512-cSDcJgfbnRmCXZ3AoRWpCAa07PMdB/k8m1LjmxnhpOnP1ohg1eUl99jwPCgd+5GK+iZmezRqbyO+YXlgsCp7GQ==",
"version": "0.2.4",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.2.4.tgz",
"integrity": "sha512-LP1nNfIpFxCgcCMlIQdseDX9dZU27TNhCL41xar8euqcetY5uKvi0YqhiVlpNO85Ss1FRQBgQ/GtnOM6Bo7oBQ==",
"cpu": [
"arm64"
],
@@ -351,9 +351,9 @@
]
},
"node_modules/@lancedb/vectordb-linux-x64-gnu": {
"version": "0.2.3",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.2.3.tgz",
"integrity": "sha512-AFA3J4hBYapGC37iXheiN6tGruitx5bmoWXkUcDv/qAaE4tizVZHB9cgx9ThTB0RDsvZEOZ5zCy7BOzPH+oCOg==",
"version": "0.2.4",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.2.4.tgz",
"integrity": "sha512-m4RhOI5JJWPU9Ip2LlRIzXu4mwIv9M//OyAuTLiLKRm8726jQHhYi5VFUEtNzqY0o0p6pS0b3XbifYQ+cyJn3Q==",
"cpu": [
"x64"
],
@@ -363,9 +363,9 @@
]
},
"node_modules/@lancedb/vectordb-win32-x64-msvc": {
"version": "0.2.3",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.2.3.tgz",
"integrity": "sha512-LI1mz1HdcpNXTM7HbcLdXz0qvUU4LxSqRC7/kMU918VlOeWy/PnryRrjHnCjcgciGzu1rVlvCqRPh7fVwaG6Kg==",
"version": "0.2.4",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.2.4.tgz",
"integrity": "sha512-lMF/2e3YkKWnTYv0R7cUCfjMkAqepNaHSc/dvJzCNsFVEhfDsFdScQFLToARs5GGxnq4fOf+MKpaHg/W6QTxiA==",
"cpu": [
"x64"
],
@@ -4852,33 +4852,33 @@
}
},
"@lancedb/vectordb-darwin-arm64": {
"version": "0.2.3",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.2.3.tgz",
"integrity": "sha512-/9dRCXrV/UsZv3fqAC/Q+D2FPKXMRprcb+a77tt4I0Iy5iGT55UDRfpaXvmJeKquhTJkZ0AuyoK5BmOh7cY41w==",
"version": "0.2.4",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.2.4.tgz",
"integrity": "sha512-MqiZXamHYEOfguPsHWLBQ56IabIN6Az8u2Hx8LCyXcxW9gcyJZMSAfJc+CcA4KYHKotv0KsVBhgxZ3kaZQQyiw==",
"optional": true
},
"@lancedb/vectordb-darwin-x64": {
"version": "0.2.3",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.2.3.tgz",
"integrity": "sha512-p06WkjmdVwDxkH8ghIWh59SCgUhjXBpy1gQISgktouymqfoFbBHz7vmeI6VO1oBA5ji6vSgGZxqjmeLRKM6blA==",
"version": "0.2.4",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.2.4.tgz",
"integrity": "sha512-DzL+mw5WhKDwXdEFlPh8M9zSDhGnfks7NvEh6ZqKbU6znH206YB7g3OA4WfFyV579IIEQ8jd4v/XDthNzQKuSA==",
"optional": true
},
"@lancedb/vectordb-linux-arm64-gnu": {
"version": "0.2.3",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.2.3.tgz",
"integrity": "sha512-cSDcJgfbnRmCXZ3AoRWpCAa07PMdB/k8m1LjmxnhpOnP1ohg1eUl99jwPCgd+5GK+iZmezRqbyO+YXlgsCp7GQ==",
"version": "0.2.4",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.2.4.tgz",
"integrity": "sha512-LP1nNfIpFxCgcCMlIQdseDX9dZU27TNhCL41xar8euqcetY5uKvi0YqhiVlpNO85Ss1FRQBgQ/GtnOM6Bo7oBQ==",
"optional": true
},
"@lancedb/vectordb-linux-x64-gnu": {
"version": "0.2.3",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.2.3.tgz",
"integrity": "sha512-AFA3J4hBYapGC37iXheiN6tGruitx5bmoWXkUcDv/qAaE4tizVZHB9cgx9ThTB0RDsvZEOZ5zCy7BOzPH+oCOg==",
"version": "0.2.4",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.2.4.tgz",
"integrity": "sha512-m4RhOI5JJWPU9Ip2LlRIzXu4mwIv9M//OyAuTLiLKRm8726jQHhYi5VFUEtNzqY0o0p6pS0b3XbifYQ+cyJn3Q==",
"optional": true
},
"@lancedb/vectordb-win32-x64-msvc": {
"version": "0.2.3",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.2.3.tgz",
"integrity": "sha512-LI1mz1HdcpNXTM7HbcLdXz0qvUU4LxSqRC7/kMU918VlOeWy/PnryRrjHnCjcgciGzu1rVlvCqRPh7fVwaG6Kg==",
"version": "0.2.4",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.2.4.tgz",
"integrity": "sha512-lMF/2e3YkKWnTYv0R7cUCfjMkAqepNaHSc/dvJzCNsFVEhfDsFdScQFLToARs5GGxnq4fOf+MKpaHg/W6QTxiA==",
"optional": true
},
"@neon-rs/cli": {

View File

@@ -1,6 +1,6 @@
{
"name": "vectordb",
"version": "0.2.3",
"version": "0.2.4",
"description": " Serverless, low-latency vector database for AI applications",
"main": "dist/index.js",
"types": "dist/index.d.ts",
@@ -78,10 +78,10 @@
}
},
"optionalDependencies": {
"@lancedb/vectordb-darwin-arm64": "0.2.3",
"@lancedb/vectordb-darwin-x64": "0.2.3",
"@lancedb/vectordb-linux-arm64-gnu": "0.2.3",
"@lancedb/vectordb-linux-x64-gnu": "0.2.3",
"@lancedb/vectordb-win32-x64-msvc": "0.2.3"
"@lancedb/vectordb-darwin-arm64": "0.2.4",
"@lancedb/vectordb-darwin-x64": "0.2.4",
"@lancedb/vectordb-linux-arm64-gnu": "0.2.4",
"@lancedb/vectordb-linux-x64-gnu": "0.2.4",
"@lancedb/vectordb-win32-x64-msvc": "0.2.4"
}
}

View File

@@ -1,5 +1,5 @@
[bumpversion]
current_version = 0.2.1
current_version = 0.2.2
commit = True
message = [python] Bump version: {current_version} → {new_version}
tag = True

View File

@@ -17,13 +17,14 @@ import inspect
import os
from abc import ABC, abstractmethod
from functools import cached_property
from typing import Iterable, List, Union
from typing import Iterable, List, Optional, Union
import lance
import numpy as np
import pyarrow as pa
import pyarrow.compute as pc
from lance import LanceDataset
from lance.dataset import ReaderLike
from lance.vector import vec_to_table
from .common import DATA, VEC, VECTOR_COLUMN_NAME
@@ -311,7 +312,7 @@ class LanceTable(Table):
This allows viewing previous versions of the table. If you wish to
keep writing to the dataset starting from an old version, then use
the `restore` function instead.
the `restore` function.
Parameters
----------
@@ -341,16 +342,18 @@ class LanceTable(Table):
raise ValueError(f"Invalid version {version}")
self._reset_dataset(version=version)
def restore(self, version: int):
def restore(self, version: int = None):
"""Restore a version of the table. This is an in-place operation.
This creates a new version where the data is equivalent to the
specified previous version. Note that this creates a new snapshot.
specified previous version. Data is not copied (as of python-v0.2.1).
Parameters
----------
version : int
The version to restore.
version : int, default None
The version to restore. If unspecified then restores the currently
checked out version. If the currently checked out version is the
latest version then this is a no-op.
Examples
--------
@@ -373,15 +376,18 @@ class LanceTable(Table):
3
"""
max_ver = max([v["version"] for v in self._dataset.versions()])
if version < 1 or version >= max_ver:
if version is None:
version = self.version
elif version < 1 or version > max_ver:
raise ValueError(f"Invalid version {version}")
else:
self.checkout(version)
if version == max_ver:
self._reset_dataset()
# no-op if restoring the latest version
return
self.checkout(version)
data = self.to_arrow()
self.checkout(max_ver)
self.add(data, mode="overwrite")
self._dataset.restore()
self._reset_dataset()
def __len__(self):
@@ -500,6 +506,69 @@ class LanceTable(Table):
lance.write_dataset(data, self._dataset_uri, schema=self.schema, mode=mode)
self._reset_dataset()
def merge(
self,
other_table: Union[LanceTable, ReaderLike],
left_on: str,
right_on: Optional[str] = None,
schema: Optional[pa.Schema, LanceModel] = None,
):
"""Merge another table into this table.
Performs a left join, where the dataset is the left side and other_table
is the right side. Rows existing in the dataset but not on the left will
be filled with null values, unless Lance doesn't support null values for
some types, in which case an error will be raised. The only overlapping
column allowed is the join column. If other overlapping columns exist,
an error will be raised.
Parameters
----------
other_table: LanceTable or Reader-like
The data to be merged. Acceptable types are:
- Pandas DataFrame, Pyarrow Table, Dataset, Scanner,
Iterator[RecordBatch], or RecordBatchReader
- LanceTable
left_on: str
The name of the column in the dataset to join on.
right_on: str or None
The name of the column in other_table to join on. If None, defaults to
left_on.
schema: pa.Schema or LanceModel, optional
The schema of the other_table.
If not provided, the schema is inferred from the data.
Examples
--------
>>> import lancedb
>>> import pyarrow as pa
>>> df = pa.table({'x': [1, 2, 3], 'y': ['a', 'b', 'c']})
>>> db = lancedb.connect("./.lancedb")
>>> table = db.create_table("dataset", df)
>>> table.to_pandas()
x y
0 1 a
1 2 b
2 3 c
>>> new_df = pa.table({'x': [1, 2, 3], 'z': ['d', 'e', 'f']})
>>> table.merge(new_df, 'x')
>>> table.to_pandas()
x y z
0 1 a d
1 2 b e
2 3 c f
"""
if isinstance(schema, LanceModel):
schema = schema.to_arrow_schema()
if isinstance(other_table, LanceTable):
other_table = other_table.to_lance()
if isinstance(other_table, LanceDataset):
other_table = other_table.to_table()
self._dataset.merge(
other_table, left_on=left_on, right_on=right_on, schema=schema
)
self._reset_dataset()
def search(
self, query: Union[VEC, str], vector_column_name=VECTOR_COLUMN_NAME
) -> LanceQueryBuilder:

View File

@@ -1,6 +1,6 @@
[project]
name = "lancedb"
version = "0.2.1"
version = "0.2.2"
dependencies = [
"pylance==0.6.5",
"ratelimiter",

View File

@@ -16,6 +16,7 @@ from pathlib import Path
from typing import List
from unittest.mock import PropertyMock, patch
import lance
import numpy as np
import pandas as pd
import pyarrow as pa
@@ -280,3 +281,38 @@ def test_restore(db):
table.restore(1)
assert len(table.list_versions()) == 3
assert len(table) == 1
expected = table.to_arrow()
table.checkout(1)
table.restore()
assert len(table.list_versions()) == 4
assert table.to_arrow() == expected
table.restore(4) # latest version should be no-op
assert len(table.list_versions()) == 4
with pytest.raises(ValueError):
table.restore(5)
with pytest.raises(ValueError):
table.restore(0)
def test_merge(db, tmp_path):
table = LanceTable.create(
db,
"my_table",
data=[{"vector": [1.1, 0.9], "id": 0}, {"vector": [1.2, 1.9], "id": 1}],
)
other_table = pa.table({"document": ["foo", "bar"], "id": [0, 1]})
table.merge(other_table, left_on="id")
assert len(table.list_versions()) == 2
expected = pa.table(
{"vector": [[1.1, 0.9], [1.2, 1.9]], "id": [0, 1], "document": ["foo", "bar"]},
schema=table.schema,
)
assert table.to_arrow() == expected
other_dataset = lance.write_dataset(other_table, tmp_path / "other_table.lance")
table.restore(1)
table.merge(other_dataset, left_on="id")

View File

@@ -1,6 +1,6 @@
[package]
name = "vectordb-node"
version = "0.2.3"
version = "0.2.4"
description = "Serverless, low-latency vector database for AI applications"
license = "Apache-2.0"
edition = "2018"

View File

@@ -1,10 +1,12 @@
[package]
name = "vectordb"
version = "0.2.3"
version = "0.2.4"
edition = "2021"
description = "Serverless, low-latency vector database for AI applications"
description = "LanceDB: A serverless, low-latency vector database for AI applications"
license = "Apache-2.0"
repository = "https://github.com/lancedb/lancedb"
keywords = ["lancedb", "lance", "database", "search"]
categories = ["database-implementations"]
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]

3
rust/vectordb/README.md Normal file
View File

@@ -0,0 +1,3 @@
# LanceDB Rust
Rust client for LanceDB, a serverless vector database. Read more at: https://lancedb.com/

View File

@@ -32,8 +32,42 @@ pub enum Error {
Store { message: String },
#[snafu(display("LanceDBError: {message}"))]
Lance { message: String },
#[snafu(display("Bad query: {message}"))]
InvalidQuery { message: String },
}
impl Error {
pub fn invalid_table_name(name: &str) -> Self {
Self::InvalidTableName {
name: name.to_string(),
}
}
pub fn table_not_found(name: &str) -> Self {
Self::TableNotFound {
name: name.to_string(),
}
}
pub fn table_already_exists(name: &str) -> Self {
Self::TableAlreadyExists {
name: name.to_string(),
}
}
pub fn invalid_query(message: &str) -> Self {
Self::InvalidQuery {
message: message.to_string(),
}
}
pub fn create_dir(path: &str, source: std::io::Error) -> Self {
Self::CreateDir {
path: path.to_string(),
source,
}
}
}
pub type Result<T> = std::result::Result<T, Error>;
impl From<lance::Error> for Error {

View File

@@ -14,12 +14,73 @@
use std::sync::Arc;
use arrow_array::Float32Array;
use lance::dataset::scanner::{DatasetRecordBatchStream, Scanner};
use lance::dataset::Dataset;
use arrow_array::{Array, Float32Array};
use arrow_schema::DataType;
use lance::dataset::{
scanner::{DatasetRecordBatchStream, Scanner},
Dataset,
};
use lance::datatypes::Schema;
use lance::index::vector::MetricType;
use crate::error::Result;
use crate::error::{Error, Result};
struct VectorQuery<T: Array> {
query: T,
column: String,
nprobs: usize,
refine_factor: Option<u32>,
metric_type: Option<MetricType>,
use_index: bool,
}
/// Best effort to find potential vector columns in a [Schema], which is a fixed size column with
/// float number, where the list size is equal to the vector dimension.
///
fn find_vector_columns(schema: &Schema, dim: i32) -> Vec<String> {
schema
.fields
.iter()
.filter(|f| match &f.data_type() {
DataType::FixedSizeList(field, list_size) => {
*list_size == dim && field.data_type().is_floating()
}
_ => false,
})
.map(|f| f.name.to_string())
.collect()
}
impl<T: Array> VectorQuery<T> {
fn try_new(dataset: &Dataset, query: T) -> Result<Self> {
let schema = dataset.schema();
let dim: i32 = query.len() as i32;
let vector_columns = find_vector_columns(schema, dim);
if vector_columns.is_empty() {
return Err(Error::InvalidQuery {
message: format!("Unable to find a vector column with dimension {}", dim),
});
};
if vector_columns.len() != 1 {
return Err(Error::invalid_query(
"Vector query can be applied to more than one vector columns, please specify the column to use"));
}
Ok(Self::with_column(query, &vector_columns[0]))
}
fn with_column(query: T, column: &str) -> Self {
VectorQuery {
query,
column: column.to_string(),
nprobs: 20,
refine_factor: None,
metric_type: None,
use_index: true,
}
}
}
/// A builder for nearest neighbor queries for LanceDB.
pub struct Query {
@@ -32,6 +93,7 @@ pub struct Query {
pub refine_factor: Option<u32>,
pub metric_type: Option<MetricType>,
pub use_index: bool,
vector_query: Option<VectorQuery<Float32Array>>,
}
impl Query {
@@ -48,6 +110,7 @@ impl Query {
pub(crate) fn new(dataset: Arc<Dataset>, vector: Float32Array) -> Self {
Query {
dataset,
vector_query: None,
query_vector: vector,
limit: 10,
nprobes: 20,
@@ -101,6 +164,25 @@ impl Query {
self
}
pub fn vector_search(mut self, query: Float32Array) -> Result<Query> {
let dim = query.len();
self.query_vector = query;
Ok(self)
}
/// Vector search on a given column.
pub fn vector_search_on(mut self, query: Float32Array, column: &str) -> Result<Query> {
if self.vector_query.is_some() {
return Err(Error::invalid_query("Vector search is already set"));
};
let dim = query.len();
self.query_vector = query;
Ok(self)
}
/// Set the number of probes to use.
///
/// # Arguments
@@ -162,9 +244,11 @@ impl Query {
#[cfg(test)]
mod tests {
use super::*;
use std::sync::Arc;
use arrow_array::{Float32Array, RecordBatch, RecordBatchIterator, RecordBatchReader};
use arrow_array::{RecordBatch, RecordBatchIterator, RecordBatchReader};
use arrow_schema::{DataType, Field as ArrowField, Schema as ArrowSchema};
use lance::dataset::Dataset;
use lance::index::vector::MetricType;