mirror of
https://github.com/lancedb/lancedb.git
synced 2026-01-04 19:02:58 +00:00
Compare commits
7 Commits
v0.22.1-be
...
python-v0.
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
5e1e9add07 | ||
|
|
97e9938dfe | ||
|
|
1d4b92e01e | ||
|
|
4c9fc3044b | ||
|
|
0ebc8d45a8 | ||
|
|
f7d78c3420 | ||
|
|
6ea6884260 |
@@ -1,5 +1,5 @@
|
|||||||
[tool.bumpversion]
|
[tool.bumpversion]
|
||||||
current_version = "0.22.1-beta.0"
|
current_version = "0.22.1-beta.1"
|
||||||
parse = """(?x)
|
parse = """(?x)
|
||||||
(?P<major>0|[1-9]\\d*)\\.
|
(?P<major>0|[1-9]\\d*)\\.
|
||||||
(?P<minor>0|[1-9]\\d*)\\.
|
(?P<minor>0|[1-9]\\d*)\\.
|
||||||
|
|||||||
4
.github/workflows/docs_test.yml
vendored
4
.github/workflows/docs_test.yml
vendored
@@ -24,7 +24,8 @@ env:
|
|||||||
jobs:
|
jobs:
|
||||||
test-python:
|
test-python:
|
||||||
name: Test doc python code
|
name: Test doc python code
|
||||||
runs-on: ubuntu-24.04
|
runs-on: warp-ubuntu-2204-x64-8x
|
||||||
|
timeout-minutes: 60
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout
|
- name: Checkout
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
@@ -48,7 +49,6 @@ jobs:
|
|||||||
uses: swatinem/rust-cache@v2
|
uses: swatinem/rust-cache@v2
|
||||||
- name: Build Python
|
- name: Build Python
|
||||||
working-directory: docs/test
|
working-directory: docs/test
|
||||||
timeout-minutes: 60
|
|
||||||
run:
|
run:
|
||||||
python -m pip install --extra-index-url https://pypi.fury.io/lancedb/ -r requirements.txt
|
python -m pip install --extra-index-url https://pypi.fury.io/lancedb/ -r requirements.txt
|
||||||
- name: Create test files
|
- name: Create test files
|
||||||
|
|||||||
16
Cargo.lock
generated
16
Cargo.lock
generated
@@ -713,9 +713,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "aws-sdk-s3"
|
name = "aws-sdk-s3"
|
||||||
version = "1.104.0"
|
version = "1.105.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "38c488cd6abb0ec9811c401894191932e941c5f91dc226043edacd0afa1634bc"
|
checksum = "c99789e929b5e1d9a5aa3fa1d81317f3a789afc796141d11b0eaafd9d9f47e38"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"aws-credential-types",
|
"aws-credential-types",
|
||||||
"aws-runtime",
|
"aws-runtime",
|
||||||
@@ -963,9 +963,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "aws-smithy-runtime"
|
name = "aws-smithy-runtime"
|
||||||
version = "1.9.1"
|
version = "1.9.2"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "d3946acbe1ead1301ba6862e712c7903ca9bb230bdf1fbd1b5ac54158ef2ab1f"
|
checksum = "4fa63ad37685ceb7762fa4d73d06f1d5493feb88e3f27259b9ed277f4c01b185"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"aws-smithy-async",
|
"aws-smithy-async",
|
||||||
"aws-smithy-http",
|
"aws-smithy-http",
|
||||||
@@ -1153,7 +1153,7 @@ dependencies = [
|
|||||||
"bitflags 2.9.4",
|
"bitflags 2.9.4",
|
||||||
"cexpr",
|
"cexpr",
|
||||||
"clang-sys",
|
"clang-sys",
|
||||||
"itertools 0.12.1",
|
"itertools 0.11.0",
|
||||||
"lazy_static",
|
"lazy_static",
|
||||||
"lazycell",
|
"lazycell",
|
||||||
"log",
|
"log",
|
||||||
@@ -4628,7 +4628,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "lancedb"
|
name = "lancedb"
|
||||||
version = "0.22.1-beta.0"
|
version = "0.22.1-beta.1"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"arrow",
|
"arrow",
|
||||||
"arrow-array",
|
"arrow-array",
|
||||||
@@ -4715,7 +4715,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "lancedb-nodejs"
|
name = "lancedb-nodejs"
|
||||||
version = "0.22.1-beta.0"
|
version = "0.22.1-beta.1"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"arrow-array",
|
"arrow-array",
|
||||||
"arrow-ipc",
|
"arrow-ipc",
|
||||||
@@ -4735,7 +4735,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "lancedb-python"
|
name = "lancedb-python"
|
||||||
version = "0.25.1-beta.0"
|
version = "0.25.1-beta.1"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"arrow",
|
"arrow",
|
||||||
"async-trait",
|
"async-trait",
|
||||||
|
|||||||
@@ -16,6 +16,7 @@ pub trait JNIEnvExt {
|
|||||||
fn get_integers(&mut self, obj: &JObject) -> Result<Vec<i32>>;
|
fn get_integers(&mut self, obj: &JObject) -> Result<Vec<i32>>;
|
||||||
|
|
||||||
/// Get strings from Java List<String> object.
|
/// Get strings from Java List<String> object.
|
||||||
|
#[allow(dead_code)]
|
||||||
fn get_strings(&mut self, obj: &JObject) -> Result<Vec<String>>;
|
fn get_strings(&mut self, obj: &JObject) -> Result<Vec<String>>;
|
||||||
|
|
||||||
/// Get strings from Java String[] object.
|
/// Get strings from Java String[] object.
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ use jni::JNIEnv;
|
|||||||
|
|
||||||
use crate::Result;
|
use crate::Result;
|
||||||
|
|
||||||
|
#[allow(dead_code)]
|
||||||
pub trait FromJObject<T> {
|
pub trait FromJObject<T> {
|
||||||
fn extract(&self) -> Result<T>;
|
fn extract(&self) -> Result<T>;
|
||||||
}
|
}
|
||||||
@@ -39,6 +40,7 @@ impl FromJObject<f64> for JObject<'_> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[allow(dead_code)]
|
||||||
pub trait FromJString {
|
pub trait FromJString {
|
||||||
fn extract(&self, env: &mut JNIEnv) -> Result<String>;
|
fn extract(&self, env: &mut JNIEnv) -> Result<String>;
|
||||||
}
|
}
|
||||||
@@ -66,6 +68,7 @@ pub trait JMapExt {
|
|||||||
fn get_f64(&self, env: &mut JNIEnv, key: &str) -> Result<Option<f64>>;
|
fn get_f64(&self, env: &mut JNIEnv, key: &str) -> Result<Option<f64>>;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[allow(dead_code)]
|
||||||
fn get_map_value<T>(env: &mut JNIEnv, map: &JMap, key: &str) -> Result<Option<T>>
|
fn get_map_value<T>(env: &mut JNIEnv, map: &JMap, key: &str) -> Result<Option<T>>
|
||||||
where
|
where
|
||||||
for<'a> JObject<'a>: FromJObject<T>,
|
for<'a> JObject<'a>: FromJObject<T>,
|
||||||
|
|||||||
@@ -8,7 +8,7 @@
|
|||||||
<parent>
|
<parent>
|
||||||
<groupId>com.lancedb</groupId>
|
<groupId>com.lancedb</groupId>
|
||||||
<artifactId>lancedb-parent</artifactId>
|
<artifactId>lancedb-parent</artifactId>
|
||||||
<version>0.22.1-beta.0</version>
|
<version>0.22.1-beta.1</version>
|
||||||
<relativePath>../pom.xml</relativePath>
|
<relativePath>../pom.xml</relativePath>
|
||||||
</parent>
|
</parent>
|
||||||
|
|
||||||
|
|||||||
@@ -8,7 +8,7 @@
|
|||||||
<parent>
|
<parent>
|
||||||
<groupId>com.lancedb</groupId>
|
<groupId>com.lancedb</groupId>
|
||||||
<artifactId>lancedb-parent</artifactId>
|
<artifactId>lancedb-parent</artifactId>
|
||||||
<version>0.22.1-beta.0</version>
|
<version>0.22.1-beta.1</version>
|
||||||
<relativePath>../pom.xml</relativePath>
|
<relativePath>../pom.xml</relativePath>
|
||||||
</parent>
|
</parent>
|
||||||
|
|
||||||
|
|||||||
@@ -6,7 +6,7 @@
|
|||||||
|
|
||||||
<groupId>com.lancedb</groupId>
|
<groupId>com.lancedb</groupId>
|
||||||
<artifactId>lancedb-parent</artifactId>
|
<artifactId>lancedb-parent</artifactId>
|
||||||
<version>0.22.1-beta.0</version>
|
<version>0.22.1-beta.1</version>
|
||||||
<packaging>pom</packaging>
|
<packaging>pom</packaging>
|
||||||
<name>${project.artifactId}</name>
|
<name>${project.artifactId}</name>
|
||||||
<description>LanceDB Java SDK Parent POM</description>
|
<description>LanceDB Java SDK Parent POM</description>
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "lancedb-nodejs"
|
name = "lancedb-nodejs"
|
||||||
edition.workspace = true
|
edition.workspace = true
|
||||||
version = "0.22.1-beta.0"
|
version = "0.22.1-beta.1"
|
||||||
license.workspace = true
|
license.workspace = true
|
||||||
description.workspace = true
|
description.workspace = true
|
||||||
repository.workspace = true
|
repository.workspace = true
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-darwin-arm64",
|
"name": "@lancedb/lancedb-darwin-arm64",
|
||||||
"version": "0.22.1-beta.0",
|
"version": "0.22.1-beta.1",
|
||||||
"os": ["darwin"],
|
"os": ["darwin"],
|
||||||
"cpu": ["arm64"],
|
"cpu": ["arm64"],
|
||||||
"main": "lancedb.darwin-arm64.node",
|
"main": "lancedb.darwin-arm64.node",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-darwin-x64",
|
"name": "@lancedb/lancedb-darwin-x64",
|
||||||
"version": "0.22.1-beta.0",
|
"version": "0.22.1-beta.1",
|
||||||
"os": ["darwin"],
|
"os": ["darwin"],
|
||||||
"cpu": ["x64"],
|
"cpu": ["x64"],
|
||||||
"main": "lancedb.darwin-x64.node",
|
"main": "lancedb.darwin-x64.node",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-linux-arm64-gnu",
|
"name": "@lancedb/lancedb-linux-arm64-gnu",
|
||||||
"version": "0.22.1-beta.0",
|
"version": "0.22.1-beta.1",
|
||||||
"os": ["linux"],
|
"os": ["linux"],
|
||||||
"cpu": ["arm64"],
|
"cpu": ["arm64"],
|
||||||
"main": "lancedb.linux-arm64-gnu.node",
|
"main": "lancedb.linux-arm64-gnu.node",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-linux-arm64-musl",
|
"name": "@lancedb/lancedb-linux-arm64-musl",
|
||||||
"version": "0.22.1-beta.0",
|
"version": "0.22.1-beta.1",
|
||||||
"os": ["linux"],
|
"os": ["linux"],
|
||||||
"cpu": ["arm64"],
|
"cpu": ["arm64"],
|
||||||
"main": "lancedb.linux-arm64-musl.node",
|
"main": "lancedb.linux-arm64-musl.node",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-linux-x64-gnu",
|
"name": "@lancedb/lancedb-linux-x64-gnu",
|
||||||
"version": "0.22.1-beta.0",
|
"version": "0.22.1-beta.1",
|
||||||
"os": ["linux"],
|
"os": ["linux"],
|
||||||
"cpu": ["x64"],
|
"cpu": ["x64"],
|
||||||
"main": "lancedb.linux-x64-gnu.node",
|
"main": "lancedb.linux-x64-gnu.node",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-linux-x64-musl",
|
"name": "@lancedb/lancedb-linux-x64-musl",
|
||||||
"version": "0.22.1-beta.0",
|
"version": "0.22.1-beta.1",
|
||||||
"os": ["linux"],
|
"os": ["linux"],
|
||||||
"cpu": ["x64"],
|
"cpu": ["x64"],
|
||||||
"main": "lancedb.linux-x64-musl.node",
|
"main": "lancedb.linux-x64-musl.node",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-win32-arm64-msvc",
|
"name": "@lancedb/lancedb-win32-arm64-msvc",
|
||||||
"version": "0.22.1-beta.0",
|
"version": "0.22.1-beta.1",
|
||||||
"os": [
|
"os": [
|
||||||
"win32"
|
"win32"
|
||||||
],
|
],
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-win32-x64-msvc",
|
"name": "@lancedb/lancedb-win32-x64-msvc",
|
||||||
"version": "0.22.1-beta.0",
|
"version": "0.22.1-beta.1",
|
||||||
"os": ["win32"],
|
"os": ["win32"],
|
||||||
"cpu": ["x64"],
|
"cpu": ["x64"],
|
||||||
"main": "lancedb.win32-x64-msvc.node",
|
"main": "lancedb.win32-x64-msvc.node",
|
||||||
|
|||||||
4
nodejs/package-lock.json
generated
4
nodejs/package-lock.json
generated
@@ -1,12 +1,12 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb",
|
"name": "@lancedb/lancedb",
|
||||||
"version": "0.22.1-beta.0",
|
"version": "0.22.1-beta.1",
|
||||||
"lockfileVersion": 3,
|
"lockfileVersion": 3,
|
||||||
"requires": true,
|
"requires": true,
|
||||||
"packages": {
|
"packages": {
|
||||||
"": {
|
"": {
|
||||||
"name": "@lancedb/lancedb",
|
"name": "@lancedb/lancedb",
|
||||||
"version": "0.22.1-beta.0",
|
"version": "0.22.1-beta.1",
|
||||||
"cpu": [
|
"cpu": [
|
||||||
"x64",
|
"x64",
|
||||||
"arm64"
|
"arm64"
|
||||||
|
|||||||
@@ -11,7 +11,7 @@
|
|||||||
"ann"
|
"ann"
|
||||||
],
|
],
|
||||||
"private": false,
|
"private": false,
|
||||||
"version": "0.22.1-beta.0",
|
"version": "0.22.1-beta.1",
|
||||||
"main": "dist/index.js",
|
"main": "dist/index.js",
|
||||||
"exports": {
|
"exports": {
|
||||||
".": "./dist/index.js",
|
".": "./dist/index.js",
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
[tool.bumpversion]
|
[tool.bumpversion]
|
||||||
current_version = "0.25.1-beta.1"
|
current_version = "0.25.1-beta.2"
|
||||||
parse = """(?x)
|
parse = """(?x)
|
||||||
(?P<major>0|[1-9]\\d*)\\.
|
(?P<major>0|[1-9]\\d*)\\.
|
||||||
(?P<minor>0|[1-9]\\d*)\\.
|
(?P<minor>0|[1-9]\\d*)\\.
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "lancedb-python"
|
name = "lancedb-python"
|
||||||
version = "0.25.1-beta.1"
|
version = "0.25.1-beta.2"
|
||||||
edition.workspace = true
|
edition.workspace = true
|
||||||
description = "Python bindings for LanceDB"
|
description = "Python bindings for LanceDB"
|
||||||
license.workspace = true
|
license.workspace = true
|
||||||
|
|||||||
@@ -122,7 +122,7 @@ class EmbeddingFunctionRegistry:
|
|||||||
obj["vector_column"]: EmbeddingFunctionConfig(
|
obj["vector_column"]: EmbeddingFunctionConfig(
|
||||||
vector_column=obj["vector_column"],
|
vector_column=obj["vector_column"],
|
||||||
source_column=obj["source_column"],
|
source_column=obj["source_column"],
|
||||||
function=self.get(obj["name"])(**obj["model"]),
|
function=self.get(obj["name"]).create(**obj["model"]),
|
||||||
)
|
)
|
||||||
for obj in raw_list
|
for obj in raw_list
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -251,6 +251,13 @@ class HnswPq:
|
|||||||
results. In most cases, there is no benefit to setting this higher than 500.
|
results. In most cases, there is no benefit to setting this higher than 500.
|
||||||
This value should be set to a value that is not less than `ef` in the
|
This value should be set to a value that is not less than `ef` in the
|
||||||
search phase.
|
search phase.
|
||||||
|
|
||||||
|
target_partition_size, default is 1,048,576
|
||||||
|
|
||||||
|
The target size of each partition.
|
||||||
|
|
||||||
|
This value controls the tradeoff between search performance and accuracy.
|
||||||
|
faster search but less accurate results as higher value.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
distance_type: Literal["l2", "cosine", "dot"] = "l2"
|
distance_type: Literal["l2", "cosine", "dot"] = "l2"
|
||||||
@@ -261,6 +268,7 @@ class HnswPq:
|
|||||||
sample_rate: int = 256
|
sample_rate: int = 256
|
||||||
m: int = 20
|
m: int = 20
|
||||||
ef_construction: int = 300
|
ef_construction: int = 300
|
||||||
|
target_partition_size: Optional[int] = None
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
@@ -351,6 +359,12 @@ class HnswSq:
|
|||||||
This value should be set to a value that is not less than `ef` in the search
|
This value should be set to a value that is not less than `ef` in the search
|
||||||
phase.
|
phase.
|
||||||
|
|
||||||
|
target_partition_size, default is 1,048,576
|
||||||
|
|
||||||
|
The target size of each partition.
|
||||||
|
|
||||||
|
This value controls the tradeoff between search performance and accuracy.
|
||||||
|
faster search but less accurate results as higher value.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
distance_type: Literal["l2", "cosine", "dot"] = "l2"
|
distance_type: Literal["l2", "cosine", "dot"] = "l2"
|
||||||
@@ -359,6 +373,7 @@ class HnswSq:
|
|||||||
sample_rate: int = 256
|
sample_rate: int = 256
|
||||||
m: int = 20
|
m: int = 20
|
||||||
ef_construction: int = 300
|
ef_construction: int = 300
|
||||||
|
target_partition_size: Optional[int] = None
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
@@ -444,12 +459,20 @@ class IvfFlat:
|
|||||||
cases the default should be sufficient.
|
cases the default should be sufficient.
|
||||||
|
|
||||||
The default value is 256.
|
The default value is 256.
|
||||||
|
|
||||||
|
target_partition_size, default is 8192
|
||||||
|
|
||||||
|
The target size of each partition.
|
||||||
|
|
||||||
|
This value controls the tradeoff between search performance and accuracy.
|
||||||
|
faster search but less accurate results as higher value.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
distance_type: Literal["l2", "cosine", "dot", "hamming"] = "l2"
|
distance_type: Literal["l2", "cosine", "dot", "hamming"] = "l2"
|
||||||
num_partitions: Optional[int] = None
|
num_partitions: Optional[int] = None
|
||||||
max_iterations: int = 50
|
max_iterations: int = 50
|
||||||
sample_rate: int = 256
|
sample_rate: int = 256
|
||||||
|
target_partition_size: Optional[int] = None
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
@@ -564,6 +587,13 @@ class IvfPq:
|
|||||||
cases the default should be sufficient.
|
cases the default should be sufficient.
|
||||||
|
|
||||||
The default value is 256.
|
The default value is 256.
|
||||||
|
|
||||||
|
target_partition_size, default is 8192
|
||||||
|
|
||||||
|
The target size of each partition.
|
||||||
|
|
||||||
|
This value controls the tradeoff between search performance and accuracy.
|
||||||
|
faster search but less accurate results as higher value.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
distance_type: Literal["l2", "cosine", "dot"] = "l2"
|
distance_type: Literal["l2", "cosine", "dot"] = "l2"
|
||||||
@@ -572,6 +602,7 @@ class IvfPq:
|
|||||||
num_bits: int = 8
|
num_bits: int = 8
|
||||||
max_iterations: int = 50
|
max_iterations: int = 50
|
||||||
sample_rate: int = 256
|
sample_rate: int = 256
|
||||||
|
target_partition_size: Optional[int] = None
|
||||||
|
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
|
|||||||
@@ -691,6 +691,7 @@ class Table(ABC):
|
|||||||
ef_construction: int = 300,
|
ef_construction: int = 300,
|
||||||
name: Optional[str] = None,
|
name: Optional[str] = None,
|
||||||
train: bool = True,
|
train: bool = True,
|
||||||
|
target_partition_size: Optional[int] = None,
|
||||||
):
|
):
|
||||||
"""Create an index on the table.
|
"""Create an index on the table.
|
||||||
|
|
||||||
@@ -2002,6 +2003,7 @@ class LanceTable(Table):
|
|||||||
*,
|
*,
|
||||||
name: Optional[str] = None,
|
name: Optional[str] = None,
|
||||||
train: bool = True,
|
train: bool = True,
|
||||||
|
target_partition_size: Optional[int] = None,
|
||||||
):
|
):
|
||||||
"""Create an index on the table."""
|
"""Create an index on the table."""
|
||||||
if accelerator is not None:
|
if accelerator is not None:
|
||||||
@@ -2018,6 +2020,7 @@ class LanceTable(Table):
|
|||||||
num_bits=num_bits,
|
num_bits=num_bits,
|
||||||
m=m,
|
m=m,
|
||||||
ef_construction=ef_construction,
|
ef_construction=ef_construction,
|
||||||
|
target_partition_size=target_partition_size,
|
||||||
)
|
)
|
||||||
self.checkout_latest()
|
self.checkout_latest()
|
||||||
return
|
return
|
||||||
@@ -2027,6 +2030,7 @@ class LanceTable(Table):
|
|||||||
num_partitions=num_partitions,
|
num_partitions=num_partitions,
|
||||||
max_iterations=max_iterations,
|
max_iterations=max_iterations,
|
||||||
sample_rate=sample_rate,
|
sample_rate=sample_rate,
|
||||||
|
target_partition_size=target_partition_size,
|
||||||
)
|
)
|
||||||
elif index_type == "IVF_PQ":
|
elif index_type == "IVF_PQ":
|
||||||
config = IvfPq(
|
config = IvfPq(
|
||||||
@@ -2036,6 +2040,7 @@ class LanceTable(Table):
|
|||||||
num_bits=num_bits,
|
num_bits=num_bits,
|
||||||
max_iterations=max_iterations,
|
max_iterations=max_iterations,
|
||||||
sample_rate=sample_rate,
|
sample_rate=sample_rate,
|
||||||
|
target_partition_size=target_partition_size,
|
||||||
)
|
)
|
||||||
elif index_type == "IVF_HNSW_PQ":
|
elif index_type == "IVF_HNSW_PQ":
|
||||||
config = HnswPq(
|
config = HnswPq(
|
||||||
@@ -2047,6 +2052,7 @@ class LanceTable(Table):
|
|||||||
sample_rate=sample_rate,
|
sample_rate=sample_rate,
|
||||||
m=m,
|
m=m,
|
||||||
ef_construction=ef_construction,
|
ef_construction=ef_construction,
|
||||||
|
target_partition_size=target_partition_size,
|
||||||
)
|
)
|
||||||
elif index_type == "IVF_HNSW_SQ":
|
elif index_type == "IVF_HNSW_SQ":
|
||||||
config = HnswSq(
|
config = HnswSq(
|
||||||
@@ -2056,6 +2062,7 @@ class LanceTable(Table):
|
|||||||
sample_rate=sample_rate,
|
sample_rate=sample_rate,
|
||||||
m=m,
|
m=m,
|
||||||
ef_construction=ef_construction,
|
ef_construction=ef_construction,
|
||||||
|
target_partition_size=target_partition_size,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
raise ValueError(f"Unknown index type {index_type}")
|
raise ValueError(f"Unknown index type {index_type}")
|
||||||
|
|||||||
@@ -114,6 +114,63 @@ def test_embedding_function_variables():
|
|||||||
assert func.safe_model_dump()["secret_key"] == "$var:secret"
|
assert func.safe_model_dump()["secret_key"] == "$var:secret"
|
||||||
|
|
||||||
|
|
||||||
|
def test_parse_functions_with_variables():
|
||||||
|
@register("variable-parsing-test")
|
||||||
|
class VariableParsingFunction(TextEmbeddingFunction):
|
||||||
|
api_key: str
|
||||||
|
base_url: Optional[str] = None
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def sensitive_keys():
|
||||||
|
return ["api_key"]
|
||||||
|
|
||||||
|
def ndims(self):
|
||||||
|
return 10
|
||||||
|
|
||||||
|
def generate_embeddings(self, texts):
|
||||||
|
# Mock implementation that just returns random embeddings
|
||||||
|
# In real usage, this would use the api_key to call an API
|
||||||
|
return [np.random.rand(self.ndims()).tolist() for _ in texts]
|
||||||
|
|
||||||
|
registry = EmbeddingFunctionRegistry.get_instance()
|
||||||
|
|
||||||
|
registry.set_var("test_api_key", "sk-test-key-12345")
|
||||||
|
registry.set_var("test_base_url", "https://api.example.com")
|
||||||
|
|
||||||
|
conf = EmbeddingFunctionConfig(
|
||||||
|
source_column="text",
|
||||||
|
vector_column="vector",
|
||||||
|
function=registry.get("variable-parsing-test").create(
|
||||||
|
api_key="$var:test_api_key", base_url="$var:test_base_url"
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
metadata = registry.get_table_metadata([conf])
|
||||||
|
|
||||||
|
# Create a mock arrow table with the metadata
|
||||||
|
schema = pa.schema(
|
||||||
|
[pa.field("text", pa.string()), pa.field("vector", pa.list_(pa.float32(), 10))]
|
||||||
|
)
|
||||||
|
table = pa.table({"text": [], "vector": []}, schema=schema)
|
||||||
|
table = table.replace_schema_metadata(metadata)
|
||||||
|
|
||||||
|
ds = lance.write_dataset(table, "memory://")
|
||||||
|
|
||||||
|
configs = registry.parse_functions(ds.schema.metadata)
|
||||||
|
|
||||||
|
assert "vector" in configs
|
||||||
|
parsed_func = configs["vector"].function
|
||||||
|
|
||||||
|
assert parsed_func.api_key == "sk-test-key-12345"
|
||||||
|
assert parsed_func.base_url == "https://api.example.com"
|
||||||
|
|
||||||
|
embeddings = parsed_func.generate_embeddings(["test text"])
|
||||||
|
assert len(embeddings) == 1
|
||||||
|
assert len(embeddings[0]) == 10
|
||||||
|
|
||||||
|
assert parsed_func.safe_model_dump()["api_key"] == "$var:test_api_key"
|
||||||
|
|
||||||
|
|
||||||
def test_embedding_with_bad_results(tmp_path):
|
def test_embedding_with_bad_results(tmp_path):
|
||||||
@register("null-embedding")
|
@register("null-embedding")
|
||||||
class NullEmbeddingFunction(TextEmbeddingFunction):
|
class NullEmbeddingFunction(TextEmbeddingFunction):
|
||||||
|
|||||||
@@ -674,6 +674,45 @@ def test_create_index_method(mock_create_index, mem_db: DBConnection):
|
|||||||
"vector", replace=True, config=expected_config, name=None, train=True
|
"vector", replace=True, config=expected_config, name=None, train=True
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Test with target_partition_size
|
||||||
|
table.create_index(
|
||||||
|
metric="l2",
|
||||||
|
num_sub_vectors=96,
|
||||||
|
vector_column_name="vector",
|
||||||
|
replace=True,
|
||||||
|
index_cache_size=256,
|
||||||
|
num_bits=4,
|
||||||
|
target_partition_size=8192,
|
||||||
|
)
|
||||||
|
expected_config = IvfPq(
|
||||||
|
distance_type="l2",
|
||||||
|
num_sub_vectors=96,
|
||||||
|
num_bits=4,
|
||||||
|
target_partition_size=8192,
|
||||||
|
)
|
||||||
|
mock_create_index.assert_called_with(
|
||||||
|
"vector", replace=True, config=expected_config, name=None, train=True
|
||||||
|
)
|
||||||
|
|
||||||
|
# target_partition_size has a default value,
|
||||||
|
# so `num_partitions` and `target_partition_size` are not required
|
||||||
|
table.create_index(
|
||||||
|
metric="l2",
|
||||||
|
num_sub_vectors=96,
|
||||||
|
vector_column_name="vector",
|
||||||
|
replace=True,
|
||||||
|
index_cache_size=256,
|
||||||
|
num_bits=4,
|
||||||
|
)
|
||||||
|
expected_config = IvfPq(
|
||||||
|
distance_type="l2",
|
||||||
|
num_sub_vectors=96,
|
||||||
|
num_bits=4,
|
||||||
|
)
|
||||||
|
mock_create_index.assert_called_with(
|
||||||
|
"vector", replace=True, config=expected_config, name=None, train=True
|
||||||
|
)
|
||||||
|
|
||||||
table.create_index(
|
table.create_index(
|
||||||
vector_column_name="my_vector",
|
vector_column_name="my_vector",
|
||||||
metric="dot",
|
metric="dot",
|
||||||
|
|||||||
@@ -255,7 +255,7 @@ impl Connection {
|
|||||||
#[pyo3(signature = (uri, api_key=None, region=None, host_override=None, read_consistency_interval=None, client_config=None, storage_options=None, session=None))]
|
#[pyo3(signature = (uri, api_key=None, region=None, host_override=None, read_consistency_interval=None, client_config=None, storage_options=None, session=None))]
|
||||||
#[allow(clippy::too_many_arguments)]
|
#[allow(clippy::too_many_arguments)]
|
||||||
pub fn connect(
|
pub fn connect(
|
||||||
py: Python,
|
py: Python<'_>,
|
||||||
uri: String,
|
uri: String,
|
||||||
api_key: Option<String>,
|
api_key: Option<String>,
|
||||||
region: Option<String>,
|
region: Option<String>,
|
||||||
|
|||||||
@@ -63,6 +63,9 @@ pub fn extract_index_params(source: &Option<Bound<'_, PyAny>>) -> PyResult<Lance
|
|||||||
if let Some(num_partitions) = params.num_partitions {
|
if let Some(num_partitions) = params.num_partitions {
|
||||||
ivf_flat_builder = ivf_flat_builder.num_partitions(num_partitions);
|
ivf_flat_builder = ivf_flat_builder.num_partitions(num_partitions);
|
||||||
}
|
}
|
||||||
|
if let Some(target_partition_size) = params.target_partition_size {
|
||||||
|
ivf_flat_builder = ivf_flat_builder.target_partition_size(target_partition_size);
|
||||||
|
}
|
||||||
Ok(LanceDbIndex::IvfFlat(ivf_flat_builder))
|
Ok(LanceDbIndex::IvfFlat(ivf_flat_builder))
|
||||||
},
|
},
|
||||||
"IvfPq" => {
|
"IvfPq" => {
|
||||||
@@ -76,6 +79,9 @@ pub fn extract_index_params(source: &Option<Bound<'_, PyAny>>) -> PyResult<Lance
|
|||||||
if let Some(num_partitions) = params.num_partitions {
|
if let Some(num_partitions) = params.num_partitions {
|
||||||
ivf_pq_builder = ivf_pq_builder.num_partitions(num_partitions);
|
ivf_pq_builder = ivf_pq_builder.num_partitions(num_partitions);
|
||||||
}
|
}
|
||||||
|
if let Some(target_partition_size) = params.target_partition_size {
|
||||||
|
ivf_pq_builder = ivf_pq_builder.target_partition_size(target_partition_size);
|
||||||
|
}
|
||||||
if let Some(num_sub_vectors) = params.num_sub_vectors {
|
if let Some(num_sub_vectors) = params.num_sub_vectors {
|
||||||
ivf_pq_builder = ivf_pq_builder.num_sub_vectors(num_sub_vectors);
|
ivf_pq_builder = ivf_pq_builder.num_sub_vectors(num_sub_vectors);
|
||||||
}
|
}
|
||||||
@@ -94,6 +100,9 @@ pub fn extract_index_params(source: &Option<Bound<'_, PyAny>>) -> PyResult<Lance
|
|||||||
if let Some(num_partitions) = params.num_partitions {
|
if let Some(num_partitions) = params.num_partitions {
|
||||||
hnsw_pq_builder = hnsw_pq_builder.num_partitions(num_partitions);
|
hnsw_pq_builder = hnsw_pq_builder.num_partitions(num_partitions);
|
||||||
}
|
}
|
||||||
|
if let Some(target_partition_size) = params.target_partition_size {
|
||||||
|
hnsw_pq_builder = hnsw_pq_builder.target_partition_size(target_partition_size);
|
||||||
|
}
|
||||||
if let Some(num_sub_vectors) = params.num_sub_vectors {
|
if let Some(num_sub_vectors) = params.num_sub_vectors {
|
||||||
hnsw_pq_builder = hnsw_pq_builder.num_sub_vectors(num_sub_vectors);
|
hnsw_pq_builder = hnsw_pq_builder.num_sub_vectors(num_sub_vectors);
|
||||||
}
|
}
|
||||||
@@ -111,6 +120,9 @@ pub fn extract_index_params(source: &Option<Bound<'_, PyAny>>) -> PyResult<Lance
|
|||||||
if let Some(num_partitions) = params.num_partitions {
|
if let Some(num_partitions) = params.num_partitions {
|
||||||
hnsw_sq_builder = hnsw_sq_builder.num_partitions(num_partitions);
|
hnsw_sq_builder = hnsw_sq_builder.num_partitions(num_partitions);
|
||||||
}
|
}
|
||||||
|
if let Some(target_partition_size) = params.target_partition_size {
|
||||||
|
hnsw_sq_builder = hnsw_sq_builder.target_partition_size(target_partition_size);
|
||||||
|
}
|
||||||
Ok(LanceDbIndex::IvfHnswSq(hnsw_sq_builder))
|
Ok(LanceDbIndex::IvfHnswSq(hnsw_sq_builder))
|
||||||
},
|
},
|
||||||
not_supported => Err(PyValueError::new_err(format!(
|
not_supported => Err(PyValueError::new_err(format!(
|
||||||
@@ -144,6 +156,7 @@ struct IvfFlatParams {
|
|||||||
num_partitions: Option<u32>,
|
num_partitions: Option<u32>,
|
||||||
max_iterations: u32,
|
max_iterations: u32,
|
||||||
sample_rate: u32,
|
sample_rate: u32,
|
||||||
|
target_partition_size: Option<u32>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(FromPyObject)]
|
#[derive(FromPyObject)]
|
||||||
@@ -154,6 +167,7 @@ struct IvfPqParams {
|
|||||||
num_bits: u32,
|
num_bits: u32,
|
||||||
max_iterations: u32,
|
max_iterations: u32,
|
||||||
sample_rate: u32,
|
sample_rate: u32,
|
||||||
|
target_partition_size: Option<u32>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(FromPyObject)]
|
#[derive(FromPyObject)]
|
||||||
@@ -166,6 +180,7 @@ struct IvfHnswPqParams {
|
|||||||
sample_rate: u32,
|
sample_rate: u32,
|
||||||
m: u32,
|
m: u32,
|
||||||
ef_construction: u32,
|
ef_construction: u32,
|
||||||
|
target_partition_size: Option<u32>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(FromPyObject)]
|
#[derive(FromPyObject)]
|
||||||
@@ -176,6 +191,7 @@ struct IvfHnswSqParams {
|
|||||||
sample_rate: u32,
|
sample_rate: u32,
|
||||||
m: u32,
|
m: u32,
|
||||||
ef_construction: u32,
|
ef_construction: u32,
|
||||||
|
target_partition_size: Option<u32>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[pyclass(get_all)]
|
#[pyclass(get_all)]
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "lancedb"
|
name = "lancedb"
|
||||||
version = "0.22.1-beta.0"
|
version = "0.22.1-beta.1"
|
||||||
edition.workspace = true
|
edition.workspace = true
|
||||||
description = "LanceDB: A serverless, low-latency vector database for AI applications"
|
description = "LanceDB: A serverless, low-latency vector database for AI applications"
|
||||||
license.workspace = true
|
license.workspace = true
|
||||||
@@ -86,11 +86,11 @@ rand = { version = "0.9", features = ["small_rng"] }
|
|||||||
random_word = { version = "0.4.3", features = ["en"] }
|
random_word = { version = "0.4.3", features = ["en"] }
|
||||||
uuid = { version = "1.7.0", features = ["v4"] }
|
uuid = { version = "1.7.0", features = ["v4"] }
|
||||||
walkdir = "2"
|
walkdir = "2"
|
||||||
aws-sdk-dynamodb = { version = "1.38.0" }
|
aws-sdk-dynamodb = { version = "1.55.0" }
|
||||||
aws-sdk-s3 = { version = "1.38.0" }
|
aws-sdk-s3 = { version = "1.55.0" }
|
||||||
aws-sdk-kms = { version = "1.37" }
|
aws-sdk-kms = { version = "1.48.0" }
|
||||||
aws-config = { version = "1.0" }
|
aws-config = { version = "1.5.10" }
|
||||||
aws-smithy-runtime = { version = "1.3" }
|
aws-smithy-runtime = { version = "1.9.1" }
|
||||||
datafusion.workspace = true
|
datafusion.workspace = true
|
||||||
http-body = "1" # Matching reqwest
|
http-body = "1" # Matching reqwest
|
||||||
rstest = "0.23.0"
|
rstest = "0.23.0"
|
||||||
|
|||||||
@@ -1,86 +0,0 @@
|
|||||||
// SPDX-License-Identifier: Apache-2.0
|
|
||||||
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
|
||||||
|
|
||||||
//! Catalog implementation for managing databases
|
|
||||||
|
|
||||||
pub mod listing;
|
|
||||||
|
|
||||||
use std::collections::HashMap;
|
|
||||||
use std::sync::Arc;
|
|
||||||
|
|
||||||
use crate::database::Database;
|
|
||||||
use crate::error::Result;
|
|
||||||
use async_trait::async_trait;
|
|
||||||
|
|
||||||
pub trait CatalogOptions {
|
|
||||||
fn serialize_into_map(&self, map: &mut HashMap<String, String>);
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Request parameters for listing databases
|
|
||||||
#[derive(Clone, Debug, Default)]
|
|
||||||
pub struct DatabaseNamesRequest {
|
|
||||||
/// Start listing after this name (exclusive)
|
|
||||||
pub start_after: Option<String>,
|
|
||||||
/// Maximum number of names to return
|
|
||||||
pub limit: Option<u32>,
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Request to open an existing database
|
|
||||||
#[derive(Clone, Debug)]
|
|
||||||
pub struct OpenDatabaseRequest {
|
|
||||||
/// The name of the database to open
|
|
||||||
pub name: String,
|
|
||||||
/// A map of database-specific options
|
|
||||||
///
|
|
||||||
/// Consult the catalog / database implementation to determine which options are available
|
|
||||||
pub database_options: HashMap<String, String>,
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Database creation mode
|
|
||||||
///
|
|
||||||
/// The default behavior is Create
|
|
||||||
pub enum CreateDatabaseMode {
|
|
||||||
/// Create new database, error if exists
|
|
||||||
Create,
|
|
||||||
/// Open existing database if present
|
|
||||||
ExistOk,
|
|
||||||
/// Overwrite existing database
|
|
||||||
Overwrite,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Default for CreateDatabaseMode {
|
|
||||||
fn default() -> Self {
|
|
||||||
Self::Create
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Request to create a new database
|
|
||||||
pub struct CreateDatabaseRequest {
|
|
||||||
/// The name of the database to create
|
|
||||||
pub name: String,
|
|
||||||
/// The creation mode
|
|
||||||
pub mode: CreateDatabaseMode,
|
|
||||||
/// A map of catalog-specific options, consult your catalog implementation to determine what's available
|
|
||||||
pub options: HashMap<String, String>,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[async_trait]
|
|
||||||
pub trait Catalog: Send + Sync + std::fmt::Debug + 'static {
|
|
||||||
/// List database names with pagination
|
|
||||||
async fn database_names(&self, request: DatabaseNamesRequest) -> Result<Vec<String>>;
|
|
||||||
|
|
||||||
/// Create a new database
|
|
||||||
async fn create_database(&self, request: CreateDatabaseRequest) -> Result<Arc<dyn Database>>;
|
|
||||||
|
|
||||||
/// Open existing database
|
|
||||||
async fn open_database(&self, request: OpenDatabaseRequest) -> Result<Arc<dyn Database>>;
|
|
||||||
|
|
||||||
/// Rename database
|
|
||||||
async fn rename_database(&self, old_name: &str, new_name: &str) -> Result<()>;
|
|
||||||
|
|
||||||
/// Delete database
|
|
||||||
async fn drop_database(&self, name: &str) -> Result<()>;
|
|
||||||
|
|
||||||
/// Delete all databases
|
|
||||||
async fn drop_all_databases(&self) -> Result<()>;
|
|
||||||
}
|
|
||||||
@@ -1,624 +0,0 @@
|
|||||||
// SPDX-License-Identifier: Apache-2.0
|
|
||||||
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
|
||||||
|
|
||||||
//! Catalog implementation based on a local file system.
|
|
||||||
|
|
||||||
use std::collections::HashMap;
|
|
||||||
use std::fs::create_dir_all;
|
|
||||||
use std::path::Path;
|
|
||||||
use std::sync::Arc;
|
|
||||||
|
|
||||||
use super::{
|
|
||||||
Catalog, CatalogOptions, CreateDatabaseMode, CreateDatabaseRequest, DatabaseNamesRequest,
|
|
||||||
OpenDatabaseRequest,
|
|
||||||
};
|
|
||||||
use crate::connection::ConnectRequest;
|
|
||||||
use crate::database::listing::{ListingDatabase, ListingDatabaseOptions};
|
|
||||||
use crate::database::{Database, DatabaseOptions};
|
|
||||||
use crate::error::{CreateDirSnafu, Error, Result};
|
|
||||||
use async_trait::async_trait;
|
|
||||||
use lance::io::{ObjectStore, ObjectStoreParams, ObjectStoreRegistry};
|
|
||||||
use lance_io::local::to_local_path;
|
|
||||||
use object_store::path::Path as ObjectStorePath;
|
|
||||||
use snafu::ResultExt;
|
|
||||||
|
|
||||||
/// Options for the listing catalog
|
|
||||||
///
|
|
||||||
/// Note: the catalog will use the `storage_options` configured on
|
|
||||||
/// db_options to configure storage for listing / creating / deleting
|
|
||||||
/// databases.
|
|
||||||
#[derive(Clone, Debug, Default)]
|
|
||||||
pub struct ListingCatalogOptions {
|
|
||||||
/// The options to use for databases opened by this catalog
|
|
||||||
///
|
|
||||||
/// This also contains the storage options used by the catalog
|
|
||||||
pub db_options: ListingDatabaseOptions,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl CatalogOptions for ListingCatalogOptions {
|
|
||||||
fn serialize_into_map(&self, map: &mut HashMap<String, String>) {
|
|
||||||
self.db_options.serialize_into_map(map);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl ListingCatalogOptions {
|
|
||||||
pub fn builder() -> ListingCatalogOptionsBuilder {
|
|
||||||
ListingCatalogOptionsBuilder::new()
|
|
||||||
}
|
|
||||||
|
|
||||||
pub(crate) fn parse_from_map(map: &HashMap<String, String>) -> Result<Self> {
|
|
||||||
let db_options = ListingDatabaseOptions::parse_from_map(map)?;
|
|
||||||
Ok(Self { db_options })
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Clone, Debug, Default)]
|
|
||||||
pub struct ListingCatalogOptionsBuilder {
|
|
||||||
options: ListingCatalogOptions,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl ListingCatalogOptionsBuilder {
|
|
||||||
pub fn new() -> Self {
|
|
||||||
Self {
|
|
||||||
options: ListingCatalogOptions::default(),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn db_options(mut self, db_options: ListingDatabaseOptions) -> Self {
|
|
||||||
self.options.db_options = db_options;
|
|
||||||
self
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn build(self) -> ListingCatalogOptions {
|
|
||||||
self.options
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// A catalog implementation that works by listing subfolders in a directory
|
|
||||||
///
|
|
||||||
/// The listing catalog will be created with a base folder specified by the URI. Every subfolder
|
|
||||||
/// in this base folder will be considered a database. These will be opened as a
|
|
||||||
/// [`crate::database::listing::ListingDatabase`]
|
|
||||||
#[derive(Debug)]
|
|
||||||
pub struct ListingCatalog {
|
|
||||||
object_store: Arc<ObjectStore>,
|
|
||||||
|
|
||||||
uri: String,
|
|
||||||
|
|
||||||
base_path: ObjectStorePath,
|
|
||||||
|
|
||||||
options: ListingCatalogOptions,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl ListingCatalog {
|
|
||||||
/// Try to create a local directory to store the lancedb dataset
|
|
||||||
pub fn try_create_dir(path: &str) -> core::result::Result<(), std::io::Error> {
|
|
||||||
let path = Path::new(path);
|
|
||||||
if !path.try_exists()? {
|
|
||||||
create_dir_all(path)?;
|
|
||||||
}
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn uri(&self) -> &str {
|
|
||||||
&self.uri
|
|
||||||
}
|
|
||||||
|
|
||||||
async fn open_path(path: &str) -> Result<Self> {
|
|
||||||
let (object_store, base_path) = ObjectStore::from_uri(path).await?;
|
|
||||||
if object_store.is_local() {
|
|
||||||
Self::try_create_dir(path).context(CreateDirSnafu { path })?;
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(Self {
|
|
||||||
uri: path.to_string(),
|
|
||||||
base_path,
|
|
||||||
object_store,
|
|
||||||
options: ListingCatalogOptions::default(),
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
pub async fn connect(request: &ConnectRequest) -> Result<Self> {
|
|
||||||
let uri = &request.uri;
|
|
||||||
let parse_res = url::Url::parse(uri);
|
|
||||||
|
|
||||||
let options = ListingCatalogOptions::parse_from_map(&request.options)?;
|
|
||||||
|
|
||||||
match parse_res {
|
|
||||||
Ok(url) if url.scheme().len() == 1 && cfg!(windows) => Self::open_path(uri).await,
|
|
||||||
Ok(url) => {
|
|
||||||
let plain_uri = url.to_string();
|
|
||||||
|
|
||||||
let registry = Arc::new(ObjectStoreRegistry::default());
|
|
||||||
let storage_options = options.db_options.storage_options.clone();
|
|
||||||
let os_params = ObjectStoreParams {
|
|
||||||
storage_options: Some(storage_options.clone()),
|
|
||||||
..Default::default()
|
|
||||||
};
|
|
||||||
let (object_store, base_path) =
|
|
||||||
ObjectStore::from_uri_and_params(registry, &plain_uri, &os_params).await?;
|
|
||||||
if object_store.is_local() {
|
|
||||||
Self::try_create_dir(&plain_uri).context(CreateDirSnafu { path: plain_uri })?;
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(Self {
|
|
||||||
uri: String::from(url.clone()),
|
|
||||||
base_path,
|
|
||||||
object_store,
|
|
||||||
options,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
Err(_) => Self::open_path(uri).await,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn database_path(&self, name: &str) -> ObjectStorePath {
|
|
||||||
self.base_path.child(name.replace('\\', "/"))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[async_trait]
|
|
||||||
impl Catalog for ListingCatalog {
|
|
||||||
async fn database_names(&self, request: DatabaseNamesRequest) -> Result<Vec<String>> {
|
|
||||||
let mut f = self
|
|
||||||
.object_store
|
|
||||||
.read_dir(self.base_path.clone())
|
|
||||||
.await?
|
|
||||||
.iter()
|
|
||||||
.map(Path::new)
|
|
||||||
.filter_map(|p| p.file_name().and_then(|s| s.to_str().map(String::from)))
|
|
||||||
.collect::<Vec<String>>();
|
|
||||||
f.sort();
|
|
||||||
|
|
||||||
if let Some(start_after) = request.start_after {
|
|
||||||
let index = f
|
|
||||||
.iter()
|
|
||||||
.position(|name| name.as_str() > start_after.as_str())
|
|
||||||
.unwrap_or(f.len());
|
|
||||||
f.drain(0..index);
|
|
||||||
}
|
|
||||||
if let Some(limit) = request.limit {
|
|
||||||
f.truncate(limit as usize);
|
|
||||||
}
|
|
||||||
Ok(f)
|
|
||||||
}
|
|
||||||
|
|
||||||
async fn create_database(&self, request: CreateDatabaseRequest) -> Result<Arc<dyn Database>> {
|
|
||||||
let db_path = self.database_path(&request.name);
|
|
||||||
let db_path_str = to_local_path(&db_path);
|
|
||||||
let exists = Path::new(&db_path_str).exists();
|
|
||||||
|
|
||||||
match request.mode {
|
|
||||||
CreateDatabaseMode::Create if exists => {
|
|
||||||
return Err(Error::DatabaseAlreadyExists { name: request.name })
|
|
||||||
}
|
|
||||||
CreateDatabaseMode::Create => {
|
|
||||||
create_dir_all(db_path.to_string()).unwrap();
|
|
||||||
}
|
|
||||||
CreateDatabaseMode::ExistOk => {
|
|
||||||
if !exists {
|
|
||||||
create_dir_all(db_path.to_string()).unwrap();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
CreateDatabaseMode::Overwrite => {
|
|
||||||
if exists {
|
|
||||||
self.drop_database(&request.name).await?;
|
|
||||||
}
|
|
||||||
create_dir_all(db_path.to_string()).unwrap();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
let db_uri = format!("/{}/{}", self.base_path, request.name);
|
|
||||||
|
|
||||||
let mut connect_request = ConnectRequest {
|
|
||||||
uri: db_uri,
|
|
||||||
#[cfg(feature = "remote")]
|
|
||||||
client_config: Default::default(),
|
|
||||||
read_consistency_interval: None,
|
|
||||||
options: Default::default(),
|
|
||||||
session: None,
|
|
||||||
};
|
|
||||||
|
|
||||||
// Add the db options to the connect request
|
|
||||||
self.options
|
|
||||||
.db_options
|
|
||||||
.serialize_into_map(&mut connect_request.options);
|
|
||||||
|
|
||||||
Ok(Arc::new(
|
|
||||||
ListingDatabase::connect_with_options(&connect_request).await?,
|
|
||||||
))
|
|
||||||
}
|
|
||||||
|
|
||||||
async fn open_database(&self, request: OpenDatabaseRequest) -> Result<Arc<dyn Database>> {
|
|
||||||
let db_path = self.database_path(&request.name);
|
|
||||||
|
|
||||||
let db_path_str = to_local_path(&db_path);
|
|
||||||
let exists = Path::new(&db_path_str).exists();
|
|
||||||
if !exists {
|
|
||||||
return Err(Error::DatabaseNotFound { name: request.name });
|
|
||||||
}
|
|
||||||
|
|
||||||
let mut connect_request = ConnectRequest {
|
|
||||||
uri: db_path.to_string(),
|
|
||||||
#[cfg(feature = "remote")]
|
|
||||||
client_config: Default::default(),
|
|
||||||
read_consistency_interval: None,
|
|
||||||
options: Default::default(),
|
|
||||||
session: None,
|
|
||||||
};
|
|
||||||
|
|
||||||
// Add the db options to the connect request
|
|
||||||
self.options
|
|
||||||
.db_options
|
|
||||||
.serialize_into_map(&mut connect_request.options);
|
|
||||||
|
|
||||||
Ok(Arc::new(
|
|
||||||
ListingDatabase::connect_with_options(&connect_request).await?,
|
|
||||||
))
|
|
||||||
}
|
|
||||||
|
|
||||||
async fn rename_database(&self, _old_name: &str, _new_name: &str) -> Result<()> {
|
|
||||||
Err(Error::NotSupported {
|
|
||||||
message: "rename_database is not supported in LanceDB OSS yet".to_string(),
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
async fn drop_database(&self, name: &str) -> Result<()> {
|
|
||||||
let db_path = self.database_path(name);
|
|
||||||
self.object_store
|
|
||||||
.remove_dir_all(db_path.clone())
|
|
||||||
.await
|
|
||||||
.map_err(|err| match err {
|
|
||||||
lance::Error::NotFound { .. } => Error::DatabaseNotFound {
|
|
||||||
name: name.to_owned(),
|
|
||||||
},
|
|
||||||
_ => Error::from(err),
|
|
||||||
})?;
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
async fn drop_all_databases(&self) -> Result<()> {
|
|
||||||
self.object_store
|
|
||||||
.remove_dir_all(self.base_path.clone())
|
|
||||||
.await?;
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(all(test, not(windows)))]
|
|
||||||
mod tests {
|
|
||||||
use super::*;
|
|
||||||
|
|
||||||
/// file:/// URIs with drive letters do not work correctly on Windows
|
|
||||||
#[cfg(windows)]
|
|
||||||
fn path_to_uri(path: PathBuf) -> String {
|
|
||||||
path.to_str().unwrap().to_string()
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(not(windows))]
|
|
||||||
fn path_to_uri(path: PathBuf) -> String {
|
|
||||||
Url::from_file_path(path).unwrap().to_string()
|
|
||||||
}
|
|
||||||
|
|
||||||
async fn setup_catalog() -> (TempDir, ListingCatalog) {
|
|
||||||
let tempdir = tempfile::tempdir().unwrap();
|
|
||||||
let catalog_path = tempdir.path().join("catalog");
|
|
||||||
std::fs::create_dir_all(&catalog_path).unwrap();
|
|
||||||
|
|
||||||
let uri = path_to_uri(catalog_path);
|
|
||||||
|
|
||||||
let request = ConnectRequest {
|
|
||||||
uri: uri.clone(),
|
|
||||||
#[cfg(feature = "remote")]
|
|
||||||
client_config: Default::default(),
|
|
||||||
options: Default::default(),
|
|
||||||
read_consistency_interval: None,
|
|
||||||
session: None,
|
|
||||||
};
|
|
||||||
|
|
||||||
let catalog = ListingCatalog::connect(&request).await.unwrap();
|
|
||||||
|
|
||||||
(tempdir, catalog)
|
|
||||||
}
|
|
||||||
|
|
||||||
use crate::database::{CreateTableData, CreateTableRequest, TableNamesRequest};
|
|
||||||
use crate::table::TableDefinition;
|
|
||||||
use arrow_schema::Field;
|
|
||||||
use std::path::PathBuf;
|
|
||||||
use std::sync::Arc;
|
|
||||||
use tempfile::{tempdir, TempDir};
|
|
||||||
use url::Url;
|
|
||||||
|
|
||||||
#[tokio::test]
|
|
||||||
async fn test_database_names() {
|
|
||||||
let (_tempdir, catalog) = setup_catalog().await;
|
|
||||||
|
|
||||||
let names = catalog
|
|
||||||
.database_names(DatabaseNamesRequest::default())
|
|
||||||
.await
|
|
||||||
.unwrap();
|
|
||||||
assert!(names.is_empty());
|
|
||||||
}
|
|
||||||
|
|
||||||
#[tokio::test]
|
|
||||||
async fn test_create_database() {
|
|
||||||
let (_tempdir, catalog) = setup_catalog().await;
|
|
||||||
|
|
||||||
catalog
|
|
||||||
.create_database(CreateDatabaseRequest {
|
|
||||||
name: "db1".into(),
|
|
||||||
mode: CreateDatabaseMode::Create,
|
|
||||||
options: HashMap::new(),
|
|
||||||
})
|
|
||||||
.await
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
let names = catalog
|
|
||||||
.database_names(DatabaseNamesRequest::default())
|
|
||||||
.await
|
|
||||||
.unwrap();
|
|
||||||
assert_eq!(names, vec!["db1"]);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[tokio::test]
|
|
||||||
async fn test_create_database_exist_ok() {
|
|
||||||
let (_tempdir, catalog) = setup_catalog().await;
|
|
||||||
|
|
||||||
let db1 = catalog
|
|
||||||
.create_database(CreateDatabaseRequest {
|
|
||||||
name: "db_exist_ok".into(),
|
|
||||||
mode: CreateDatabaseMode::ExistOk,
|
|
||||||
options: HashMap::new(),
|
|
||||||
})
|
|
||||||
.await
|
|
||||||
.unwrap();
|
|
||||||
let dummy_schema = Arc::new(arrow_schema::Schema::new(Vec::<Field>::default()));
|
|
||||||
db1.create_table(CreateTableRequest {
|
|
||||||
name: "test_table".parse().unwrap(),
|
|
||||||
data: CreateTableData::Empty(TableDefinition::new_from_schema(dummy_schema)),
|
|
||||||
mode: Default::default(),
|
|
||||||
write_options: Default::default(),
|
|
||||||
namespace: vec![],
|
|
||||||
})
|
|
||||||
.await
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
let db2 = catalog
|
|
||||||
.create_database(CreateDatabaseRequest {
|
|
||||||
name: "db_exist_ok".into(),
|
|
||||||
mode: CreateDatabaseMode::ExistOk,
|
|
||||||
options: HashMap::new(),
|
|
||||||
})
|
|
||||||
.await
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
let tables = db2.table_names(TableNamesRequest::default()).await.unwrap();
|
|
||||||
assert_eq!(tables, vec!["test_table".to_string()]);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[tokio::test]
|
|
||||||
async fn test_create_database_overwrite() {
|
|
||||||
let (_tempdir, catalog) = setup_catalog().await;
|
|
||||||
|
|
||||||
let db = catalog
|
|
||||||
.create_database(CreateDatabaseRequest {
|
|
||||||
name: "db_overwrite".into(),
|
|
||||||
mode: CreateDatabaseMode::Create,
|
|
||||||
options: HashMap::new(),
|
|
||||||
})
|
|
||||||
.await
|
|
||||||
.unwrap();
|
|
||||||
let dummy_schema = Arc::new(arrow_schema::Schema::new(Vec::<Field>::default()));
|
|
||||||
db.create_table(CreateTableRequest {
|
|
||||||
name: "old_table".parse().unwrap(),
|
|
||||||
data: CreateTableData::Empty(TableDefinition::new_from_schema(dummy_schema)),
|
|
||||||
mode: Default::default(),
|
|
||||||
write_options: Default::default(),
|
|
||||||
namespace: vec![],
|
|
||||||
})
|
|
||||||
.await
|
|
||||||
.unwrap();
|
|
||||||
let tables = db.table_names(TableNamesRequest::default()).await.unwrap();
|
|
||||||
assert!(!tables.is_empty());
|
|
||||||
|
|
||||||
let new_db = catalog
|
|
||||||
.create_database(CreateDatabaseRequest {
|
|
||||||
name: "db_overwrite".into(),
|
|
||||||
mode: CreateDatabaseMode::Overwrite,
|
|
||||||
options: HashMap::new(),
|
|
||||||
})
|
|
||||||
.await
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
let tables = new_db
|
|
||||||
.table_names(TableNamesRequest::default())
|
|
||||||
.await
|
|
||||||
.unwrap();
|
|
||||||
assert!(tables.is_empty());
|
|
||||||
}
|
|
||||||
|
|
||||||
#[tokio::test]
|
|
||||||
async fn test_create_database_overwrite_non_existing() {
|
|
||||||
let (_tempdir, catalog) = setup_catalog().await;
|
|
||||||
|
|
||||||
catalog
|
|
||||||
.create_database(CreateDatabaseRequest {
|
|
||||||
name: "new_db".into(),
|
|
||||||
mode: CreateDatabaseMode::Overwrite,
|
|
||||||
options: HashMap::new(),
|
|
||||||
})
|
|
||||||
.await
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
let names = catalog
|
|
||||||
.database_names(DatabaseNamesRequest::default())
|
|
||||||
.await
|
|
||||||
.unwrap();
|
|
||||||
assert!(names.contains(&"new_db".to_string()));
|
|
||||||
}
|
|
||||||
|
|
||||||
#[tokio::test]
|
|
||||||
async fn test_open_database() {
|
|
||||||
let (_tempdir, catalog) = setup_catalog().await;
|
|
||||||
|
|
||||||
// Test open non-existent
|
|
||||||
let result = catalog
|
|
||||||
.open_database(OpenDatabaseRequest {
|
|
||||||
name: "missing".into(),
|
|
||||||
database_options: HashMap::new(),
|
|
||||||
})
|
|
||||||
.await;
|
|
||||||
assert!(matches!(
|
|
||||||
result.unwrap_err(),
|
|
||||||
Error::DatabaseNotFound { name } if name == "missing"
|
|
||||||
));
|
|
||||||
|
|
||||||
// Create and open
|
|
||||||
catalog
|
|
||||||
.create_database(CreateDatabaseRequest {
|
|
||||||
name: "valid_db".into(),
|
|
||||||
mode: CreateDatabaseMode::Create,
|
|
||||||
options: HashMap::new(),
|
|
||||||
})
|
|
||||||
.await
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
let db = catalog
|
|
||||||
.open_database(OpenDatabaseRequest {
|
|
||||||
name: "valid_db".into(),
|
|
||||||
database_options: HashMap::new(),
|
|
||||||
})
|
|
||||||
.await
|
|
||||||
.unwrap();
|
|
||||||
assert_eq!(
|
|
||||||
db.table_names(TableNamesRequest::default()).await.unwrap(),
|
|
||||||
Vec::<String>::new()
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[tokio::test]
|
|
||||||
async fn test_drop_database() {
|
|
||||||
let (_tempdir, catalog) = setup_catalog().await;
|
|
||||||
|
|
||||||
// Create test database
|
|
||||||
catalog
|
|
||||||
.create_database(CreateDatabaseRequest {
|
|
||||||
name: "to_drop".into(),
|
|
||||||
mode: CreateDatabaseMode::Create,
|
|
||||||
options: HashMap::new(),
|
|
||||||
})
|
|
||||||
.await
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
let names = catalog
|
|
||||||
.database_names(DatabaseNamesRequest::default())
|
|
||||||
.await
|
|
||||||
.unwrap();
|
|
||||||
assert!(!names.is_empty());
|
|
||||||
|
|
||||||
// Drop database
|
|
||||||
catalog.drop_database("to_drop").await.unwrap();
|
|
||||||
|
|
||||||
let names = catalog
|
|
||||||
.database_names(DatabaseNamesRequest::default())
|
|
||||||
.await
|
|
||||||
.unwrap();
|
|
||||||
assert!(names.is_empty());
|
|
||||||
}
|
|
||||||
|
|
||||||
#[tokio::test]
|
|
||||||
async fn test_drop_all_databases() {
|
|
||||||
let (_tempdir, catalog) = setup_catalog().await;
|
|
||||||
|
|
||||||
catalog
|
|
||||||
.create_database(CreateDatabaseRequest {
|
|
||||||
name: "db1".into(),
|
|
||||||
mode: CreateDatabaseMode::Create,
|
|
||||||
options: HashMap::new(),
|
|
||||||
})
|
|
||||||
.await
|
|
||||||
.unwrap();
|
|
||||||
catalog
|
|
||||||
.create_database(CreateDatabaseRequest {
|
|
||||||
name: "db2".into(),
|
|
||||||
mode: CreateDatabaseMode::Create,
|
|
||||||
options: HashMap::new(),
|
|
||||||
})
|
|
||||||
.await
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
catalog.drop_all_databases().await.unwrap();
|
|
||||||
|
|
||||||
let names = catalog
|
|
||||||
.database_names(DatabaseNamesRequest::default())
|
|
||||||
.await
|
|
||||||
.unwrap();
|
|
||||||
assert!(names.is_empty());
|
|
||||||
}
|
|
||||||
|
|
||||||
#[tokio::test]
|
|
||||||
async fn test_rename_database_unsupported() {
|
|
||||||
let (_tempdir, catalog) = setup_catalog().await;
|
|
||||||
let result = catalog.rename_database("old", "new").await;
|
|
||||||
assert!(matches!(
|
|
||||||
result.unwrap_err(),
|
|
||||||
Error::NotSupported { message } if message.contains("rename_database")
|
|
||||||
));
|
|
||||||
}
|
|
||||||
|
|
||||||
#[tokio::test]
|
|
||||||
async fn test_connect_local_path() {
|
|
||||||
let tmp_dir = tempdir().unwrap();
|
|
||||||
let path = tmp_dir.path().to_str().unwrap();
|
|
||||||
|
|
||||||
let request = ConnectRequest {
|
|
||||||
uri: path.to_string(),
|
|
||||||
#[cfg(feature = "remote")]
|
|
||||||
client_config: Default::default(),
|
|
||||||
options: Default::default(),
|
|
||||||
read_consistency_interval: None,
|
|
||||||
session: None,
|
|
||||||
};
|
|
||||||
|
|
||||||
let catalog = ListingCatalog::connect(&request).await.unwrap();
|
|
||||||
assert!(catalog.object_store.is_local());
|
|
||||||
assert_eq!(catalog.uri, path);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[tokio::test]
|
|
||||||
async fn test_connect_file_scheme() {
|
|
||||||
let tmp_dir = tempdir().unwrap();
|
|
||||||
let path = tmp_dir.path();
|
|
||||||
let uri = path_to_uri(path.to_path_buf());
|
|
||||||
|
|
||||||
let request = ConnectRequest {
|
|
||||||
uri: uri.clone(),
|
|
||||||
#[cfg(feature = "remote")]
|
|
||||||
client_config: Default::default(),
|
|
||||||
options: Default::default(),
|
|
||||||
read_consistency_interval: None,
|
|
||||||
session: None,
|
|
||||||
};
|
|
||||||
|
|
||||||
let catalog = ListingCatalog::connect(&request).await.unwrap();
|
|
||||||
assert!(catalog.object_store.is_local());
|
|
||||||
assert_eq!(catalog.uri, uri);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[tokio::test]
|
|
||||||
async fn test_connect_invalid_uri_fallback() {
|
|
||||||
let invalid_uri = "invalid:///path";
|
|
||||||
let request = ConnectRequest {
|
|
||||||
uri: invalid_uri.to_string(),
|
|
||||||
#[cfg(feature = "remote")]
|
|
||||||
client_config: Default::default(),
|
|
||||||
options: Default::default(),
|
|
||||||
read_consistency_interval: None,
|
|
||||||
session: None,
|
|
||||||
};
|
|
||||||
|
|
||||||
let result = ListingCatalog::connect(&request).await;
|
|
||||||
assert!(result.is_err());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -13,8 +13,6 @@ use lance::dataset::ReadParams;
|
|||||||
use object_store::aws::AwsCredential;
|
use object_store::aws::AwsCredential;
|
||||||
|
|
||||||
use crate::arrow::{IntoArrow, IntoArrowStream, SendableRecordBatchStream};
|
use crate::arrow::{IntoArrow, IntoArrowStream, SendableRecordBatchStream};
|
||||||
use crate::catalog::listing::ListingCatalog;
|
|
||||||
use crate::catalog::CatalogOptions;
|
|
||||||
use crate::database::listing::{
|
use crate::database::listing::{
|
||||||
ListingDatabase, OPT_NEW_TABLE_STORAGE_VERSION, OPT_NEW_TABLE_V2_MANIFEST_PATHS,
|
ListingDatabase, OPT_NEW_TABLE_STORAGE_VERSION, OPT_NEW_TABLE_V2_MANIFEST_PATHS,
|
||||||
};
|
};
|
||||||
@@ -660,7 +658,7 @@ pub struct ConnectRequest {
|
|||||||
#[cfg(feature = "remote")]
|
#[cfg(feature = "remote")]
|
||||||
pub client_config: ClientConfig,
|
pub client_config: ClientConfig,
|
||||||
|
|
||||||
/// Database/Catalog specific options
|
/// Database specific options
|
||||||
pub options: HashMap<String, String>,
|
pub options: HashMap<String, String>,
|
||||||
|
|
||||||
/// The interval at which to check for updates from other processes.
|
/// The interval at which to check for updates from other processes.
|
||||||
@@ -937,50 +935,6 @@ pub fn connect(uri: &str) -> ConnectBuilder {
|
|||||||
ConnectBuilder::new(uri)
|
ConnectBuilder::new(uri)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// A builder for configuring a connection to a LanceDB catalog
|
|
||||||
#[derive(Debug)]
|
|
||||||
pub struct CatalogConnectBuilder {
|
|
||||||
request: ConnectRequest,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl CatalogConnectBuilder {
|
|
||||||
/// Create a new [`CatalogConnectBuilder`] with the given catalog URI.
|
|
||||||
pub fn new(uri: &str) -> Self {
|
|
||||||
Self {
|
|
||||||
request: ConnectRequest {
|
|
||||||
uri: uri.to_string(),
|
|
||||||
#[cfg(feature = "remote")]
|
|
||||||
client_config: Default::default(),
|
|
||||||
read_consistency_interval: None,
|
|
||||||
options: HashMap::new(),
|
|
||||||
session: None,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn catalog_options(mut self, catalog_options: &dyn CatalogOptions) -> Self {
|
|
||||||
catalog_options.serialize_into_map(&mut self.request.options);
|
|
||||||
self
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Establishes a connection to the catalog
|
|
||||||
pub async fn execute(self) -> Result<Arc<ListingCatalog>> {
|
|
||||||
let catalog = ListingCatalog::connect(&self.request).await?;
|
|
||||||
Ok(Arc::new(catalog))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Connect to a LanceDB catalog.
|
|
||||||
///
|
|
||||||
/// A catalog is a container for databases, which in turn are containers for tables.
|
|
||||||
///
|
|
||||||
/// # Arguments
|
|
||||||
///
|
|
||||||
/// * `uri` - URI where the catalog is located, can be a local directory or supported remote cloud storage.
|
|
||||||
pub fn connect_catalog(uri: &str) -> CatalogConnectBuilder {
|
|
||||||
CatalogConnectBuilder::new(uri)
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(all(test, feature = "remote"))]
|
#[cfg(all(test, feature = "remote"))]
|
||||||
mod test_utils {
|
mod test_utils {
|
||||||
use super::*;
|
use super::*;
|
||||||
@@ -1022,7 +976,6 @@ mod test_utils {
|
|||||||
mod tests {
|
mod tests {
|
||||||
use std::fs::create_dir_all;
|
use std::fs::create_dir_all;
|
||||||
|
|
||||||
use crate::catalog::{Catalog, DatabaseNamesRequest, OpenDatabaseRequest};
|
|
||||||
use crate::database::listing::{ListingDatabaseOptions, NewTableConfig};
|
use crate::database::listing::{ListingDatabaseOptions, NewTableConfig};
|
||||||
use crate::query::QueryBase;
|
use crate::query::QueryBase;
|
||||||
use crate::query::{ExecutableQuery, QueryExecutionOptions};
|
use crate::query::{ExecutableQuery, QueryExecutionOptions};
|
||||||
@@ -1328,91 +1281,4 @@ mod tests {
|
|||||||
.unwrap();
|
.unwrap();
|
||||||
assert_eq!(other_schema, overwritten.schema().await.unwrap());
|
assert_eq!(other_schema, overwritten.schema().await.unwrap());
|
||||||
}
|
}
|
||||||
|
|
||||||
#[tokio::test]
|
|
||||||
async fn test_connect_catalog() {
|
|
||||||
let tmp_dir = tempdir().unwrap();
|
|
||||||
let uri = tmp_dir.path().to_str().unwrap();
|
|
||||||
let catalog = connect_catalog(uri).execute().await.unwrap();
|
|
||||||
|
|
||||||
// Verify that we can get the uri from the catalog
|
|
||||||
let catalog_uri = catalog.uri();
|
|
||||||
assert_eq!(catalog_uri, uri);
|
|
||||||
|
|
||||||
// Check that the catalog is initially empty
|
|
||||||
let dbs = catalog
|
|
||||||
.database_names(DatabaseNamesRequest::default())
|
|
||||||
.await
|
|
||||||
.unwrap();
|
|
||||||
assert_eq!(dbs.len(), 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[tokio::test]
|
|
||||||
#[cfg(not(windows))]
|
|
||||||
async fn test_catalog_create_database() {
|
|
||||||
let tmp_dir = tempdir().unwrap();
|
|
||||||
let uri = tmp_dir.path().to_str().unwrap();
|
|
||||||
let catalog = connect_catalog(uri).execute().await.unwrap();
|
|
||||||
|
|
||||||
let db_name = "test_db";
|
|
||||||
catalog
|
|
||||||
.create_database(crate::catalog::CreateDatabaseRequest {
|
|
||||||
name: db_name.to_string(),
|
|
||||||
mode: Default::default(),
|
|
||||||
options: Default::default(),
|
|
||||||
})
|
|
||||||
.await
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
let dbs = catalog
|
|
||||||
.database_names(DatabaseNamesRequest::default())
|
|
||||||
.await
|
|
||||||
.unwrap();
|
|
||||||
assert_eq!(dbs.len(), 1);
|
|
||||||
assert_eq!(dbs[0], db_name);
|
|
||||||
|
|
||||||
let db = catalog
|
|
||||||
.open_database(OpenDatabaseRequest {
|
|
||||||
name: db_name.to_string(),
|
|
||||||
database_options: HashMap::new(),
|
|
||||||
})
|
|
||||||
.await
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
let tables = db.table_names(Default::default()).await.unwrap();
|
|
||||||
assert_eq!(tables.len(), 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[tokio::test]
|
|
||||||
#[cfg(not(windows))]
|
|
||||||
async fn test_catalog_drop_database() {
|
|
||||||
let tmp_dir = tempdir().unwrap();
|
|
||||||
let uri = tmp_dir.path().to_str().unwrap();
|
|
||||||
let catalog = connect_catalog(uri).execute().await.unwrap();
|
|
||||||
|
|
||||||
// Create and then drop a database
|
|
||||||
let db_name = "test_db_to_drop";
|
|
||||||
catalog
|
|
||||||
.create_database(crate::catalog::CreateDatabaseRequest {
|
|
||||||
name: db_name.to_string(),
|
|
||||||
mode: Default::default(),
|
|
||||||
options: Default::default(),
|
|
||||||
})
|
|
||||||
.await
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
let dbs = catalog
|
|
||||||
.database_names(DatabaseNamesRequest::default())
|
|
||||||
.await
|
|
||||||
.unwrap();
|
|
||||||
assert_eq!(dbs.len(), 1);
|
|
||||||
|
|
||||||
catalog.drop_database(db_name).await.unwrap();
|
|
||||||
|
|
||||||
let dbs_after = catalog
|
|
||||||
.database_names(DatabaseNamesRequest::default())
|
|
||||||
.await
|
|
||||||
.unwrap();
|
|
||||||
assert_eq!(dbs_after.len(), 0);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -587,7 +587,13 @@ impl ListingDatabase {
|
|||||||
|
|
||||||
#[async_trait::async_trait]
|
#[async_trait::async_trait]
|
||||||
impl Database for ListingDatabase {
|
impl Database for ListingDatabase {
|
||||||
async fn list_namespaces(&self, _request: ListNamespacesRequest) -> Result<Vec<String>> {
|
async fn list_namespaces(&self, request: ListNamespacesRequest) -> Result<Vec<String>> {
|
||||||
|
if !request.namespace.is_empty() {
|
||||||
|
return Err(Error::NotSupported {
|
||||||
|
message: "Namespace operations are not supported for listing database".into(),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
Ok(Vec::new())
|
Ok(Vec::new())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -45,10 +45,10 @@ use crate::{
|
|||||||
pub trait EmbeddingFunction: std::fmt::Debug + Send + Sync {
|
pub trait EmbeddingFunction: std::fmt::Debug + Send + Sync {
|
||||||
fn name(&self) -> &str;
|
fn name(&self) -> &str;
|
||||||
/// The type of the input data
|
/// The type of the input data
|
||||||
fn source_type(&self) -> Result<Cow<DataType>>;
|
fn source_type(&self) -> Result<Cow<'_, DataType>>;
|
||||||
/// The type of the output data
|
/// The type of the output data
|
||||||
/// This should **always** match the output of the `embed` function
|
/// This should **always** match the output of the `embed` function
|
||||||
fn dest_type(&self) -> Result<Cow<DataType>>;
|
fn dest_type(&self) -> Result<Cow<'_, DataType>>;
|
||||||
/// Compute the embeddings for the source column in the database
|
/// Compute the embeddings for the source column in the database
|
||||||
fn compute_source_embeddings(&self, source: Arc<dyn Array>) -> Result<Arc<dyn Array>>;
|
fn compute_source_embeddings(&self, source: Arc<dyn Array>) -> Result<Arc<dyn Array>>;
|
||||||
/// Compute the embeddings for a given user query
|
/// Compute the embeddings for a given user query
|
||||||
|
|||||||
@@ -75,11 +75,11 @@ impl EmbeddingFunction for BedrockEmbeddingFunction {
|
|||||||
"bedrock"
|
"bedrock"
|
||||||
}
|
}
|
||||||
|
|
||||||
fn source_type(&self) -> Result<Cow<DataType>> {
|
fn source_type(&self) -> Result<Cow<'_, DataType>> {
|
||||||
Ok(Cow::Owned(DataType::Utf8))
|
Ok(Cow::Owned(DataType::Utf8))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn dest_type(&self) -> Result<Cow<DataType>> {
|
fn dest_type(&self) -> Result<Cow<'_, DataType>> {
|
||||||
let n_dims = self.model.ndims();
|
let n_dims = self.model.ndims();
|
||||||
Ok(Cow::Owned(DataType::new_fixed_size_list(
|
Ok(Cow::Owned(DataType::new_fixed_size_list(
|
||||||
DataType::Float32,
|
DataType::Float32,
|
||||||
|
|||||||
@@ -144,11 +144,11 @@ impl EmbeddingFunction for OpenAIEmbeddingFunction {
|
|||||||
"openai"
|
"openai"
|
||||||
}
|
}
|
||||||
|
|
||||||
fn source_type(&self) -> Result<Cow<DataType>> {
|
fn source_type(&self) -> Result<Cow<'_, DataType>> {
|
||||||
Ok(Cow::Owned(DataType::Utf8))
|
Ok(Cow::Owned(DataType::Utf8))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn dest_type(&self) -> Result<Cow<DataType>> {
|
fn dest_type(&self) -> Result<Cow<'_, DataType>> {
|
||||||
let n_dims = self.model.ndims();
|
let n_dims = self.model.ndims();
|
||||||
Ok(Cow::Owned(DataType::new_fixed_size_list(
|
Ok(Cow::Owned(DataType::new_fixed_size_list(
|
||||||
DataType::Float32,
|
DataType::Float32,
|
||||||
|
|||||||
@@ -407,11 +407,11 @@ impl EmbeddingFunction for SentenceTransformersEmbeddings {
|
|||||||
"sentence-transformers"
|
"sentence-transformers"
|
||||||
}
|
}
|
||||||
|
|
||||||
fn source_type(&self) -> crate::Result<std::borrow::Cow<arrow_schema::DataType>> {
|
fn source_type(&self) -> crate::Result<std::borrow::Cow<'_, arrow_schema::DataType>> {
|
||||||
Ok(Cow::Owned(DataType::Utf8))
|
Ok(Cow::Owned(DataType::Utf8))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn dest_type(&self) -> crate::Result<std::borrow::Cow<arrow_schema::DataType>> {
|
fn dest_type(&self) -> crate::Result<std::borrow::Cow<'_, arrow_schema::DataType>> {
|
||||||
let (n_dims, dtype) = self.compute_ndims_and_dtype()?;
|
let (n_dims, dtype) = self.compute_ndims_and_dtype()?;
|
||||||
Ok(Cow::Owned(DataType::new_fixed_size_list(
|
Ok(Cow::Owned(DataType::new_fixed_size_list(
|
||||||
dtype,
|
dtype,
|
||||||
|
|||||||
@@ -112,6 +112,15 @@ macro_rules! impl_ivf_params_setter {
|
|||||||
self.max_iterations = max_iterations;
|
self.max_iterations = max_iterations;
|
||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// The target size of each partition.
|
||||||
|
///
|
||||||
|
/// This value controls the tradeoff between search performance and accuracy.
|
||||||
|
/// The higher the value the faster the search but the less accurate the results will be.
|
||||||
|
pub fn target_partition_size(mut self, target_partition_size: u32) -> Self {
|
||||||
|
self.target_partition_size = Some(target_partition_size);
|
||||||
|
self
|
||||||
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -182,6 +191,7 @@ pub struct IvfFlatIndexBuilder {
|
|||||||
pub(crate) num_partitions: Option<u32>,
|
pub(crate) num_partitions: Option<u32>,
|
||||||
pub(crate) sample_rate: u32,
|
pub(crate) sample_rate: u32,
|
||||||
pub(crate) max_iterations: u32,
|
pub(crate) max_iterations: u32,
|
||||||
|
pub(crate) target_partition_size: Option<u32>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Default for IvfFlatIndexBuilder {
|
impl Default for IvfFlatIndexBuilder {
|
||||||
@@ -191,6 +201,7 @@ impl Default for IvfFlatIndexBuilder {
|
|||||||
num_partitions: None,
|
num_partitions: None,
|
||||||
sample_rate: 256,
|
sample_rate: 256,
|
||||||
max_iterations: 50,
|
max_iterations: 50,
|
||||||
|
target_partition_size: None,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -228,6 +239,7 @@ pub struct IvfPqIndexBuilder {
|
|||||||
pub(crate) num_partitions: Option<u32>,
|
pub(crate) num_partitions: Option<u32>,
|
||||||
pub(crate) sample_rate: u32,
|
pub(crate) sample_rate: u32,
|
||||||
pub(crate) max_iterations: u32,
|
pub(crate) max_iterations: u32,
|
||||||
|
pub(crate) target_partition_size: Option<u32>,
|
||||||
|
|
||||||
// PQ
|
// PQ
|
||||||
pub(crate) num_sub_vectors: Option<u32>,
|
pub(crate) num_sub_vectors: Option<u32>,
|
||||||
@@ -243,6 +255,7 @@ impl Default for IvfPqIndexBuilder {
|
|||||||
num_bits: None,
|
num_bits: None,
|
||||||
sample_rate: 256,
|
sample_rate: 256,
|
||||||
max_iterations: 50,
|
max_iterations: 50,
|
||||||
|
target_partition_size: None,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -293,6 +306,7 @@ pub struct IvfHnswPqIndexBuilder {
|
|||||||
pub(crate) num_partitions: Option<u32>,
|
pub(crate) num_partitions: Option<u32>,
|
||||||
pub(crate) sample_rate: u32,
|
pub(crate) sample_rate: u32,
|
||||||
pub(crate) max_iterations: u32,
|
pub(crate) max_iterations: u32,
|
||||||
|
pub(crate) target_partition_size: Option<u32>,
|
||||||
|
|
||||||
// HNSW
|
// HNSW
|
||||||
pub(crate) m: u32,
|
pub(crate) m: u32,
|
||||||
@@ -314,6 +328,7 @@ impl Default for IvfHnswPqIndexBuilder {
|
|||||||
max_iterations: 50,
|
max_iterations: 50,
|
||||||
m: 20,
|
m: 20,
|
||||||
ef_construction: 300,
|
ef_construction: 300,
|
||||||
|
target_partition_size: None,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -341,6 +356,7 @@ pub struct IvfHnswSqIndexBuilder {
|
|||||||
pub(crate) num_partitions: Option<u32>,
|
pub(crate) num_partitions: Option<u32>,
|
||||||
pub(crate) sample_rate: u32,
|
pub(crate) sample_rate: u32,
|
||||||
pub(crate) max_iterations: u32,
|
pub(crate) max_iterations: u32,
|
||||||
|
pub(crate) target_partition_size: Option<u32>,
|
||||||
|
|
||||||
// HNSW
|
// HNSW
|
||||||
pub(crate) m: u32,
|
pub(crate) m: u32,
|
||||||
@@ -358,6 +374,7 @@ impl Default for IvfHnswSqIndexBuilder {
|
|||||||
max_iterations: 50,
|
max_iterations: 50,
|
||||||
m: 20,
|
m: 20,
|
||||||
ef_construction: 300,
|
ef_construction: 300,
|
||||||
|
target_partition_size: None,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -191,7 +191,6 @@
|
|||||||
//! ```
|
//! ```
|
||||||
|
|
||||||
pub mod arrow;
|
pub mod arrow;
|
||||||
pub mod catalog;
|
|
||||||
pub mod connection;
|
pub mod connection;
|
||||||
pub mod data;
|
pub mod data;
|
||||||
pub mod database;
|
pub mod database;
|
||||||
|
|||||||
@@ -242,17 +242,15 @@ pub struct OptimizeStats {
|
|||||||
/// Describes what happens when a vector either contains NaN or
|
/// Describes what happens when a vector either contains NaN or
|
||||||
/// does not have enough values
|
/// does not have enough values
|
||||||
#[derive(Clone, Debug, Default)]
|
#[derive(Clone, Debug, Default)]
|
||||||
|
#[allow(dead_code)] // https://github.com/lancedb/lancedb/issues/992
|
||||||
enum BadVectorHandling {
|
enum BadVectorHandling {
|
||||||
/// An error is returned
|
/// An error is returned
|
||||||
#[default]
|
#[default]
|
||||||
Error,
|
Error,
|
||||||
#[allow(dead_code)] // https://github.com/lancedb/lancedb/issues/992
|
|
||||||
/// The offending row is droppped
|
/// The offending row is droppped
|
||||||
Drop,
|
Drop,
|
||||||
#[allow(dead_code)] // https://github.com/lancedb/lancedb/issues/992
|
|
||||||
/// The invalid/missing items are replaced by fill_value
|
/// The invalid/missing items are replaced by fill_value
|
||||||
Fill(f32),
|
Fill(f32),
|
||||||
#[allow(dead_code)] // https://github.com/lancedb/lancedb/issues/992
|
|
||||||
/// The invalid items are replaced by NULL
|
/// The invalid items are replaced by NULL
|
||||||
None,
|
None,
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -20,6 +20,8 @@ use datafusion_physical_plan::SendableRecordBatchStream;
|
|||||||
|
|
||||||
lazy_static! {
|
lazy_static! {
|
||||||
static ref TABLE_NAME_REGEX: regex::Regex = regex::Regex::new(r"^[a-zA-Z0-9_\-\.]+$").unwrap();
|
static ref TABLE_NAME_REGEX: regex::Regex = regex::Regex::new(r"^[a-zA-Z0-9_\-\.]+$").unwrap();
|
||||||
|
static ref NAMESPACE_NAME_REGEX: regex::Regex =
|
||||||
|
regex::Regex::new(r"^[a-zA-Z0-9_\-\.]+$").unwrap();
|
||||||
}
|
}
|
||||||
|
|
||||||
pub trait PatchStoreParam {
|
pub trait PatchStoreParam {
|
||||||
@@ -98,6 +100,53 @@ pub fn validate_table_name(name: &str) -> Result<()> {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Validate a namespace name component
|
||||||
|
///
|
||||||
|
/// Namespace names must:
|
||||||
|
/// - Not be empty
|
||||||
|
/// - Only contain alphanumeric characters, underscores, hyphens, and periods
|
||||||
|
///
|
||||||
|
/// # Arguments
|
||||||
|
/// * `name` - A single namespace component (not the full path)
|
||||||
|
///
|
||||||
|
/// # Returns
|
||||||
|
/// * `Ok(())` if the namespace name is valid
|
||||||
|
/// * `Err(Error)` if the namespace name is invalid
|
||||||
|
pub fn validate_namespace_name(name: &str) -> Result<()> {
|
||||||
|
if name.is_empty() {
|
||||||
|
return Err(Error::InvalidInput {
|
||||||
|
message: "Namespace names cannot be empty strings".to_string(),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
if !NAMESPACE_NAME_REGEX.is_match(name) {
|
||||||
|
return Err(Error::InvalidInput {
|
||||||
|
message: format!(
|
||||||
|
"Invalid namespace name '{}': Namespace names can only contain alphanumeric characters, underscores, hyphens, and periods",
|
||||||
|
name
|
||||||
|
),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Validate all components of a namespace
|
||||||
|
///
|
||||||
|
/// Iterates through all namespace components and validates each one.
|
||||||
|
/// Returns an error if any component is invalid.
|
||||||
|
///
|
||||||
|
/// # Arguments
|
||||||
|
/// * `namespace` - The namespace components to validate
|
||||||
|
///
|
||||||
|
/// # Returns
|
||||||
|
/// * `Ok(())` if all namespace components are valid
|
||||||
|
/// * `Err(Error)` if any component is invalid
|
||||||
|
pub fn validate_namespace(namespace: &[String]) -> Result<()> {
|
||||||
|
for component in namespace {
|
||||||
|
validate_namespace_name(component)?;
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
/// Find one default column to create index or perform vector query.
|
/// Find one default column to create index or perform vector query.
|
||||||
pub(crate) fn default_vector_column(schema: &Schema, dim: Option<i32>) -> Result<String> {
|
pub(crate) fn default_vector_column(schema: &Schema, dim: Option<i32>) -> Result<String> {
|
||||||
// Try to find a vector column.
|
// Try to find a vector column.
|
||||||
@@ -345,6 +394,61 @@ mod tests {
|
|||||||
assert!(validate_table_name("name with space").is_err());
|
assert!(validate_table_name("name with space").is_err());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_validate_namespace_name() {
|
||||||
|
// Valid namespace names
|
||||||
|
assert!(validate_namespace_name("ns1").is_ok());
|
||||||
|
assert!(validate_namespace_name("namespace_123").is_ok());
|
||||||
|
assert!(validate_namespace_name("my-namespace").is_ok());
|
||||||
|
assert!(validate_namespace_name("my.namespace").is_ok());
|
||||||
|
assert!(validate_namespace_name("NS_1.2.3").is_ok());
|
||||||
|
assert!(validate_namespace_name("a").is_ok());
|
||||||
|
assert!(validate_namespace_name("123").is_ok());
|
||||||
|
assert!(validate_namespace_name("_underscore").is_ok());
|
||||||
|
assert!(validate_namespace_name("-hyphen").is_ok());
|
||||||
|
assert!(validate_namespace_name(".period").is_ok());
|
||||||
|
|
||||||
|
// Invalid namespace names
|
||||||
|
assert!(validate_namespace_name("").is_err());
|
||||||
|
assert!(validate_namespace_name("namespace with spaces").is_err());
|
||||||
|
assert!(validate_namespace_name("namespace/with/slashes").is_err());
|
||||||
|
assert!(validate_namespace_name("namespace\\with\\backslashes").is_err());
|
||||||
|
assert!(validate_namespace_name("namespace$with$delimiter").is_err());
|
||||||
|
assert!(validate_namespace_name("namespace@special").is_err());
|
||||||
|
assert!(validate_namespace_name("namespace#hash").is_err());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_validate_namespace() {
|
||||||
|
// Valid namespace with single component
|
||||||
|
assert!(validate_namespace(&["ns1".to_string()]).is_ok());
|
||||||
|
|
||||||
|
// Valid namespace with multiple components
|
||||||
|
assert!(
|
||||||
|
validate_namespace(&["ns1".to_string(), "ns2".to_string(), "ns3".to_string()]).is_ok()
|
||||||
|
);
|
||||||
|
|
||||||
|
// Empty namespace (root) is valid
|
||||||
|
assert!(validate_namespace(&[]).is_ok());
|
||||||
|
|
||||||
|
// Invalid: contains empty component
|
||||||
|
assert!(validate_namespace(&["ns1".to_string(), "".to_string()]).is_err());
|
||||||
|
|
||||||
|
// Invalid: contains component with spaces
|
||||||
|
assert!(validate_namespace(&["ns1".to_string(), "ns 2".to_string()]).is_err());
|
||||||
|
|
||||||
|
// Invalid: contains component with special characters
|
||||||
|
assert!(validate_namespace(&["ns1".to_string(), "ns@2".to_string()]).is_err());
|
||||||
|
assert!(validate_namespace(&["ns1".to_string(), "ns/2".to_string()]).is_err());
|
||||||
|
assert!(validate_namespace(&["ns1".to_string(), "ns$2".to_string()]).is_err());
|
||||||
|
|
||||||
|
// Valid: underscores, hyphens, and periods are allowed
|
||||||
|
assert!(
|
||||||
|
validate_namespace(&["ns_1".to_string(), "ns-2".to_string(), "ns.3".to_string()])
|
||||||
|
.is_ok()
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_string_to_datatype() {
|
fn test_string_to_datatype() {
|
||||||
let string = "int32";
|
let string = "int32";
|
||||||
|
|||||||
@@ -341,10 +341,10 @@ impl EmbeddingFunction for MockEmbed {
|
|||||||
fn name(&self) -> &str {
|
fn name(&self) -> &str {
|
||||||
&self.name
|
&self.name
|
||||||
}
|
}
|
||||||
fn source_type(&self) -> Result<Cow<DataType>> {
|
fn source_type(&self) -> Result<Cow<'_, DataType>> {
|
||||||
Ok(Cow::Borrowed(&self.source_type))
|
Ok(Cow::Borrowed(&self.source_type))
|
||||||
}
|
}
|
||||||
fn dest_type(&self) -> Result<Cow<DataType>> {
|
fn dest_type(&self) -> Result<Cow<'_, DataType>> {
|
||||||
Ok(Cow::Borrowed(&self.dest_type))
|
Ok(Cow::Borrowed(&self.dest_type))
|
||||||
}
|
}
|
||||||
fn compute_source_embeddings(&self, source: Arc<dyn Array>) -> Result<Arc<dyn Array>> {
|
fn compute_source_embeddings(&self, source: Arc<dyn Array>) -> Result<Arc<dyn Array>> {
|
||||||
|
|||||||
Reference in New Issue
Block a user