Compare commits

..

1 Commits

Author SHA1 Message Date
lancedb automation
2ae26b4ad7 chore: update lance dependency to v1.0.0-beta.9 2025-11-25 05:13:12 +00:00
51 changed files with 567 additions and 3119 deletions

View File

@@ -1,5 +1,5 @@
[tool.bumpversion] [tool.bumpversion]
current_version = "0.22.4-beta.3" current_version = "0.22.4-beta.2"
parse = """(?x) parse = """(?x)
(?P<major>0|[1-9]\\d*)\\. (?P<major>0|[1-9]\\d*)\\.
(?P<minor>0|[1-9]\\d*)\\. (?P<minor>0|[1-9]\\d*)\\.

View File

@@ -19,7 +19,7 @@ rustflags = [
"-Wclippy::string_add_assign", "-Wclippy::string_add_assign",
"-Wclippy::string_add", "-Wclippy::string_add",
"-Wclippy::string_lit_as_bytes", "-Wclippy::string_lit_as_bytes",
"-Wclippy::implicit_clone", "-Wclippy::string_to_string",
"-Wclippy::use_self", "-Wclippy::use_self",
"-Dclippy::cargo", "-Dclippy::cargo",
"-Dclippy::dbg_macro", "-Dclippy::dbg_macro",

View File

@@ -97,6 +97,12 @@ jobs:
fail-fast: false fail-fast: false
matrix: matrix:
settings: settings:
- target: x86_64-apple-darwin
host: macos-latest
features: ","
pre_build: |-
brew install protobuf
rustup target add x86_64-apple-darwin
- target: aarch64-apple-darwin - target: aarch64-apple-darwin
host: macos-latest host: macos-latest
features: fp16kernels features: fp16kernels

View File

@@ -64,6 +64,8 @@ jobs:
strategy: strategy:
matrix: matrix:
config: config:
- target: x86_64-apple-darwin
runner: macos-13
- target: aarch64-apple-darwin - target: aarch64-apple-darwin
runner: warp-macos-14-arm64-6x runner: warp-macos-14-arm64-6x
env: env:

View File

@@ -143,9 +143,16 @@ jobs:
- name: Delete wheels - name: Delete wheels
run: rm -rf target/wheels run: rm -rf target/wheels
platform: platform:
name: "Mac" name: "Mac: ${{ matrix.config.name }}"
timeout-minutes: 30 timeout-minutes: 30
runs-on: macos-14 strategy:
matrix:
config:
- name: x86
runner: macos-13
- name: Arm
runner: macos-14
runs-on: "${{ matrix.config.runner }}"
defaults: defaults:
run: run:
shell: bash shell: bash

View File

@@ -122,7 +122,7 @@ jobs:
timeout-minutes: 30 timeout-minutes: 30
strategy: strategy:
matrix: matrix:
mac-runner: ["macos-14", "macos-15"] mac-runner: ["macos-13", "macos-14"]
runs-on: "${{ matrix.mac-runner }}" runs-on: "${{ matrix.mac-runner }}"
defaults: defaults:
run: run:

772
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -15,20 +15,20 @@ categories = ["database-implementations"]
rust-version = "1.78.0" rust-version = "1.78.0"
[workspace.dependencies] [workspace.dependencies]
lance = { "version" = "=1.1.0-beta.1", default-features = false, "tag" = "v1.1.0-beta.1", "git" = "https://github.com/lance-format/lance.git" } lance = { "version" = "=1.0.0-beta.9", default-features = false, "tag" = "v1.0.0-beta.9", "git" = "https://github.com/lance-format/lance.git" }
lance-core = { "version" = "=1.1.0-beta.1", "tag" = "v1.1.0-beta.1", "git" = "https://github.com/lance-format/lance.git" } lance-core = { "version" = "=1.0.0-beta.9", "tag" = "v1.0.0-beta.9", "git" = "https://github.com/lance-format/lance.git" }
lance-datagen = { "version" = "=1.1.0-beta.1", "tag" = "v1.1.0-beta.1", "git" = "https://github.com/lance-format/lance.git" } lance-datagen = { "version" = "=1.0.0-beta.9", "tag" = "v1.0.0-beta.9", "git" = "https://github.com/lance-format/lance.git" }
lance-file = { "version" = "=1.1.0-beta.1", "tag" = "v1.1.0-beta.1", "git" = "https://github.com/lance-format/lance.git" } lance-file = { "version" = "=1.0.0-beta.9", "tag" = "v1.0.0-beta.9", "git" = "https://github.com/lance-format/lance.git" }
lance-io = { "version" = "=1.1.0-beta.1", default-features = false, "tag" = "v1.1.0-beta.1", "git" = "https://github.com/lance-format/lance.git" } lance-io = { "version" = "=1.0.0-beta.9", default-features = false, "tag" = "v1.0.0-beta.9", "git" = "https://github.com/lance-format/lance.git" }
lance-index = { "version" = "=1.1.0-beta.1", "tag" = "v1.1.0-beta.1", "git" = "https://github.com/lance-format/lance.git" } lance-index = { "version" = "=1.0.0-beta.9", "tag" = "v1.0.0-beta.9", "git" = "https://github.com/lance-format/lance.git" }
lance-linalg = { "version" = "=1.1.0-beta.1", "tag" = "v1.1.0-beta.1", "git" = "https://github.com/lance-format/lance.git" } lance-linalg = { "version" = "=1.0.0-beta.9", "tag" = "v1.0.0-beta.9", "git" = "https://github.com/lance-format/lance.git" }
lance-namespace = { "version" = "=1.1.0-beta.1", "tag" = "v1.1.0-beta.1", "git" = "https://github.com/lance-format/lance.git" } lance-namespace = { "version" = "=1.0.0-beta.9", "tag" = "v1.0.0-beta.9", "git" = "https://github.com/lance-format/lance.git" }
lance-namespace-impls = { "version" = "=1.1.0-beta.1", default-features = false, "tag" = "v1.1.0-beta.1", "git" = "https://github.com/lance-format/lance.git" } lance-namespace-impls = { "version" = "=1.0.0-beta.9", "features" = ["dir-aws", "dir-gcp", "dir-azure", "dir-oss", "rest"], "tag" = "v1.0.0-beta.9", "git" = "https://github.com/lance-format/lance.git" }
lance-table = { "version" = "=1.1.0-beta.1", "tag" = "v1.1.0-beta.1", "git" = "https://github.com/lance-format/lance.git" } lance-table = { "version" = "=1.0.0-beta.9", "tag" = "v1.0.0-beta.9", "git" = "https://github.com/lance-format/lance.git" }
lance-testing = { "version" = "=1.1.0-beta.1", "tag" = "v1.1.0-beta.1", "git" = "https://github.com/lance-format/lance.git" } lance-testing = { "version" = "=1.0.0-beta.9", "tag" = "v1.0.0-beta.9", "git" = "https://github.com/lance-format/lance.git" }
lance-datafusion = { "version" = "=1.1.0-beta.1", "tag" = "v1.1.0-beta.1", "git" = "https://github.com/lance-format/lance.git" } lance-datafusion = { "version" = "=1.0.0-beta.9", "tag" = "v1.0.0-beta.9", "git" = "https://github.com/lance-format/lance.git" }
lance-encoding = { "version" = "=1.1.0-beta.1", "tag" = "v1.1.0-beta.1", "git" = "https://github.com/lance-format/lance.git" } lance-encoding = { "version" = "=1.0.0-beta.9", "tag" = "v1.0.0-beta.9", "git" = "https://github.com/lance-format/lance.git" }
lance-arrow = { "version" = "=1.1.0-beta.1", "tag" = "v1.1.0-beta.1", "git" = "https://github.com/lance-format/lance.git" } lance-arrow = { "version" = "=1.0.0-beta.9", "tag" = "v1.0.0-beta.9", "git" = "https://github.com/lance-format/lance.git" }
ahash = "0.8" ahash = "0.8"
# Note that this one does not include pyarrow # Note that this one does not include pyarrow
arrow = { version = "56.2", optional = false } arrow = { version = "56.2", optional = false }

View File

@@ -8,7 +8,7 @@
<parent> <parent>
<groupId>com.lancedb</groupId> <groupId>com.lancedb</groupId>
<artifactId>lancedb-parent</artifactId> <artifactId>lancedb-parent</artifactId>
<version>0.22.4-beta.3</version> <version>0.22.4-beta.2</version>
<relativePath>../pom.xml</relativePath> <relativePath>../pom.xml</relativePath>
</parent> </parent>

View File

@@ -8,7 +8,7 @@
<parent> <parent>
<groupId>com.lancedb</groupId> <groupId>com.lancedb</groupId>
<artifactId>lancedb-parent</artifactId> <artifactId>lancedb-parent</artifactId>
<version>0.22.4-beta.3</version> <version>0.22.4-beta.2</version>
<relativePath>../pom.xml</relativePath> <relativePath>../pom.xml</relativePath>
</parent> </parent>

View File

@@ -6,7 +6,7 @@
<groupId>com.lancedb</groupId> <groupId>com.lancedb</groupId>
<artifactId>lancedb-parent</artifactId> <artifactId>lancedb-parent</artifactId>
<version>0.22.4-beta.3</version> <version>0.22.4-beta.2</version>
<packaging>pom</packaging> <packaging>pom</packaging>
<name>${project.artifactId}</name> <name>${project.artifactId}</name>
<description>LanceDB Java SDK Parent POM</description> <description>LanceDB Java SDK Parent POM</description>

View File

@@ -1,7 +1,7 @@
[package] [package]
name = "lancedb-nodejs" name = "lancedb-nodejs"
edition.workspace = true edition.workspace = true
version = "0.22.4-beta.3" version = "0.22.4-beta.2"
license.workspace = true license.workspace = true
description.workspace = true description.workspace = true
repository.workspace = true repository.workspace = true

View File

@@ -1,6 +1,6 @@
{ {
"name": "@lancedb/lancedb-darwin-arm64", "name": "@lancedb/lancedb-darwin-arm64",
"version": "0.22.4-beta.3", "version": "0.22.4-beta.2",
"os": ["darwin"], "os": ["darwin"],
"cpu": ["arm64"], "cpu": ["arm64"],
"main": "lancedb.darwin-arm64.node", "main": "lancedb.darwin-arm64.node",

View File

@@ -1,6 +1,6 @@
{ {
"name": "@lancedb/lancedb-darwin-x64", "name": "@lancedb/lancedb-darwin-x64",
"version": "0.22.4-beta.3", "version": "0.22.4-beta.2",
"os": ["darwin"], "os": ["darwin"],
"cpu": ["x64"], "cpu": ["x64"],
"main": "lancedb.darwin-x64.node", "main": "lancedb.darwin-x64.node",

View File

@@ -1,6 +1,6 @@
{ {
"name": "@lancedb/lancedb-linux-arm64-gnu", "name": "@lancedb/lancedb-linux-arm64-gnu",
"version": "0.22.4-beta.3", "version": "0.22.4-beta.2",
"os": ["linux"], "os": ["linux"],
"cpu": ["arm64"], "cpu": ["arm64"],
"main": "lancedb.linux-arm64-gnu.node", "main": "lancedb.linux-arm64-gnu.node",

View File

@@ -1,6 +1,6 @@
{ {
"name": "@lancedb/lancedb-linux-arm64-musl", "name": "@lancedb/lancedb-linux-arm64-musl",
"version": "0.22.4-beta.3", "version": "0.22.4-beta.2",
"os": ["linux"], "os": ["linux"],
"cpu": ["arm64"], "cpu": ["arm64"],
"main": "lancedb.linux-arm64-musl.node", "main": "lancedb.linux-arm64-musl.node",

View File

@@ -1,6 +1,6 @@
{ {
"name": "@lancedb/lancedb-linux-x64-gnu", "name": "@lancedb/lancedb-linux-x64-gnu",
"version": "0.22.4-beta.3", "version": "0.22.4-beta.2",
"os": ["linux"], "os": ["linux"],
"cpu": ["x64"], "cpu": ["x64"],
"main": "lancedb.linux-x64-gnu.node", "main": "lancedb.linux-x64-gnu.node",

View File

@@ -1,6 +1,6 @@
{ {
"name": "@lancedb/lancedb-linux-x64-musl", "name": "@lancedb/lancedb-linux-x64-musl",
"version": "0.22.4-beta.3", "version": "0.22.4-beta.2",
"os": ["linux"], "os": ["linux"],
"cpu": ["x64"], "cpu": ["x64"],
"main": "lancedb.linux-x64-musl.node", "main": "lancedb.linux-x64-musl.node",

View File

@@ -1,6 +1,6 @@
{ {
"name": "@lancedb/lancedb-win32-arm64-msvc", "name": "@lancedb/lancedb-win32-arm64-msvc",
"version": "0.22.4-beta.3", "version": "0.22.4-beta.2",
"os": [ "os": [
"win32" "win32"
], ],

View File

@@ -1,6 +1,6 @@
{ {
"name": "@lancedb/lancedb-win32-x64-msvc", "name": "@lancedb/lancedb-win32-x64-msvc",
"version": "0.22.4-beta.3", "version": "0.22.4-beta.2",
"os": ["win32"], "os": ["win32"],
"cpu": ["x64"], "cpu": ["x64"],
"main": "lancedb.win32-x64-msvc.node", "main": "lancedb.win32-x64-msvc.node",

View File

@@ -1,12 +1,12 @@
{ {
"name": "@lancedb/lancedb", "name": "@lancedb/lancedb",
"version": "0.22.4-beta.3", "version": "0.22.4-beta.2",
"lockfileVersion": 3, "lockfileVersion": 3,
"requires": true, "requires": true,
"packages": { "packages": {
"": { "": {
"name": "@lancedb/lancedb", "name": "@lancedb/lancedb",
"version": "0.22.4-beta.3", "version": "0.22.4-beta.2",
"cpu": [ "cpu": [
"x64", "x64",
"arm64" "arm64"

View File

@@ -11,7 +11,7 @@
"ann" "ann"
], ],
"private": false, "private": false,
"version": "0.22.4-beta.3", "version": "0.22.4-beta.2",
"main": "dist/index.js", "main": "dist/index.js",
"exports": { "exports": {
".": "./dist/index.js", ".": "./dist/index.js",

View File

@@ -1,5 +1,5 @@
[tool.bumpversion] [tool.bumpversion]
current_version = "0.26.0-beta.0" current_version = "0.25.4-beta.2"
parse = """(?x) parse = """(?x)
(?P<major>0|[1-9]\\d*)\\. (?P<major>0|[1-9]\\d*)\\.
(?P<minor>0|[1-9]\\d*)\\. (?P<minor>0|[1-9]\\d*)\\.

View File

@@ -1,6 +1,6 @@
[package] [package]
name = "lancedb-python" name = "lancedb-python"
version = "0.26.0-beta.0" version = "0.25.4-beta.2"
edition.workspace = true edition.workspace = true
description = "Python bindings for LanceDB" description = "Python bindings for LanceDB"
license.workspace = true license.workspace = true
@@ -18,7 +18,6 @@ arrow = { version = "56.2", features = ["pyarrow"] }
async-trait = "0.1" async-trait = "0.1"
lancedb = { path = "../rust/lancedb", default-features = false } lancedb = { path = "../rust/lancedb", default-features = false }
lance-core.workspace = true lance-core.workspace = true
lance-namespace.workspace = true
lance-io.workspace = true lance-io.workspace = true
env_logger.workspace = true env_logger.workspace = true
pyo3 = { version = "0.25", features = ["extension-module", "abi3-py39"] } pyo3 = { version = "0.25", features = ["extension-module", "abi3-py39"] }

View File

@@ -10,7 +10,7 @@ dependencies = [
"pyarrow>=16", "pyarrow>=16",
"pydantic>=1.10", "pydantic>=1.10",
"tqdm>=4.27.0", "tqdm>=4.27.0",
"lance-namespace>=0.2.1" "lance-namespace>=0.0.21"
] ]
description = "lancedb" description = "lancedb"
authors = [{ name = "LanceDB Devs", email = "dev@lancedb.com" }] authors = [{ name = "LanceDB Devs", email = "dev@lancedb.com" }]
@@ -45,7 +45,7 @@ repository = "https://github.com/lancedb/lancedb"
[project.optional-dependencies] [project.optional-dependencies]
pylance = [ pylance = [
"pylance>=1.0.0b14", "pylance>=0.25",
] ]
tests = [ tests = [
"aiohttp", "aiohttp",
@@ -59,7 +59,7 @@ tests = [
"polars>=0.19, <=1.3.0", "polars>=0.19, <=1.3.0",
"tantivy", "tantivy",
"pyarrow-stubs", "pyarrow-stubs",
"pylance>=1.0.0b14", "pylance>=1.0.0b4",
"requests", "requests",
"datafusion", "datafusion",
] ]

View File

@@ -3,30 +3,10 @@ from typing import Dict, List, Optional, Tuple, Any, TypedDict, Union, Literal
import pyarrow as pa import pyarrow as pa
from .index import ( from .index import BTree, IvfFlat, IvfPq, Bitmap, LabelList, HnswPq, HnswSq, FTS
BTree,
IvfFlat,
IvfPq,
IvfSq,
Bitmap,
LabelList,
HnswPq,
HnswSq,
FTS,
)
from .io import StorageOptionsProvider from .io import StorageOptionsProvider
from lance_namespace import (
ListNamespacesResponse,
CreateNamespaceResponse,
DropNamespaceResponse,
DescribeNamespaceResponse,
ListTablesResponse,
)
from .remote import ClientConfig from .remote import ClientConfig
IvfHnswPq: type[HnswPq] = HnswPq
IvfHnswSq: type[HnswSq] = HnswSq
class Session: class Session:
def __init__( def __init__(
self, self,
@@ -46,38 +26,18 @@ class Connection(object):
async def close(self): ... async def close(self): ...
async def list_namespaces( async def list_namespaces(
self, self,
namespace: Optional[List[str]] = None, namespace: Optional[List[str]],
page_token: Optional[str] = None, page_token: Optional[str],
limit: Optional[int] = None, limit: Optional[int],
) -> ListNamespacesResponse: ... ) -> List[str]: ...
async def create_namespace( async def create_namespace(self, namespace: List[str]) -> None: ...
self, async def drop_namespace(self, namespace: List[str]) -> None: ...
namespace: List[str],
mode: Optional[str] = None,
properties: Optional[Dict[str, str]] = None,
) -> CreateNamespaceResponse: ...
async def drop_namespace(
self,
namespace: List[str],
mode: Optional[str] = None,
behavior: Optional[str] = None,
) -> DropNamespaceResponse: ...
async def describe_namespace(
self,
namespace: List[str],
) -> DescribeNamespaceResponse: ...
async def list_tables(
self,
namespace: Optional[List[str]] = None,
page_token: Optional[str] = None,
limit: Optional[int] = None,
) -> ListTablesResponse: ...
async def table_names( async def table_names(
self, self,
namespace: Optional[List[str]], namespace: Optional[List[str]],
start_after: Optional[str], start_after: Optional[str],
limit: Optional[int], limit: Optional[int],
) -> list[str]: ... # Deprecated: Use list_tables instead ) -> list[str]: ...
async def create_table( async def create_table(
self, self,
name: str, name: str,
@@ -144,17 +104,7 @@ class Table:
async def create_index( async def create_index(
self, self,
column: str, column: str,
index: Union[ index: Union[IvfFlat, IvfPq, HnswPq, HnswSq, BTree, Bitmap, LabelList, FTS],
IvfFlat,
IvfSq,
IvfPq,
HnswPq,
HnswSq,
BTree,
Bitmap,
LabelList,
FTS,
],
replace: Optional[bool], replace: Optional[bool],
wait_timeout: Optional[object], wait_timeout: Optional[object],
*, *,

View File

@@ -22,13 +22,6 @@ from lancedb.embeddings.registry import EmbeddingFunctionRegistry
from lancedb.common import data_to_reader, sanitize_uri, validate_schema from lancedb.common import data_to_reader, sanitize_uri, validate_schema
from lancedb.background_loop import LOOP from lancedb.background_loop import LOOP
from lance_namespace import (
ListNamespacesResponse,
CreateNamespaceResponse,
DropNamespaceResponse,
DescribeNamespaceResponse,
ListTablesResponse,
)
from . import __version__ from . import __version__
from ._lancedb import connect as lancedb_connect # type: ignore from ._lancedb import connect as lancedb_connect # type: ignore
@@ -55,12 +48,6 @@ if TYPE_CHECKING:
from .io import StorageOptionsProvider from .io import StorageOptionsProvider
from ._lancedb import Session from ._lancedb import Session
from .namespace_utils import (
_normalize_create_namespace_mode,
_normalize_drop_namespace_mode,
_normalize_drop_namespace_behavior,
)
class DBConnection(EnforceOverrides): class DBConnection(EnforceOverrides):
"""An active LanceDB connection interface.""" """An active LanceDB connection interface."""
@@ -69,8 +56,8 @@ class DBConnection(EnforceOverrides):
self, self,
namespace: Optional[List[str]] = None, namespace: Optional[List[str]] = None,
page_token: Optional[str] = None, page_token: Optional[str] = None,
limit: Optional[int] = None, limit: int = 10,
) -> ListNamespacesResponse: ) -> Iterable[str]:
"""List immediate child namespace names in the given namespace. """List immediate child namespace names in the given namespace.
Parameters Parameters
@@ -79,119 +66,43 @@ class DBConnection(EnforceOverrides):
The parent namespace to list namespaces in. The parent namespace to list namespaces in.
Empty list represents root namespace. Empty list represents root namespace.
page_token: str, optional page_token: str, optional
Token for pagination. Use the token from a previous response The token to use for pagination. If not present, start from the beginning.
to get the next page of results. limit: int, default 10
limit: int, optional The size of the page to return.
The maximum number of results to return.
Returns Returns
------- -------
ListNamespacesResponse Iterable of str
Response containing namespace names and optional page_token for pagination. List of immediate child namespace names
""" """
if namespace is None: if namespace is None:
namespace = [] namespace = []
return ListNamespacesResponse(namespaces=[], page_token=None) return []
def create_namespace( def create_namespace(self, namespace: List[str]) -> None:
self,
namespace: List[str],
mode: Optional[str] = None,
properties: Optional[Dict[str, str]] = None,
) -> CreateNamespaceResponse:
"""Create a new namespace. """Create a new namespace.
Parameters Parameters
---------- ----------
namespace: List[str] namespace: List[str]
The namespace identifier to create. The namespace identifier to create.
mode: str, optional
Creation mode - "create" (fail if exists), "exist_ok" (skip if exists),
or "overwrite" (replace if exists). Case insensitive.
properties: Dict[str, str], optional
Properties to set on the namespace.
Returns
-------
CreateNamespaceResponse
Response containing the properties of the created namespace.
""" """
raise NotImplementedError( raise NotImplementedError(
"Namespace operations are not supported for this connection type" "Namespace operations are not supported for this connection type"
) )
def drop_namespace( def drop_namespace(self, namespace: List[str]) -> None:
self,
namespace: List[str],
mode: Optional[str] = None,
behavior: Optional[str] = None,
) -> DropNamespaceResponse:
"""Drop a namespace. """Drop a namespace.
Parameters Parameters
---------- ----------
namespace: List[str] namespace: List[str]
The namespace identifier to drop. The namespace identifier to drop.
mode: str, optional
Whether to skip if not exists ("SKIP") or fail ("FAIL"). Case insensitive.
behavior: str, optional
Whether to restrict drop if not empty ("RESTRICT") or cascade ("CASCADE").
Case insensitive.
Returns
-------
DropNamespaceResponse
Response containing properties and transaction_id if applicable.
""" """
raise NotImplementedError( raise NotImplementedError(
"Namespace operations are not supported for this connection type" "Namespace operations are not supported for this connection type"
) )
def describe_namespace(self, namespace: List[str]) -> DescribeNamespaceResponse:
"""Describe a namespace.
Parameters
----------
namespace: List[str]
The namespace identifier to describe.
Returns
-------
DescribeNamespaceResponse
Response containing the namespace properties.
"""
raise NotImplementedError(
"Namespace operations are not supported for this connection type"
)
def list_tables(
self,
namespace: Optional[List[str]] = None,
page_token: Optional[str] = None,
limit: Optional[int] = None,
) -> ListTablesResponse:
"""List all tables in this database with pagination support.
Parameters
----------
namespace: List[str], optional
The namespace to list tables in.
None or empty list represents root namespace.
page_token: str, optional
Token for pagination. Use the token from a previous response
to get the next page of results.
limit: int, optional
The maximum number of results to return.
Returns
-------
ListTablesResponse
Response containing table names and optional page_token for pagination.
"""
raise NotImplementedError(
"list_tables is not supported for this connection type"
)
@abstractmethod @abstractmethod
def table_names( def table_names(
self, self,
@@ -283,10 +194,6 @@ class DBConnection(EnforceOverrides):
connection will be inherited by the table, but can be overridden here. connection will be inherited by the table, but can be overridden here.
See available options at See available options at
<https://lancedb.com/docs/storage/> <https://lancedb.com/docs/storage/>
To enable stable row IDs (row IDs remain stable after compaction,
update, delete, and merges), set `new_table_enable_stable_row_ids`
to `"true"` in storage_options when connecting to the database.
data_storage_version: optional, str, default "stable" data_storage_version: optional, str, default "stable"
Deprecated. Set `storage_options` when connecting to the database and set Deprecated. Set `storage_options` when connecting to the database and set
`new_table_data_storage_version` in the options. `new_table_data_storage_version` in the options.
@@ -646,8 +553,8 @@ class LanceDBConnection(DBConnection):
self, self,
namespace: Optional[List[str]] = None, namespace: Optional[List[str]] = None,
page_token: Optional[str] = None, page_token: Optional[str] = None,
limit: Optional[int] = None, limit: int = 10,
) -> ListNamespacesResponse: ) -> Iterable[str]:
"""List immediate child namespace names in the given namespace. """List immediate child namespace names in the given namespace.
Parameters Parameters
@@ -656,15 +563,14 @@ class LanceDBConnection(DBConnection):
The parent namespace to list namespaces in. The parent namespace to list namespaces in.
None or empty list represents root namespace. None or empty list represents root namespace.
page_token: str, optional page_token: str, optional
Token for pagination. Use the token from a previous response The token to use for pagination. If not present, start from the beginning.
to get the next page of results. limit: int, default 10
limit: int, optional The size of the page to return.
The maximum number of results to return.
Returns Returns
------- -------
ListNamespacesResponse Iterable of str
Response containing namespace names and optional page_token for pagination. List of immediate child namespace names
""" """
if namespace is None: if namespace is None:
namespace = [] namespace = []
@@ -675,111 +581,26 @@ class LanceDBConnection(DBConnection):
) )
@override @override
def create_namespace( def create_namespace(self, namespace: List[str]) -> None:
self,
namespace: List[str],
mode: Optional[str] = None,
properties: Optional[Dict[str, str]] = None,
) -> CreateNamespaceResponse:
"""Create a new namespace. """Create a new namespace.
Parameters Parameters
---------- ----------
namespace: List[str] namespace: List[str]
The namespace identifier to create. The namespace identifier to create.
mode: str, optional
Creation mode - "create" (fail if exists), "exist_ok" (skip if exists),
or "overwrite" (replace if exists). Case insensitive.
properties: Dict[str, str], optional
Properties to set on the namespace.
Returns
-------
CreateNamespaceResponse
Response containing the properties of the created namespace.
""" """
return LOOP.run( LOOP.run(self._conn.create_namespace(namespace=namespace))
self._conn.create_namespace(
namespace=namespace, mode=mode, properties=properties
)
)
@override @override
def drop_namespace( def drop_namespace(self, namespace: List[str]) -> None:
self,
namespace: List[str],
mode: Optional[str] = None,
behavior: Optional[str] = None,
) -> DropNamespaceResponse:
"""Drop a namespace. """Drop a namespace.
Parameters Parameters
---------- ----------
namespace: List[str] namespace: List[str]
The namespace identifier to drop. The namespace identifier to drop.
mode: str, optional
Whether to skip if not exists ("SKIP") or fail ("FAIL"). Case insensitive.
behavior: str, optional
Whether to restrict drop if not empty ("RESTRICT") or cascade ("CASCADE").
Case insensitive.
Returns
-------
DropNamespaceResponse
Response containing properties and transaction_id if applicable.
""" """
return LOOP.run( return LOOP.run(self._conn.drop_namespace(namespace=namespace))
self._conn.drop_namespace(namespace=namespace, mode=mode, behavior=behavior)
)
@override
def describe_namespace(self, namespace: List[str]) -> DescribeNamespaceResponse:
"""Describe a namespace.
Parameters
----------
namespace: List[str]
The namespace identifier to describe.
Returns
-------
DescribeNamespaceResponse
Response containing the namespace properties.
"""
return LOOP.run(self._conn.describe_namespace(namespace=namespace))
@override
def list_tables(
self,
namespace: Optional[List[str]] = None,
page_token: Optional[str] = None,
limit: Optional[int] = None,
) -> ListTablesResponse:
"""List all tables in this database with pagination support.
Parameters
----------
namespace: List[str], optional
The namespace to list tables in.
None or empty list represents root namespace.
page_token: str, optional
Token for pagination. Use the token from a previous response
to get the next page of results.
limit: int, optional
The maximum number of results to return.
Returns
-------
ListTablesResponse
Response containing table names and optional page_token for pagination.
"""
if namespace is None:
namespace = []
return LOOP.run(
self._conn.list_tables(
namespace=namespace, page_token=page_token, limit=limit
)
)
@override @override
def table_names( def table_names(
@@ -791,9 +612,6 @@ class LanceDBConnection(DBConnection):
) -> Iterable[str]: ) -> Iterable[str]:
"""Get the names of all tables in the database. The names are sorted. """Get the names of all tables in the database. The names are sorted.
.. deprecated::
Use :meth:`list_tables` instead, which provides proper pagination support.
Parameters Parameters
---------- ----------
namespace: List[str], optional namespace: List[str], optional
@@ -808,13 +626,6 @@ class LanceDBConnection(DBConnection):
Iterator of str. Iterator of str.
A list of table names. A list of table names.
""" """
import warnings
warnings.warn(
"table_names() is deprecated, use list_tables() instead",
DeprecationWarning,
stacklevel=2,
)
if namespace is None: if namespace is None:
namespace = [] namespace = []
return LOOP.run( return LOOP.run(
@@ -1129,8 +940,8 @@ class AsyncConnection(object):
self, self,
namespace: Optional[List[str]] = None, namespace: Optional[List[str]] = None,
page_token: Optional[str] = None, page_token: Optional[str] = None,
limit: Optional[int] = None, limit: int = 10,
) -> ListNamespacesResponse: ) -> Iterable[str]:
"""List immediate child namespace names in the given namespace. """List immediate child namespace names in the given namespace.
Parameters Parameters
@@ -1140,128 +951,39 @@ class AsyncConnection(object):
None or empty list represents root namespace. None or empty list represents root namespace.
page_token: str, optional page_token: str, optional
The token to use for pagination. If not present, start from the beginning. The token to use for pagination. If not present, start from the beginning.
limit: int, optional limit: int, default 10
The maximum number of results to return. The size of the page to return.
Returns Returns
------- -------
ListNamespacesResponse Iterable of str
Response containing namespace names and optional pagination token List of immediate child namespace names (not full paths)
""" """
if namespace is None: if namespace is None:
namespace = [] namespace = []
result = await self._inner.list_namespaces( return await self._inner.list_namespaces(
namespace=namespace, page_token=page_token, limit=limit namespace=namespace, page_token=page_token, limit=limit
) )
return ListNamespacesResponse(**result)
async def create_namespace( async def create_namespace(self, namespace: List[str]) -> None:
self,
namespace: List[str],
mode: Optional[str] = None,
properties: Optional[Dict[str, str]] = None,
) -> CreateNamespaceResponse:
"""Create a new namespace. """Create a new namespace.
Parameters Parameters
---------- ----------
namespace: List[str] namespace: List[str]
The namespace identifier to create. The namespace identifier to create.
mode: str, optional
Creation mode - "create", "exist_ok", or "overwrite". Case insensitive.
properties: Dict[str, str], optional
Properties to associate with the namespace
Returns
-------
CreateNamespaceResponse
Response containing namespace properties
""" """
result = await self._inner.create_namespace( await self._inner.create_namespace(namespace)
namespace,
mode=_normalize_create_namespace_mode(mode),
properties=properties,
)
return CreateNamespaceResponse(**result)
async def drop_namespace( async def drop_namespace(self, namespace: List[str]) -> None:
self,
namespace: List[str],
mode: Optional[str] = None,
behavior: Optional[str] = None,
) -> DropNamespaceResponse:
"""Drop a namespace. """Drop a namespace.
Parameters Parameters
---------- ----------
namespace: List[str] namespace: List[str]
The namespace identifier to drop. The namespace identifier to drop.
mode: str, optional
Whether to skip if not exists ("SKIP") or fail ("FAIL"). Case insensitive.
behavior: str, optional
Whether to restrict drop if not empty ("RESTRICT") or cascade ("CASCADE").
Case insensitive.
Returns
-------
DropNamespaceResponse
Response containing properties and transaction_id if applicable.
""" """
result = await self._inner.drop_namespace( await self._inner.drop_namespace(namespace)
namespace,
mode=_normalize_drop_namespace_mode(mode),
behavior=_normalize_drop_namespace_behavior(behavior),
)
return DropNamespaceResponse(**result)
async def describe_namespace(
self, namespace: List[str]
) -> DescribeNamespaceResponse:
"""Describe a namespace.
Parameters
----------
namespace: List[str]
The namespace identifier to describe.
Returns
-------
DescribeNamespaceResponse
Response containing the namespace properties.
"""
result = await self._inner.describe_namespace(namespace)
return DescribeNamespaceResponse(**result)
async def list_tables(
self,
namespace: Optional[List[str]] = None,
page_token: Optional[str] = None,
limit: Optional[int] = None,
) -> ListTablesResponse:
"""List all tables in this database with pagination support.
Parameters
----------
namespace: List[str], optional
The namespace to list tables in.
None or empty list represents root namespace.
page_token: str, optional
Token for pagination. Use the token from a previous response
to get the next page of results.
limit: int, optional
The maximum number of results to return.
Returns
-------
ListTablesResponse
Response containing table names and optional page_token for pagination.
"""
if namespace is None:
namespace = []
result = await self._inner.list_tables(
namespace=namespace, page_token=page_token, limit=limit
)
return ListTablesResponse(**result)
async def table_names( async def table_names(
self, self,
@@ -1272,9 +994,6 @@ class AsyncConnection(object):
) -> Iterable[str]: ) -> Iterable[str]:
"""List all tables in this database, in sorted order """List all tables in this database, in sorted order
.. deprecated::
Use :meth:`list_tables` instead, which provides proper pagination support.
Parameters Parameters
---------- ----------
namespace: List[str], optional namespace: List[str], optional
@@ -1293,13 +1012,6 @@ class AsyncConnection(object):
------- -------
Iterable of str Iterable of str
""" """
import warnings
warnings.warn(
"table_names() is deprecated, use list_tables() instead",
DeprecationWarning,
stacklevel=2,
)
if namespace is None: if namespace is None:
namespace = [] namespace = []
return await self._inner.table_names( return await self._inner.table_names(
@@ -1367,10 +1079,6 @@ class AsyncConnection(object):
See available options at See available options at
<https://lancedb.com/docs/storage/> <https://lancedb.com/docs/storage/>
To enable stable row IDs (row IDs remain stable after compaction,
update, delete, and merges), set `new_table_enable_stable_row_ids`
to `"true"` in storage_options when connecting to the database.
Returns Returns
------- -------
AsyncTable AsyncTable

View File

@@ -376,11 +376,6 @@ class HnswSq:
target_partition_size: Optional[int] = None target_partition_size: Optional[int] = None
# Backwards-compatible aliases
IvfHnswPq = HnswPq
IvfHnswSq = HnswSq
@dataclass @dataclass
class IvfFlat: class IvfFlat:
"""Describes an IVF Flat Index """Describes an IVF Flat Index
@@ -480,36 +475,6 @@ class IvfFlat:
target_partition_size: Optional[int] = None target_partition_size: Optional[int] = None
@dataclass
class IvfSq:
"""Describes an IVF Scalar Quantization (SQ) index.
This index applies scalar quantization to compress vectors and organizes the
quantized vectors into IVF partitions. It offers a balance between search
speed and storage efficiency while keeping good recall.
Attributes
----------
distance_type: str, default "l2"
The distance metric used to train and search the index. Supported values
are "l2", "cosine", and "dot".
num_partitions: int, default sqrt(num_rows)
Number of IVF partitions to create.
max_iterations: int, default 50
Maximum iterations for kmeans during partition training.
sample_rate: int, default 256
Controls the number of training vectors: sample_rate * num_partitions.
target_partition_size: int, optional
Target size for each partition; adjusts the balance between speed and accuracy.
"""
distance_type: Literal["l2", "cosine", "dot"] = "l2"
num_partitions: Optional[int] = None
max_iterations: int = 50
sample_rate: int = 256
target_partition_size: Optional[int] = None
@dataclass @dataclass
class IvfPq: class IvfPq:
"""Describes an IVF PQ Index """Describes an IVF PQ Index
@@ -696,9 +661,6 @@ class IvfRq:
__all__ = [ __all__ = [
"BTree", "BTree",
"IvfPq", "IvfPq",
"IvfHnswPq",
"IvfHnswSq",
"IvfSq",
"IvfRq", "IvfRq",
"IvfFlat", "IvfFlat",
"HnswPq", "HnswPq",

View File

@@ -23,29 +23,7 @@ from datetime import timedelta
import pyarrow as pa import pyarrow as pa
from lancedb.db import DBConnection, LanceDBConnection from lancedb.db import DBConnection, LanceDBConnection
from lancedb.namespace_utils import (
_normalize_create_namespace_mode,
_normalize_drop_namespace_mode,
_normalize_drop_namespace_behavior,
)
from lancedb.io import StorageOptionsProvider from lancedb.io import StorageOptionsProvider
from lance_namespace import (
LanceNamespace,
connect as namespace_connect,
CreateNamespaceResponse,
DescribeNamespaceResponse,
DropNamespaceResponse,
ListNamespacesResponse,
ListTablesResponse,
ListTablesRequest,
DescribeTableRequest,
DescribeNamespaceRequest,
DropTableRequest,
ListNamespacesRequest,
CreateNamespaceRequest,
DropNamespaceRequest,
CreateEmptyTableRequest,
)
from lancedb.table import AsyncTable, LanceTable, Table from lancedb.table import AsyncTable, LanceTable, Table
from lancedb.util import validate_table_name from lancedb.util import validate_table_name
from lancedb.common import DATA from lancedb.common import DATA
@@ -53,9 +31,19 @@ from lancedb.pydantic import LanceModel
from lancedb.embeddings import EmbeddingFunctionConfig from lancedb.embeddings import EmbeddingFunctionConfig
from ._lancedb import Session from ._lancedb import Session
from lance_namespace_urllib3_client.models.json_arrow_schema import JsonArrowSchema from lance_namespace import LanceNamespace, connect as namespace_connect
from lance_namespace_urllib3_client.models.json_arrow_field import JsonArrowField from lance_namespace_urllib3_client.models import (
from lance_namespace_urllib3_client.models.json_arrow_data_type import JsonArrowDataType ListTablesRequest,
DescribeTableRequest,
DropTableRequest,
ListNamespacesRequest,
CreateNamespaceRequest,
DropNamespaceRequest,
CreateEmptyTableRequest,
JsonArrowSchema,
JsonArrowField,
JsonArrowDataType,
)
def _convert_pyarrow_type_to_json(arrow_type: pa.DataType) -> JsonArrowDataType: def _convert_pyarrow_type_to_json(arrow_type: pa.DataType) -> JsonArrowDataType:
@@ -253,19 +241,6 @@ class LanceNamespaceDBConnection(DBConnection):
*, *,
namespace: Optional[List[str]] = None, namespace: Optional[List[str]] = None,
) -> Iterable[str]: ) -> Iterable[str]:
"""
List table names in the database.
.. deprecated::
Use :meth:`list_tables` instead, which provides proper pagination support.
"""
import warnings
warnings.warn(
"table_names() is deprecated, use list_tables() instead",
DeprecationWarning,
stacklevel=2,
)
if namespace is None: if namespace is None:
namespace = [] namespace = []
request = ListTablesRequest(id=namespace, page_token=page_token, limit=limit) request = ListTablesRequest(id=namespace, page_token=page_token, limit=limit)
@@ -458,8 +433,8 @@ class LanceNamespaceDBConnection(DBConnection):
self, self,
namespace: Optional[List[str]] = None, namespace: Optional[List[str]] = None,
page_token: Optional[str] = None, page_token: Optional[str] = None,
limit: Optional[int] = None, limit: int = 10,
) -> ListNamespacesResponse: ) -> Iterable[str]:
""" """
List child namespaces under the given namespace. List child namespaces under the given namespace.
@@ -469,15 +444,14 @@ class LanceNamespaceDBConnection(DBConnection):
The parent namespace to list children from. The parent namespace to list children from.
If None, lists root-level namespaces. If None, lists root-level namespaces.
page_token : Optional[str] page_token : Optional[str]
Token for pagination. Use the token from a previous response Pagination token for listing results.
to get the next page of results. limit : int
limit : int, optional
Maximum number of namespaces to return. Maximum number of namespaces to return.
Returns Returns
------- -------
ListNamespacesResponse Iterable[str]
Response containing namespace names and optional page_token for pagination. Names of child namespaces.
""" """
if namespace is None: if namespace is None:
namespace = [] namespace = []
@@ -485,18 +459,10 @@ class LanceNamespaceDBConnection(DBConnection):
id=namespace, page_token=page_token, limit=limit id=namespace, page_token=page_token, limit=limit
) )
response = self._ns.list_namespaces(request) response = self._ns.list_namespaces(request)
return ListNamespacesResponse( return response.namespaces if response.namespaces else []
namespaces=response.namespaces if response.namespaces else [],
page_token=response.page_token,
)
@override @override
def create_namespace( def create_namespace(self, namespace: List[str]) -> None:
self,
namespace: List[str],
mode: Optional[str] = None,
properties: Optional[Dict[str, str]] = None,
) -> CreateNamespaceResponse:
""" """
Create a new namespace. Create a new namespace.
@@ -504,34 +470,12 @@ class LanceNamespaceDBConnection(DBConnection):
---------- ----------
namespace : List[str] namespace : List[str]
The namespace path to create. The namespace path to create.
mode : str, optional
Creation mode - "create" (fail if exists), "exist_ok" (skip if exists),
or "overwrite" (replace if exists). Case insensitive.
properties : Dict[str, str], optional
Properties to set on the namespace.
Returns
-------
CreateNamespaceResponse
Response containing the properties of the created namespace.
""" """
request = CreateNamespaceRequest( request = CreateNamespaceRequest(id=namespace)
id=namespace, self._ns.create_namespace(request)
mode=_normalize_create_namespace_mode(mode),
properties=properties,
)
response = self._ns.create_namespace(request)
return CreateNamespaceResponse(
properties=response.properties if hasattr(response, "properties") else None
)
@override @override
def drop_namespace( def drop_namespace(self, namespace: List[str]) -> None:
self,
namespace: List[str],
mode: Optional[str] = None,
behavior: Optional[str] = None,
) -> DropNamespaceResponse:
""" """
Drop a namespace. Drop a namespace.
@@ -539,87 +483,9 @@ class LanceNamespaceDBConnection(DBConnection):
---------- ----------
namespace : List[str] namespace : List[str]
The namespace path to drop. The namespace path to drop.
mode : str, optional
Whether to skip if not exists ("SKIP") or fail ("FAIL"). Case insensitive.
behavior : str, optional
Whether to restrict drop if not empty ("RESTRICT") or cascade ("CASCADE").
Case insensitive.
Returns
-------
DropNamespaceResponse
Response containing properties and transaction_id if applicable.
""" """
request = DropNamespaceRequest( request = DropNamespaceRequest(id=namespace)
id=namespace, self._ns.drop_namespace(request)
mode=_normalize_drop_namespace_mode(mode),
behavior=_normalize_drop_namespace_behavior(behavior),
)
response = self._ns.drop_namespace(request)
return DropNamespaceResponse(
properties=(
response.properties if hasattr(response, "properties") else None
),
transaction_id=(
response.transaction_id if hasattr(response, "transaction_id") else None
),
)
@override
def describe_namespace(self, namespace: List[str]) -> DescribeNamespaceResponse:
"""
Describe a namespace.
Parameters
----------
namespace : List[str]
The namespace identifier to describe.
Returns
-------
DescribeNamespaceResponse
Response containing the namespace properties.
"""
request = DescribeNamespaceRequest(id=namespace)
response = self._ns.describe_namespace(request)
return DescribeNamespaceResponse(
properties=response.properties if hasattr(response, "properties") else None
)
@override
def list_tables(
self,
namespace: Optional[List[str]] = None,
page_token: Optional[str] = None,
limit: Optional[int] = None,
) -> ListTablesResponse:
"""
List all tables in this database with pagination support.
Parameters
----------
namespace : List[str], optional
The namespace to list tables in.
None or empty list represents root namespace.
page_token : str, optional
Token for pagination. Use the token from a previous response
to get the next page of results.
limit : int, optional
The maximum number of results to return.
Returns
-------
ListTablesResponse
Response containing table names and optional page_token for pagination.
"""
if namespace is None:
namespace = []
request = ListTablesRequest(id=namespace, page_token=page_token, limit=limit)
response = self._ns.list_tables(request)
return ListTablesResponse(
tables=response.tables if response.tables else [],
page_token=response.page_token,
)
def _lance_table_from_uri( def _lance_table_from_uri(
self, self,
@@ -697,19 +563,7 @@ class AsyncLanceNamespaceDBConnection:
*, *,
namespace: Optional[List[str]] = None, namespace: Optional[List[str]] = None,
) -> Iterable[str]: ) -> Iterable[str]:
""" """List table names in the namespace."""
List table names in the namespace.
.. deprecated::
Use :meth:`list_tables` instead, which provides proper pagination support.
"""
import warnings
warnings.warn(
"table_names() is deprecated, use list_tables() instead",
DeprecationWarning,
stacklevel=2,
)
if namespace is None: if namespace is None:
namespace = [] namespace = []
request = ListTablesRequest(id=namespace, page_token=page_token, limit=limit) request = ListTablesRequest(id=namespace, page_token=page_token, limit=limit)
@@ -917,8 +771,8 @@ class AsyncLanceNamespaceDBConnection:
self, self,
namespace: Optional[List[str]] = None, namespace: Optional[List[str]] = None,
page_token: Optional[str] = None, page_token: Optional[str] = None,
limit: Optional[int] = None, limit: int = 10,
) -> ListNamespacesResponse: ) -> Iterable[str]:
""" """
List child namespaces under the given namespace. List child namespaces under the given namespace.
@@ -928,15 +782,14 @@ class AsyncLanceNamespaceDBConnection:
The parent namespace to list children from. The parent namespace to list children from.
If None, lists root-level namespaces. If None, lists root-level namespaces.
page_token : Optional[str] page_token : Optional[str]
Token for pagination. Use the token from a previous response Pagination token for listing results.
to get the next page of results. limit : int
limit : int, optional
Maximum number of namespaces to return. Maximum number of namespaces to return.
Returns Returns
------- -------
ListNamespacesResponse Iterable[str]
Response containing namespace names and optional page_token for pagination. Names of child namespaces.
""" """
if namespace is None: if namespace is None:
namespace = [] namespace = []
@@ -944,17 +797,9 @@ class AsyncLanceNamespaceDBConnection:
id=namespace, page_token=page_token, limit=limit id=namespace, page_token=page_token, limit=limit
) )
response = self._ns.list_namespaces(request) response = self._ns.list_namespaces(request)
return ListNamespacesResponse( return response.namespaces if response.namespaces else []
namespaces=response.namespaces if response.namespaces else [],
page_token=response.page_token,
)
async def create_namespace( async def create_namespace(self, namespace: List[str]) -> None:
self,
namespace: List[str],
mode: Optional[str] = None,
properties: Optional[Dict[str, str]] = None,
) -> CreateNamespaceResponse:
""" """
Create a new namespace. Create a new namespace.
@@ -962,33 +807,11 @@ class AsyncLanceNamespaceDBConnection:
---------- ----------
namespace : List[str] namespace : List[str]
The namespace path to create. The namespace path to create.
mode : str, optional
Creation mode - "create" (fail if exists), "exist_ok" (skip if exists),
or "overwrite" (replace if exists). Case insensitive.
properties : Dict[str, str], optional
Properties to set on the namespace.
Returns
-------
CreateNamespaceResponse
Response containing the properties of the created namespace.
""" """
request = CreateNamespaceRequest( request = CreateNamespaceRequest(id=namespace)
id=namespace, self._ns.create_namespace(request)
mode=_normalize_create_namespace_mode(mode),
properties=properties,
)
response = self._ns.create_namespace(request)
return CreateNamespaceResponse(
properties=response.properties if hasattr(response, "properties") else None
)
async def drop_namespace( async def drop_namespace(self, namespace: List[str]) -> None:
self,
namespace: List[str],
mode: Optional[str] = None,
behavior: Optional[str] = None,
) -> DropNamespaceResponse:
""" """
Drop a namespace. Drop a namespace.
@@ -996,87 +819,9 @@ class AsyncLanceNamespaceDBConnection:
---------- ----------
namespace : List[str] namespace : List[str]
The namespace path to drop. The namespace path to drop.
mode : str, optional
Whether to skip if not exists ("SKIP") or fail ("FAIL"). Case insensitive.
behavior : str, optional
Whether to restrict drop if not empty ("RESTRICT") or cascade ("CASCADE").
Case insensitive.
Returns
-------
DropNamespaceResponse
Response containing properties and transaction_id if applicable.
""" """
request = DropNamespaceRequest( request = DropNamespaceRequest(id=namespace)
id=namespace, self._ns.drop_namespace(request)
mode=_normalize_drop_namespace_mode(mode),
behavior=_normalize_drop_namespace_behavior(behavior),
)
response = self._ns.drop_namespace(request)
return DropNamespaceResponse(
properties=(
response.properties if hasattr(response, "properties") else None
),
transaction_id=(
response.transaction_id if hasattr(response, "transaction_id") else None
),
)
async def describe_namespace(
self, namespace: List[str]
) -> DescribeNamespaceResponse:
"""
Describe a namespace.
Parameters
----------
namespace : List[str]
The namespace identifier to describe.
Returns
-------
DescribeNamespaceResponse
Response containing the namespace properties.
"""
request = DescribeNamespaceRequest(id=namespace)
response = self._ns.describe_namespace(request)
return DescribeNamespaceResponse(
properties=response.properties if hasattr(response, "properties") else None
)
async def list_tables(
self,
namespace: Optional[List[str]] = None,
page_token: Optional[str] = None,
limit: Optional[int] = None,
) -> ListTablesResponse:
"""
List all tables in this database with pagination support.
Parameters
----------
namespace : List[str], optional
The namespace to list tables in.
None or empty list represents root namespace.
page_token : str, optional
Token for pagination. Use the token from a previous response
to get the next page of results.
limit : int, optional
The maximum number of results to return.
Returns
-------
ListTablesResponse
Response containing table names and optional page_token for pagination.
"""
if namespace is None:
namespace = []
request = ListTablesRequest(id=namespace, page_token=page_token, limit=limit)
response = self._ns.list_tables(request)
return ListTablesResponse(
tables=response.tables if response.tables else [],
page_token=response.page_token,
)
def connect_namespace( def connect_namespace(

View File

@@ -1,27 +0,0 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright The LanceDB Authors
"""Utility functions for namespace operations."""
from typing import Optional
def _normalize_create_namespace_mode(mode: Optional[str]) -> Optional[str]:
"""Normalize create namespace mode to lowercase (API expects lowercase)."""
if mode is None:
return None
return mode.lower()
def _normalize_drop_namespace_mode(mode: Optional[str]) -> Optional[str]:
"""Normalize drop namespace mode to uppercase (API expects uppercase)."""
if mode is None:
return None
return mode.upper()
def _normalize_drop_namespace_behavior(behavior: Optional[str]) -> Optional[str]:
"""Normalize drop namespace behavior to uppercase (API expects uppercase)."""
if behavior is None:
return None
return behavior.upper()

View File

@@ -2429,8 +2429,9 @@ class AsyncQueryBase(object):
>>> from lancedb import connect_async >>> from lancedb import connect_async
>>> async def doctest_example(): >>> async def doctest_example():
... conn = await connect_async("./.lancedb") ... conn = await connect_async("./.lancedb")
... table = await conn.create_table("my_table", [{"vector": [99.0, 99.0]}]) ... table = await conn.create_table("my_table", [{"vector": [99, 99]}])
... plan = await table.query().nearest_to([1.0, 2.0]).explain_plan(True) ... query = [100, 100]
... plan = await table.query().nearest_to([1, 2]).explain_plan(True)
... print(plan) ... print(plan)
>>> asyncio.run(doctest_example()) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE >>> asyncio.run(doctest_example()) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
ProjectionExec: expr=[vector@0 as vector, _distance@2 as _distance] ProjectionExec: expr=[vector@0 as vector, _distance@2 as _distance]
@@ -2439,7 +2440,6 @@ class AsyncQueryBase(object):
SortExec: TopK(fetch=10), expr=[_distance@2 ASC NULLS LAST, _rowid@1 ASC NULLS LAST], preserve_partitioning=[false] SortExec: TopK(fetch=10), expr=[_distance@2 ASC NULLS LAST, _rowid@1 ASC NULLS LAST], preserve_partitioning=[false]
KNNVectorDistance: metric=l2 KNNVectorDistance: metric=l2
LanceRead: uri=..., projection=[vector], ... LanceRead: uri=..., projection=[vector], ...
<BLANKLINE>
Parameters Parameters
---------- ----------
@@ -3141,9 +3141,10 @@ class AsyncHybridQuery(AsyncStandardQuery, AsyncVectorQueryBase):
>>> from lancedb.index import FTS >>> from lancedb.index import FTS
>>> async def doctest_example(): >>> async def doctest_example():
... conn = await connect_async("./.lancedb") ... conn = await connect_async("./.lancedb")
... table = await conn.create_table("my_table", [{"vector": [99.0, 99.0], "text": "hello world"}]) ... table = await conn.create_table("my_table", [{"vector": [99, 99], "text": "hello world"}])
... await table.create_index("text", config=FTS(with_position=False)) ... await table.create_index("text", config=FTS(with_position=False))
... plan = await table.query().nearest_to([1.0, 2.0]).nearest_to_text("hello").explain_plan(True) ... query = [100, 100]
... plan = await table.query().nearest_to([1, 2]).nearest_to_text("hello").explain_plan(True)
... print(plan) ... print(plan)
>>> asyncio.run(doctest_example()) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE >>> asyncio.run(doctest_example()) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
Vector Search Plan: Vector Search Plan:
@@ -3417,8 +3418,9 @@ class BaseQueryBuilder(object):
>>> from lancedb import connect_async >>> from lancedb import connect_async
>>> async def doctest_example(): >>> async def doctest_example():
... conn = await connect_async("./.lancedb") ... conn = await connect_async("./.lancedb")
... table = await conn.create_table("my_table", [{"vector": [99.0, 99.0]}]) ... table = await conn.create_table("my_table", [{"vector": [99, 99]}])
... plan = await table.query().nearest_to([1.0, 2.0]).explain_plan(True) ... query = [100, 100]
... plan = await table.query().nearest_to([1, 2]).explain_plan(True)
... print(plan) ... print(plan)
>>> asyncio.run(doctest_example()) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE >>> asyncio.run(doctest_example()) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
ProjectionExec: expr=[vector@0 as vector, _distance@2 as _distance] ProjectionExec: expr=[vector@0 as vector, _distance@2 as _distance]
@@ -3427,7 +3429,6 @@ class BaseQueryBuilder(object):
SortExec: TopK(fetch=10), expr=[_distance@2 ASC NULLS LAST, _rowid@1 ASC NULLS LAST], preserve_partitioning=[false] SortExec: TopK(fetch=10), expr=[_distance@2 ASC NULLS LAST, _rowid@1 ASC NULLS LAST], preserve_partitioning=[false]
KNNVectorDistance: metric=l2 KNNVectorDistance: metric=l2
LanceRead: uri=..., projection=[vector], ... LanceRead: uri=..., projection=[vector], ...
<BLANKLINE>
Parameters Parameters
---------- ----------

View File

@@ -23,13 +23,6 @@ import pyarrow as pa
from ..common import DATA from ..common import DATA
from ..db import DBConnection, LOOP from ..db import DBConnection, LOOP
from ..embeddings import EmbeddingFunctionConfig from ..embeddings import EmbeddingFunctionConfig
from lance_namespace import (
CreateNamespaceResponse,
DescribeNamespaceResponse,
DropNamespaceResponse,
ListNamespacesResponse,
ListTablesResponse,
)
from ..pydantic import LanceModel from ..pydantic import LanceModel
from ..table import Table from ..table import Table
from ..util import validate_table_name from ..util import validate_table_name
@@ -113,8 +106,8 @@ class RemoteDBConnection(DBConnection):
self, self,
namespace: Optional[List[str]] = None, namespace: Optional[List[str]] = None,
page_token: Optional[str] = None, page_token: Optional[str] = None,
limit: Optional[int] = None, limit: int = 10,
) -> ListNamespacesResponse: ) -> Iterable[str]:
"""List immediate child namespace names in the given namespace. """List immediate child namespace names in the given namespace.
Parameters Parameters
@@ -123,15 +116,14 @@ class RemoteDBConnection(DBConnection):
The parent namespace to list namespaces in. The parent namespace to list namespaces in.
None or empty list represents root namespace. None or empty list represents root namespace.
page_token: str, optional page_token: str, optional
Token for pagination. Use the token from a previous response The token to use for pagination. If not present, start from the beginning.
to get the next page of results. limit: int, default 10
limit: int, optional The size of the page to return.
The maximum number of results to return.
Returns Returns
------- -------
ListNamespacesResponse Iterable of str
Response containing namespace names and optional page_token for pagination. List of immediate child namespace names
""" """
if namespace is None: if namespace is None:
namespace = [] namespace = []
@@ -142,111 +134,26 @@ class RemoteDBConnection(DBConnection):
) )
@override @override
def create_namespace( def create_namespace(self, namespace: List[str]) -> None:
self,
namespace: List[str],
mode: Optional[str] = None,
properties: Optional[Dict[str, str]] = None,
) -> CreateNamespaceResponse:
"""Create a new namespace. """Create a new namespace.
Parameters Parameters
---------- ----------
namespace: List[str] namespace: List[str]
The namespace identifier to create. The namespace identifier to create.
mode: str, optional
Creation mode - "create" (fail if exists), "exist_ok" (skip if exists),
or "overwrite" (replace if exists). Case insensitive.
properties: Dict[str, str], optional
Properties to set on the namespace.
Returns
-------
CreateNamespaceResponse
Response containing the properties of the created namespace.
""" """
return LOOP.run( LOOP.run(self._conn.create_namespace(namespace=namespace))
self._conn.create_namespace(
namespace=namespace, mode=mode, properties=properties
)
)
@override @override
def drop_namespace( def drop_namespace(self, namespace: List[str]) -> None:
self,
namespace: List[str],
mode: Optional[str] = None,
behavior: Optional[str] = None,
) -> DropNamespaceResponse:
"""Drop a namespace. """Drop a namespace.
Parameters Parameters
---------- ----------
namespace: List[str] namespace: List[str]
The namespace identifier to drop. The namespace identifier to drop.
mode: str, optional
Whether to skip if not exists ("SKIP") or fail ("FAIL"). Case insensitive.
behavior: str, optional
Whether to restrict drop if not empty ("RESTRICT") or cascade ("CASCADE").
Case insensitive.
Returns
-------
DropNamespaceResponse
Response containing properties and transaction_id if applicable.
""" """
return LOOP.run( return LOOP.run(self._conn.drop_namespace(namespace=namespace))
self._conn.drop_namespace(namespace=namespace, mode=mode, behavior=behavior)
)
@override
def describe_namespace(self, namespace: List[str]) -> DescribeNamespaceResponse:
"""Describe a namespace.
Parameters
----------
namespace: List[str]
The namespace identifier to describe.
Returns
-------
DescribeNamespaceResponse
Response containing the namespace properties.
"""
return LOOP.run(self._conn.describe_namespace(namespace=namespace))
@override
def list_tables(
self,
namespace: Optional[List[str]] = None,
page_token: Optional[str] = None,
limit: Optional[int] = None,
) -> ListTablesResponse:
"""List all tables in this database with pagination support.
Parameters
----------
namespace: List[str], optional
The namespace to list tables in.
None or empty list represents root namespace.
page_token: str, optional
Token for pagination. Use the token from a previous response
to get the next page of results.
limit: int, optional
The maximum number of results to return.
Returns
-------
ListTablesResponse
Response containing table names and optional page_token for pagination.
"""
if namespace is None:
namespace = []
return LOOP.run(
self._conn.list_tables(
namespace=namespace, page_token=page_token, limit=limit
)
)
@override @override
def table_names( def table_names(
@@ -258,9 +165,6 @@ class RemoteDBConnection(DBConnection):
) -> Iterable[str]: ) -> Iterable[str]:
"""List the names of all tables in the database. """List the names of all tables in the database.
.. deprecated::
Use :meth:`list_tables` instead, which provides proper pagination support.
Parameters Parameters
---------- ----------
namespace: List[str], default [] namespace: List[str], default []
@@ -275,13 +179,6 @@ class RemoteDBConnection(DBConnection):
------- -------
An iterator of table names. An iterator of table names.
""" """
import warnings
warnings.warn(
"table_names() is deprecated, use list_tables() instead",
DeprecationWarning,
stacklevel=2,
)
if namespace is None: if namespace is None:
namespace = [] namespace = []
return LOOP.run( return LOOP.run(

View File

@@ -18,7 +18,7 @@ from lancedb._lancedb import (
UpdateResult, UpdateResult,
) )
from lancedb.embeddings.base import EmbeddingFunctionConfig from lancedb.embeddings.base import EmbeddingFunctionConfig
from lancedb.index import FTS, BTree, Bitmap, HnswSq, IvfFlat, IvfPq, IvfSq, LabelList from lancedb.index import FTS, BTree, Bitmap, HnswSq, IvfFlat, IvfPq, LabelList
from lancedb.remote.db import LOOP from lancedb.remote.db import LOOP
import pyarrow as pa import pyarrow as pa
@@ -265,8 +265,6 @@ class RemoteTable(Table):
num_sub_vectors=num_sub_vectors, num_sub_vectors=num_sub_vectors,
num_bits=num_bits, num_bits=num_bits,
) )
elif index_type == "IVF_SQ":
config = IvfSq(distance_type=metric, num_partitions=num_partitions)
elif index_type == "IVF_HNSW_PQ": elif index_type == "IVF_HNSW_PQ":
raise ValueError( raise ValueError(
"IVF_HNSW_PQ is not supported on LanceDB cloud." "IVF_HNSW_PQ is not supported on LanceDB cloud."
@@ -279,7 +277,7 @@ class RemoteTable(Table):
else: else:
raise ValueError( raise ValueError(
f"Unknown vector index type: {index_type}. Valid options are" f"Unknown vector index type: {index_type}. Valid options are"
" 'IVF_FLAT', 'IVF_SQ', 'IVF_PQ', 'IVF_HNSW_PQ', 'IVF_HNSW_SQ'" " 'IVF_FLAT', 'IVF_PQ', 'IVF_HNSW_PQ', 'IVF_HNSW_SQ'"
) )
LOOP.run( LOOP.run(

View File

@@ -44,18 +44,7 @@ import numpy as np
from .common import DATA, VEC, VECTOR_COLUMN_NAME from .common import DATA, VEC, VECTOR_COLUMN_NAME
from .embeddings import EmbeddingFunctionConfig, EmbeddingFunctionRegistry from .embeddings import EmbeddingFunctionConfig, EmbeddingFunctionRegistry
from .index import ( from .index import BTree, IvfFlat, IvfPq, Bitmap, IvfRq, LabelList, HnswPq, HnswSq, FTS
BTree,
IvfFlat,
IvfPq,
IvfSq,
Bitmap,
IvfRq,
LabelList,
HnswPq,
HnswSq,
FTS,
)
from .merge import LanceMergeInsertBuilder from .merge import LanceMergeInsertBuilder
from .pydantic import LanceModel, model_to_dict from .pydantic import LanceModel, model_to_dict
from .query import ( from .query import (
@@ -2065,7 +2054,7 @@ class LanceTable(Table):
index_cache_size: Optional[int] = None, index_cache_size: Optional[int] = None,
num_bits: int = 8, num_bits: int = 8,
index_type: Literal[ index_type: Literal[
"IVF_FLAT", "IVF_SQ", "IVF_PQ", "IVF_RQ", "IVF_HNSW_SQ", "IVF_HNSW_PQ" "IVF_FLAT", "IVF_PQ", "IVF_RQ", "IVF_HNSW_SQ", "IVF_HNSW_PQ"
] = "IVF_PQ", ] = "IVF_PQ",
max_iterations: int = 50, max_iterations: int = 50,
sample_rate: int = 256, sample_rate: int = 256,
@@ -2103,14 +2092,6 @@ class LanceTable(Table):
sample_rate=sample_rate, sample_rate=sample_rate,
target_partition_size=target_partition_size, target_partition_size=target_partition_size,
) )
elif index_type == "IVF_SQ":
config = IvfSq(
distance_type=metric,
num_partitions=num_partitions,
max_iterations=max_iterations,
sample_rate=sample_rate,
target_partition_size=target_partition_size,
)
elif index_type == "IVF_PQ": elif index_type == "IVF_PQ":
config = IvfPq( config = IvfPq(
distance_type=metric, distance_type=metric,
@@ -3475,22 +3456,11 @@ class AsyncTable:
if config is not None: if config is not None:
if not isinstance( if not isinstance(
config, config,
( (IvfFlat, IvfPq, IvfRq, HnswPq, HnswSq, BTree, Bitmap, LabelList, FTS),
IvfFlat,
IvfSq,
IvfPq,
IvfRq,
HnswPq,
HnswSq,
BTree,
Bitmap,
LabelList,
FTS,
),
): ):
raise TypeError( raise TypeError(
"config must be an instance of IvfSq, IvfPq, IvfRq, HnswPq, HnswSq," "config must be an instance of IvfPq, IvfRq, HnswPq, HnswSq, BTree,"
" BTree, Bitmap, LabelList, or FTS, but got " + str(type(config)) " Bitmap, LabelList, or FTS, but got " + str(type(config))
) )
try: try:
await self._inner.create_index( await self._inner.create_index(

View File

@@ -18,20 +18,12 @@ AddMode = Literal["append", "overwrite"]
CreateMode = Literal["create", "overwrite"] CreateMode = Literal["create", "overwrite"]
# Index type literals # Index type literals
VectorIndexType = Literal[ VectorIndexType = Literal["IVF_FLAT", "IVF_PQ", "IVF_HNSW_SQ", "IVF_HNSW_PQ", "IVF_RQ"]
"IVF_FLAT",
"IVF_SQ",
"IVF_PQ",
"IVF_HNSW_SQ",
"IVF_HNSW_PQ",
"IVF_RQ",
]
ScalarIndexType = Literal["BTREE", "BITMAP", "LABEL_LIST"] ScalarIndexType = Literal["BTREE", "BITMAP", "LABEL_LIST"]
IndexType = Literal[ IndexType = Literal[
"IVF_PQ", "IVF_PQ",
"IVF_HNSW_PQ", "IVF_HNSW_PQ",
"IVF_HNSW_SQ", "IVF_HNSW_SQ",
"IVF_SQ",
"FTS", "FTS",
"BTREE", "BTREE",
"BITMAP", "BITMAP",

View File

@@ -441,150 +441,6 @@ async def test_create_table_v2_manifest_paths_async(tmp_path):
assert re.match(r"\d{20}\.manifest", manifest) assert re.match(r"\d{20}\.manifest", manifest)
@pytest.mark.asyncio
async def test_create_table_stable_row_ids_via_storage_options(tmp_path):
"""Test stable_row_ids via storage_options at connect time."""
import lance
# Connect with stable row IDs enabled as default for new tables
db_with = await lancedb.connect_async(
tmp_path, storage_options={"new_table_enable_stable_row_ids": "true"}
)
# Connect without stable row IDs (default)
db_without = await lancedb.connect_async(
tmp_path, storage_options={"new_table_enable_stable_row_ids": "false"}
)
# Create table using connection with stable row IDs enabled
await db_with.create_table(
"with_stable_via_opts",
data=[{"id": i} for i in range(10)],
)
lance_ds_with = lance.dataset(tmp_path / "with_stable_via_opts.lance")
fragments_with = lance_ds_with.get_fragments()
assert len(fragments_with) > 0
assert fragments_with[0].metadata.row_id_meta is not None
# Create table using connection without stable row IDs
await db_without.create_table(
"without_stable_via_opts",
data=[{"id": i} for i in range(10)],
)
lance_ds_without = lance.dataset(tmp_path / "without_stable_via_opts.lance")
fragments_without = lance_ds_without.get_fragments()
assert len(fragments_without) > 0
assert fragments_without[0].metadata.row_id_meta is None
def test_create_table_stable_row_ids_via_storage_options_sync(tmp_path):
"""Test that enable_stable_row_ids can be set via storage_options (sync API)."""
# Connect with stable row IDs enabled as default for new tables
db_with = lancedb.connect(
tmp_path, storage_options={"new_table_enable_stable_row_ids": "true"}
)
# Connect without stable row IDs (default)
db_without = lancedb.connect(
tmp_path, storage_options={"new_table_enable_stable_row_ids": "false"}
)
# Create table using connection with stable row IDs enabled
tbl_with = db_with.create_table(
"with_stable_sync",
data=[{"id": i} for i in range(10)],
)
lance_ds_with = tbl_with.to_lance()
fragments_with = lance_ds_with.get_fragments()
assert len(fragments_with) > 0
assert fragments_with[0].metadata.row_id_meta is not None
# Create table using connection without stable row IDs
tbl_without = db_without.create_table(
"without_stable_sync",
data=[{"id": i} for i in range(10)],
)
lance_ds_without = tbl_without.to_lance()
fragments_without = lance_ds_without.get_fragments()
assert len(fragments_without) > 0
assert fragments_without[0].metadata.row_id_meta is None
@pytest.mark.asyncio
async def test_create_table_stable_row_ids_table_level_override(tmp_path):
"""Test that stable_row_ids can be enabled/disabled at create_table level."""
import lance
# Connect without any stable row ID setting
db_default = await lancedb.connect_async(tmp_path)
# Connect with stable row IDs enabled at connection level
db_with_stable = await lancedb.connect_async(
tmp_path, storage_options={"new_table_enable_stable_row_ids": "true"}
)
# Case 1: No connection setting, enable at table level
await db_default.create_table(
"table_level_enabled",
data=[{"id": i} for i in range(10)],
storage_options={"new_table_enable_stable_row_ids": "true"},
)
lance_ds = lance.dataset(tmp_path / "table_level_enabled.lance")
fragments = lance_ds.get_fragments()
assert len(fragments) > 0
assert fragments[0].metadata.row_id_meta is not None, (
"Table should have stable row IDs when enabled at table level"
)
# Case 2: Connection has stable row IDs, override with false at table level
await db_with_stable.create_table(
"table_level_disabled",
data=[{"id": i} for i in range(10)],
storage_options={"new_table_enable_stable_row_ids": "false"},
)
lance_ds = lance.dataset(tmp_path / "table_level_disabled.lance")
fragments = lance_ds.get_fragments()
assert len(fragments) > 0
assert fragments[0].metadata.row_id_meta is None, (
"Table should NOT have stable row IDs when disabled at table level"
)
def test_create_table_stable_row_ids_table_level_override_sync(tmp_path):
"""Test that stable_row_ids can be enabled/disabled at create_table level (sync)."""
# Connect without any stable row ID setting
db_default = lancedb.connect(tmp_path)
# Connect with stable row IDs enabled at connection level
db_with_stable = lancedb.connect(
tmp_path, storage_options={"new_table_enable_stable_row_ids": "true"}
)
# Case 1: No connection setting, enable at table level
tbl = db_default.create_table(
"table_level_enabled_sync",
data=[{"id": i} for i in range(10)],
storage_options={"new_table_enable_stable_row_ids": "true"},
)
lance_ds = tbl.to_lance()
fragments = lance_ds.get_fragments()
assert len(fragments) > 0
assert fragments[0].metadata.row_id_meta is not None, (
"Table should have stable row IDs when enabled at table level"
)
# Case 2: Connection has stable row IDs, override with false at table level
tbl = db_with_stable.create_table(
"table_level_disabled_sync",
data=[{"id": i} for i in range(10)],
storage_options={"new_table_enable_stable_row_ids": "false"},
)
lance_ds = tbl.to_lance()
fragments = lance_ds.get_fragments()
assert len(fragments) > 0
assert fragments[0].metadata.row_id_meta is None, (
"Table should NOT have stable row IDs when disabled at table level"
)
def test_open_table_sync(tmp_db: lancedb.DBConnection): def test_open_table_sync(tmp_db: lancedb.DBConnection):
tmp_db.create_table("test", data=[{"id": 0}]) tmp_db.create_table("test", data=[{"id": 0}])
assert tmp_db.open_table("test").count_rows() == 1 assert tmp_db.open_table("test").count_rows() == 1
@@ -892,7 +748,7 @@ def test_local_namespace_operations(tmp_path):
db = lancedb.connect(tmp_path) db = lancedb.connect(tmp_path)
# Test list_namespaces returns empty list for root namespace # Test list_namespaces returns empty list for root namespace
namespaces = db.list_namespaces().namespaces namespaces = list(db.list_namespaces())
assert namespaces == [] assert namespaces == []
# Test list_namespaces with non-empty namespace raises NotImplementedError # Test list_namespaces with non-empty namespace raises NotImplementedError
@@ -900,7 +756,7 @@ def test_local_namespace_operations(tmp_path):
NotImplementedError, NotImplementedError,
match="Namespace operations are not supported for listing database", match="Namespace operations are not supported for listing database",
): ):
db.list_namespaces(namespace=["test"]) list(db.list_namespaces(namespace=["test"]))
def test_local_create_namespace_not_supported(tmp_path): def test_local_create_namespace_not_supported(tmp_path):

View File

@@ -12,9 +12,6 @@ from lancedb.index import (
BTree, BTree,
IvfFlat, IvfFlat,
IvfPq, IvfPq,
IvfSq,
IvfHnswPq,
IvfHnswSq,
IvfRq, IvfRq,
Bitmap, Bitmap,
LabelList, LabelList,
@@ -232,35 +229,6 @@ async def test_create_hnswsq_index(some_table: AsyncTable):
assert len(indices) == 1 assert len(indices) == 1
@pytest.mark.asyncio
async def test_create_hnswsq_alias_index(some_table: AsyncTable):
await some_table.create_index("vector", config=IvfHnswSq(num_partitions=5))
indices = await some_table.list_indices()
assert len(indices) == 1
assert indices[0].index_type in {"HnswSq", "IvfHnswSq"}
@pytest.mark.asyncio
async def test_create_hnswpq_alias_index(some_table: AsyncTable):
await some_table.create_index("vector", config=IvfHnswPq(num_partitions=5))
indices = await some_table.list_indices()
assert len(indices) == 1
assert indices[0].index_type in {"HnswPq", "IvfHnswPq"}
@pytest.mark.asyncio
async def test_create_ivfsq_index(some_table: AsyncTable):
await some_table.create_index("vector", config=IvfSq(num_partitions=10))
indices = await some_table.list_indices()
assert len(indices) == 1
assert indices[0].index_type == "IvfSq"
stats = await some_table.index_stats(indices[0].name)
assert stats.index_type == "IVF_SQ"
assert stats.distance_type == "l2"
assert stats.num_indexed_rows == await some_table.count_rows()
assert stats.num_unindexed_rows == 0
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_create_index_with_binary_vectors(binary_table: AsyncTable): async def test_create_index_with_binary_vectors(binary_table: AsyncTable):
await binary_table.create_index( await binary_table.create_index(

View File

@@ -279,13 +279,13 @@ class TestNamespaceConnection:
db = lancedb.connect_namespace("dir", {"root": self.temp_dir}) db = lancedb.connect_namespace("dir", {"root": self.temp_dir})
# Initially no namespaces # Initially no namespaces
assert len(db.list_namespaces().namespaces) == 0 assert len(list(db.list_namespaces())) == 0
# Create a namespace # Create a namespace
db.create_namespace(["test_namespace"]) db.create_namespace(["test_namespace"])
# Verify namespace exists # Verify namespace exists
namespaces = db.list_namespaces().namespaces namespaces = list(db.list_namespaces())
assert "test_namespace" in namespaces assert "test_namespace" in namespaces
assert len(namespaces) == 1 assert len(namespaces) == 1
@@ -322,7 +322,7 @@ class TestNamespaceConnection:
db.drop_namespace(["test_namespace"]) db.drop_namespace(["test_namespace"])
# Verify namespace no longer exists # Verify namespace no longer exists
namespaces = db.list_namespaces().namespaces namespaces = list(db.list_namespaces())
assert len(namespaces) == 0 assert len(namespaces) == 0
def test_namespace_with_tables_cannot_be_dropped(self): def test_namespace_with_tables_cannot_be_dropped(self):
@@ -570,13 +570,13 @@ class TestAsyncNamespaceConnection:
# Initially no namespaces # Initially no namespaces
namespaces = await db.list_namespaces() namespaces = await db.list_namespaces()
assert len(namespaces.namespaces) == 0 assert len(list(namespaces)) == 0
# Create a namespace # Create a namespace
await db.create_namespace(["test_namespace"]) await db.create_namespace(["test_namespace"])
# Verify namespace exists # Verify namespace exists
namespaces = (await db.list_namespaces()).namespaces namespaces = list(await db.list_namespaces())
assert "test_namespace" in namespaces assert "test_namespace" in namespaces
assert len(namespaces) == 1 assert len(namespaces) == 1
@@ -608,7 +608,7 @@ class TestAsyncNamespaceConnection:
await db.drop_namespace(["test_namespace"]) await db.drop_namespace(["test_namespace"])
# Verify namespace no longer exists # Verify namespace no longer exists
namespaces = (await db.list_namespaces()).namespaces namespaces = list(await db.list_namespaces())
assert len(namespaces) == 0 assert len(namespaces) == 0
async def test_drop_all_tables_async(self): async def test_drop_all_tables_async(self):

View File

@@ -10,9 +10,8 @@ use lancedb::{
}; };
use pyo3::{ use pyo3::{
exceptions::{PyRuntimeError, PyValueError}, exceptions::{PyRuntimeError, PyValueError},
pyclass, pyfunction, pymethods, pyclass, pyfunction, pymethods, Bound, FromPyObject, Py, PyAny, PyObject, PyRef, PyResult,
types::{PyDict, PyDictMethods}, Python,
Bound, FromPyObject, Py, PyAny, PyObject, PyRef, PyResult, Python,
}; };
use pyo3_async_runtimes::tokio::future_into_py; use pyo3_async_runtimes::tokio::future_into_py;
@@ -293,155 +292,40 @@ impl Connection {
limit: Option<u32>, limit: Option<u32>,
) -> PyResult<Bound<'_, PyAny>> { ) -> PyResult<Bound<'_, PyAny>> {
let inner = self_.get_inner()?.clone(); let inner = self_.get_inner()?.clone();
let py = self_.py(); future_into_py(self_.py(), async move {
future_into_py(py, async move { use lancedb::database::ListNamespacesRequest;
use lance_namespace::models::ListNamespacesRequest;
let request = ListNamespacesRequest { let request = ListNamespacesRequest {
id: if namespace.is_empty() { namespace,
None
} else {
Some(namespace)
},
page_token, page_token,
limit: limit.map(|l| l as i32), limit,
}; };
let response = inner.list_namespaces(request).await.infer_error()?; inner.list_namespaces(request).await.infer_error()
Python::with_gil(|py| -> PyResult<Py<PyDict>> {
let dict = PyDict::new(py);
dict.set_item("namespaces", response.namespaces)?;
dict.set_item("page_token", response.page_token)?;
Ok(dict.unbind())
})
})
}
#[pyo3(signature = (namespace, mode=None, properties=None))]
pub fn create_namespace(
self_: PyRef<'_, Self>,
namespace: Vec<String>,
mode: Option<String>,
properties: Option<std::collections::HashMap<String, String>>,
) -> PyResult<Bound<'_, PyAny>> {
let inner = self_.get_inner()?.clone();
let py = self_.py();
future_into_py(py, async move {
use lance_namespace::models::{create_namespace_request, CreateNamespaceRequest};
let mode_enum = mode.and_then(|m| match m.to_lowercase().as_str() {
"create" => Some(create_namespace_request::Mode::Create),
"exist_ok" => Some(create_namespace_request::Mode::ExistOk),
"overwrite" => Some(create_namespace_request::Mode::Overwrite),
_ => None,
});
let request = CreateNamespaceRequest {
id: if namespace.is_empty() {
None
} else {
Some(namespace)
},
mode: mode_enum,
properties,
};
let response = inner.create_namespace(request).await.infer_error()?;
Python::with_gil(|py| -> PyResult<Py<PyDict>> {
let dict = PyDict::new(py);
dict.set_item("properties", response.properties)?;
Ok(dict.unbind())
})
})
}
#[pyo3(signature = (namespace, mode=None, behavior=None))]
pub fn drop_namespace(
self_: PyRef<'_, Self>,
namespace: Vec<String>,
mode: Option<String>,
behavior: Option<String>,
) -> PyResult<Bound<'_, PyAny>> {
let inner = self_.get_inner()?.clone();
let py = self_.py();
future_into_py(py, async move {
use lance_namespace::models::{drop_namespace_request, DropNamespaceRequest};
let mode_enum = mode.and_then(|m| match m.to_uppercase().as_str() {
"SKIP" => Some(drop_namespace_request::Mode::Skip),
"FAIL" => Some(drop_namespace_request::Mode::Fail),
_ => None,
});
let behavior_enum = behavior.and_then(|b| match b.to_uppercase().as_str() {
"RESTRICT" => Some(drop_namespace_request::Behavior::Restrict),
"CASCADE" => Some(drop_namespace_request::Behavior::Cascade),
_ => None,
});
let request = DropNamespaceRequest {
id: if namespace.is_empty() {
None
} else {
Some(namespace)
},
mode: mode_enum,
behavior: behavior_enum,
};
let response = inner.drop_namespace(request).await.infer_error()?;
Python::with_gil(|py| -> PyResult<Py<PyDict>> {
let dict = PyDict::new(py);
dict.set_item("properties", response.properties)?;
dict.set_item("transaction_id", response.transaction_id)?;
Ok(dict.unbind())
})
}) })
} }
#[pyo3(signature = (namespace,))] #[pyo3(signature = (namespace,))]
pub fn describe_namespace( pub fn create_namespace(
self_: PyRef<'_, Self>, self_: PyRef<'_, Self>,
namespace: Vec<String>, namespace: Vec<String>,
) -> PyResult<Bound<'_, PyAny>> { ) -> PyResult<Bound<'_, PyAny>> {
let inner = self_.get_inner()?.clone(); let inner = self_.get_inner()?.clone();
let py = self_.py(); future_into_py(self_.py(), async move {
future_into_py(py, async move { use lancedb::database::CreateNamespaceRequest;
use lance_namespace::models::DescribeNamespaceRequest; let request = CreateNamespaceRequest { namespace };
let request = DescribeNamespaceRequest { inner.create_namespace(request).await.infer_error()
id: if namespace.is_empty() {
None
} else {
Some(namespace)
},
};
let response = inner.describe_namespace(request).await.infer_error()?;
Python::with_gil(|py| -> PyResult<Py<PyDict>> {
let dict = PyDict::new(py);
dict.set_item("properties", response.properties)?;
Ok(dict.unbind())
})
}) })
} }
#[pyo3(signature = (namespace=vec![], page_token=None, limit=None))] #[pyo3(signature = (namespace,))]
pub fn list_tables( pub fn drop_namespace(
self_: PyRef<'_, Self>, self_: PyRef<'_, Self>,
namespace: Vec<String>, namespace: Vec<String>,
page_token: Option<String>,
limit: Option<u32>,
) -> PyResult<Bound<'_, PyAny>> { ) -> PyResult<Bound<'_, PyAny>> {
let inner = self_.get_inner()?.clone(); let inner = self_.get_inner()?.clone();
let py = self_.py(); future_into_py(self_.py(), async move {
future_into_py(py, async move { use lancedb::database::DropNamespaceRequest;
use lance_namespace::models::ListTablesRequest; let request = DropNamespaceRequest { namespace };
let request = ListTablesRequest { inner.drop_namespace(request).await.infer_error()
id: if namespace.is_empty() {
None
} else {
Some(namespace)
},
page_token,
limit: limit.map(|l| l as i32),
};
let response = inner.list_tables(request).await.infer_error()?;
Python::with_gil(|py| -> PyResult<Py<PyDict>> {
let dict = PyDict::new(py);
dict.set_item("tables", response.tables)?;
dict.set_item("page_token", response.page_token)?;
Ok(dict.unbind())
})
}) })
} }
} }

View File

@@ -1,7 +1,7 @@
// SPDX-License-Identifier: Apache-2.0 // SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright The LanceDB Authors // SPDX-FileCopyrightText: Copyright The LanceDB Authors
use lancedb::index::vector::{IvfFlatIndexBuilder, IvfRqIndexBuilder, IvfSqIndexBuilder}; use lancedb::index::vector::{IvfFlatIndexBuilder, IvfRqIndexBuilder};
use lancedb::index::{ use lancedb::index::{
scalar::{BTreeIndexBuilder, FtsIndexBuilder}, scalar::{BTreeIndexBuilder, FtsIndexBuilder},
vector::{IvfHnswPqIndexBuilder, IvfHnswSqIndexBuilder, IvfPqIndexBuilder}, vector::{IvfHnswPqIndexBuilder, IvfHnswSqIndexBuilder, IvfPqIndexBuilder},
@@ -87,21 +87,6 @@ pub fn extract_index_params(source: &Option<Bound<'_, PyAny>>) -> PyResult<Lance
} }
Ok(LanceDbIndex::IvfPq(ivf_pq_builder)) Ok(LanceDbIndex::IvfPq(ivf_pq_builder))
}, },
"IvfSq" => {
let params = source.extract::<IvfSqParams>()?;
let distance_type = parse_distance_type(params.distance_type)?;
let mut ivf_sq_builder = IvfSqIndexBuilder::default()
.distance_type(distance_type)
.max_iterations(params.max_iterations)
.sample_rate(params.sample_rate);
if let Some(num_partitions) = params.num_partitions {
ivf_sq_builder = ivf_sq_builder.num_partitions(num_partitions);
}
if let Some(target_partition_size) = params.target_partition_size {
ivf_sq_builder = ivf_sq_builder.target_partition_size(target_partition_size);
}
Ok(LanceDbIndex::IvfSq(ivf_sq_builder))
},
"IvfRq" => { "IvfRq" => {
let params = source.extract::<IvfRqParams>()?; let params = source.extract::<IvfRqParams>()?;
let distance_type = parse_distance_type(params.distance_type)?; let distance_type = parse_distance_type(params.distance_type)?;
@@ -157,7 +142,7 @@ pub fn extract_index_params(source: &Option<Bound<'_, PyAny>>) -> PyResult<Lance
Ok(LanceDbIndex::IvfHnswSq(hnsw_sq_builder)) Ok(LanceDbIndex::IvfHnswSq(hnsw_sq_builder))
}, },
not_supported => Err(PyValueError::new_err(format!( not_supported => Err(PyValueError::new_err(format!(
"Invalid index type '{}'. Must be one of BTree, Bitmap, LabelList, FTS, IvfPq, IvfSq, IvfHnswPq, or IvfHnswSq", "Invalid index type '{}'. Must be one of BTree, Bitmap, LabelList, FTS, IvfPq, IvfHnswPq, or IvfHnswSq",
not_supported not_supported
))), ))),
} }
@@ -201,15 +186,6 @@ struct IvfPqParams {
target_partition_size: Option<u32>, target_partition_size: Option<u32>,
} }
#[derive(FromPyObject)]
struct IvfSqParams {
distance_type: String,
num_partitions: Option<u32>,
max_iterations: u32,
sample_rate: u32,
target_partition_size: Option<u32>,
}
#[derive(FromPyObject)] #[derive(FromPyObject)]
struct IvfRqParams { struct IvfRqParams {
distance_type: String, distance_type: String,

View File

@@ -690,7 +690,7 @@ impl FTSQuery {
} }
pub fn get_query(&self) -> String { pub fn get_query(&self) -> String {
self.fts_query.query.query().clone() self.fts_query.query.query().to_owned()
} }
pub fn to_query_request(&self) -> PyQueryRequest { pub fn to_query_request(&self) -> PyQueryRequest {

View File

@@ -1,6 +1,6 @@
[package] [package]
name = "lancedb" name = "lancedb"
version = "0.22.4-beta.3" version = "0.22.4-beta.2"
edition.workspace = true edition.workspace = true
description = "LanceDB: A serverless, low-latency vector database for AI applications" description = "LanceDB: A serverless, low-latency vector database for AI applications"
license.workspace = true license.workspace = true
@@ -105,12 +105,12 @@ test-log = "0.2"
[features] [features]
default = ["aws", "gcs", "azure", "dynamodb", "oss"] default = ["aws", "gcs", "azure", "dynamodb", "oss"]
aws = ["lance/aws", "lance-io/aws", "lance-namespace-impls/dir-aws"] aws = ["lance/aws", "lance-io/aws"]
oss = ["lance/oss", "lance-io/oss", "lance-namespace-impls/dir-oss"] oss = ["lance/oss", "lance-io/oss"]
gcs = ["lance/gcp", "lance-io/gcp", "lance-namespace-impls/dir-gcp"] gcs = ["lance/gcp", "lance-io/gcp"]
azure = ["lance/azure", "lance-io/azure", "lance-namespace-impls/dir-azure"] azure = ["lance/azure", "lance-io/azure"]
dynamodb = ["lance/dynamodb", "aws"] dynamodb = ["lance/dynamodb", "aws"]
remote = ["dep:reqwest", "dep:http", "lance-namespace-impls/rest"] remote = ["dep:reqwest", "dep:http"]
fp16kernels = ["lance-linalg/fp16kernels"] fp16kernels = ["lance-linalg/fp16kernels"]
s3-test = [] s3-test = []
bedrock = ["dep:aws-sdk-bedrockruntime"] bedrock = ["dep:aws-sdk-bedrockruntime"]

View File

@@ -9,11 +9,6 @@ use std::sync::Arc;
use arrow_array::RecordBatchReader; use arrow_array::RecordBatchReader;
use arrow_schema::{Field, SchemaRef}; use arrow_schema::{Field, SchemaRef};
use lance::dataset::ReadParams; use lance::dataset::ReadParams;
use lance_namespace::models::{
CreateNamespaceRequest, CreateNamespaceResponse, DescribeNamespaceRequest,
DescribeNamespaceResponse, DropNamespaceRequest, DropNamespaceResponse, ListNamespacesRequest,
ListNamespacesResponse, ListTablesRequest, ListTablesResponse,
};
#[cfg(feature = "aws")] #[cfg(feature = "aws")]
use object_store::aws::AwsCredential; use object_store::aws::AwsCredential;
@@ -22,8 +17,9 @@ use crate::database::listing::{
ListingDatabase, OPT_NEW_TABLE_STORAGE_VERSION, OPT_NEW_TABLE_V2_MANIFEST_PATHS, ListingDatabase, OPT_NEW_TABLE_STORAGE_VERSION, OPT_NEW_TABLE_V2_MANIFEST_PATHS,
}; };
use crate::database::{ use crate::database::{
CloneTableRequest, CreateTableData, CreateTableMode, CreateTableRequest, Database, CloneTableRequest, CreateNamespaceRequest, CreateTableData, CreateTableMode,
DatabaseOptions, OpenTableRequest, ReadConsistency, TableNamesRequest, CreateTableRequest, Database, DatabaseOptions, DropNamespaceRequest, ListNamespacesRequest,
OpenTableRequest, ReadConsistency, TableNamesRequest,
}; };
use crate::embeddings::{ use crate::embeddings::{
EmbeddingDefinition, EmbeddingFunction, EmbeddingRegistry, MemoryRegistry, WithEmbeddings, EmbeddingDefinition, EmbeddingFunction, EmbeddingRegistry, MemoryRegistry, WithEmbeddings,
@@ -78,7 +74,6 @@ impl TableNamesBuilder {
} }
/// Execute the table names operation /// Execute the table names operation
#[allow(deprecated)]
pub async fn execute(self) -> Result<Vec<String>> { pub async fn execute(self) -> Result<Vec<String>> {
self.parent.clone().table_names(self.request).await self.parent.clone().table_names(self.request).await
} }
@@ -413,7 +408,6 @@ impl OpenTableBuilder {
index_cache_size: None, index_cache_size: None,
lance_read_params: None, lance_read_params: None,
location: None, location: None,
namespace_client: None,
}, },
embedding_registry, embedding_registry,
} }
@@ -773,42 +767,20 @@ impl Connection {
} }
/// List immediate child namespace names in the given namespace /// List immediate child namespace names in the given namespace
pub async fn list_namespaces( pub async fn list_namespaces(&self, request: ListNamespacesRequest) -> Result<Vec<String>> {
&self,
request: ListNamespacesRequest,
) -> Result<ListNamespacesResponse> {
self.internal.list_namespaces(request).await self.internal.list_namespaces(request).await
} }
/// Create a new namespace /// Create a new namespace
pub async fn create_namespace( pub async fn create_namespace(&self, request: CreateNamespaceRequest) -> Result<()> {
&self,
request: CreateNamespaceRequest,
) -> Result<CreateNamespaceResponse> {
self.internal.create_namespace(request).await self.internal.create_namespace(request).await
} }
/// Drop a namespace /// Drop a namespace
pub async fn drop_namespace( pub async fn drop_namespace(&self, request: DropNamespaceRequest) -> Result<()> {
&self,
request: DropNamespaceRequest,
) -> Result<DropNamespaceResponse> {
self.internal.drop_namespace(request).await self.internal.drop_namespace(request).await
} }
/// Describe a namespace
pub async fn describe_namespace(
&self,
request: DescribeNamespaceRequest,
) -> Result<DescribeNamespaceResponse> {
self.internal.describe_namespace(request).await
}
/// List tables with pagination support
pub async fn list_tables(&self, request: ListTablesRequest) -> Result<ListTablesResponse> {
self.internal.list_tables(request).await
}
/// Get the in-memory embedding registry. /// Get the in-memory embedding registry.
/// It's important to note that the embedding registry is not persisted across connections. /// It's important to note that the embedding registry is not persisted across connections.
/// So if a table contains embeddings, you will need to make sure that you are using a connection that has the same embedding functions registered /// So if a table contains embeddings, you will need to make sure that you are using a connection that has the same embedding functions registered
@@ -1114,7 +1086,6 @@ pub struct ConnectNamespaceBuilder {
read_consistency_interval: Option<std::time::Duration>, read_consistency_interval: Option<std::time::Duration>,
embedding_registry: Option<Arc<dyn EmbeddingRegistry>>, embedding_registry: Option<Arc<dyn EmbeddingRegistry>>,
session: Option<Arc<lance::session::Session>>, session: Option<Arc<lance::session::Session>>,
server_side_query_enabled: bool,
} }
impl ConnectNamespaceBuilder { impl ConnectNamespaceBuilder {
@@ -1126,7 +1097,6 @@ impl ConnectNamespaceBuilder {
read_consistency_interval: None, read_consistency_interval: None,
embedding_registry: None, embedding_registry: None,
session: None, session: None,
server_side_query_enabled: false,
} }
} }
@@ -1181,18 +1151,6 @@ impl ConnectNamespaceBuilder {
self self
} }
/// Enable server-side query execution.
///
/// When enabled, queries will be executed on the namespace server instead of
/// locally. This can improve performance by reducing data transfer and
/// leveraging server-side compute resources.
///
/// Default is `false` (queries executed locally).
pub fn server_side_query(mut self, enabled: bool) -> Self {
self.server_side_query_enabled = enabled;
self
}
/// Execute the connection /// Execute the connection
pub async fn execute(self) -> Result<Connection> { pub async fn execute(self) -> Result<Connection> {
use crate::database::namespace::LanceNamespaceDatabase; use crate::database::namespace::LanceNamespaceDatabase;
@@ -1204,7 +1162,6 @@ impl ConnectNamespaceBuilder {
self.storage_options, self.storage_options,
self.read_consistency_interval, self.read_consistency_interval,
self.session, self.session,
self.server_side_query_enabled,
) )
.await?, .await?,
); );

View File

@@ -24,12 +24,6 @@ use datafusion_physical_plan::stream::RecordBatchStreamAdapter;
use futures::stream; use futures::stream;
use lance::dataset::ReadParams; use lance::dataset::ReadParams;
use lance_datafusion::utils::StreamingWriteSource; use lance_datafusion::utils::StreamingWriteSource;
use lance_namespace::models::{
CreateNamespaceRequest, CreateNamespaceResponse, DescribeNamespaceRequest,
DescribeNamespaceResponse, DropNamespaceRequest, DropNamespaceResponse, ListNamespacesRequest,
ListNamespacesResponse, ListTablesRequest, ListTablesResponse,
};
use lance_namespace::LanceNamespace;
use crate::arrow::{SendableRecordBatchStream, SendableRecordBatchStreamExt}; use crate::arrow::{SendableRecordBatchStream, SendableRecordBatchStreamExt};
use crate::error::Result; use crate::error::Result;
@@ -42,7 +36,32 @@ pub trait DatabaseOptions {
fn serialize_into_map(&self, map: &mut HashMap<String, String>); fn serialize_into_map(&self, map: &mut HashMap<String, String>);
} }
/// A request to list names of tables in the database (deprecated, use ListTablesRequest) /// A request to list namespaces in the database
#[derive(Clone, Debug, Default)]
pub struct ListNamespacesRequest {
/// The parent namespace to list namespaces in. Empty list represents root namespace.
pub namespace: Vec<String>,
/// If present, only return names that come lexicographically after the supplied value.
pub page_token: Option<String>,
/// The maximum number of namespace names to return
pub limit: Option<u32>,
}
/// A request to create a namespace
#[derive(Clone, Debug)]
pub struct CreateNamespaceRequest {
/// The namespace identifier to create
pub namespace: Vec<String>,
}
/// A request to drop a namespace
#[derive(Clone, Debug)]
pub struct DropNamespaceRequest {
/// The namespace identifier to drop
pub namespace: Vec<String>,
}
/// A request to list names of tables in the database
#[derive(Clone, Debug, Default)] #[derive(Clone, Debug, Default)]
pub struct TableNamesRequest { pub struct TableNamesRequest {
/// The namespace to list tables in. Empty list represents root namespace. /// The namespace to list tables in. Empty list represents root namespace.
@@ -58,7 +77,7 @@ pub struct TableNamesRequest {
} }
/// A request to open a table /// A request to open a table
#[derive(Clone)] #[derive(Clone, Debug)]
pub struct OpenTableRequest { pub struct OpenTableRequest {
pub name: String, pub name: String,
/// The namespace to open the table from. Empty list represents root namespace. /// The namespace to open the table from. Empty list represents root namespace.
@@ -68,22 +87,6 @@ pub struct OpenTableRequest {
/// Optional custom location for the table. If not provided, the database will /// Optional custom location for the table. If not provided, the database will
/// derive a location based on its URI and the table name. /// derive a location based on its URI and the table name.
pub location: Option<String>, pub location: Option<String>,
/// Optional namespace client for server-side query execution.
/// When set, queries will be executed on the namespace server instead of locally.
pub namespace_client: Option<Arc<dyn LanceNamespace>>,
}
impl std::fmt::Debug for OpenTableRequest {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("OpenTableRequest")
.field("name", &self.name)
.field("namespace", &self.namespace)
.field("index_cache_size", &self.index_cache_size)
.field("lance_read_params", &self.lance_read_params)
.field("location", &self.location)
.field("namespace_client", &self.namespace_client)
.finish()
}
} }
pub type TableBuilderCallback = Box<dyn FnOnce(OpenTableRequest) -> OpenTableRequest + Send>; pub type TableBuilderCallback = Box<dyn FnOnce(OpenTableRequest) -> OpenTableRequest + Send>;
@@ -167,9 +170,6 @@ pub struct CreateTableRequest {
/// Optional custom location for the table. If not provided, the database will /// Optional custom location for the table. If not provided, the database will
/// derive a location based on its URI and the table name. /// derive a location based on its URI and the table name.
pub location: Option<String>, pub location: Option<String>,
/// Optional namespace client for server-side query execution.
/// When set, queries will be executed on the namespace server instead of locally.
pub namespace_client: Option<Arc<dyn LanceNamespace>>,
} }
impl CreateTableRequest { impl CreateTableRequest {
@@ -181,7 +181,6 @@ impl CreateTableRequest {
mode: CreateTableMode::default(), mode: CreateTableMode::default(),
write_options: WriteOptions::default(), write_options: WriteOptions::default(),
location: None, location: None,
namespace_client: None,
} }
} }
} }
@@ -248,30 +247,13 @@ pub trait Database:
/// Get the read consistency of the database /// Get the read consistency of the database
async fn read_consistency(&self) -> Result<ReadConsistency>; async fn read_consistency(&self) -> Result<ReadConsistency>;
/// List immediate child namespace names in the given namespace /// List immediate child namespace names in the given namespace
async fn list_namespaces( async fn list_namespaces(&self, request: ListNamespacesRequest) -> Result<Vec<String>>;
&self,
request: ListNamespacesRequest,
) -> Result<ListNamespacesResponse>;
/// Create a new namespace /// Create a new namespace
async fn create_namespace( async fn create_namespace(&self, request: CreateNamespaceRequest) -> Result<()>;
&self,
request: CreateNamespaceRequest,
) -> Result<CreateNamespaceResponse>;
/// Drop a namespace /// Drop a namespace
async fn drop_namespace(&self, request: DropNamespaceRequest) -> Result<DropNamespaceResponse>; async fn drop_namespace(&self, request: DropNamespaceRequest) -> Result<()>;
/// Describe a namespace (get its properties)
async fn describe_namespace(
&self,
request: DescribeNamespaceRequest,
) -> Result<DescribeNamespaceResponse>;
/// List the names of tables in the database /// List the names of tables in the database
///
/// # Deprecated
/// Use `list_tables` instead for pagination support
#[deprecated(note = "Use list_tables instead")]
async fn table_names(&self, request: TableNamesRequest) -> Result<Vec<String>>; async fn table_names(&self, request: TableNamesRequest) -> Result<Vec<String>>;
/// List tables in the database with pagination support
async fn list_tables(&self, request: ListTablesRequest) -> Result<ListTablesResponse>;
/// Create a table in the database /// Create a table in the database
async fn create_table(&self, request: CreateTableRequest) -> Result<Arc<dyn BaseTable>>; async fn create_table(&self, request: CreateTableRequest) -> Result<Arc<dyn BaseTable>>;
/// Clone a table in the database. /// Clone a table in the database.

View File

@@ -24,15 +24,10 @@ use crate::io::object_store::MirroringObjectStoreWrapper;
use crate::table::NativeTable; use crate::table::NativeTable;
use crate::utils::validate_table_name; use crate::utils::validate_table_name;
use lance_namespace::models::{
CreateNamespaceRequest, CreateNamespaceResponse, DescribeNamespaceRequest,
DescribeNamespaceResponse, DropNamespaceRequest, DropNamespaceResponse, ListNamespacesRequest,
ListNamespacesResponse, ListTablesRequest, ListTablesResponse,
};
use super::{ use super::{
BaseTable, CloneTableRequest, CreateTableMode, CreateTableRequest, Database, DatabaseOptions, BaseTable, CloneTableRequest, CreateNamespaceRequest, CreateTableMode, CreateTableRequest,
OpenTableRequest, TableNamesRequest, Database, DatabaseOptions, DropNamespaceRequest, ListNamespacesRequest, OpenTableRequest,
TableNamesRequest,
}; };
/// File extension to indicate a lance table /// File extension to indicate a lance table
@@ -40,7 +35,6 @@ pub const LANCE_FILE_EXTENSION: &str = "lance";
pub const OPT_NEW_TABLE_STORAGE_VERSION: &str = "new_table_data_storage_version"; pub const OPT_NEW_TABLE_STORAGE_VERSION: &str = "new_table_data_storage_version";
pub const OPT_NEW_TABLE_V2_MANIFEST_PATHS: &str = "new_table_enable_v2_manifest_paths"; pub const OPT_NEW_TABLE_V2_MANIFEST_PATHS: &str = "new_table_enable_v2_manifest_paths";
pub const OPT_NEW_TABLE_ENABLE_STABLE_ROW_IDS: &str = "new_table_enable_stable_row_ids";
/// Controls how new tables should be created /// Controls how new tables should be created
#[derive(Clone, Debug, Default)] #[derive(Clone, Debug, Default)]
@@ -54,12 +48,6 @@ pub struct NewTableConfig {
/// V2 manifest paths are more efficient than V2 manifest paths but are not /// V2 manifest paths are more efficient than V2 manifest paths but are not
/// supported by old clients. /// supported by old clients.
pub enable_v2_manifest_paths: Option<bool>, pub enable_v2_manifest_paths: Option<bool>,
/// Whether to enable stable row IDs for new tables
///
/// When enabled, row IDs remain stable after compaction, update, delete,
/// and merges. This is useful for materialized views and other use cases
/// that need to track source rows across these operations.
pub enable_stable_row_ids: Option<bool>,
} }
/// Options specific to the listing database /// Options specific to the listing database
@@ -99,14 +87,6 @@ impl ListingDatabaseOptions {
}) })
}) })
.transpose()?, .transpose()?,
enable_stable_row_ids: map
.get(OPT_NEW_TABLE_ENABLE_STABLE_ROW_IDS)
.map(|s| {
s.parse::<bool>().map_err(|_| Error::InvalidInput {
message: format!("enable_stable_row_ids must be a boolean, received {}", s),
})
})
.transpose()?,
}; };
// We just assume that any options that are not new table config options are storage options // We just assume that any options that are not new table config options are storage options
let storage_options = map let storage_options = map
@@ -114,7 +94,6 @@ impl ListingDatabaseOptions {
.filter(|(key, _)| { .filter(|(key, _)| {
key.as_str() != OPT_NEW_TABLE_STORAGE_VERSION key.as_str() != OPT_NEW_TABLE_STORAGE_VERSION
&& key.as_str() != OPT_NEW_TABLE_V2_MANIFEST_PATHS && key.as_str() != OPT_NEW_TABLE_V2_MANIFEST_PATHS
&& key.as_str() != OPT_NEW_TABLE_ENABLE_STABLE_ROW_IDS
}) })
.map(|(key, value)| (key.clone(), value.clone())) .map(|(key, value)| (key.clone(), value.clone()))
.collect(); .collect();
@@ -139,12 +118,6 @@ impl DatabaseOptions for ListingDatabaseOptions {
enable_v2_manifest_paths.to_string(), enable_v2_manifest_paths.to_string(),
); );
} }
if let Some(enable_stable_row_ids) = self.new_table_config.enable_stable_row_ids {
map.insert(
OPT_NEW_TABLE_ENABLE_STABLE_ROW_IDS.to_string(),
enable_stable_row_ids.to_string(),
);
}
} }
} }
@@ -502,7 +475,7 @@ impl ListingDatabase {
// this error is not lance::Error::DatasetNotFound, as the method // this error is not lance::Error::DatasetNotFound, as the method
// `remove_dir_all` may be used to remove something not be a dataset // `remove_dir_all` may be used to remove something not be a dataset
lance::Error::NotFound { .. } => Error::TableNotFound { lance::Error::NotFound { .. } => Error::TableNotFound {
name: name.clone(), name: name.to_owned(),
source: Box::new(err), source: Box::new(err),
}, },
_ => Error::from(err), _ => Error::from(err),
@@ -524,7 +497,7 @@ impl ListingDatabase {
fn extract_storage_overrides( fn extract_storage_overrides(
&self, &self,
request: &CreateTableRequest, request: &CreateTableRequest,
) -> Result<(Option<LanceFileVersion>, Option<bool>, Option<bool>)> { ) -> Result<(Option<LanceFileVersion>, Option<bool>)> {
let storage_options = request let storage_options = request
.write_options .write_options
.lance_write_params .lance_write_params
@@ -545,19 +518,7 @@ impl ListingDatabase {
message: "enable_v2_manifest_paths must be a boolean".to_string(), message: "enable_v2_manifest_paths must be a boolean".to_string(),
})?; })?;
let stable_row_ids_override = storage_options Ok((storage_version_override, v2_manifest_override))
.and_then(|opts| opts.get(OPT_NEW_TABLE_ENABLE_STABLE_ROW_IDS))
.map(|s| s.parse::<bool>())
.transpose()
.map_err(|_| Error::InvalidInput {
message: "enable_stable_row_ids must be a boolean".to_string(),
})?;
Ok((
storage_version_override,
v2_manifest_override,
stable_row_ids_override,
))
} }
/// Prepare write parameters for table creation /// Prepare write parameters for table creation
@@ -566,7 +527,6 @@ impl ListingDatabase {
request: &CreateTableRequest, request: &CreateTableRequest,
storage_version_override: Option<LanceFileVersion>, storage_version_override: Option<LanceFileVersion>,
v2_manifest_override: Option<bool>, v2_manifest_override: Option<bool>,
stable_row_ids_override: Option<bool>,
) -> lance::dataset::WriteParams { ) -> lance::dataset::WriteParams {
let mut write_params = request let mut write_params = request
.write_options .write_options
@@ -611,13 +571,6 @@ impl ListingDatabase {
write_params.enable_v2_manifest_paths = enable_v2_manifest_paths; write_params.enable_v2_manifest_paths = enable_v2_manifest_paths;
} }
// Apply enable_stable_row_ids: table-level override takes precedence over connection config
if let Some(enable_stable_row_ids) =
stable_row_ids_override.or(self.new_table_config.enable_stable_row_ids)
{
write_params.enable_stable_row_ids = enable_stable_row_ids;
}
if matches!(&request.mode, CreateTableMode::Overwrite) { if matches!(&request.mode, CreateTableMode::Overwrite) {
write_params.mode = WriteMode::Overwrite; write_params.mode = WriteMode::Overwrite;
} }
@@ -646,7 +599,6 @@ impl ListingDatabase {
index_cache_size: None, index_cache_size: None,
lance_read_params: None, lance_read_params: None,
location: None, location: None,
namespace_client: None,
}; };
let req = (callback)(req); let req = (callback)(req);
let table = self.open_table(req).await?; let table = self.open_table(req).await?;
@@ -668,20 +620,14 @@ impl ListingDatabase {
#[async_trait::async_trait] #[async_trait::async_trait]
impl Database for ListingDatabase { impl Database for ListingDatabase {
async fn list_namespaces( async fn list_namespaces(&self, request: ListNamespacesRequest) -> Result<Vec<String>> {
&self, if !request.namespace.is_empty() {
request: ListNamespacesRequest,
) -> Result<ListNamespacesResponse> {
if request.id.as_ref().map(|v| !v.is_empty()).unwrap_or(false) {
return Err(Error::NotSupported { return Err(Error::NotSupported {
message: "Namespace operations are not supported for listing database".into(), message: "Namespace operations are not supported for listing database".into(),
}); });
} }
Ok(ListNamespacesResponse { Ok(Vec::new())
namespaces: Vec::new(),
page_token: None,
})
} }
fn uri(&self) -> &str { fn uri(&self) -> &str {
@@ -700,28 +646,13 @@ impl Database for ListingDatabase {
} }
} }
async fn create_namespace( async fn create_namespace(&self, _request: CreateNamespaceRequest) -> Result<()> {
&self,
_request: CreateNamespaceRequest,
) -> Result<CreateNamespaceResponse> {
Err(Error::NotSupported { Err(Error::NotSupported {
message: "Namespace operations are not supported for listing database".into(), message: "Namespace operations are not supported for listing database".into(),
}) })
} }
async fn drop_namespace( async fn drop_namespace(&self, _request: DropNamespaceRequest) -> Result<()> {
&self,
_request: DropNamespaceRequest,
) -> Result<DropNamespaceResponse> {
Err(Error::NotSupported {
message: "Namespace operations are not supported for listing database".into(),
})
}
async fn describe_namespace(
&self,
_request: DescribeNamespaceRequest,
) -> Result<DescribeNamespaceResponse> {
Err(Error::NotSupported { Err(Error::NotSupported {
message: "Namespace operations are not supported for listing database".into(), message: "Namespace operations are not supported for listing database".into(),
}) })
@@ -762,57 +693,6 @@ impl Database for ListingDatabase {
Ok(f) Ok(f)
} }
async fn list_tables(&self, request: ListTablesRequest) -> Result<ListTablesResponse> {
if request.id.as_ref().map(|v| !v.is_empty()).unwrap_or(false) {
return Err(Error::NotSupported {
message: "Namespace parameter is not supported for listing database. Only root namespace is supported.".into(),
});
}
let mut f = self
.object_store
.read_dir(self.base_path.clone())
.await?
.iter()
.map(Path::new)
.filter(|path| {
let is_lance = path
.extension()
.and_then(|e| e.to_str())
.map(|e| e == LANCE_EXTENSION);
is_lance.unwrap_or(false)
})
.filter_map(|p| p.file_stem().and_then(|s| s.to_str().map(String::from)))
.collect::<Vec<String>>();
f.sort();
// Handle pagination with page_token
if let Some(ref page_token) = request.page_token {
let index = f
.iter()
.position(|name| name.as_str() > page_token.as_str())
.unwrap_or(f.len());
f.drain(0..index);
}
// Determine if there's a next page
let next_page_token = if let Some(limit) = request.limit {
if f.len() > limit as usize {
let token = f[limit as usize].clone();
f.truncate(limit as usize);
Some(token)
} else {
None
}
} else {
None
};
Ok(ListTablesResponse {
tables: f,
page_token: next_page_token,
})
}
async fn create_table(&self, request: CreateTableRequest) -> Result<Arc<dyn BaseTable>> { async fn create_table(&self, request: CreateTableRequest) -> Result<Arc<dyn BaseTable>> {
// When namespace is not empty, location must be provided // When namespace is not empty, location must be provided
if !request.namespace.is_empty() && request.location.is_none() { if !request.namespace.is_empty() && request.location.is_none() {
@@ -826,15 +706,11 @@ impl Database for ListingDatabase {
.clone() .clone()
.unwrap_or_else(|| self.table_uri(&request.name).unwrap()); .unwrap_or_else(|| self.table_uri(&request.name).unwrap());
let (storage_version_override, v2_manifest_override, stable_row_ids_override) = let (storage_version_override, v2_manifest_override) =
self.extract_storage_overrides(&request)?; self.extract_storage_overrides(&request)?;
let write_params = self.prepare_write_params( let write_params =
&request, self.prepare_write_params(&request, storage_version_override, v2_manifest_override);
storage_version_override,
v2_manifest_override,
stable_row_ids_override,
);
let data_schema = request.data.arrow_schema(); let data_schema = request.data.arrow_schema();
@@ -846,7 +722,6 @@ impl Database for ListingDatabase {
self.store_wrapper.clone(), self.store_wrapper.clone(),
Some(write_params), Some(write_params),
self.read_consistency_interval, self.read_consistency_interval,
request.namespace_client,
) )
.await .await
{ {
@@ -918,7 +793,6 @@ impl Database for ListingDatabase {
self.store_wrapper.clone(), self.store_wrapper.clone(),
None, None,
self.read_consistency_interval, self.read_consistency_interval,
None,
) )
.await?; .await?;
@@ -990,7 +864,6 @@ impl Database for ListingDatabase {
self.store_wrapper.clone(), self.store_wrapper.clone(),
Some(read_params), Some(read_params),
self.read_consistency_interval, self.read_consistency_interval,
request.namespace_client,
) )
.await?, .await?,
); );
@@ -1028,7 +901,6 @@ impl Database for ListingDatabase {
self.drop_tables(vec![name.to_string()]).await self.drop_tables(vec![name.to_string()]).await
} }
#[allow(deprecated)]
async fn drop_all_tables(&self, namespace: &[String]) -> Result<()> { async fn drop_all_tables(&self, namespace: &[String]) -> Result<()> {
// Check if namespace parameter is provided // Check if namespace parameter is provided
if !namespace.is_empty() { if !namespace.is_empty() {
@@ -1049,7 +921,7 @@ impl Database for ListingDatabase {
mod tests { mod tests {
use super::*; use super::*;
use crate::connection::ConnectRequest; use crate::connection::ConnectRequest;
use crate::database::{CreateTableData, CreateTableMode, CreateTableRequest, WriteOptions}; use crate::database::{CreateTableData, CreateTableMode, CreateTableRequest};
use crate::table::{Table, TableDefinition}; use crate::table::{Table, TableDefinition};
use arrow_array::{Int32Array, RecordBatch, StringArray}; use arrow_array::{Int32Array, RecordBatch, StringArray};
use arrow_schema::{DataType, Field, Schema}; use arrow_schema::{DataType, Field, Schema};
@@ -1093,7 +965,6 @@ mod tests {
mode: CreateTableMode::Create, mode: CreateTableMode::Create,
write_options: Default::default(), write_options: Default::default(),
location: None, location: None,
namespace_client: None,
}) })
.await .await
.unwrap(); .unwrap();
@@ -1115,7 +986,6 @@ mod tests {
.unwrap(); .unwrap();
// Verify both tables exist // Verify both tables exist
#[allow(deprecated)]
let table_names = db.table_names(TableNamesRequest::default()).await.unwrap(); let table_names = db.table_names(TableNamesRequest::default()).await.unwrap();
assert!(table_names.contains(&"source_table".to_string())); assert!(table_names.contains(&"source_table".to_string()));
assert!(table_names.contains(&"cloned_table".to_string())); assert!(table_names.contains(&"cloned_table".to_string()));
@@ -1159,7 +1029,6 @@ mod tests {
mode: CreateTableMode::Create, mode: CreateTableMode::Create,
write_options: Default::default(), write_options: Default::default(),
location: None, location: None,
namespace_client: None,
}) })
.await .await
.unwrap(); .unwrap();
@@ -1218,7 +1087,6 @@ mod tests {
mode: CreateTableMode::Create, mode: CreateTableMode::Create,
write_options: Default::default(), write_options: Default::default(),
location: None, location: None,
namespace_client: None,
}) })
.await .await
.unwrap(); .unwrap();
@@ -1254,7 +1122,6 @@ mod tests {
mode: CreateTableMode::Create, mode: CreateTableMode::Create,
write_options: Default::default(), write_options: Default::default(),
location: None, location: None,
namespace_client: None,
}) })
.await .await
.unwrap(); .unwrap();
@@ -1294,7 +1161,6 @@ mod tests {
mode: CreateTableMode::Create, mode: CreateTableMode::Create,
write_options: Default::default(), write_options: Default::default(),
location: None, location: None,
namespace_client: None,
}) })
.await .await
.unwrap(); .unwrap();
@@ -1334,7 +1200,6 @@ mod tests {
mode: CreateTableMode::Create, mode: CreateTableMode::Create,
write_options: Default::default(), write_options: Default::default(),
location: None, location: None,
namespace_client: None,
}) })
.await .await
.unwrap(); .unwrap();
@@ -1389,7 +1254,6 @@ mod tests {
mode: CreateTableMode::Create, mode: CreateTableMode::Create,
write_options: Default::default(), write_options: Default::default(),
location: None, location: None,
namespace_client: None,
}) })
.await .await
.unwrap(); .unwrap();
@@ -1447,7 +1311,6 @@ mod tests {
mode: CreateTableMode::Create, mode: CreateTableMode::Create,
write_options: Default::default(), write_options: Default::default(),
location: None, location: None,
namespace_client: None,
}) })
.await .await
.unwrap(); .unwrap();
@@ -1533,7 +1396,6 @@ mod tests {
mode: CreateTableMode::Create, mode: CreateTableMode::Create,
write_options: Default::default(), write_options: Default::default(),
location: None, location: None,
namespace_client: None,
}) })
.await .await
.unwrap(); .unwrap();
@@ -1620,7 +1482,6 @@ mod tests {
mode: CreateTableMode::Create, mode: CreateTableMode::Create,
write_options: Default::default(), write_options: Default::default(),
location: None, location: None,
namespace_client: None,
}) })
.await .await
.unwrap(); .unwrap();
@@ -1714,7 +1575,6 @@ mod tests {
mode: CreateTableMode::Create, mode: CreateTableMode::Create,
write_options: Default::default(), write_options: Default::default(),
location: None, location: None,
namespace_client: None,
}) })
.await .await
.unwrap(); .unwrap();
@@ -1761,270 +1621,4 @@ mod tests {
// Cloned table should have all 8 rows from the latest version // Cloned table should have all 8 rows from the latest version
assert_eq!(cloned_table.count_rows(None).await.unwrap(), 8); assert_eq!(cloned_table.count_rows(None).await.unwrap(), 8);
} }
#[tokio::test]
async fn test_create_table_with_stable_row_ids_connection_level() {
let tempdir = tempdir().unwrap();
let uri = tempdir.path().to_str().unwrap();
// Create database with stable row IDs enabled at connection level
let mut options = HashMap::new();
options.insert(
OPT_NEW_TABLE_ENABLE_STABLE_ROW_IDS.to_string(),
"true".to_string(),
);
let request = ConnectRequest {
uri: uri.to_string(),
#[cfg(feature = "remote")]
client_config: Default::default(),
options,
read_consistency_interval: None,
session: None,
};
let db = ListingDatabase::connect_with_options(&request)
.await
.unwrap();
// Verify the config was parsed correctly
assert_eq!(db.new_table_config.enable_stable_row_ids, Some(true));
// Create a table - it should inherit the stable row IDs setting
let schema = Arc::new(Schema::new(vec![Field::new("id", DataType::Int32, false)]));
let batch = RecordBatch::try_new(
schema.clone(),
vec![Arc::new(Int32Array::from(vec![1, 2, 3]))],
)
.unwrap();
let reader = Box::new(arrow_array::RecordBatchIterator::new(
vec![Ok(batch)],
schema.clone(),
));
let table = db
.create_table(CreateTableRequest {
name: "test_stable".to_string(),
namespace: vec![],
data: CreateTableData::Data(reader),
mode: CreateTableMode::Create,
write_options: Default::default(),
location: None,
namespace_client: None,
})
.await
.unwrap();
// Verify table was created successfully
assert_eq!(table.count_rows(None).await.unwrap(), 3);
}
#[tokio::test]
async fn test_create_table_with_stable_row_ids_table_level() {
let (_tempdir, db) = setup_database().await;
// Verify connection has no stable row IDs config
assert_eq!(db.new_table_config.enable_stable_row_ids, None);
// Create a table with stable row IDs enabled at table level via storage_options
let schema = Arc::new(Schema::new(vec![Field::new("id", DataType::Int32, false)]));
let batch = RecordBatch::try_new(
schema.clone(),
vec![Arc::new(Int32Array::from(vec![1, 2, 3]))],
)
.unwrap();
let reader = Box::new(arrow_array::RecordBatchIterator::new(
vec![Ok(batch)],
schema.clone(),
));
let mut storage_options = HashMap::new();
storage_options.insert(
OPT_NEW_TABLE_ENABLE_STABLE_ROW_IDS.to_string(),
"true".to_string(),
);
let write_options = WriteOptions {
lance_write_params: Some(lance::dataset::WriteParams {
store_params: Some(lance::io::ObjectStoreParams {
storage_options: Some(storage_options),
..Default::default()
}),
..Default::default()
}),
};
let table = db
.create_table(CreateTableRequest {
name: "test_stable_table_level".to_string(),
namespace: vec![],
data: CreateTableData::Data(reader),
mode: CreateTableMode::Create,
write_options,
location: None,
namespace_client: None,
})
.await
.unwrap();
// Verify table was created successfully
assert_eq!(table.count_rows(None).await.unwrap(), 3);
}
#[tokio::test]
async fn test_create_table_stable_row_ids_table_overrides_connection() {
let tempdir = tempdir().unwrap();
let uri = tempdir.path().to_str().unwrap();
// Create database with stable row IDs enabled at connection level
let mut options = HashMap::new();
options.insert(
OPT_NEW_TABLE_ENABLE_STABLE_ROW_IDS.to_string(),
"true".to_string(),
);
let request = ConnectRequest {
uri: uri.to_string(),
#[cfg(feature = "remote")]
client_config: Default::default(),
options,
read_consistency_interval: None,
session: None,
};
let db = ListingDatabase::connect_with_options(&request)
.await
.unwrap();
assert_eq!(db.new_table_config.enable_stable_row_ids, Some(true));
// Create table with stable row IDs disabled at table level (overrides connection)
let schema = Arc::new(Schema::new(vec![Field::new("id", DataType::Int32, false)]));
let batch = RecordBatch::try_new(
schema.clone(),
vec![Arc::new(Int32Array::from(vec![1, 2, 3]))],
)
.unwrap();
let reader = Box::new(arrow_array::RecordBatchIterator::new(
vec![Ok(batch)],
schema.clone(),
));
let mut storage_options = HashMap::new();
storage_options.insert(
OPT_NEW_TABLE_ENABLE_STABLE_ROW_IDS.to_string(),
"false".to_string(),
);
let write_options = WriteOptions {
lance_write_params: Some(lance::dataset::WriteParams {
store_params: Some(lance::io::ObjectStoreParams {
storage_options: Some(storage_options),
..Default::default()
}),
..Default::default()
}),
};
let table = db
.create_table(CreateTableRequest {
name: "test_override".to_string(),
namespace: vec![],
data: CreateTableData::Data(reader),
mode: CreateTableMode::Create,
write_options,
location: None,
namespace_client: None,
})
.await
.unwrap();
// Verify table was created successfully
assert_eq!(table.count_rows(None).await.unwrap(), 3);
}
#[tokio::test]
async fn test_stable_row_ids_invalid_value() {
let tempdir = tempdir().unwrap();
let uri = tempdir.path().to_str().unwrap();
// Try to create database with invalid stable row IDs value
let mut options = HashMap::new();
options.insert(
OPT_NEW_TABLE_ENABLE_STABLE_ROW_IDS.to_string(),
"not_a_boolean".to_string(),
);
let request = ConnectRequest {
uri: uri.to_string(),
#[cfg(feature = "remote")]
client_config: Default::default(),
options,
read_consistency_interval: None,
session: None,
};
let result = ListingDatabase::connect_with_options(&request).await;
assert!(result.is_err());
assert!(matches!(
result.unwrap_err(),
Error::InvalidInput { message } if message.contains("enable_stable_row_ids must be a boolean")
));
}
#[test]
fn test_stable_row_ids_config_serialization() {
// Test that ListingDatabaseOptions correctly serializes stable_row_ids
let mut options = HashMap::new();
options.insert(
OPT_NEW_TABLE_ENABLE_STABLE_ROW_IDS.to_string(),
"true".to_string(),
);
// Parse the options
let db_options = ListingDatabaseOptions::parse_from_map(&options).unwrap();
assert_eq!(
db_options.new_table_config.enable_stable_row_ids,
Some(true)
);
// Serialize back to map
let mut serialized = HashMap::new();
db_options.serialize_into_map(&mut serialized);
assert_eq!(
serialized.get(OPT_NEW_TABLE_ENABLE_STABLE_ROW_IDS),
Some(&"true".to_string())
);
}
#[test]
fn test_stable_row_ids_config_parse_false() {
let mut options = HashMap::new();
options.insert(
OPT_NEW_TABLE_ENABLE_STABLE_ROW_IDS.to_string(),
"false".to_string(),
);
let db_options = ListingDatabaseOptions::parse_from_map(&options).unwrap();
assert_eq!(
db_options.new_table_config.enable_stable_row_ids,
Some(false)
);
}
#[test]
fn test_stable_row_ids_config_not_set() {
let options = HashMap::new();
let db_options = ListingDatabaseOptions::parse_from_map(&options).unwrap();
assert_eq!(db_options.new_table_config.enable_stable_row_ids, None);
}
} }

View File

@@ -10,10 +10,8 @@ use async_trait::async_trait;
use lance_io::object_store::{LanceNamespaceStorageOptionsProvider, StorageOptionsProvider}; use lance_io::object_store::{LanceNamespaceStorageOptionsProvider, StorageOptionsProvider};
use lance_namespace::{ use lance_namespace::{
models::{ models::{
CreateEmptyTableRequest, CreateNamespaceRequest, CreateNamespaceResponse, CreateEmptyTableRequest, CreateNamespaceRequest, DescribeTableRequest,
DescribeNamespaceRequest, DescribeNamespaceResponse, DescribeTableRequest, DropNamespaceRequest, DropTableRequest, ListNamespacesRequest, ListTablesRequest,
DropNamespaceRequest, DropNamespaceResponse, DropTableRequest, ListNamespacesRequest,
ListNamespacesResponse, ListTablesRequest, ListTablesResponse,
}, },
LanceNamespace, LanceNamespace,
}; };
@@ -24,8 +22,11 @@ use crate::database::ReadConsistency;
use crate::error::{Error, Result}; use crate::error::{Error, Result};
use super::{ use super::{
listing::ListingDatabase, BaseTable, CloneTableRequest, CreateTableMode, listing::ListingDatabase, BaseTable, CloneTableRequest,
CreateTableRequest as DbCreateTableRequest, Database, OpenTableRequest, TableNamesRequest, CreateNamespaceRequest as DbCreateNamespaceRequest, CreateTableMode,
CreateTableRequest as DbCreateTableRequest, Database,
DropNamespaceRequest as DbDropNamespaceRequest,
ListNamespacesRequest as DbListNamespacesRequest, OpenTableRequest, TableNamesRequest,
}; };
/// A database implementation that uses lance-namespace for table management /// A database implementation that uses lance-namespace for table management
@@ -39,8 +40,6 @@ pub struct LanceNamespaceDatabase {
session: Option<Arc<lance::session::Session>>, session: Option<Arc<lance::session::Session>>,
// database URI // database URI
uri: String, uri: String,
// Whether to enable server-side query execution
server_side_query_enabled: bool,
} }
impl LanceNamespaceDatabase { impl LanceNamespaceDatabase {
@@ -50,7 +49,6 @@ impl LanceNamespaceDatabase {
storage_options: HashMap<String, String>, storage_options: HashMap<String, String>,
read_consistency_interval: Option<std::time::Duration>, read_consistency_interval: Option<std::time::Duration>,
session: Option<Arc<lance::session::Session>>, session: Option<Arc<lance::session::Session>>,
server_side_query_enabled: bool,
) -> Result<Self> { ) -> Result<Self> {
let mut builder = ConnectBuilder::new(ns_impl); let mut builder = ConnectBuilder::new(ns_impl);
for (key, value) in ns_properties.clone() { for (key, value) in ns_properties.clone() {
@@ -69,7 +67,6 @@ impl LanceNamespaceDatabase {
read_consistency_interval, read_consistency_interval,
session, session,
uri: format!("namespace://{}", ns_impl), uri: format!("namespace://{}", ns_impl),
server_side_query_enabled,
}) })
} }
} }
@@ -79,7 +76,6 @@ impl std::fmt::Debug for LanceNamespaceDatabase {
f.debug_struct("LanceNamespaceDatabase") f.debug_struct("LanceNamespaceDatabase")
.field("storage_options", &self.storage_options) .field("storage_options", &self.storage_options)
.field("read_consistency_interval", &self.read_consistency_interval) .field("read_consistency_interval", &self.read_consistency_interval)
.field("server_side_query_enabled", &self.server_side_query_enabled)
.finish() .finish()
} }
} }
@@ -153,47 +149,92 @@ impl Database for LanceNamespaceDatabase {
} }
} }
async fn list_namespaces( async fn list_namespaces(&self, request: DbListNamespacesRequest) -> Result<Vec<String>> {
&self, let ns_request = ListNamespacesRequest {
request: ListNamespacesRequest, id: if request.namespace.is_empty() {
) -> Result<ListNamespacesResponse> { None
Ok(self.namespace.list_namespaces(request).await?) } else {
Some(request.namespace)
},
page_token: request.page_token,
limit: request.limit.map(|l| l as i32),
};
let response = self
.namespace
.list_namespaces(ns_request)
.await
.map_err(|e| Error::Runtime {
message: format!("Failed to list namespaces: {}", e),
})?;
Ok(response.namespaces)
} }
async fn create_namespace( async fn create_namespace(&self, request: DbCreateNamespaceRequest) -> Result<()> {
&self, let ns_request = CreateNamespaceRequest {
request: CreateNamespaceRequest, id: if request.namespace.is_empty() {
) -> Result<CreateNamespaceResponse> { None
Ok(self.namespace.create_namespace(request).await?) } else {
Some(request.namespace)
},
mode: None,
properties: None,
};
self.namespace
.create_namespace(ns_request)
.await
.map_err(|e| Error::Runtime {
message: format!("Failed to create namespace: {}", e),
})?;
Ok(())
} }
async fn drop_namespace(&self, request: DropNamespaceRequest) -> Result<DropNamespaceResponse> { async fn drop_namespace(&self, request: DbDropNamespaceRequest) -> Result<()> {
Ok(self.namespace.drop_namespace(request).await?) let ns_request = DropNamespaceRequest {
} id: if request.namespace.is_empty() {
None
} else {
Some(request.namespace)
},
mode: None,
behavior: None,
};
async fn describe_namespace( self.namespace
&self, .drop_namespace(ns_request)
request: DescribeNamespaceRequest, .await
) -> Result<DescribeNamespaceResponse> { .map_err(|e| Error::Runtime {
Ok(self.namespace.describe_namespace(request).await?) message: format!("Failed to drop namespace: {}", e),
})?;
Ok(())
} }
async fn table_names(&self, request: TableNamesRequest) -> Result<Vec<String>> { async fn table_names(&self, request: TableNamesRequest) -> Result<Vec<String>> {
let ns_request = ListTablesRequest { let ns_request = ListTablesRequest {
id: Some(request.namespace), id: if request.namespace.is_empty() {
None
} else {
Some(request.namespace)
},
page_token: request.start_after, page_token: request.start_after,
limit: request.limit.map(|l| l as i32), limit: request.limit.map(|l| l as i32),
}; };
let response = self.namespace.list_tables(ns_request).await?; let response =
self.namespace
.list_tables(ns_request)
.await
.map_err(|e| Error::Runtime {
message: format!("Failed to list tables: {}", e),
})?;
Ok(response.tables) Ok(response.tables)
} }
async fn list_tables(&self, request: ListTablesRequest) -> Result<ListTablesResponse> {
Ok(self.namespace.list_tables(request).await?)
}
async fn create_table(&self, request: DbCreateTableRequest) -> Result<Arc<dyn BaseTable>> { async fn create_table(&self, request: DbCreateTableRequest) -> Result<Arc<dyn BaseTable>> {
// Extract user-provided storage options from request // Extract user-provided storage options from request
let user_storage_options = request let user_storage_options = request
@@ -249,10 +290,6 @@ impl Database for LanceNamespaceDatabase {
) )
.await?; .await?;
let namespace_client = self
.server_side_query_enabled
.then(|| self.namespace.clone());
return listing_db return listing_db
.open_table(OpenTableRequest { .open_table(OpenTableRequest {
name: request.name.clone(), name: request.name.clone(),
@@ -260,7 +297,6 @@ impl Database for LanceNamespaceDatabase {
index_cache_size: None, index_cache_size: None,
lance_read_params: None, lance_read_params: None,
location: Some(location), location: Some(location),
namespace_client,
}) })
.await; .await;
} }
@@ -297,16 +333,12 @@ impl Database for LanceNamespaceDatabase {
let listing_db = self let listing_db = self
.create_listing_database( .create_listing_database(
&location, &location,
table_id.clone(), table_id,
user_storage_options, user_storage_options,
create_empty_response.storage_options.as_ref(), create_empty_response.storage_options.as_ref(),
) )
.await?; .await?;
let namespace_client = self
.server_side_query_enabled
.then(|| self.namespace.clone());
let create_request = DbCreateTableRequest { let create_request = DbCreateTableRequest {
name: request.name, name: request.name,
namespace: request.namespace, namespace: request.namespace,
@@ -314,9 +346,7 @@ impl Database for LanceNamespaceDatabase {
mode: request.mode, mode: request.mode,
write_options: request.write_options, write_options: request.write_options,
location: Some(location), location: Some(location),
namespace_client,
}; };
listing_db.create_table(create_request).await listing_db.create_table(create_request).await
} }
@@ -350,25 +380,19 @@ impl Database for LanceNamespaceDatabase {
let listing_db = self let listing_db = self
.create_listing_database( .create_listing_database(
&location, &location,
table_id.clone(), table_id,
user_storage_options, user_storage_options,
response.storage_options.as_ref(), response.storage_options.as_ref(),
) )
.await?; .await?;
let namespace_client = self
.server_side_query_enabled
.then(|| self.namespace.clone());
let open_request = OpenTableRequest { let open_request = OpenTableRequest {
name: request.name.clone(), name: request.name.clone(),
namespace: request.namespace.clone(), namespace: request.namespace.clone(),
index_cache_size: request.index_cache_size, index_cache_size: request.index_cache_size,
lance_read_params: request.lance_read_params, lance_read_params: request.lance_read_params,
location: Some(location), location: Some(location),
namespace_client,
}; };
listing_db.open_table(open_request).await listing_db.open_table(open_request).await
} }
@@ -405,7 +429,6 @@ impl Database for LanceNamespaceDatabase {
Ok(()) Ok(())
} }
#[allow(deprecated)]
async fn drop_all_tables(&self, namespace: &[String]) -> Result<()> { async fn drop_all_tables(&self, namespace: &[String]) -> Result<()> {
let tables = self let tables = self
.table_names(TableNamesRequest { .table_names(TableNamesRequest {
@@ -432,6 +455,7 @@ impl Database for LanceNamespaceDatabase {
mod tests { mod tests {
use super::*; use super::*;
use crate::connect_namespace; use crate::connect_namespace;
use crate::database::CreateNamespaceRequest;
use crate::query::ExecutableQuery; use crate::query::ExecutableQuery;
use arrow_array::{Int32Array, RecordBatch, RecordBatchIterator, StringArray}; use arrow_array::{Int32Array, RecordBatch, RecordBatchIterator, StringArray};
use arrow_schema::{DataType, Field, Schema}; use arrow_schema::{DataType, Field, Schema};
@@ -544,9 +568,7 @@ mod tests {
// Create a child namespace first // Create a child namespace first
conn.create_namespace(CreateNamespaceRequest { conn.create_namespace(CreateNamespaceRequest {
id: Some(vec!["test_ns".into()]), namespace: vec!["test_ns".into()],
mode: None,
properties: None,
}) })
.await .await
.expect("Failed to create namespace"); .expect("Failed to create namespace");
@@ -605,9 +627,7 @@ mod tests {
// Create a child namespace first // Create a child namespace first
conn.create_namespace(CreateNamespaceRequest { conn.create_namespace(CreateNamespaceRequest {
id: Some(vec!["test_ns".into()]), namespace: vec!["test_ns".into()],
mode: None,
properties: None,
}) })
.await .await
.expect("Failed to create namespace"); .expect("Failed to create namespace");
@@ -669,9 +689,7 @@ mod tests {
// Create a child namespace first // Create a child namespace first
conn.create_namespace(CreateNamespaceRequest { conn.create_namespace(CreateNamespaceRequest {
id: Some(vec!["test_ns".into()]), namespace: vec!["test_ns".into()],
mode: None,
properties: None,
}) })
.await .await
.expect("Failed to create namespace"); .expect("Failed to create namespace");
@@ -753,9 +771,7 @@ mod tests {
// Create a child namespace first // Create a child namespace first
conn.create_namespace(CreateNamespaceRequest { conn.create_namespace(CreateNamespaceRequest {
id: Some(vec!["test_ns".into()]), namespace: vec!["test_ns".into()],
mode: None,
properties: None,
}) })
.await .await
.expect("Failed to create namespace"); .expect("Failed to create namespace");
@@ -809,9 +825,7 @@ mod tests {
// Create a child namespace first // Create a child namespace first
conn.create_namespace(CreateNamespaceRequest { conn.create_namespace(CreateNamespaceRequest {
id: Some(vec!["test_ns".into()]), namespace: vec!["test_ns".into()],
mode: None,
properties: None,
}) })
.await .await
.expect("Failed to create namespace"); .expect("Failed to create namespace");
@@ -890,9 +904,7 @@ mod tests {
// Create a child namespace first // Create a child namespace first
conn.create_namespace(CreateNamespaceRequest { conn.create_namespace(CreateNamespaceRequest {
id: Some(vec!["test_ns".into()]), namespace: vec!["test_ns".into()],
mode: None,
properties: None,
}) })
.await .await
.expect("Failed to create namespace"); .expect("Failed to create namespace");
@@ -924,9 +936,7 @@ mod tests {
// Create a child namespace first // Create a child namespace first
conn.create_namespace(CreateNamespaceRequest { conn.create_namespace(CreateNamespaceRequest {
id: Some(vec!["test_ns".into()]), namespace: vec!["test_ns".into()],
mode: None,
properties: None,
}) })
.await .await
.expect("Failed to create namespace"); .expect("Failed to create namespace");
@@ -967,46 +977,4 @@ mod tests {
let open_result = conn.open_table("drop_test").execute().await; let open_result = conn.open_table("drop_test").execute().await;
assert!(open_result.is_err()); assert!(open_result.is_err());
} }
#[tokio::test]
async fn test_table_names_at_root() {
// Test that table_names at root (empty namespace) works correctly
// This is a regression test for a bug where empty namespace was converted to None
let tmp_dir = tempdir().unwrap();
let root_path = tmp_dir.path().to_str().unwrap().to_string();
let mut properties = HashMap::new();
properties.insert("root".to_string(), root_path);
let conn = connect_namespace("dir", properties)
.execute()
.await
.expect("Failed to connect to namespace");
// Create multiple tables at root namespace
let test_data1 = create_test_data();
let _table1 = conn
.create_table("table1", test_data1)
.execute()
.await
.expect("Failed to create table1 at root");
let test_data2 = create_test_data();
let _table2 = conn
.create_table("table2", test_data2)
.execute()
.await
.expect("Failed to create table2 at root");
// List tables at root using table_names (empty namespace means root)
let table_names = conn
.table_names()
.execute()
.await
.expect("Failed to list tables at root");
assert!(table_names.contains(&"table1".to_string()));
assert!(table_names.contains(&"table2".to_string()));
assert_eq!(table_names.len(), 2);
}
} }

View File

@@ -13,7 +13,7 @@ use crate::{table::BaseTable, DistanceType, Error, Result};
use self::{ use self::{
scalar::{BTreeIndexBuilder, BitmapIndexBuilder, LabelListIndexBuilder}, scalar::{BTreeIndexBuilder, BitmapIndexBuilder, LabelListIndexBuilder},
vector::{IvfHnswPqIndexBuilder, IvfHnswSqIndexBuilder, IvfPqIndexBuilder, IvfSqIndexBuilder}, vector::{IvfHnswPqIndexBuilder, IvfHnswSqIndexBuilder, IvfPqIndexBuilder},
}; };
pub mod scalar; pub mod scalar;
@@ -54,9 +54,6 @@ pub enum Index {
/// IVF index with Product Quantization /// IVF index with Product Quantization
IvfPq(IvfPqIndexBuilder), IvfPq(IvfPqIndexBuilder),
/// IVF index with Scalar Quantization
IvfSq(IvfSqIndexBuilder),
/// IVF index with RabitQ Quantization /// IVF index with RabitQ Quantization
IvfRq(IvfRqIndexBuilder), IvfRq(IvfRqIndexBuilder),
@@ -280,8 +277,6 @@ pub enum IndexType {
// Vector // Vector
#[serde(alias = "IVF_FLAT")] #[serde(alias = "IVF_FLAT")]
IvfFlat, IvfFlat,
#[serde(alias = "IVF_SQ")]
IvfSq,
#[serde(alias = "IVF_PQ")] #[serde(alias = "IVF_PQ")]
IvfPq, IvfPq,
#[serde(alias = "IVF_RQ")] #[serde(alias = "IVF_RQ")]
@@ -306,7 +301,6 @@ impl std::fmt::Display for IndexType {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
match self { match self {
Self::IvfFlat => write!(f, "IVF_FLAT"), Self::IvfFlat => write!(f, "IVF_FLAT"),
Self::IvfSq => write!(f, "IVF_SQ"),
Self::IvfPq => write!(f, "IVF_PQ"), Self::IvfPq => write!(f, "IVF_PQ"),
Self::IvfRq => write!(f, "IVF_RQ"), Self::IvfRq => write!(f, "IVF_RQ"),
Self::IvfHnswPq => write!(f, "IVF_HNSW_PQ"), Self::IvfHnswPq => write!(f, "IVF_HNSW_PQ"),
@@ -329,7 +323,6 @@ impl std::str::FromStr for IndexType {
"LABEL_LIST" | "LABELLIST" => Ok(Self::LabelList), "LABEL_LIST" | "LABELLIST" => Ok(Self::LabelList),
"FTS" | "INVERTED" => Ok(Self::FTS), "FTS" | "INVERTED" => Ok(Self::FTS),
"IVF_FLAT" => Ok(Self::IvfFlat), "IVF_FLAT" => Ok(Self::IvfFlat),
"IVF_SQ" => Ok(Self::IvfSq),
"IVF_PQ" => Ok(Self::IvfPq), "IVF_PQ" => Ok(Self::IvfPq),
"IVF_RQ" => Ok(Self::IvfRq), "IVF_RQ" => Ok(Self::IvfRq),
"IVF_HNSW_PQ" => Ok(Self::IvfHnswPq), "IVF_HNSW_PQ" => Ok(Self::IvfHnswPq),

View File

@@ -209,38 +209,6 @@ impl IvfFlatIndexBuilder {
impl_ivf_params_setter!(); impl_ivf_params_setter!();
} }
/// Builder for an IVF SQ index.
///
/// This index compresses vectors using scalar quantization and groups them into IVF partitions.
/// It offers a balance between search performance and storage footprint.
#[derive(Debug, Clone)]
pub struct IvfSqIndexBuilder {
pub(crate) distance_type: DistanceType,
// IVF
pub(crate) num_partitions: Option<u32>,
pub(crate) sample_rate: u32,
pub(crate) max_iterations: u32,
pub(crate) target_partition_size: Option<u32>,
}
impl Default for IvfSqIndexBuilder {
fn default() -> Self {
Self {
distance_type: DistanceType::L2,
num_partitions: None,
sample_rate: 256,
max_iterations: 50,
target_partition_size: None,
}
}
}
impl IvfSqIndexBuilder {
impl_distance_type_setter!();
impl_ivf_params_setter!();
}
/// Builder for an IVF PQ index. /// Builder for an IVF PQ index.
/// ///
/// This index stores a compressed (quantized) copy of every vector. These vectors /// This index stores a compressed (quantized) copy of every vector. These vectors

View File

@@ -10,17 +10,13 @@ use http::StatusCode;
use lance_io::object_store::StorageOptions; use lance_io::object_store::StorageOptions;
use moka::future::Cache; use moka::future::Cache;
use reqwest::header::CONTENT_TYPE; use reqwest::header::CONTENT_TYPE;
use serde::Deserialize;
use tokio::task::spawn_blocking; use tokio::task::spawn_blocking;
use lance_namespace::models::{
CreateNamespaceRequest, CreateNamespaceResponse, DescribeNamespaceRequest,
DescribeNamespaceResponse, DropNamespaceRequest, DropNamespaceResponse, ListNamespacesRequest,
ListNamespacesResponse, ListTablesRequest, ListTablesResponse,
};
use crate::database::{ use crate::database::{
CloneTableRequest, CreateTableData, CreateTableMode, CreateTableRequest, Database, CloneTableRequest, CreateNamespaceRequest, CreateTableData, CreateTableMode,
DatabaseOptions, OpenTableRequest, ReadConsistency, TableNamesRequest, CreateTableRequest, Database, DatabaseOptions, DropNamespaceRequest, ListNamespacesRequest,
OpenTableRequest, ReadConsistency, TableNamesRequest,
}; };
use crate::error::Result; use crate::error::Result;
use crate::table::BaseTable; use crate::table::BaseTable;
@@ -184,6 +180,11 @@ impl RemoteDatabaseOptionsBuilder {
} }
} }
#[derive(Deserialize)]
struct ListTablesResponse {
tables: Vec<String>,
}
#[derive(Debug)] #[derive(Debug)]
pub struct RemoteDatabase<S: HttpSend = Sender> { pub struct RemoteDatabase<S: HttpSend = Sender> {
client: RestfulLanceDbClient<S>, client: RestfulLanceDbClient<S>,
@@ -336,6 +337,7 @@ impl<S: HttpSend> Database for RemoteDatabase<S> {
self.client self.client
.get(&format!("/v1/namespace/{}/table/list", namespace_id)) .get(&format!("/v1/namespace/{}/table/list", namespace_id))
} else { } else {
// TODO: use new API for all listing operations once stable
self.client.get("/v1/table/") self.client.get("/v1/table/")
}; };
@@ -369,44 +371,6 @@ impl<S: HttpSend> Database for RemoteDatabase<S> {
Ok(tables) Ok(tables)
} }
async fn list_tables(&self, request: ListTablesRequest) -> Result<ListTablesResponse> {
let namespace_parts = request.id.as_deref().unwrap_or(&[]);
let namespace_id = build_namespace_identifier(namespace_parts, &self.client.id_delimiter);
let mut req = self
.client
.get(&format!("/v1/namespace/{}/table/list", namespace_id));
if let Some(limit) = request.limit {
req = req.query(&[("limit", limit)]);
}
if let Some(ref page_token) = request.page_token {
req = req.query(&[("page_token", page_token)]);
}
let (request_id, rsp) = self.client.send_with_retry(req, None, true).await?;
let rsp = self.client.check_response(&request_id, rsp).await?;
let version = parse_server_version(&request_id, &rsp)?;
let response: ListTablesResponse = rsp.json().await.err_to_http(request_id)?;
// Cache the tables for future use
let namespace_vec = namespace_parts.to_vec();
for table in &response.tables {
let table_identifier =
build_table_identifier(table, &namespace_vec, &self.client.id_delimiter);
let cache_key = build_cache_key(table, &namespace_vec);
let remote_table = Arc::new(RemoteTable::new(
self.client.clone(),
table.clone(),
namespace_vec.clone(),
table_identifier.clone(),
version.clone(),
));
self.table_cache.insert(cache_key, remote_table).await;
}
Ok(response)
}
async fn create_table(&self, request: CreateTableRequest) -> Result<Arc<dyn BaseTable>> { async fn create_table(&self, request: CreateTableRequest) -> Result<Arc<dyn BaseTable>> {
let data = match request.data { let data = match request.data {
CreateTableData::Data(data) => data, CreateTableData::Data(data) => data,
@@ -453,7 +417,6 @@ impl<S: HttpSend> Database for RemoteDatabase<S> {
index_cache_size: None, index_cache_size: None,
lance_read_params: None, lance_read_params: None,
location: None, location: None,
namespace_client: None,
}; };
let req = (callback)(req); let req = (callback)(req);
self.open_table(req).await self.open_table(req).await
@@ -627,101 +590,53 @@ impl<S: HttpSend> Database for RemoteDatabase<S> {
}) })
} }
async fn list_namespaces( async fn list_namespaces(&self, request: ListNamespacesRequest) -> Result<Vec<String>> {
&self, let namespace_id =
request: ListNamespacesRequest, build_namespace_identifier(request.namespace.as_slice(), &self.client.id_delimiter);
) -> Result<ListNamespacesResponse> {
let namespace_parts = request.id.as_deref().unwrap_or(&[]);
let namespace_id = build_namespace_identifier(namespace_parts, &self.client.id_delimiter);
let mut req = self let mut req = self
.client .client
.get(&format!("/v1/namespace/{}/list", namespace_id)); .get(&format!("/v1/namespace/{}/list", namespace_id));
if let Some(limit) = request.limit { if let Some(limit) = request.limit {
req = req.query(&[("limit", limit)]); req = req.query(&[("limit", limit)]);
} }
if let Some(ref page_token) = request.page_token { if let Some(page_token) = request.page_token {
req = req.query(&[("page_token", page_token)]); req = req.query(&[("page_token", page_token)]);
} }
let (request_id, resp) = self.client.send(req).await?; let (request_id, resp) = self.client.send(req).await?;
let resp = self.client.check_response(&request_id, resp).await?; let resp = self.client.check_response(&request_id, resp).await?;
resp.json().await.err_to_http(request_id) #[derive(Deserialize)]
struct ListNamespacesResponse {
namespaces: Vec<String>,
} }
async fn create_namespace( let parsed: ListNamespacesResponse = resp.json().await.map_err(|e| Error::Runtime {
&self, message: format!("Failed to parse namespace response: {}", e),
request: CreateNamespaceRequest, })?;
) -> Result<CreateNamespaceResponse> { Ok(parsed.namespaces)
let namespace_parts = request.id.as_deref().unwrap_or(&[]);
let namespace_id = build_namespace_identifier(namespace_parts, &self.client.id_delimiter);
let mut req = self
.client
.post(&format!("/v1/namespace/{}/create", namespace_id));
// Build request body with mode and properties if present
#[derive(serde::Serialize)]
struct CreateNamespaceRequestBody {
#[serde(skip_serializing_if = "Option::is_none")]
mode: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
properties: Option<HashMap<String, String>>,
} }
let body = CreateNamespaceRequestBody { async fn create_namespace(&self, request: CreateNamespaceRequest) -> Result<()> {
mode: request.mode.as_ref().map(|m| format!("{:?}", m)), let namespace_id =
properties: request.properties, build_namespace_identifier(request.namespace.as_slice(), &self.client.id_delimiter);
};
req = req.json(&body);
let (request_id, resp) = self.client.send(req).await?;
let resp = self.client.check_response(&request_id, resp).await?;
resp.json().await.err_to_http(request_id)
}
async fn drop_namespace(&self, request: DropNamespaceRequest) -> Result<DropNamespaceResponse> {
let namespace_parts = request.id.as_deref().unwrap_or(&[]);
let namespace_id = build_namespace_identifier(namespace_parts, &self.client.id_delimiter);
let mut req = self
.client
.post(&format!("/v1/namespace/{}/drop", namespace_id));
// Build request body with mode and behavior if present
#[derive(serde::Serialize)]
struct DropNamespaceRequestBody {
#[serde(skip_serializing_if = "Option::is_none")]
mode: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
behavior: Option<String>,
}
let body = DropNamespaceRequestBody {
mode: request.mode.as_ref().map(|m| format!("{:?}", m)),
behavior: request.behavior.as_ref().map(|b| format!("{:?}", b)),
};
req = req.json(&body);
let (request_id, resp) = self.client.send(req).await?;
let resp = self.client.check_response(&request_id, resp).await?;
resp.json().await.err_to_http(request_id)
}
async fn describe_namespace(
&self,
request: DescribeNamespaceRequest,
) -> Result<DescribeNamespaceResponse> {
let namespace_parts = request.id.as_deref().unwrap_or(&[]);
let namespace_id = build_namespace_identifier(namespace_parts, &self.client.id_delimiter);
let req = self let req = self
.client .client
.get(&format!("/v1/namespace/{}/describe", namespace_id)); .post(&format!("/v1/namespace/{}/create", namespace_id));
let (request_id, resp) = self.client.send(req).await?; let (request_id, resp) = self.client.send(req).await?;
let resp = self.client.check_response(&request_id, resp).await?; self.client.check_response(&request_id, resp).await?;
Ok(())
}
resp.json().await.err_to_http(request_id) async fn drop_namespace(&self, request: DropNamespaceRequest) -> Result<()> {
let namespace_id =
build_namespace_identifier(request.namespace.as_slice(), &self.client.id_delimiter);
let req = self
.client
.post(&format!("/v1/namespace/{}/drop", namespace_id));
let (request_id, resp) = self.client.send(req).await?;
self.client.check_response(&request_id, resp).await?;
Ok(())
} }
fn as_any(&self) -> &dyn std::any::Any { fn as_any(&self) -> &dyn std::any::Any {

View File

@@ -1072,14 +1072,6 @@ impl<S: HttpSend> BaseTable for RemoteTable<S> {
body["num_bits"] = serde_json::Value::Number(num_bits.into()); body["num_bits"] = serde_json::Value::Number(num_bits.into());
} }
} }
Index::IvfSq(index) => {
body[INDEX_TYPE_KEY] = serde_json::Value::String("IVF_SQ".to_string());
body[METRIC_TYPE_KEY] =
serde_json::Value::String(index.distance_type.to_string().to_lowercase());
if let Some(num_partitions) = index.num_partitions {
body["num_partitions"] = serde_json::Value::Number(num_partitions.into());
}
}
Index::IvfHnswSq(index) => { Index::IvfHnswSq(index) => {
body[INDEX_TYPE_KEY] = serde_json::Value::String("IVF_HNSW_SQ".to_string()); body[INDEX_TYPE_KEY] = serde_json::Value::String("IVF_HNSW_SQ".to_string());
body[METRIC_TYPE_KEY] = body[METRIC_TYPE_KEY] =

View File

@@ -40,11 +40,6 @@ use lance_index::vector::pq::PQBuildParams;
use lance_index::vector::sq::builder::SQBuildParams; use lance_index::vector::sq::builder::SQBuildParams;
use lance_index::DatasetIndexExt; use lance_index::DatasetIndexExt;
use lance_index::IndexType; use lance_index::IndexType;
use lance_namespace::models::{
QueryTableRequest as NsQueryTableRequest, QueryTableRequestFullTextQuery,
QueryTableRequestVector, StringFtsQuery,
};
use lance_namespace::LanceNamespace;
use lance_table::format::Manifest; use lance_table::format::Manifest;
use lance_table::io::commit::ManifestNamingScheme; use lance_table::io::commit::ManifestNamingScheme;
use log::info; use log::info;
@@ -1485,7 +1480,7 @@ impl NativeTableExt for Arc<dyn BaseTable> {
} }
/// A table in a LanceDB database. /// A table in a LanceDB database.
#[derive(Clone)] #[derive(Debug, Clone)]
pub struct NativeTable { pub struct NativeTable {
name: String, name: String,
namespace: Vec<String>, namespace: Vec<String>,
@@ -1495,22 +1490,6 @@ pub struct NativeTable {
// This comes from the connection options. We store here so we can pass down // This comes from the connection options. We store here so we can pass down
// to the dataset when we recreate it (for example, in checkout_latest). // to the dataset when we recreate it (for example, in checkout_latest).
read_consistency_interval: Option<std::time::Duration>, read_consistency_interval: Option<std::time::Duration>,
// Optional namespace client for server-side query execution.
// When set, queries will be executed on the namespace server instead of locally.
namespace_client: Option<Arc<dyn LanceNamespace>>,
}
impl std::fmt::Debug for NativeTable {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("NativeTable")
.field("name", &self.name)
.field("namespace", &self.namespace)
.field("id", &self.id)
.field("uri", &self.uri)
.field("read_consistency_interval", &self.read_consistency_interval)
.field("namespace_client", &self.namespace_client)
.finish()
}
} }
impl std::fmt::Display for NativeTable { impl std::fmt::Display for NativeTable {
@@ -1545,7 +1524,7 @@ impl NativeTable {
/// * A [NativeTable] object. /// * A [NativeTable] object.
pub async fn open(uri: &str) -> Result<Self> { pub async fn open(uri: &str) -> Result<Self> {
let name = Self::get_table_name(uri)?; let name = Self::get_table_name(uri)?;
Self::open_with_params(uri, &name, vec![], None, None, None, None).await Self::open_with_params(uri, &name, vec![], None, None, None).await
} }
/// Opens an existing Table /// Opens an existing Table
@@ -1555,12 +1534,10 @@ impl NativeTable {
/// * `base_path` - The base path where the table is located /// * `base_path` - The base path where the table is located
/// * `name` The Table name /// * `name` The Table name
/// * `params` The [ReadParams] to use when opening the table /// * `params` The [ReadParams] to use when opening the table
/// * `namespace_client` - Optional namespace client for server-side query execution
/// ///
/// # Returns /// # Returns
/// ///
/// * A [NativeTable] object. /// * A [NativeTable] object.
#[allow(clippy::too_many_arguments)]
pub async fn open_with_params( pub async fn open_with_params(
uri: &str, uri: &str,
name: &str, name: &str,
@@ -1568,7 +1545,6 @@ impl NativeTable {
write_store_wrapper: Option<Arc<dyn WrappingObjectStore>>, write_store_wrapper: Option<Arc<dyn WrappingObjectStore>>,
params: Option<ReadParams>, params: Option<ReadParams>,
read_consistency_interval: Option<std::time::Duration>, read_consistency_interval: Option<std::time::Duration>,
namespace_client: Option<Arc<dyn LanceNamespace>>,
) -> Result<Self> { ) -> Result<Self> {
let params = params.unwrap_or_default(); let params = params.unwrap_or_default();
// patch the params if we have a write store wrapper // patch the params if we have a write store wrapper
@@ -1599,18 +1575,9 @@ impl NativeTable {
uri: uri.to_string(), uri: uri.to_string(),
dataset, dataset,
read_consistency_interval, read_consistency_interval,
namespace_client,
}) })
} }
/// Set the namespace client for server-side query execution.
///
/// When set, queries will be executed on the namespace server instead of locally.
pub fn with_namespace_client(mut self, namespace_client: Arc<dyn LanceNamespace>) -> Self {
self.namespace_client = Some(namespace_client);
self
}
fn get_table_name(uri: &str) -> Result<String> { fn get_table_name(uri: &str) -> Result<String> {
let path = Path::new(uri); let path = Path::new(uri);
let name = path let name = path
@@ -1647,12 +1614,10 @@ impl NativeTable {
/// * `namespace` - The namespace path. When non-empty, an explicit URI must be provided. /// * `namespace` - The namespace path. When non-empty, an explicit URI must be provided.
/// * `batches` RecordBatch to be saved in the database. /// * `batches` RecordBatch to be saved in the database.
/// * `params` - Write parameters. /// * `params` - Write parameters.
/// * `namespace_client` - Optional namespace client for server-side query execution
/// ///
/// # Returns /// # Returns
/// ///
/// * A [TableImpl] object. /// * A [TableImpl] object.
#[allow(clippy::too_many_arguments)]
pub async fn create( pub async fn create(
uri: &str, uri: &str,
name: &str, name: &str,
@@ -1661,7 +1626,6 @@ impl NativeTable {
write_store_wrapper: Option<Arc<dyn WrappingObjectStore>>, write_store_wrapper: Option<Arc<dyn WrappingObjectStore>>,
params: Option<WriteParams>, params: Option<WriteParams>,
read_consistency_interval: Option<std::time::Duration>, read_consistency_interval: Option<std::time::Duration>,
namespace_client: Option<Arc<dyn LanceNamespace>>,
) -> Result<Self> { ) -> Result<Self> {
// Default params uses format v1. // Default params uses format v1.
let params = params.unwrap_or(WriteParams { let params = params.unwrap_or(WriteParams {
@@ -1693,11 +1657,9 @@ impl NativeTable {
uri: uri.to_string(), uri: uri.to_string(),
dataset: DatasetConsistencyWrapper::new_latest(dataset, read_consistency_interval), dataset: DatasetConsistencyWrapper::new_latest(dataset, read_consistency_interval),
read_consistency_interval, read_consistency_interval,
namespace_client,
}) })
} }
#[allow(clippy::too_many_arguments)]
pub async fn create_empty( pub async fn create_empty(
uri: &str, uri: &str,
name: &str, name: &str,
@@ -1706,7 +1668,6 @@ impl NativeTable {
write_store_wrapper: Option<Arc<dyn WrappingObjectStore>>, write_store_wrapper: Option<Arc<dyn WrappingObjectStore>>,
params: Option<WriteParams>, params: Option<WriteParams>,
read_consistency_interval: Option<std::time::Duration>, read_consistency_interval: Option<std::time::Duration>,
namespace_client: Option<Arc<dyn LanceNamespace>>,
) -> Result<Self> { ) -> Result<Self> {
let batches = RecordBatchIterator::new(vec![], schema); let batches = RecordBatchIterator::new(vec![], schema);
Self::create( Self::create(
@@ -1717,7 +1678,6 @@ impl NativeTable {
write_store_wrapper, write_store_wrapper,
params, params,
read_consistency_interval, read_consistency_interval,
namespace_client,
) )
.await .await
} }
@@ -1946,25 +1906,6 @@ impl NativeTable {
VectorIndexParams::with_ivf_flat_params(index.distance_type.into(), ivf_params); VectorIndexParams::with_ivf_flat_params(index.distance_type.into(), ivf_params);
Ok(Box::new(lance_idx_params)) Ok(Box::new(lance_idx_params))
} }
Index::IvfSq(index) => {
Self::validate_index_type(field, "IVF SQ", supported_vector_data_type)?;
let ivf_params = Self::build_ivf_params(
index.num_partitions,
index.target_partition_size,
index.sample_rate,
index.max_iterations,
);
let sq_params = SQBuildParams {
sample_rate: index.sample_rate as usize,
..Default::default()
};
let lance_idx_params = VectorIndexParams::with_ivf_sq_params(
index.distance_type.into(),
ivf_params,
sq_params,
);
Ok(Box::new(lance_idx_params))
}
Index::IvfPq(index) => { Index::IvfPq(index) => {
Self::validate_index_type(field, "IVF PQ", supported_vector_data_type)?; Self::validate_index_type(field, "IVF PQ", supported_vector_data_type)?;
let dim = Self::get_vector_dimension(field)?; let dim = Self::get_vector_dimension(field)?;
@@ -2072,7 +2013,6 @@ impl NativeTable {
Index::LabelList(_) => IndexType::LabelList, Index::LabelList(_) => IndexType::LabelList,
Index::FTS(_) => IndexType::Inverted, Index::FTS(_) => IndexType::Inverted,
Index::IvfFlat(_) Index::IvfFlat(_)
| Index::IvfSq(_)
| Index::IvfPq(_) | Index::IvfPq(_)
| Index::IvfRq(_) | Index::IvfRq(_)
| Index::IvfHnswPq(_) | Index::IvfHnswPq(_)
@@ -2095,278 +2035,6 @@ impl NativeTable {
Ok(DatasetRecordBatchStream::new(inner)) Ok(DatasetRecordBatchStream::new(inner))
} }
/// Execute a query on the namespace server instead of locally.
async fn namespace_query(
&self,
namespace_client: Arc<dyn LanceNamespace>,
query: &AnyQuery,
_options: QueryExecutionOptions,
) -> Result<DatasetRecordBatchStream> {
// Build table_id from namespace + table name
let mut table_id = self.namespace.clone();
table_id.push(self.name.clone());
// Convert AnyQuery to namespace QueryTableRequest
let mut ns_request = self.convert_to_namespace_query(query)?;
// Set the table ID on the request
ns_request.id = Some(table_id);
// Call the namespace query_table API
let response_bytes = namespace_client
.query_table(ns_request)
.await
.map_err(|e| Error::Runtime {
message: format!("Failed to execute server-side query: {}", e),
})?;
// Parse the Arrow IPC response into a RecordBatchStream
self.parse_arrow_ipc_response(response_bytes).await
}
/// Convert a QueryFilter to a SQL string for the namespace API.
fn filter_to_sql(&self, filter: &QueryFilter) -> Result<String> {
match filter {
QueryFilter::Sql(sql) => Ok(sql.clone()),
QueryFilter::Substrait(_) => Err(Error::NotSupported {
message: "Substrait filters are not supported for server-side queries".to_string(),
}),
QueryFilter::Datafusion(_) => Err(Error::NotSupported {
message: "Datafusion expression filters are not supported for server-side queries. Use SQL filter instead.".to_string(),
}),
}
}
/// Convert an AnyQuery to the namespace QueryTableRequest format.
fn convert_to_namespace_query(&self, query: &AnyQuery) -> Result<NsQueryTableRequest> {
match query {
AnyQuery::VectorQuery(vq) => {
// Extract the query vector(s)
let vector = self.extract_query_vector(&vq.query_vector)?;
// Convert filter to SQL string
let filter = match &vq.base.filter {
Some(f) => Some(self.filter_to_sql(f)?),
None => None,
};
// Convert select to columns list
let columns = match &vq.base.select {
Select::All => None,
Select::Columns(cols) => Some(cols.clone()),
Select::Dynamic(_) => {
return Err(Error::NotSupported {
message:
"Dynamic column selection is not supported for server-side queries"
.to_string(),
});
}
};
// Check for unsupported features
if vq.base.reranker.is_some() {
return Err(Error::NotSupported {
message: "Reranker is not supported for server-side queries".to_string(),
});
}
// Convert FTS query if present
let full_text_query = vq.base.full_text_search.as_ref().map(|fts| {
let columns = fts.columns();
let columns_vec = if columns.is_empty() {
None
} else {
Some(columns.into_iter().collect())
};
Box::new(QueryTableRequestFullTextQuery {
string_query: Some(Box::new(StringFtsQuery {
query: fts.query.to_string(),
columns: columns_vec,
})),
structured_query: None,
})
});
Ok(NsQueryTableRequest {
id: None, // Will be set in namespace_query
k: vq.base.limit.unwrap_or(10) as i32,
vector: Box::new(vector),
vector_column: vq.column.clone(),
filter,
columns,
offset: vq.base.offset.map(|o| o as i32),
distance_type: vq.distance_type.map(|dt| dt.to_string()),
nprobes: Some(vq.minimum_nprobes as i32),
ef: vq.ef.map(|e| e as i32),
refine_factor: vq.refine_factor.map(|r| r as i32),
lower_bound: vq.lower_bound,
upper_bound: vq.upper_bound,
prefilter: Some(vq.base.prefilter),
fast_search: Some(vq.base.fast_search),
with_row_id: Some(vq.base.with_row_id),
bypass_vector_index: Some(!vq.use_index),
full_text_query,
version: None,
})
}
AnyQuery::Query(q) => {
// For non-vector queries, pass an empty vector (similar to remote table implementation)
if q.reranker.is_some() {
return Err(Error::NotSupported {
message: "Reranker is not supported for server-side query execution"
.to_string(),
});
}
let filter = q
.filter
.as_ref()
.map(|f| self.filter_to_sql(f))
.transpose()?;
let columns = match &q.select {
Select::All => None,
Select::Columns(cols) => Some(cols.clone()),
Select::Dynamic(_) => {
return Err(Error::NotSupported {
message: "Dynamic columns are not supported for server-side query"
.to_string(),
});
}
};
// Handle full text search if present
let full_text_query = q.full_text_search.as_ref().map(|fts| {
let columns_vec = if fts.columns().is_empty() {
None
} else {
Some(fts.columns().iter().cloned().collect())
};
Box::new(QueryTableRequestFullTextQuery {
string_query: Some(Box::new(StringFtsQuery {
query: fts.query.to_string(),
columns: columns_vec,
})),
structured_query: None,
})
});
// Empty vector for non-vector queries
let vector = Box::new(QueryTableRequestVector {
single_vector: Some(vec![]),
multi_vector: None,
});
Ok(NsQueryTableRequest {
id: None, // Will be set by caller
vector,
k: q.limit.unwrap_or(10) as i32,
filter,
columns,
prefilter: Some(q.prefilter),
offset: q.offset.map(|o| o as i32),
ef: None,
refine_factor: None,
distance_type: None,
nprobes: None,
vector_column: None, // No vector column for plain queries
with_row_id: Some(q.with_row_id),
bypass_vector_index: Some(true), // No vector index for plain queries
full_text_query,
version: None,
fast_search: None,
lower_bound: None,
upper_bound: None,
})
}
}
}
/// Extract query vector(s) from Arrow arrays into the namespace format.
fn extract_query_vector(
&self,
query_vectors: &[Arc<dyn arrow_array::Array>],
) -> Result<QueryTableRequestVector> {
if query_vectors.is_empty() {
return Err(Error::InvalidInput {
message: "Query vector is required for vector search".to_string(),
});
}
// Handle single vector case
if query_vectors.len() == 1 {
let arr = &query_vectors[0];
let single_vector = self.array_to_f32_vec(arr)?;
Ok(QueryTableRequestVector {
single_vector: Some(single_vector),
multi_vector: None,
})
} else {
// Handle multi-vector case
let multi_vector: Result<Vec<Vec<f32>>> = query_vectors
.iter()
.map(|arr| self.array_to_f32_vec(arr))
.collect();
Ok(QueryTableRequestVector {
single_vector: None,
multi_vector: Some(multi_vector?),
})
}
}
/// Convert an Arrow array to a Vec<f32>.
fn array_to_f32_vec(&self, arr: &Arc<dyn arrow_array::Array>) -> Result<Vec<f32>> {
// Handle FixedSizeList (common for vectors)
if let Some(fsl) = arr
.as_any()
.downcast_ref::<arrow_array::FixedSizeListArray>()
{
let values = fsl.values();
if let Some(f32_arr) = values.as_any().downcast_ref::<arrow_array::Float32Array>() {
return Ok(f32_arr.values().to_vec());
}
}
// Handle direct Float32Array
if let Some(f32_arr) = arr.as_any().downcast_ref::<arrow_array::Float32Array>() {
return Ok(f32_arr.values().to_vec());
}
Err(Error::InvalidInput {
message: "Query vector must be Float32 type".to_string(),
})
}
/// Parse Arrow IPC response from the namespace server.
async fn parse_arrow_ipc_response(
&self,
bytes: bytes::Bytes,
) -> Result<DatasetRecordBatchStream> {
use arrow_ipc::reader::StreamReader;
use std::io::Cursor;
let cursor = Cursor::new(bytes);
let reader = StreamReader::try_new(cursor, None).map_err(|e| Error::Runtime {
message: format!("Failed to parse Arrow IPC response: {}", e),
})?;
// Collect all record batches
let schema = reader.schema();
let batches: Vec<_> = reader
.into_iter()
.collect::<std::result::Result<Vec<_>, _>>()
.map_err(|e| Error::Runtime {
message: format!("Failed to read Arrow IPC batches: {}", e),
})?;
// Create a stream from the batches
let stream = futures::stream::iter(batches.into_iter().map(Ok));
let record_batch_stream = Box::pin(
datafusion_physical_plan::stream::RecordBatchStreamAdapter::new(schema, stream),
);
Ok(DatasetRecordBatchStream::new(record_batch_stream))
}
/// Check whether the table uses V2 manifest paths. /// Check whether the table uses V2 manifest paths.
/// ///
/// See [Self::migrate_manifest_paths_v2] and [ManifestNamingScheme] for /// See [Self::migrate_manifest_paths_v2] and [ManifestNamingScheme] for
@@ -2798,12 +2466,6 @@ impl BaseTable for NativeTable {
query: &AnyQuery, query: &AnyQuery,
options: QueryExecutionOptions, options: QueryExecutionOptions,
) -> Result<DatasetRecordBatchStream> { ) -> Result<DatasetRecordBatchStream> {
// If namespace client is configured, use server-side query execution
if let Some(ref namespace_client) = self.namespace_client {
return self
.namespace_query(namespace_client.clone(), query, options)
.await;
}
self.generic_query(query, options).await self.generic_query(query, options).await
} }
@@ -3272,7 +2934,7 @@ mod tests {
let batches = make_test_batches(); let batches = make_test_batches();
let batches = Box::new(batches) as Box<dyn RecordBatchReader + Send>; let batches = Box::new(batches) as Box<dyn RecordBatchReader + Send>;
let table = NativeTable::create(uri, "test", vec![], batches, None, None, None, None) let table = NativeTable::create(uri, "test", vec![], batches, None, None, None)
.await .await
.unwrap(); .unwrap();
@@ -4912,91 +4574,4 @@ mod tests {
assert_eq!(result.len(), 1); assert_eq!(result.len(), 1);
assert_eq!(result[0].index_type, crate::index::IndexType::Bitmap); assert_eq!(result[0].index_type, crate::index::IndexType::Bitmap);
} }
#[tokio::test]
async fn test_convert_to_namespace_query_vector() {
let tmp_dir = tempdir().unwrap();
let dataset_path = tmp_dir.path().join("test_ns_query.lance");
let batches = make_test_batches();
Dataset::write(batches, dataset_path.to_str().unwrap(), None)
.await
.unwrap();
let table = NativeTable::open(dataset_path.to_str().unwrap())
.await
.unwrap();
// Create a vector query
let query_vector = Arc::new(Float32Array::from(vec![1.0, 2.0, 3.0, 4.0]));
let vq = VectorQueryRequest {
base: QueryRequest {
limit: Some(10),
offset: Some(5),
filter: Some(QueryFilter::Sql("id > 0".to_string())),
select: Select::Columns(vec!["id".to_string()]),
..Default::default()
},
column: Some("vector".to_string()),
query_vector: vec![query_vector as Arc<dyn Array>],
minimum_nprobes: 20,
distance_type: Some(crate::DistanceType::L2),
..Default::default()
};
let any_query = AnyQuery::VectorQuery(vq);
let ns_request = table.convert_to_namespace_query(&any_query).unwrap();
assert_eq!(ns_request.k, 10);
assert_eq!(ns_request.offset, Some(5));
assert_eq!(ns_request.filter, Some("id > 0".to_string()));
assert_eq!(ns_request.columns, Some(vec!["id".to_string()]));
assert_eq!(ns_request.vector_column, Some("vector".to_string()));
assert_eq!(ns_request.distance_type, Some("l2".to_string()));
assert!(ns_request.vector.single_vector.is_some());
assert_eq!(
ns_request.vector.single_vector.as_ref().unwrap(),
&vec![1.0, 2.0, 3.0, 4.0]
);
}
#[tokio::test]
async fn test_convert_to_namespace_query_plain_query() {
let tmp_dir = tempdir().unwrap();
let dataset_path = tmp_dir.path().join("test_ns_plain.lance");
let batches = make_test_batches();
Dataset::write(batches, dataset_path.to_str().unwrap(), None)
.await
.unwrap();
let table = NativeTable::open(dataset_path.to_str().unwrap())
.await
.unwrap();
// Create a plain (non-vector) query with filter and select
let q = QueryRequest {
limit: Some(20),
offset: Some(5),
filter: Some(QueryFilter::Sql("id > 5".to_string())),
select: Select::Columns(vec!["id".to_string()]),
with_row_id: true,
..Default::default()
};
let any_query = AnyQuery::Query(q);
let ns_request = table.convert_to_namespace_query(&any_query).unwrap();
// Plain queries should pass an empty vector
assert_eq!(ns_request.k, 20);
assert_eq!(ns_request.offset, Some(5));
assert_eq!(ns_request.filter, Some("id > 5".to_string()));
assert_eq!(ns_request.columns, Some(vec!["id".to_string()]));
assert_eq!(ns_request.with_row_id, Some(true));
assert_eq!(ns_request.bypass_vector_index, Some(true));
assert!(ns_request.vector_column.is_none()); // No vector column for plain queries
// Should have an empty vector
assert!(ns_request.vector.single_vector.as_ref().unwrap().is_empty());
}
} }