Compare commits

..

16 Commits

Author SHA1 Message Date
Lance Release
e612686fdb Bump version: 0.25.0 → 0.25.1-beta.0 2025-09-10 14:24:07 +00:00
Wyatt Alt
e77d57a5b6 chore: update lance to 0.35.0-beta4 (#2639)
Updates lance to 0.35.0-beta4, which also incurs a datafusion update.
This brings in a fix for a memory leak in index caching, resulting from
a cyclical reference.
2025-09-10 06:19:35 -07:00
Jack Ye
9391ad1450 feat: support mTLS for remote database (#2638)
This PR adds mTLS (mutual TLS) configuration support for the LanceDB
remote HTTP client, allowing users to authenticate with client
certificates and configure custom CA certificates for server
verification.

---------

Co-authored-by: Claude <noreply@anthropic.com>
2025-09-09 21:04:46 -07:00
LuQQiu
79960b254e fix: add partition statistics to MetadataEraser (#2637)
Some of the data fusion optimizers optimize based on data statistics
(e.g. total bytes, number of rows).
If those statistics are not supplied, optimizers cannot optimize on top.
One example is Anti Hash Join which can optimize from LeftAnti (Left:
big table, Right: small table) to RightAnti (Left: small table, Right:
big table). Left Anti requires reading the whole big & small table while
RightAnti only requires reading the whole left table and supports limit
push down to only read partial of big table
2025-09-09 09:13:22 -07:00
Xuanwo
d19c64e29b chore: bump version for JSON support (#2633)
Bump version of lance to latest beta for JSON support.

Signed-off-by: Xuanwo <github@xuanwo.io>
2025-09-05 12:26:28 -07:00
Lance Release
06d5612443 Bump version: 0.22.0-beta.2 → 0.22.0 2025-09-04 08:33:40 +00:00
Lance Release
45f96f4151 Bump version: 0.22.0-beta.1 → 0.22.0-beta.2 2025-09-04 08:33:09 +00:00
Lance Release
f744b785f8 Bump version: 0.25.0-beta.2 → 0.25.0 2025-09-04 08:32:44 +00:00
Lance Release
2e3f745820 Bump version: 0.25.0-beta.1 → 0.25.0-beta.2 2025-09-04 08:32:43 +00:00
Jack Ye
683aaed716 chore: upgrade lance to 0.35.0 (#2625) 2025-09-04 01:31:13 -07:00
Lance Release
48f7b20daa Bump version: 0.22.0-beta.0 → 0.22.0-beta.1 2025-09-03 17:51:36 +00:00
Lance Release
4dd399ca29 Bump version: 0.25.0-beta.0 → 0.25.0-beta.1 2025-09-03 17:50:41 +00:00
Jack Ye
e6f1da31dc chore: upgrade lance to 0.34.0-beta.4 (#2621) 2025-09-02 21:33:55 -07:00
Wyatt Alt
a9ea785b15 fix: remote python sdk namespace typing (#2620)
This changes the default values for some namespace parameters in the
remote python SDK from None to [], to match the underlying code it
calls.

Prior to this commit, failing to supply "namespace" with the remote SDK
would cause an error because the underlying code it dispatches to does
not consider None to be valid input.
2025-09-02 16:32:32 -07:00
Colin Patrick McCabe
cc38453391 fix!: fix doctest in query.py (#2622)
Fix doctest in query.py to include cumulative_cpu, now that lance
includes that.
2025-09-02 15:47:32 -07:00
Lance Release
47747287b6 Bump version: 0.21.4-beta.1 → 0.22.0-beta.0 2025-08-29 21:20:57 +00:00
39 changed files with 1462 additions and 692 deletions

View File

@@ -1,5 +1,5 @@
[tool.bumpversion]
current_version = "0.21.4-beta.1"
current_version = "0.22.0"
parse = """(?x)
(?P<major>0|[1-9]\\d*)\\.
(?P<minor>0|[1-9]\\d*)\\.

1681
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -15,14 +15,14 @@ categories = ["database-implementations"]
rust-version = "1.78.0"
[workspace.dependencies]
lance = { "version" = "=0.34.0", default-features = false, "features" = ["dynamodb"], "tag" = "v0.34.0-beta.3", "git" = "https://github.com/lancedb/lance.git" }
lance-io = { "version" = "=0.34.0", default-features = false, "tag" = "v0.34.0-beta.3", "git" = "https://github.com/lancedb/lance.git" }
lance-index = { "version" = "=0.34.0", "tag" = "v0.34.0-beta.3", "git" = "https://github.com/lancedb/lance.git" }
lance-linalg = { "version" = "=0.34.0", "tag" = "v0.34.0-beta.3", "git" = "https://github.com/lancedb/lance.git" }
lance-table = { "version" = "=0.34.0", "tag" = "v0.34.0-beta.3", "git" = "https://github.com/lancedb/lance.git" }
lance-testing = { "version" = "=0.34.0", "tag" = "v0.34.0-beta.3", "git" = "https://github.com/lancedb/lance.git" }
lance-datafusion = { "version" = "=0.34.0", "tag" = "v0.34.0-beta.3", "git" = "https://github.com/lancedb/lance.git" }
lance-encoding = { "version" = "=0.34.0", "tag" = "v0.34.0-beta.3", "git" = "https://github.com/lancedb/lance.git" }
lance = { "version" = "=0.35.0", default-features = false, "features" = ["dynamodb"], "tag" = "v0.35.0-beta.4", "git" = "https://github.com/lancedb/lance.git" }
lance-io = { "version" = "=0.35.0", default-features = false, "tag" = "v0.35.0-beta.4", "git" = "https://github.com/lancedb/lance.git" }
lance-index = { "version" = "=0.35.0", "tag" = "v0.35.0-beta.4", "git" = "https://github.com/lancedb/lance.git" }
lance-linalg = { "version" = "=0.35.0", "tag" = "v0.35.0-beta.4", "git" = "https://github.com/lancedb/lance.git" }
lance-table = { "version" = "=0.35.0", "tag" = "v0.35.0-beta.4", "git" = "https://github.com/lancedb/lance.git" }
lance-testing = { "version" = "=0.35.0", "tag" = "v0.35.0-beta.4", "git" = "https://github.com/lancedb/lance.git" }
lance-datafusion = { "version" = "=0.35.0", "tag" = "v0.35.0-beta.4", "git" = "https://github.com/lancedb/lance.git" }
lance-encoding = { "version" = "=0.35.0", "tag" = "v0.35.0-beta.4", "git" = "https://github.com/lancedb/lance.git" }
# Note that this one does not include pyarrow
arrow = { version = "55.1", optional = false }
arrow-array = "55.1"
@@ -33,12 +33,12 @@ arrow-schema = "55.1"
arrow-arith = "55.1"
arrow-cast = "55.1"
async-trait = "0"
datafusion = { version = "48.0", default-features = false }
datafusion-catalog = "48.0"
datafusion-common = { version = "48.0", default-features = false }
datafusion-execution = "48.0"
datafusion-expr = "48.0"
datafusion-physical-plan = "48.0"
datafusion = { version = "49.0", default-features = false }
datafusion-catalog = "49.0"
datafusion-common = { version = "49.0", default-features = false }
datafusion-execution = "49.0"
datafusion-expr = "49.0"
datafusion-physical-plan = "49.0"
env_logger = "0.11"
half = { "version" = "2.6.0", default-features = false, features = [
"num-traits",

View File

@@ -8,7 +8,7 @@
<parent>
<groupId>com.lancedb</groupId>
<artifactId>lancedb-parent</artifactId>
<version>0.21.4-beta.1</version>
<version>0.22.0-final.0</version>
<relativePath>../pom.xml</relativePath>
</parent>

View File

@@ -8,7 +8,7 @@
<parent>
<groupId>com.lancedb</groupId>
<artifactId>lancedb-parent</artifactId>
<version>0.21.4-beta.1</version>
<version>0.22.0-final.0</version>
<relativePath>../pom.xml</relativePath>
</parent>

View File

@@ -6,7 +6,7 @@
<groupId>com.lancedb</groupId>
<artifactId>lancedb-parent</artifactId>
<version>0.21.4-beta.1</version>
<version>0.22.0-final.0</version>
<packaging>pom</packaging>
<name>${project.artifactId}</name>
<description>LanceDB Java SDK Parent POM</description>

View File

@@ -1,7 +1,7 @@
[package]
name = "lancedb-nodejs"
edition.workspace = true
version = "0.21.4-beta.1"
version = "0.22.0"
license.workspace = true
description.workspace = true
repository.workspace = true

View File

@@ -3,7 +3,13 @@
import * as http from "http";
import { RequestListener } from "http";
import { Connection, ConnectionOptions, connect } from "../lancedb";
import {
ClientConfig,
Connection,
ConnectionOptions,
TlsConfig,
connect,
} from "../lancedb";
async function withMockDatabase(
listener: RequestListener,
@@ -148,4 +154,88 @@ describe("remote connection", () => {
},
);
});
describe("TlsConfig", () => {
it("should create TlsConfig with all fields", () => {
const tlsConfig: TlsConfig = {
certFile: "/path/to/cert.pem",
keyFile: "/path/to/key.pem",
sslCaCert: "/path/to/ca.pem",
assertHostname: false,
};
expect(tlsConfig.certFile).toBe("/path/to/cert.pem");
expect(tlsConfig.keyFile).toBe("/path/to/key.pem");
expect(tlsConfig.sslCaCert).toBe("/path/to/ca.pem");
expect(tlsConfig.assertHostname).toBe(false);
});
it("should create TlsConfig with partial fields", () => {
const tlsConfig: TlsConfig = {
certFile: "/path/to/cert.pem",
keyFile: "/path/to/key.pem",
};
expect(tlsConfig.certFile).toBe("/path/to/cert.pem");
expect(tlsConfig.keyFile).toBe("/path/to/key.pem");
expect(tlsConfig.sslCaCert).toBeUndefined();
expect(tlsConfig.assertHostname).toBeUndefined();
});
it("should create ClientConfig with TlsConfig", () => {
const tlsConfig: TlsConfig = {
certFile: "/path/to/cert.pem",
keyFile: "/path/to/key.pem",
sslCaCert: "/path/to/ca.pem",
assertHostname: true,
};
const clientConfig: ClientConfig = {
userAgent: "test-agent",
tlsConfig: tlsConfig,
};
expect(clientConfig.userAgent).toBe("test-agent");
expect(clientConfig.tlsConfig).toBeDefined();
expect(clientConfig.tlsConfig?.certFile).toBe("/path/to/cert.pem");
expect(clientConfig.tlsConfig?.keyFile).toBe("/path/to/key.pem");
expect(clientConfig.tlsConfig?.sslCaCert).toBe("/path/to/ca.pem");
expect(clientConfig.tlsConfig?.assertHostname).toBe(true);
});
it("should handle empty TlsConfig", () => {
const tlsConfig: TlsConfig = {};
expect(tlsConfig.certFile).toBeUndefined();
expect(tlsConfig.keyFile).toBeUndefined();
expect(tlsConfig.sslCaCert).toBeUndefined();
expect(tlsConfig.assertHostname).toBeUndefined();
});
it("should accept TlsConfig in connection options", () => {
const tlsConfig: TlsConfig = {
certFile: "/path/to/cert.pem",
keyFile: "/path/to/key.pem",
sslCaCert: "/path/to/ca.pem",
assertHostname: false,
};
// Just verify that the ClientConfig accepts the TlsConfig
const clientConfig: ClientConfig = {
tlsConfig: tlsConfig,
};
const connectionOptions: ConnectionOptions = {
apiKey: "fake",
clientConfig: clientConfig,
};
// Verify the configuration structure is correct
expect(connectionOptions.clientConfig).toBeDefined();
expect(connectionOptions.clientConfig?.tlsConfig).toBeDefined();
expect(connectionOptions.clientConfig?.tlsConfig?.certFile).toBe(
"/path/to/cert.pem",
);
});
});
});

View File

@@ -21,6 +21,7 @@ export {
ClientConfig,
TimeoutConfig,
RetryConfig,
TlsConfig,
OptimizeStats,
CompactionStats,
RemovalStats,

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-darwin-arm64",
"version": "0.21.4-beta.1",
"version": "0.22.0",
"os": ["darwin"],
"cpu": ["arm64"],
"main": "lancedb.darwin-arm64.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-darwin-x64",
"version": "0.21.4-beta.1",
"version": "0.22.0",
"os": ["darwin"],
"cpu": ["x64"],
"main": "lancedb.darwin-x64.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-linux-arm64-gnu",
"version": "0.21.4-beta.1",
"version": "0.22.0",
"os": ["linux"],
"cpu": ["arm64"],
"main": "lancedb.linux-arm64-gnu.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-linux-arm64-musl",
"version": "0.21.4-beta.1",
"version": "0.22.0",
"os": ["linux"],
"cpu": ["arm64"],
"main": "lancedb.linux-arm64-musl.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-linux-x64-gnu",
"version": "0.21.4-beta.1",
"version": "0.22.0",
"os": ["linux"],
"cpu": ["x64"],
"main": "lancedb.linux-x64-gnu.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-linux-x64-musl",
"version": "0.21.4-beta.1",
"version": "0.22.0",
"os": ["linux"],
"cpu": ["x64"],
"main": "lancedb.linux-x64-musl.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-win32-arm64-msvc",
"version": "0.21.4-beta.1",
"version": "0.22.0",
"os": [
"win32"
],

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-win32-x64-msvc",
"version": "0.21.4-beta.1",
"version": "0.22.0",
"os": ["win32"],
"cpu": ["x64"],
"main": "lancedb.win32-x64-msvc.node",

View File

@@ -1,12 +1,12 @@
{
"name": "@lancedb/lancedb",
"version": "0.21.4-beta.1",
"version": "0.22.0",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "@lancedb/lancedb",
"version": "0.21.4-beta.1",
"version": "0.22.0",
"cpu": [
"x64",
"arm64"

View File

@@ -11,7 +11,7 @@
"ann"
],
"private": false,
"version": "0.21.4-beta.1",
"version": "0.22.0",
"main": "dist/index.js",
"exports": {
".": "./dist/index.js",

View File

@@ -480,6 +480,7 @@ impl JsFullTextQuery {
}
#[napi(factory)]
#[allow(clippy::use_self)] // NAPI doesn't allow Self here but clippy reports it
pub fn boolean_query(queries: Vec<(String, &JsFullTextQuery)>) -> napi::Result<Self> {
let mut sub_queries = Vec::with_capacity(queries.len());
for (occur, q) in queries {

View File

@@ -69,6 +69,20 @@ pub struct RetryConfig {
pub statuses: Option<Vec<u16>>,
}
/// TLS/mTLS configuration for the remote HTTP client.
#[napi(object)]
#[derive(Debug, Default)]
pub struct TlsConfig {
/// Path to the client certificate file (PEM format) for mTLS authentication.
pub cert_file: Option<String>,
/// Path to the client private key file (PEM format) for mTLS authentication.
pub key_file: Option<String>,
/// Path to the CA certificate file (PEM format) for server verification.
pub ssl_ca_cert: Option<String>,
/// Whether to verify the hostname in the server's certificate.
pub assert_hostname: Option<bool>,
}
#[napi(object)]
#[derive(Debug, Default)]
pub struct ClientConfig {
@@ -77,6 +91,7 @@ pub struct ClientConfig {
pub timeout_config: Option<TimeoutConfig>,
pub extra_headers: Option<HashMap<String, String>>,
pub id_delimiter: Option<String>,
pub tls_config: Option<TlsConfig>,
}
impl From<TimeoutConfig> for lancedb::remote::TimeoutConfig {
@@ -107,6 +122,17 @@ impl From<RetryConfig> for lancedb::remote::RetryConfig {
}
}
impl From<TlsConfig> for lancedb::remote::TlsConfig {
fn from(config: TlsConfig) -> Self {
Self {
cert_file: config.cert_file,
key_file: config.key_file,
ssl_ca_cert: config.ssl_ca_cert,
assert_hostname: config.assert_hostname.unwrap_or(true),
}
}
}
impl From<ClientConfig> for lancedb::remote::ClientConfig {
fn from(config: ClientConfig) -> Self {
Self {
@@ -117,6 +143,7 @@ impl From<ClientConfig> for lancedb::remote::ClientConfig {
timeout_config: config.timeout_config.map(Into::into).unwrap_or_default(),
extra_headers: config.extra_headers.unwrap_or_default(),
id_delimiter: config.id_delimiter,
tls_config: config.tls_config.map(Into::into),
}
}
}

View File

@@ -1,5 +1,5 @@
[tool.bumpversion]
current_version = "0.25.0-beta.0"
current_version = "0.25.1-beta.0"
parse = """(?x)
(?P<major>0|[1-9]\\d*)\\.
(?P<minor>0|[1-9]\\d*)\\.

View File

@@ -1,6 +1,6 @@
[package]
name = "lancedb-python"
version = "0.25.0-beta.0"
version = "0.25.1-beta.0"
edition.workspace = true
description = "Python bindings for LanceDB"
license.workspace = true

View File

@@ -95,9 +95,10 @@ class DBConnection(EnforceOverrides):
@abstractmethod
def table_names(
self,
namespace: List[str] = [],
page_token: Optional[str] = None,
limit: int = 10,
*,
namespace: List[str] = [],
) -> Iterable[str]:
"""List all tables in this database, in sorted order
@@ -543,9 +544,10 @@ class LanceDBConnection(DBConnection):
@override
def table_names(
self,
namespace: List[str] = [],
page_token: Optional[str] = None,
limit: int = 10,
*,
namespace: List[str] = [],
) -> Iterable[str]:
"""Get the names of all tables in the database. The names are sorted.

View File

@@ -138,9 +138,10 @@ class LanceNamespaceDBConnection(DBConnection):
@override
def table_names(
self,
namespace: List[str] = [],
page_token: Optional[str] = None,
limit: int = 10,
*,
namespace: List[str] = [],
) -> Iterable[str]:
request = ListTablesRequest(id=namespace, page_token=page_token, limit=limit)
response = self._ns.list_tables(request)
@@ -190,7 +191,7 @@ class LanceNamespaceDBConnection(DBConnection):
json_schema = _convert_pyarrow_schema_to_json(schema)
# Create table request with namespace
table_id = (namespace or []) + [name]
table_id = namespace + [name]
request = CreateTableRequest(id=table_id, var_schema=json_schema)
# Create empty Arrow IPC stream bytes
@@ -219,7 +220,7 @@ class LanceNamespaceDBConnection(DBConnection):
storage_options: Optional[Dict[str, str]] = None,
index_cache_size: Optional[int] = None,
) -> Table:
table_id = (namespace or []) + [name]
table_id = namespace + [name]
request = DescribeTableRequest(id=table_id)
response = self._ns.describe_table(request)
@@ -236,9 +237,9 @@ class LanceNamespaceDBConnection(DBConnection):
)
@override
def drop_table(self, name: str, namespace: Optional[List[str]] = None):
def drop_table(self, name: str, namespace: List[str] = []):
# Use namespace drop_table directly
table_id = (namespace or []) + [name]
table_id = namespace + [name]
request = DropTableRequest(id=table_id)
self._ns.drop_table(request)
@@ -247,8 +248,8 @@ class LanceNamespaceDBConnection(DBConnection):
self,
cur_name: str,
new_name: str,
cur_namespace: Optional[List[str]] = None,
new_namespace: Optional[List[str]] = None,
cur_namespace: List[str] = [],
new_namespace: List[str] = [],
):
raise NotImplementedError(
"rename_table is not supported for namespace connections"
@@ -261,7 +262,7 @@ class LanceNamespaceDBConnection(DBConnection):
)
@override
def drop_all_tables(self, namespace: Optional[List[str]] = None):
def drop_all_tables(self, namespace: List[str] = []):
for table_name in self.table_names(namespace=namespace):
self.drop_table(table_name, namespace=namespace)

View File

@@ -943,20 +943,22 @@ class LanceQueryBuilder(ABC):
>>> query = [100, 100]
>>> plan = table.search(query).analyze_plan()
>>> print(plan) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
AnalyzeExec verbose=true, metrics=[]
TracedExec, metrics=[]
ProjectionExec: expr=[...], metrics=[...]
GlobalLimitExec: skip=0, fetch=10, metrics=[...]
AnalyzeExec verbose=true, metrics=[], cumulative_cpu=...
TracedExec, metrics=[], cumulative_cpu=...
ProjectionExec: expr=[...], metrics=[...], cumulative_cpu=...
GlobalLimitExec: skip=0, fetch=10, metrics=[...], cumulative_cpu=...
FilterExec: _distance@2 IS NOT NULL,
metrics=[output_rows=..., elapsed_compute=...]
metrics=[output_rows=..., elapsed_compute=...], cumulative_cpu=...
SortExec: TopK(fetch=10), expr=[...],
preserve_partitioning=[...],
metrics=[output_rows=..., elapsed_compute=..., row_replacements=...]
metrics=[output_rows=..., elapsed_compute=..., row_replacements=...],
cumulative_cpu=...
KNNVectorDistance: metric=l2,
metrics=[output_rows=..., elapsed_compute=..., output_batches=...]
metrics=[output_rows=..., elapsed_compute=..., output_batches=...],
cumulative_cpu=...
LanceRead: uri=..., projection=[vector], ...
metrics=[output_rows=..., elapsed_compute=...,
bytes_read=..., iops=..., requests=...]
bytes_read=..., iops=..., requests=...], cumulative_cpu=...
Returns
-------

View File

@@ -8,7 +8,7 @@ from typing import List, Optional
from lancedb import __version__
__all__ = ["TimeoutConfig", "RetryConfig", "ClientConfig"]
__all__ = ["TimeoutConfig", "RetryConfig", "TlsConfig", "ClientConfig"]
@dataclass
@@ -112,6 +112,29 @@ class RetryConfig:
statuses: Optional[List[int]] = None
@dataclass
class TlsConfig:
"""TLS/mTLS configuration for the remote HTTP client.
Attributes
----------
cert_file: Optional[str]
Path to the client certificate file (PEM format) for mTLS authentication.
key_file: Optional[str]
Path to the client private key file (PEM format) for mTLS authentication.
ssl_ca_cert: Optional[str]
Path to the CA certificate file (PEM format) for server verification.
assert_hostname: bool
Whether to verify the hostname in the server's certificate. Default is True.
Set to False to disable hostname verification (use with caution).
"""
cert_file: Optional[str] = None
key_file: Optional[str] = None
ssl_ca_cert: Optional[str] = None
assert_hostname: bool = True
@dataclass
class ClientConfig:
user_agent: str = f"LanceDB-Python-Client/{__version__}"
@@ -119,9 +142,12 @@ class ClientConfig:
timeout_config: Optional[TimeoutConfig] = field(default_factory=TimeoutConfig)
extra_headers: Optional[dict] = None
id_delimiter: Optional[str] = None
tls_config: Optional[TlsConfig] = None
def __post_init__(self):
if isinstance(self.retry_config, dict):
self.retry_config = RetryConfig(**self.retry_config)
if isinstance(self.timeout_config, dict):
self.timeout_config = TimeoutConfig(**self.timeout_config)
if isinstance(self.tls_config, dict):
self.tls_config = TlsConfig(**self.tls_config)

View File

@@ -151,9 +151,10 @@ class RemoteDBConnection(DBConnection):
@override
def table_names(
self,
namespace: List[str] = [],
page_token: Optional[str] = None,
limit: int = 10,
*,
namespace: List[str] = [],
) -> Iterable[str]:
"""List the names of all tables in the database.
@@ -343,7 +344,7 @@ class RemoteDBConnection(DBConnection):
return RemoteTable(table, self.db_name)
@override
def drop_table(self, name: str, namespace: Optional[List[str]] = None):
def drop_table(self, name: str, namespace: List[str] = []):
"""Drop a table from the database.
Parameters
@@ -361,8 +362,8 @@ class RemoteDBConnection(DBConnection):
self,
cur_name: str,
new_name: str,
cur_namespace: Optional[List[str]] = None,
new_namespace: Optional[List[str]] = None,
cur_namespace: List[str] = [],
new_namespace: List[str] = [],
):
"""Rename a table in the database.

View File

@@ -175,6 +175,18 @@ def test_table_names(tmp_db: lancedb.DBConnection):
tmp_db.create_table("test3", data=data)
assert tmp_db.table_names() == ["test1", "test2", "test3"]
# Test that positional arguments for page_token and limit
result = list(tmp_db.table_names("test1", 1)) # page_token="test1", limit=1
assert result == ["test2"], f"Expected ['test2'], got {result}"
# Test mixed positional and keyword arguments
result = list(tmp_db.table_names("test2", limit=2))
assert result == ["test3"], f"Expected ['test3'], got {result}"
# Test that namespace parameter can be passed as keyword
result = list(tmp_db.table_names(namespace=[]))
assert len(result) == 3
@pytest.mark.asyncio
async def test_table_names_async(tmp_path):

View File

@@ -420,6 +420,10 @@ class TestNamespaceConnection:
assert "table2" in table_names
assert len(table_names) == 1
# Test that drop_table works without explicit namespace parameter
db.drop_table("table2")
assert len(list(db.table_names())) == 0
# Should not be able to open dropped table
with pytest.raises(RuntimeError):
db.open_table("table1")
@@ -487,6 +491,11 @@ class TestNamespaceConnection:
# Verify all tables are gone
assert len(list(db.table_names())) == 0
# Test that table_names works with keyword-only namespace parameter
db.create_table("test_table", schema=schema)
result = list(db.table_names(namespace=[]))
assert "test_table" in result
def test_table_operations(self):
"""Test various table operations through namespace."""
db = lancedb.connect_namespace("temp", {"root": self.temp_dir})

View File

@@ -301,6 +301,7 @@ pub struct PyClientConfig {
timeout_config: Option<PyClientTimeoutConfig>,
extra_headers: Option<HashMap<String, String>>,
id_delimiter: Option<String>,
tls_config: Option<PyClientTlsConfig>,
}
#[derive(FromPyObject)]
@@ -321,6 +322,14 @@ pub struct PyClientTimeoutConfig {
pool_idle_timeout: Option<Duration>,
}
#[derive(FromPyObject)]
pub struct PyClientTlsConfig {
cert_file: Option<String>,
key_file: Option<String>,
ssl_ca_cert: Option<String>,
assert_hostname: bool,
}
#[cfg(feature = "remote")]
impl From<PyClientRetryConfig> for lancedb::remote::RetryConfig {
fn from(value: PyClientRetryConfig) -> Self {
@@ -347,6 +356,18 @@ impl From<PyClientTimeoutConfig> for lancedb::remote::TimeoutConfig {
}
}
#[cfg(feature = "remote")]
impl From<PyClientTlsConfig> for lancedb::remote::TlsConfig {
fn from(value: PyClientTlsConfig) -> Self {
Self {
cert_file: value.cert_file,
key_file: value.key_file,
ssl_ca_cert: value.ssl_ca_cert,
assert_hostname: value.assert_hostname,
}
}
}
#[cfg(feature = "remote")]
impl From<PyClientConfig> for lancedb::remote::ClientConfig {
fn from(value: PyClientConfig) -> Self {
@@ -356,6 +377,7 @@ impl From<PyClientConfig> for lancedb::remote::ClientConfig {
timeout_config: value.timeout_config.map(Into::into).unwrap_or_default(),
extra_headers: value.extra_headers.unwrap_or_default(),
id_delimiter: value.id_delimiter,
tls_config: value.tls_config.map(Into::into),
}
}
}

View File

@@ -1,6 +1,6 @@
[package]
name = "lancedb"
version = "0.21.4-beta.1"
version = "0.22.0"
edition.workspace = true
description = "LanceDB: A serverless, low-latency vector database for AI applications"
license.workspace = true

View File

@@ -62,10 +62,8 @@ async fn main() -> Result<()> {
.as_any()
.downcast_ref::<StringArray>()
.unwrap();
for text in out.iter() {
if let Some(text) = text {
println!("Result: {}", text);
}
for text in out.iter().flatten() {
println!("Result: {}", text);
}
}

View File

@@ -9,7 +9,7 @@ use futures::{stream::BoxStream, TryFutureExt};
use lance::io::WrappingObjectStore;
use object_store::{
path::Path, Error, GetOptions, GetResult, ListResult, MultipartUpload, ObjectMeta, ObjectStore,
PutMultipartOpts, PutOptions, PutPayload, PutResult, Result, UploadPart,
PutMultipartOptions, PutOptions, PutPayload, PutResult, Result, UploadPart,
};
use async_trait::async_trait;
@@ -73,7 +73,7 @@ impl ObjectStore for MirroringObjectStore {
async fn put_multipart_opts(
&self,
location: &Path,
opts: PutMultipartOpts,
opts: PutMultipartOptions,
) -> Result<Box<dyn MultipartUpload>> {
if location.primary_only() {
return self.primary.put_multipart_opts(location, opts).await;

View File

@@ -11,7 +11,7 @@ use futures::stream::BoxStream;
use lance::io::WrappingObjectStore;
use object_store::{
path::Path, GetOptions, GetResult, ListResult, MultipartUpload, ObjectMeta, ObjectStore,
PutMultipartOpts, PutOptions, PutPayload, PutResult, Result as OSResult, UploadPart,
PutMultipartOptions, PutOptions, PutPayload, PutResult, Result as OSResult, UploadPart,
};
#[derive(Debug, Default)]
@@ -110,7 +110,7 @@ impl ObjectStore for IoTrackingStore {
async fn put_multipart_opts(
&self,
location: &Path,
opts: PutMultipartOpts,
opts: PutMultipartOptions,
) -> OSResult<Box<dyn MultipartUpload>> {
let target = self.target.put_multipart_opts(location, opts).await?;
Ok(Box::new(IoTrackingMultipartUpload {

View File

@@ -18,5 +18,5 @@ const ARROW_FILE_CONTENT_TYPE: &str = "application/vnd.apache.arrow.file";
#[cfg(test)]
const JSON_CONTENT_TYPE: &str = "application/json";
pub use client::{ClientConfig, RetryConfig, TimeoutConfig};
pub use client::{ClientConfig, RetryConfig, TimeoutConfig, TlsConfig};
pub use db::{RemoteDatabaseOptions, RemoteDatabaseOptionsBuilder};

View File

@@ -15,6 +15,19 @@ use crate::remote::retry::{ResolvedRetryConfig, RetryCounter};
const REQUEST_ID_HEADER: HeaderName = HeaderName::from_static("x-request-id");
/// Configuration for TLS/mTLS settings.
#[derive(Clone, Debug, Default)]
pub struct TlsConfig {
/// Path to the client certificate file (PEM format)
pub cert_file: Option<String>,
/// Path to the client private key file (PEM format)
pub key_file: Option<String>,
/// Path to the CA certificate file for server verification (PEM format)
pub ssl_ca_cert: Option<String>,
/// Whether to verify the hostname in the server's certificate
pub assert_hostname: bool,
}
/// Configuration for the LanceDB Cloud HTTP client.
#[derive(Clone, Debug)]
pub struct ClientConfig {
@@ -28,6 +41,8 @@ pub struct ClientConfig {
/// The delimiter to use when constructing object identifiers.
/// If not default, passes as query parameter.
pub id_delimiter: Option<String>,
/// TLS configuration for mTLS support
pub tls_config: Option<TlsConfig>,
}
impl Default for ClientConfig {
@@ -38,6 +53,7 @@ impl Default for ClientConfig {
user_agent: concat!("LanceDB-Rust-Client/", env!("CARGO_PKG_VERSION")).into(),
extra_headers: HashMap::new(),
id_delimiter: None,
tls_config: None,
}
}
}
@@ -245,6 +261,49 @@ impl RestfulLanceDbClient<Sender> {
if let Some(timeout) = timeout {
client_builder = client_builder.timeout(timeout);
}
// Configure mTLS if TlsConfig is provided
if let Some(tls_config) = &client_config.tls_config {
// Load client certificate and key for mTLS
if let (Some(cert_file), Some(key_file)) = (&tls_config.cert_file, &tls_config.key_file)
{
let cert = std::fs::read(cert_file).map_err(|err| Error::Other {
message: format!("Failed to read certificate file: {}", cert_file),
source: Some(Box::new(err)),
})?;
let key = std::fs::read(key_file).map_err(|err| Error::Other {
message: format!("Failed to read key file: {}", key_file),
source: Some(Box::new(err)),
})?;
let identity = reqwest::Identity::from_pem(&[&cert[..], &key[..]].concat())
.map_err(|err| Error::Other {
message: "Failed to create client identity from certificate and key".into(),
source: Some(Box::new(err)),
})?;
client_builder = client_builder.identity(identity);
}
// Load CA certificate for server verification
if let Some(ca_cert_file) = &tls_config.ssl_ca_cert {
let ca_cert = std::fs::read(ca_cert_file).map_err(|err| Error::Other {
message: format!("Failed to read CA certificate file: {}", ca_cert_file),
source: Some(Box::new(err)),
})?;
let ca_cert =
reqwest::Certificate::from_pem(&ca_cert).map_err(|err| Error::Other {
message: "Failed to create CA certificate from PEM".into(),
source: Some(Box::new(err)),
})?;
client_builder = client_builder.add_root_certificate(ca_cert);
}
// Configure hostname verification
client_builder =
client_builder.danger_accept_invalid_hostnames(!tls_config.assert_hostname);
}
let client = client_builder
.default_headers(Self::default_headers(
api_key,
@@ -661,4 +720,50 @@ mod tests {
Some(Duration::from_secs(120))
);
}
#[test]
fn test_tls_config_default() {
let config = TlsConfig::default();
assert!(config.cert_file.is_none());
assert!(config.key_file.is_none());
assert!(config.ssl_ca_cert.is_none());
assert!(!config.assert_hostname);
}
#[test]
fn test_tls_config_with_mtls() {
let tls_config = TlsConfig {
cert_file: Some("/path/to/cert.pem".to_string()),
key_file: Some("/path/to/key.pem".to_string()),
ssl_ca_cert: Some("/path/to/ca.pem".to_string()),
assert_hostname: true,
};
assert_eq!(tls_config.cert_file, Some("/path/to/cert.pem".to_string()));
assert_eq!(tls_config.key_file, Some("/path/to/key.pem".to_string()));
assert_eq!(tls_config.ssl_ca_cert, Some("/path/to/ca.pem".to_string()));
assert!(tls_config.assert_hostname);
}
#[test]
fn test_client_config_with_tls() {
let tls_config = TlsConfig {
cert_file: Some("/path/to/cert.pem".to_string()),
key_file: Some("/path/to/key.pem".to_string()),
ssl_ca_cert: None,
assert_hostname: false,
};
let client_config = ClientConfig {
tls_config: Some(tls_config.clone()),
..Default::default()
};
assert!(client_config.tls_config.is_some());
let config_tls = client_config.tls_config.unwrap();
assert_eq!(config_tls.cert_file, Some("/path/to/cert.pem".to_string()));
assert_eq!(config_tls.key_file, Some("/path/to/key.pem".to_string()));
assert!(config_tls.ssl_ca_cert.is_none());
assert!(!config_tls.assert_hostname);
}
}

View File

@@ -32,7 +32,7 @@ use lance::index::vector::VectorIndexParams;
use lance::io::WrappingObjectStore;
use lance_datafusion::exec::{analyze_plan as lance_analyze_plan, execute_plan};
use lance_datafusion::utils::StreamingWriteSource;
use lance_index::scalar::{ScalarIndexParams, ScalarIndexType};
use lance_index::scalar::{BuiltinIndexType, ScalarIndexParams};
use lance_index::vector::hnsw::builder::HnswBuildParams;
use lance_index::vector::ivf::IvfBuildParams;
use lance_index::vector::pq::PQBuildParams;
@@ -1778,7 +1778,9 @@ impl NativeTable {
);
Ok(Box::new(lance_idx_params))
} else if supported_btree_data_type(field.data_type()) {
Ok(Box::new(ScalarIndexParams::new(ScalarIndexType::BTree)))
Ok(Box::new(ScalarIndexParams::for_builtin(
BuiltinIndexType::BTree,
)))
} else {
return Err(Error::InvalidInput {
message: format!(
@@ -1791,15 +1793,21 @@ impl NativeTable {
}
Index::BTree(_) => {
Self::validate_index_type(field, "BTree", supported_btree_data_type)?;
Ok(Box::new(ScalarIndexParams::new(ScalarIndexType::BTree)))
Ok(Box::new(ScalarIndexParams::for_builtin(
BuiltinIndexType::BTree,
)))
}
Index::Bitmap(_) => {
Self::validate_index_type(field, "Bitmap", supported_bitmap_data_type)?;
Ok(Box::new(ScalarIndexParams::new(ScalarIndexType::Bitmap)))
Ok(Box::new(ScalarIndexParams::for_builtin(
BuiltinIndexType::Bitmap,
)))
}
Index::LabelList(_) => {
Self::validate_index_type(field, "LabelList", supported_label_list_data_type)?;
Ok(Box::new(ScalarIndexParams::new(ScalarIndexType::LabelList)))
Ok(Box::new(ScalarIndexParams::for_builtin(
BuiltinIndexType::LabelList,
)))
}
Index::FTS(fts_opts) => {
Self::validate_index_type(field, "FTS", supported_fts_data_type)?;

View File

@@ -121,6 +121,10 @@ impl ExecutionPlan for MetadataEraserExec {
as SendableRecordBatchStream,
)
}
fn partition_statistics(&self, partition: Option<usize>) -> DataFusionResult<Statistics> {
self.input.partition_statistics(partition)
}
}
#[derive(Debug)]
@@ -227,6 +231,7 @@ pub mod tests {
prelude::{SessionConfig, SessionContext},
};
use datafusion_catalog::TableProvider;
use datafusion_common::stats::Precision;
use datafusion_execution::SendableRecordBatchStream;
use datafusion_expr::{col, lit, LogicalPlan, LogicalPlanBuilder};
use futures::{StreamExt, TryStreamExt};
@@ -495,6 +500,7 @@ pub mod tests {
plan,
"MetadataEraserExec
ProjectionExec:...
CooperativeExec...
LanceRead:...",
)
.await;
@@ -509,4 +515,24 @@ pub mod tests {
TestFixture::check_plan(plan, "").await;
}
#[tokio::test]
async fn test_metadata_eraser_propagates_statistics() {
let fixture = TestFixture::new().await;
let plan =
LogicalPlanBuilder::scan("foo", provider_as_source(fixture.adapter.clone()), None)
.unwrap()
.build()
.unwrap();
let ctx = SessionContext::new();
let physical_plan = ctx.state().create_physical_plan(&plan).await.unwrap();
assert_eq!(physical_plan.name(), "MetadataEraserExec");
let partition_stats = physical_plan.partition_statistics(None).unwrap();
assert!(matches!(partition_stats.num_rows, Precision::Exact(10)));
}
}