Compare commits

..

4 Commits

Author SHA1 Message Date
lancedb automation
a7b29dfabb chore: update lance dependency to v2.0.0-beta.9 2026-01-15 17:21:57 +00:00
Will Jones
1840aa7edc feat(rust)!: remove default features (#2912)
BREAKING CHANGE: removes `aws`, `dynamodb`, `azure`, `gcs`, `oss`,
`huggingface` from default Rust features. They can be enabled by users
as needed.

They are still enabled for Python and NodeJS, since those users don't
control the compilation of artifacts.

Closes #2911
2026-01-13 11:23:14 -08:00
Xuanwo
489c91c5d6 feat: enable huggingface feature by default (#2910) 2026-01-13 20:42:11 +05:30
LanceDB Robot
f0c3fe5c6d chore: update lance dependency to v1.0.2-rc.2 (#2908)
## Summary

- bump Lance dependencies to v1.0.2-rc.2 using ci/set_lance_version.py
- verified cargo clippy --workspace --tests --all-features -D warnings
- ran cargo fmt --all

Tag: https://github.com/lance-format/lance/releases/tag/v1.0.2-rc.2
2026-01-12 12:28:07 -08:00
19 changed files with 869 additions and 788 deletions

View File

@@ -167,13 +167,13 @@ jobs:
- name: Build
run: |
$env:VCPKG_ROOT = $env:VCPKG_INSTALLATION_ROOT
cargo build --profile ci --features remote --tests --locked --target ${{ matrix.target }}
cargo build --profile ci --features aws,remote --tests --locked --target ${{ matrix.target }}
- name: Run tests
# Can only run tests when target matches host
if: ${{ matrix.target == 'x86_64-pc-windows-msvc' }}
run: |
$env:VCPKG_ROOT = $env:VCPKG_INSTALLATION_ROOT
cargo test --profile ci --features remote --locked
cargo test --profile ci --features aws,remote --locked
msrv:
# Check the minimum supported Rust version

1267
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -15,39 +15,39 @@ categories = ["database-implementations"]
rust-version = "1.78.0"
[workspace.dependencies]
lance = { "version" = "=2.0.0-beta.7", default-features = false, "tag" = "v2.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" }
lance-core = { "version" = "=2.0.0-beta.7", "tag" = "v2.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" }
lance-datagen = { "version" = "=2.0.0-beta.7", "tag" = "v2.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" }
lance-file = { "version" = "=2.0.0-beta.7", "tag" = "v2.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" }
lance-io = { "version" = "=2.0.0-beta.7", default-features = false, "tag" = "v2.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" }
lance-index = { "version" = "=2.0.0-beta.7", "tag" = "v2.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" }
lance-linalg = { "version" = "=2.0.0-beta.7", "tag" = "v2.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" }
lance-namespace = { "version" = "=2.0.0-beta.7", "tag" = "v2.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" }
lance-namespace-impls = { "version" = "=2.0.0-beta.7", default-features = false, "tag" = "v2.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" }
lance-table = { "version" = "=2.0.0-beta.7", "tag" = "v2.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" }
lance-testing = { "version" = "=2.0.0-beta.7", "tag" = "v2.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" }
lance-datafusion = { "version" = "=2.0.0-beta.7", "tag" = "v2.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" }
lance-encoding = { "version" = "=2.0.0-beta.7", "tag" = "v2.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" }
lance-arrow = { "version" = "=2.0.0-beta.7", "tag" = "v2.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" }
lance = { "version" = "=2.0.0-beta.9", default-features = false, "tag" = "v2.0.0-beta.9", "git" = "https://github.com/lance-format/lance.git" }
lance-core = { "version" = "=2.0.0-beta.9", "tag" = "v2.0.0-beta.9", "git" = "https://github.com/lance-format/lance.git" }
lance-datagen = { "version" = "=2.0.0-beta.9", "tag" = "v2.0.0-beta.9", "git" = "https://github.com/lance-format/lance.git" }
lance-file = { "version" = "=2.0.0-beta.9", "tag" = "v2.0.0-beta.9", "git" = "https://github.com/lance-format/lance.git" }
lance-io = { "version" = "=2.0.0-beta.9", default-features = false, "tag" = "v2.0.0-beta.9", "git" = "https://github.com/lance-format/lance.git" }
lance-index = { "version" = "=2.0.0-beta.9", "tag" = "v2.0.0-beta.9", "git" = "https://github.com/lance-format/lance.git" }
lance-linalg = { "version" = "=2.0.0-beta.9", "tag" = "v2.0.0-beta.9", "git" = "https://github.com/lance-format/lance.git" }
lance-namespace = { "version" = "=2.0.0-beta.9", "tag" = "v2.0.0-beta.9", "git" = "https://github.com/lance-format/lance.git" }
lance-namespace-impls = { "version" = "=2.0.0-beta.9", default-features = false, "tag" = "v2.0.0-beta.9", "git" = "https://github.com/lance-format/lance.git" }
lance-table = { "version" = "=2.0.0-beta.9", "tag" = "v2.0.0-beta.9", "git" = "https://github.com/lance-format/lance.git" }
lance-testing = { "version" = "=2.0.0-beta.9", "tag" = "v2.0.0-beta.9", "git" = "https://github.com/lance-format/lance.git" }
lance-datafusion = { "version" = "=2.0.0-beta.9", "tag" = "v2.0.0-beta.9", "git" = "https://github.com/lance-format/lance.git" }
lance-encoding = { "version" = "=2.0.0-beta.9", "tag" = "v2.0.0-beta.9", "git" = "https://github.com/lance-format/lance.git" }
lance-arrow = { "version" = "=2.0.0-beta.9", "tag" = "v2.0.0-beta.9", "git" = "https://github.com/lance-format/lance.git" }
ahash = "0.8"
# Note that this one does not include pyarrow
arrow = { version = "57.2.0", optional = false }
arrow-array = "57.2.0"
arrow-data = "57.2.0"
arrow-ipc = "57.2.0"
arrow-ord = "57.2.0"
arrow-schema = "57.2.0"
arrow-select = "57.2.0"
arrow-cast = "57.2.0"
arrow = { version = "57.2", optional = false }
arrow-array = "57.2"
arrow-data = "57.2"
arrow-ipc = "57.2"
arrow-ord = "57.2"
arrow-schema = "57.2"
arrow-select = "57.2"
arrow-cast = "57.2"
async-trait = "0"
datafusion = { version = "51.0.0", default-features = false }
datafusion-catalog = "51.0.0"
datafusion-common = { version = "51.0.0", default-features = false }
datafusion-execution = "51.0.0"
datafusion-expr = "51.0.0"
datafusion-physical-plan = "51.0.0"
datafusion = { version = "51.0", default-features = false }
datafusion-catalog = "51.0"
datafusion-common = { version = "51.0", default-features = false }
datafusion-execution = "51.0"
datafusion-expr = "51.0"
datafusion-physical-plan = "51.0"
env_logger = "0.11"
half = { "version" = "2.7.1", default-features = false, features = [
half = { "version" = "2.6.0", default-features = false, features = [
"num-traits",
] }
futures = "0"
@@ -59,7 +59,7 @@ rand = "0.9"
snafu = "0.8"
url = "2"
num-traits = "0.2"
regex = "1.12"
regex = "1.10"
lazy_static = "1"
semver = "1.0.25"
chrono = "0.4"

View File

@@ -36,6 +36,6 @@ aws-lc-rs = "=1.13.0"
napi-build = "2.1"
[features]
default = ["remote", "lancedb/default"]
default = ["remote", "lancedb/aws", "lancedb/gcs", "lancedb/azure", "lancedb/dynamodb", "lancedb/oss", "lancedb/huggingface"]
fp16kernels = ["lancedb/fp16kernels"]
remote = ["lancedb/remote"]

View File

@@ -14,7 +14,7 @@ name = "_lancedb"
crate-type = ["cdylib"]
[dependencies]
arrow = { version = "57.2.0", features = ["pyarrow"] }
arrow = { version = "57.2", features = ["pyarrow"] }
async-trait = "0.1"
lancedb = { path = "../rust/lancedb", default-features = false }
lance-core.workspace = true
@@ -38,6 +38,6 @@ pyo3-build-config = { version = "0.26", features = [
] }
[features]
default = ["remote", "lancedb/default"]
default = ["remote", "lancedb/aws", "lancedb/gcs", "lancedb/azure", "lancedb/dynamodb", "lancedb/oss", "lancedb/huggingface"]
fp16kernels = ["lancedb/fp16kernels"]
remote = ["lancedb/remote"]

View File

@@ -1,4 +1,3 @@
#![allow(deprecated)]
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
@@ -40,7 +39,7 @@ impl RecordBatchStream {
(*self.schema)
.clone()
.into_pyarrow(py)
.map(|bound| bound.unbind())
.map(|obj| obj.unbind())
}
pub fn __aiter__(self_: PyRef<'_, Self>) -> PyRef<'_, Self> {
@@ -56,11 +55,11 @@ impl RecordBatchStream {
.next()
.await
.ok_or_else(|| PyStopAsyncIteration::new_err(""))?;
Python::with_gil(|py| {
Python::attach(|py| {
inner_next
.infer_error()?
.to_pyarrow(py)
.map(|bound| bound.unbind())
.map(|obj| obj.unbind())
})
})
}

View File

@@ -1,4 +1,3 @@
#![allow(deprecated)]
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
@@ -13,7 +12,7 @@ use pyo3::{
exceptions::{PyRuntimeError, PyValueError},
pyclass, pyfunction, pymethods,
types::{PyDict, PyDictMethods},
Bound, FromPyObject, Py, PyAny, PyObject, PyRef, PyResult, Python,
Bound, FromPyObject, Py, PyAny, PyRef, PyResult, Python,
};
use pyo3_async_runtimes::tokio::future_into_py;
@@ -115,7 +114,7 @@ impl Connection {
data: Bound<'_, PyAny>,
namespace: Vec<String>,
storage_options: Option<HashMap<String, String>>,
storage_options_provider: Option<PyObject>,
storage_options_provider: Option<Py<PyAny>>,
location: Option<String>,
) -> PyResult<Bound<'a, PyAny>> {
let inner = self_.get_inner()?.clone();
@@ -153,7 +152,7 @@ impl Connection {
schema: Bound<'_, PyAny>,
namespace: Vec<String>,
storage_options: Option<HashMap<String, String>>,
storage_options_provider: Option<PyObject>,
storage_options_provider: Option<Py<PyAny>>,
location: Option<String>,
) -> PyResult<Bound<'a, PyAny>> {
let inner = self_.get_inner()?.clone();
@@ -188,7 +187,7 @@ impl Connection {
name: String,
namespace: Vec<String>,
storage_options: Option<HashMap<String, String>>,
storage_options_provider: Option<PyObject>,
storage_options_provider: Option<Py<PyAny>>,
index_cache_size: Option<u32>,
location: Option<String>,
) -> PyResult<Bound<'_, PyAny>> {
@@ -298,6 +297,8 @@ impl Connection {
future_into_py(py, async move {
use lance_namespace::models::ListNamespacesRequest;
let request = ListNamespacesRequest {
identity: None,
context: None,
id: if namespace.is_empty() {
None
} else {
@@ -305,10 +306,9 @@ impl Connection {
},
page_token,
limit: limit.map(|l| l as i32),
..Default::default()
};
let response = inner.list_namespaces(request).await.infer_error()?;
Python::with_gil(|py| -> PyResult<Py<PyDict>> {
Python::attach(|py| -> PyResult<Py<PyDict>> {
let dict = PyDict::new(py);
dict.set_item("namespaces", response.namespaces)?;
dict.set_item("page_token", response.page_token)?;
@@ -328,18 +328,25 @@ impl Connection {
let py = self_.py();
future_into_py(py, async move {
use lance_namespace::models::CreateNamespaceRequest;
let mode_value = mode.and_then(|m| match m.to_lowercase().as_str() {
"create" => Some("Create".to_string()),
"exist_ok" => Some("ExistOk".to_string()),
"overwrite" => Some("Overwrite".to_string()),
_ => None,
});
let request = CreateNamespaceRequest {
identity: None,
context: None,
id: if namespace.is_empty() {
None
} else {
Some(namespace)
},
mode,
mode: mode_value,
properties,
..Default::default()
};
let response = inner.create_namespace(request).await.infer_error()?;
Python::with_gil(|py| -> PyResult<Py<PyDict>> {
Python::attach(|py| -> PyResult<Py<PyDict>> {
let dict = PyDict::new(py);
dict.set_item("properties", response.properties)?;
Ok(dict.unbind())
@@ -358,18 +365,29 @@ impl Connection {
let py = self_.py();
future_into_py(py, async move {
use lance_namespace::models::DropNamespaceRequest;
let mode_value = mode.and_then(|m| match m.to_uppercase().as_str() {
"SKIP" => Some("Skip".to_string()),
"FAIL" => Some("Fail".to_string()),
_ => None,
});
let behavior_value = behavior.and_then(|b| match b.to_uppercase().as_str() {
"RESTRICT" => Some("Restrict".to_string()),
"CASCADE" => Some("Cascade".to_string()),
_ => None,
});
let request = DropNamespaceRequest {
identity: None,
context: None,
id: if namespace.is_empty() {
None
} else {
Some(namespace)
},
mode,
behavior,
..Default::default()
mode: mode_value,
behavior: behavior_value,
};
let response = inner.drop_namespace(request).await.infer_error()?;
Python::with_gil(|py| -> PyResult<Py<PyDict>> {
Python::attach(|py| -> PyResult<Py<PyDict>> {
let dict = PyDict::new(py);
dict.set_item("properties", response.properties)?;
dict.set_item("transaction_id", response.transaction_id)?;
@@ -388,15 +406,16 @@ impl Connection {
future_into_py(py, async move {
use lance_namespace::models::DescribeNamespaceRequest;
let request = DescribeNamespaceRequest {
identity: None,
context: None,
id: if namespace.is_empty() {
None
} else {
Some(namespace)
},
..Default::default()
};
let response = inner.describe_namespace(request).await.infer_error()?;
Python::with_gil(|py| -> PyResult<Py<PyDict>> {
Python::attach(|py| -> PyResult<Py<PyDict>> {
let dict = PyDict::new(py);
dict.set_item("properties", response.properties)?;
Ok(dict.unbind())
@@ -416,6 +435,8 @@ impl Connection {
future_into_py(py, async move {
use lance_namespace::models::ListTablesRequest;
let request = ListTablesRequest {
identity: None,
context: None,
id: if namespace.is_empty() {
None
} else {
@@ -423,10 +444,9 @@ impl Connection {
},
page_token,
limit: limit.map(|l| l as i32),
..Default::default()
};
let response = inner.list_tables(request).await.infer_error()?;
Python::with_gil(|py| -> PyResult<Py<PyDict>> {
Python::attach(|py| -> PyResult<Py<PyDict>> {
let dict = PyDict::new(py);
dict.set_item("tables", response.tables)?;
dict.set_item("page_token", response.page_token)?;

View File

@@ -1,4 +1,3 @@
#![allow(deprecated)]
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
@@ -41,7 +40,7 @@ impl<T> PythonErrorExt<T> for std::result::Result<T, LanceError> {
request_id,
source,
status_code,
} => Python::with_gil(|py| {
} => Python::attach(|py| {
let message = err.to_string();
let http_err_cls = py
.import(intern!(py, "lancedb.remote.errors"))?
@@ -76,7 +75,7 @@ impl<T> PythonErrorExt<T> for std::result::Result<T, LanceError> {
max_read_failures,
source,
status_code,
} => Python::with_gil(|py| {
} => Python::attach(|py| {
let cause_err = http_from_rust_error(
py,
source.as_ref(),

View File

@@ -1,4 +1,3 @@
#![allow(deprecated)]
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
@@ -13,7 +12,7 @@ pub struct PyHeaderProvider {
impl Clone for PyHeaderProvider {
fn clone(&self) -> Self {
Python::with_gil(|py| Self {
Python::attach(|py| Self {
provider: self.provider.clone_ref(py),
})
}
@@ -26,7 +25,7 @@ impl PyHeaderProvider {
/// Get headers from the Python provider (internal implementation)
fn get_headers_internal(&self) -> Result<HashMap<String, String>, String> {
Python::with_gil(|py| {
Python::attach(|py| {
// Call the get_headers method
let result = self.provider.call_method0(py, "get_headers");

View File

@@ -1,4 +1,3 @@
#![allow(deprecated)]
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
@@ -282,7 +281,7 @@ impl PyPermutationReader {
let reader = slf.reader.clone();
future_into_py(slf.py(), async move {
let schema = reader.output_schema(selection).await.infer_error()?;
Python::with_gil(|py| schema.to_pyarrow(py).map(|b| b.unbind()))
Python::attach(|py| schema.as_ref().to_pyarrow(py).map(|obj| obj.unbind()))
})
}

View File

@@ -1,4 +1,3 @@
#![allow(deprecated)]
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
@@ -217,7 +216,7 @@ impl<'py> IntoPyObject<'py> for PyQueryVectors {
let py_objs = self
.0
.into_iter()
.map(|v| v.to_data().into_pyarrow(py).map(|b| b.unbind()))
.map(|v| v.to_data().into_pyarrow(py))
.collect::<Result<Vec<_>, _>>()?;
PyList::new(py, py_objs)
}
@@ -454,7 +453,7 @@ impl Query {
let inner = self_.inner.clone();
future_into_py(self_.py(), async move {
let schema = inner.output_schema().await.infer_error()?;
Python::with_gil(|py| schema.to_pyarrow(py).map(|b| b.unbind()))
Python::attach(|py| schema.to_pyarrow(py).map(|obj| obj.unbind()))
})
}
@@ -533,7 +532,7 @@ impl TakeQuery {
let inner = self_.inner.clone();
future_into_py(self_.py(), async move {
let schema = inner.output_schema().await.infer_error()?;
Python::with_gil(|py| schema.to_pyarrow(py).map(|b| b.unbind()))
Python::attach(|py| schema.to_pyarrow(py).map(|obj| obj.unbind()))
})
}
@@ -628,7 +627,7 @@ impl FTSQuery {
let inner = self_.inner.clone();
future_into_py(self_.py(), async move {
let schema = inner.output_schema().await.infer_error()?;
Python::with_gil(|py| schema.to_pyarrow(py).map(|b| b.unbind()))
Python::attach(|py| schema.to_pyarrow(py).map(|obj| obj.unbind()))
})
}
@@ -807,7 +806,7 @@ impl VectorQuery {
let inner = self_.inner.clone();
future_into_py(self_.py(), async move {
let schema = inner.output_schema().await.infer_error()?;
Python::with_gil(|py| schema.to_pyarrow(py).map(|b| b.unbind()))
Python::attach(|py| schema.to_pyarrow(py).map(|obj| obj.unbind()))
})
}

View File

@@ -1,4 +1,3 @@
#![allow(deprecated)]
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
@@ -18,20 +17,20 @@ use pyo3::types::PyDict;
/// Internal wrapper around a Python object implementing StorageOptionsProvider
pub struct PyStorageOptionsProvider {
/// The Python object implementing fetch_storage_options()
inner: PyObject,
inner: Py<PyAny>,
}
impl Clone for PyStorageOptionsProvider {
fn clone(&self) -> Self {
Python::with_gil(|py| Self {
Python::attach(|py| Self {
inner: self.inner.clone_ref(py),
})
}
}
impl PyStorageOptionsProvider {
pub fn new(obj: PyObject) -> PyResult<Self> {
Python::with_gil(|py| {
pub fn new(obj: Py<PyAny>) -> PyResult<Self> {
Python::attach(|py| {
// Verify the object has a fetch_storage_options method
if !obj.bind(py).hasattr("fetch_storage_options")? {
return Err(pyo3::exceptions::PyTypeError::new_err(
@@ -61,7 +60,7 @@ impl StorageOptionsProvider for PyStorageOptionsProviderWrapper {
let py_provider = self.py_provider.clone();
tokio::task::spawn_blocking(move || {
Python::with_gil(|py| {
Python::attach(|py| {
// Call the Python fetch_storage_options method
let result = py_provider
.inner
@@ -120,7 +119,7 @@ impl StorageOptionsProvider for PyStorageOptionsProviderWrapper {
}
fn provider_id(&self) -> String {
Python::with_gil(|py| {
Python::attach(|py| {
// Call provider_id() method on the Python object
let obj = self.py_provider.inner.bind(py);
obj.call_method0("provider_id")
@@ -144,7 +143,7 @@ impl std::fmt::Debug for PyStorageOptionsProviderWrapper {
/// This is the main entry point for converting Python StorageOptionsProvider objects
/// to Rust trait objects that can be used by the Lance ecosystem.
pub fn py_object_to_storage_options_provider(
py_obj: PyObject,
py_obj: Py<PyAny>,
) -> PyResult<Arc<dyn StorageOptionsProvider>> {
let py_provider = PyStorageOptionsProvider::new(py_obj)?;
Ok(Arc::new(PyStorageOptionsProviderWrapper::new(py_provider)))

View File

@@ -1,4 +1,3 @@
#![allow(deprecated)]
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
use std::{collections::HashMap, sync::Arc};
@@ -288,7 +287,7 @@ impl Table {
let inner = self_.inner_ref()?.clone();
future_into_py(self_.py(), async move {
let schema = inner.schema().await.infer_error()?;
Python::with_gil(|py| schema.to_pyarrow(py).map(|b| b.unbind()))
Python::attach(|py| schema.to_pyarrow(py).map(|obj| obj.unbind()))
})
}
@@ -438,7 +437,7 @@ impl Table {
future_into_py(self_.py(), async move {
let stats = inner.index_stats(&index_name).await.infer_error()?;
if let Some(stats) = stats {
Python::with_gil(|py| {
Python::attach(|py| {
let dict = PyDict::new(py);
dict.set_item("num_indexed_rows", stats.num_indexed_rows)?;
dict.set_item("num_unindexed_rows", stats.num_unindexed_rows)?;
@@ -468,7 +467,7 @@ impl Table {
let inner = self_.inner_ref()?.clone();
future_into_py(self_.py(), async move {
let stats = inner.stats().await.infer_error()?;
Python::with_gil(|py| {
Python::attach(|py| {
let dict = PyDict::new(py);
dict.set_item("total_bytes", stats.total_bytes)?;
dict.set_item("num_rows", stats.num_rows)?;
@@ -517,7 +516,7 @@ impl Table {
let inner = self_.inner_ref()?.clone();
future_into_py(self_.py(), async move {
let versions = inner.list_versions().await.infer_error()?;
let versions_as_dict = Python::with_gil(|py| {
let versions_as_dict = Python::attach(|py| {
versions
.iter()
.map(|v| {
@@ -868,7 +867,7 @@ impl Tags {
let tags = inner.tags().await.infer_error()?;
let res = tags.list().await.infer_error()?;
Python::with_gil(|py| {
Python::attach(|py| {
let py_dict = PyDict::new(py);
for (key, contents) in res {
let value_dict = PyDict::new(py);

View File

@@ -104,11 +104,16 @@ test-log = "0.2"
[features]
default = ["aws", "gcs", "azure", "dynamodb", "oss"]
default = []
aws = ["lance/aws", "lance-io/aws", "lance-namespace-impls/dir-aws"]
oss = ["lance/oss", "lance-io/oss", "lance-namespace-impls/dir-oss"]
gcs = ["lance/gcp", "lance-io/gcp", "lance-namespace-impls/dir-gcp"]
azure = ["lance/azure", "lance-io/azure", "lance-namespace-impls/dir-azure"]
huggingface = [
"lance/huggingface",
"lance-io/huggingface",
"lance-namespace-impls/dir-huggingface",
]
dynamodb = ["lance/dynamodb", "aws"]
remote = ["dep:reqwest", "dep:http", "lance-namespace-impls/rest", "lance-namespace-impls/rest-adapter"]
fp16kernels = ["lance-linalg/fp16kernels"]
@@ -148,3 +153,6 @@ name = "ivf_pq"
[[example]]
name = "hybrid_search"
required-features = ["sentence-transformers"]
[package.metadata.docs.rs]
all-features = true

View File

@@ -134,10 +134,11 @@ impl Database for LanceNamespaceDatabase {
async fn table_names(&self, request: TableNamesRequest) -> Result<Vec<String>> {
let ns_request = ListTablesRequest {
identity: None,
context: None,
id: Some(request.namespace),
page_token: request.start_after,
limit: request.limit.map(|l| l as i32),
..Default::default()
};
let response = self.namespace.list_tables(ns_request).await?;
@@ -153,9 +154,13 @@ impl Database for LanceNamespaceDatabase {
let mut table_id = request.namespace.clone();
table_id.push(request.name.clone());
let describe_request = DescribeTableRequest {
identity: None,
context: None,
id: Some(table_id.clone()),
version: None,
..Default::default()
with_table_uri: None,
load_detailed_metadata: None,
vend_credentials: None,
};
let describe_result = self.namespace.describe_table(describe_request).await;
@@ -172,8 +177,9 @@ impl Database for LanceNamespaceDatabase {
if describe_result.is_ok() {
// Drop the existing table - must succeed
let drop_request = DropTableRequest {
identity: None,
context: None,
id: Some(table_id.clone()),
..Default::default()
};
self.namespace
.drop_table(drop_request)
@@ -205,24 +211,31 @@ impl Database for LanceNamespaceDatabase {
let mut table_id = request.namespace.clone();
table_id.push(request.name.clone());
let declare_request = DeclareTableRequest {
let create_empty_request = DeclareTableRequest {
identity: None,
context: None,
id: Some(table_id.clone()),
location: None,
vend_credentials: None,
..Default::default()
vend_credentials: if self.storage_options.is_empty() {
None
} else {
Some(true)
},
};
let declare_response = self
let create_empty_response = self
.namespace
.declare_table(declare_request)
.declare_table(create_empty_request)
.await
.map_err(|e| Error::Runtime {
message: format!("Failed to declare table: {}", e),
})?;
let location = declare_response.location.ok_or_else(|| Error::Runtime {
message: "Table location is missing from declare_table response".to_string(),
})?;
let location = create_empty_response
.location
.ok_or_else(|| Error::Runtime {
message: "Table location is missing from create_empty_table response".to_string(),
})?;
let native_table = NativeTable::create_from_namespace(
self.namespace.clone(),
@@ -280,8 +293,9 @@ impl Database for LanceNamespaceDatabase {
table_id.push(name.to_string());
let drop_request = DropTableRequest {
identity: None,
context: None,
id: Some(table_id),
..Default::default()
};
self.namespace
.drop_table(drop_request)
@@ -436,10 +450,11 @@ mod tests {
// Create a child namespace first
conn.create_namespace(CreateNamespaceRequest {
identity: None,
context: None,
id: Some(vec!["test_ns".into()]),
mode: None,
properties: None,
..Default::default()
})
.await
.expect("Failed to create namespace");
@@ -498,10 +513,11 @@ mod tests {
// Create a child namespace first
conn.create_namespace(CreateNamespaceRequest {
identity: None,
context: None,
id: Some(vec!["test_ns".into()]),
mode: None,
properties: None,
..Default::default()
})
.await
.expect("Failed to create namespace");
@@ -563,10 +579,11 @@ mod tests {
// Create a child namespace first
conn.create_namespace(CreateNamespaceRequest {
identity: None,
context: None,
id: Some(vec!["test_ns".into()]),
mode: None,
properties: None,
..Default::default()
})
.await
.expect("Failed to create namespace");
@@ -648,10 +665,11 @@ mod tests {
// Create a child namespace first
conn.create_namespace(CreateNamespaceRequest {
identity: None,
context: None,
id: Some(vec!["test_ns".into()]),
mode: None,
properties: None,
..Default::default()
})
.await
.expect("Failed to create namespace");
@@ -705,10 +723,11 @@ mod tests {
// Create a child namespace first
conn.create_namespace(CreateNamespaceRequest {
identity: None,
context: None,
id: Some(vec!["test_ns".into()]),
mode: None,
properties: None,
..Default::default()
})
.await
.expect("Failed to create namespace");
@@ -787,10 +806,11 @@ mod tests {
// Create a child namespace first
conn.create_namespace(CreateNamespaceRequest {
identity: None,
context: None,
id: Some(vec!["test_ns".into()]),
mode: None,
properties: None,
..Default::default()
})
.await
.expect("Failed to create namespace");
@@ -822,10 +842,11 @@ mod tests {
// Create a child namespace first
conn.create_namespace(CreateNamespaceRequest {
identity: None,
context: None,
id: Some(vec!["test_ns".into()]),
mode: None,
properties: None,
..Default::default()
})
.await
.expect("Failed to create namespace");

View File

@@ -8,9 +8,10 @@ use datafusion_execution::{disk_manager::DiskManagerBuilder, runtime_env::Runtim
use datafusion_expr::col;
use futures::TryStreamExt;
use lance_core::ROW_ID;
use lance_datafusion::exec::SessionContextExt;
use crate::{
arrow::{SendableRecordBatchStream, SimpleRecordBatchStream},
arrow::{SendableRecordBatchStream, SendableRecordBatchStreamExt, SimpleRecordBatchStream},
connect,
database::{CreateTableData, CreateTableRequest, Database},
dataloader::permutation::{
@@ -177,17 +178,12 @@ impl PermutationBuilder {
.build_arc()
.unwrap(),
);
let batches = data
let df = ctx
.read_one_shot(data.into_df_stream())
.map_err(|e| Error::Other {
message: format!("Failed to setup sort by split id: {}", e),
source: Some(e.into()),
})
.try_collect::<Vec<_>>()
.await?;
let df = ctx.read_batches(batches).map_err(|e| Error::Other {
message: format!("Failed to setup sort by split id: {}", e),
source: Some(e.into()),
})?;
})?;
let df_stream = df
.sort_by(vec![col(SPLIT_ID_COLUMN)])
.map_err(|e| Error::Other {

View File

@@ -25,13 +25,14 @@
//!
//! ## Crate Features
//!
//! ### Experimental Features
//!
//! These features are not enabled by default. They are experimental or in-development features that
//! are not yet ready to be released.
//!
//! - `remote` - Enable remote client to connect to LanceDB cloud. This is not yet fully implemented
//! and should not be enabled.
//! - `aws` - Enable AWS S3 object store support.
//! - `dynamodb` - Enable DynamoDB manifest store support.
//! - `azure` - Enable Azure Blob Storage object store support.
//! - `gcs` - Enable Google Cloud Storage object store support.
//! - `oss` - Enable Alibaba Cloud OSS object store support.
//! - `remote` - Enable remote client to connect to LanceDB cloud.
//! - `huggingface` - Enable HuggingFace Hub integration for loading datasets from the Hub.
//! - `fp16kernels` - Enable FP16 kernels for faster vector search on CPU.
//!
//! ### Quick Start
//!

View File

@@ -1717,10 +1717,11 @@ mod tests {
// Create a child namespace first
let namespace = vec!["test_ns".to_string()];
conn.create_namespace(CreateNamespaceRequest {
identity: None,
context: None,
id: Some(namespace.clone()),
mode: None,
properties: None,
..Default::default()
})
.await
.expect("Failed to create namespace");
@@ -1744,10 +1745,11 @@ mod tests {
// List tables in the child namespace
let list_response = conn
.list_tables(ListTablesRequest {
identity: None,
context: None,
id: Some(namespace.clone()),
page_token: None,
limit: None,
..Default::default()
})
.await
.expect("Failed to list tables");
@@ -1757,10 +1759,11 @@ mod tests {
let namespace_client = conn.namespace_client().await.unwrap();
let list_response = namespace_client
.list_tables(ListTablesRequest {
identity: None,
context: None,
id: Some(namespace.clone()),
page_token: None,
limit: None,
..Default::default()
})
.await
.unwrap();
@@ -1799,10 +1802,11 @@ mod tests {
// Create a child namespace first
let namespace = vec!["multi_table_ns".to_string()];
conn.create_namespace(CreateNamespaceRequest {
identity: None,
context: None,
id: Some(namespace.clone()),
mode: None,
properties: None,
..Default::default()
})
.await
.expect("Failed to create namespace");
@@ -1826,10 +1830,11 @@ mod tests {
// List tables in the child namespace
let list_response = conn
.list_tables(ListTablesRequest {
identity: None,
context: None,
id: Some(namespace.clone()),
page_token: None,
limit: None,
..Default::default()
})
.await
.unwrap();

View File

@@ -1411,35 +1411,28 @@ impl Table {
let projected_plans = plans
.into_iter()
.enumerate()
.map(
|(plan_i, plan)| -> Result<Arc<dyn datafusion_physical_plan::ExecutionPlan>> {
let query_index = datafusion_common::ScalarValue::Int32(Some(plan_i as i32));
let query_index_expr =
datafusion_physical_plan::expressions::Literal::new(query_index);
let query_index_expr = Arc::new(query_index_expr)
as Arc<dyn datafusion_physical_plan::PhysicalExpr>;
let mut projections = vec![(query_index_expr, "query_index".to_string())];
projections.extend_from_slice(&project_all_columns);
let projection =
ProjectionExec::try_new(projections, plan).map_err(|e| Error::Runtime {
message: format!("Failed to build projection plan: {e}"),
})?;
Ok(Arc::new(projection) as Arc<dyn datafusion_physical_plan::ExecutionPlan>)
},
)
.collect::<Result<Vec<_>>>()?;
.map(|(plan_i, plan)| {
let query_index = datafusion_common::ScalarValue::Int32(Some(plan_i as i32));
let query_index_expr =
datafusion_physical_plan::expressions::Literal::new(query_index);
let query_index_expr =
Arc::new(query_index_expr) as Arc<dyn datafusion_physical_plan::PhysicalExpr>;
let mut projections = vec![(query_index_expr, "query_index".to_string())];
projections.extend_from_slice(&project_all_columns);
let projection = ProjectionExec::try_new(projections, plan).unwrap();
Arc::new(projection) as Arc<dyn datafusion_physical_plan::ExecutionPlan>
})
.collect::<Vec<_>>();
let unioned = UnionExec::try_new(projected_plans).map_err(|e| Error::Runtime {
message: format!("Failed to union query plans: {e}"),
message: format!("Failed to union query plans: {}", e),
})?;
// We require 1 partition in the final output
let repartitioned = RepartitionExec::try_new(
unioned,
datafusion_physical_plan::Partitioning::RoundRobinBatch(1),
)
.map_err(|e| Error::Runtime {
message: format!("Failed to repartition query plans: {e}"),
})?;
.unwrap();
Ok(Arc::new(repartitioned))
}
@@ -2343,23 +2336,6 @@ impl NativeTable {
/// Convert an AnyQuery to the namespace QueryTableRequest format.
fn convert_to_namespace_query(&self, query: &AnyQuery) -> Result<NsQueryTableRequest> {
let to_namespace_columns =
|select: &Select| -> Result<Option<Box<QueryTableRequestColumns>>> {
match select {
Select::All => Ok(None),
Select::Columns(cols) => {
let mut columns = QueryTableRequestColumns::new();
columns.column_names = Some(cols.clone());
Ok(Some(Box::new(columns)))
}
Select::Dynamic(_) => Err(Error::NotSupported {
message:
"Dynamic column selection is not supported for server-side queries"
.to_string(),
}),
}
};
match query {
AnyQuery::VectorQuery(vq) => {
// Extract the query vector(s)
@@ -2371,6 +2347,22 @@ impl NativeTable {
None => None,
};
// Convert select to columns list
let columns = match &vq.base.select {
Select::All => None,
Select::Columns(cols) => Some(Box::new(QueryTableRequestColumns {
column_names: Some(cols.clone()),
column_aliases: None,
})),
Select::Dynamic(_) => {
return Err(Error::NotSupported {
message:
"Dynamic column selection is not supported for server-side queries"
.to_string(),
});
}
};
// Check for unsupported features
if vq.base.reranker.is_some() {
return Err(Error::NotSupported {
@@ -2378,8 +2370,6 @@ impl NativeTable {
});
}
let columns = to_namespace_columns(&vq.base.select)?;
// Convert FTS query if present
let full_text_query = vq.base.full_text_search.as_ref().map(|fts| {
let columns = fts.columns();
@@ -2398,6 +2388,8 @@ impl NativeTable {
});
Ok(NsQueryTableRequest {
identity: None,
context: None,
id: None, // Will be set in namespace_query
k: vq.base.limit.unwrap_or(10) as i32,
vector: Box::new(vector),
@@ -2417,7 +2409,6 @@ impl NativeTable {
bypass_vector_index: Some(!vq.use_index),
full_text_query,
version: None,
..Default::default()
})
}
AnyQuery::Query(q) => {
@@ -2435,7 +2426,19 @@ impl NativeTable {
.map(|f| self.filter_to_sql(f))
.transpose()?;
let columns = to_namespace_columns(&q.select)?;
let columns = match &q.select {
Select::All => None,
Select::Columns(cols) => Some(Box::new(QueryTableRequestColumns {
column_names: Some(cols.clone()),
column_aliases: None,
})),
Select::Dynamic(_) => {
return Err(Error::NotSupported {
message: "Dynamic columns are not supported for server-side query"
.to_string(),
});
}
};
// Handle full text search if present
let full_text_query = q.full_text_search.as_ref().map(|fts| {
@@ -2460,6 +2463,8 @@ impl NativeTable {
});
Ok(NsQueryTableRequest {
identity: None,
context: None,
id: None, // Will be set by caller
vector,
k: q.limit.unwrap_or(10) as i32,
@@ -2479,7 +2484,6 @@ impl NativeTable {
fast_search: None,
lower_bound: None,
upper_bound: None,
..Default::default()
})
}
}
@@ -5158,7 +5162,7 @@ mod tests {
ns_request
.columns
.as_ref()
.and_then(|cols| cols.column_names.clone()),
.and_then(|columns| columns.column_names.clone()),
Some(vec!["id".to_string()])
);
assert_eq!(ns_request.vector_column, Some("vector".to_string()));
@@ -5205,7 +5209,7 @@ mod tests {
ns_request
.columns
.as_ref()
.and_then(|cols| cols.column_names.clone()),
.and_then(|columns| columns.column_names.clone()),
Some(vec!["id".to_string()])
);
assert_eq!(ns_request.with_row_id, Some(true));