Avoid embedding warnings

2026-03-26 10:30:40 +00:00 · 2026-01-30 12:35:45 -05:00
27 changed files with 501 additions and 608 deletions
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@@ -48,8 +48,6 @@ jobs:
        run: cargo fmt --all -- --check
      - name: Run clippy
        run: cargo clippy --profile ci --workspace --tests --all-features -- -D warnings
-      - name: Run clippy (without remote feature)
-        run: cargo clippy --profile ci --workspace --tests -- -D warnings

  build-no-lock:
    runs-on: ubuntu-24.04
@@ -183,7 +181,7 @@ jobs:
    runs-on: ubuntu-24.04
    strategy:
      matrix:
-        msrv: ["1.88.0"] # This should match up with rust-version in Cargo.toml
+        msrv: ["1.78.0"] # This should match up with rust-version in Cargo.toml
    env:
      # Need up-to-date compilers for kernels
      CC: clang-18
@@ -214,6 +212,4 @@ jobs:
          cargo update -p aws-sdk-sts --precise 1.51.0
          cargo update -p home --precise 0.5.9
      - name: cargo +${{ matrix.msrv }} check
-        env:
-          RUSTUP_TOOLCHAIN: ${{ matrix.msrv }}
        run: cargo check --profile ci --workspace --tests --benches --all-features
--- a/Cargo.lock
+++ b/Cargo.lock
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -12,42 +12,42 @@ repository = "https://github.com/lancedb/lancedb"
 description = "Serverless, low-latency vector database for AI applications"
 keywords = ["lancedb", "lance", "database", "vector", "search"]
 categories = ["database-implementations"]
-rust-version = "1.88.0"
+rust-version = "1.78.0"

 [workspace.dependencies]
-lance = { "version" = "=1.0.4", default-features = false, "tag" = "v1.0.4", "git" = "https://github.com/lance-format/lance.git" }
-lance-core = { "version" = "=1.0.4", "tag" = "v1.0.4", "git" = "https://github.com/lance-format/lance.git" }
-lance-datagen = { "version" = "=1.0.4", "tag" = "v1.0.4", "git" = "https://github.com/lance-format/lance.git" }
-lance-file = { "version" = "=1.0.4", "tag" = "v1.0.4", "git" = "https://github.com/lance-format/lance.git" }
-lance-io = { "version" = "=1.0.4", default-features = false, "tag" = "v1.0.4", "git" = "https://github.com/lance-format/lance.git" }
-lance-index = { "version" = "=1.0.4", "tag" = "v1.0.4", "git" = "https://github.com/lance-format/lance.git" }
-lance-linalg = { "version" = "=1.0.4", "tag" = "v1.0.4", "git" = "https://github.com/lance-format/lance.git" }
-lance-namespace = { "version" = "=1.0.4", "tag" = "v1.0.4", "git" = "https://github.com/lance-format/lance.git" }
-lance-namespace-impls = { "version" = "=1.0.4", default-features = false, "tag" = "v1.0.4", "git" = "https://github.com/lance-format/lance.git" }
-lance-table = { "version" = "=1.0.4", "tag" = "v1.0.4", "git" = "https://github.com/lance-format/lance.git" }
-lance-testing = { "version" = "=1.0.4", "tag" = "v1.0.4", "git" = "https://github.com/lance-format/lance.git" }
-lance-datafusion = { "version" = "=1.0.4", "tag" = "v1.0.4", "git" = "https://github.com/lance-format/lance.git" }
-lance-encoding = { "version" = "=1.0.4", "tag" = "v1.0.4", "git" = "https://github.com/lance-format/lance.git" }
-lance-arrow = { "version" = "=1.0.4", "tag" = "v1.0.4", "git" = "https://github.com/lance-format/lance.git" }
+lance = { "version" = "=2.0.0-rc.1", default-features = false, "tag" = "v2.0.0-rc.1", "git" = "https://github.com/lance-format/lance.git" }
+lance-core = { "version" = "=2.0.0-rc.1", "tag" = "v2.0.0-rc.1", "git" = "https://github.com/lance-format/lance.git" }
+lance-datagen = { "version" = "=2.0.0-rc.1", "tag" = "v2.0.0-rc.1", "git" = "https://github.com/lance-format/lance.git" }
+lance-file = { "version" = "=2.0.0-rc.1", "tag" = "v2.0.0-rc.1", "git" = "https://github.com/lance-format/lance.git" }
+lance-io = { "version" = "=2.0.0-rc.1", default-features = false, "tag" = "v2.0.0-rc.1", "git" = "https://github.com/lance-format/lance.git" }
+lance-index = { "version" = "=2.0.0-rc.1", "tag" = "v2.0.0-rc.1", "git" = "https://github.com/lance-format/lance.git" }
+lance-linalg = { "version" = "=2.0.0-rc.1", "tag" = "v2.0.0-rc.1", "git" = "https://github.com/lance-format/lance.git" }
+lance-namespace = { "version" = "=2.0.0-rc.1", "tag" = "v2.0.0-rc.1", "git" = "https://github.com/lance-format/lance.git" }
+lance-namespace-impls = { "version" = "=2.0.0-rc.1", default-features = false, "tag" = "v2.0.0-rc.1", "git" = "https://github.com/lance-format/lance.git" }
+lance-table = { "version" = "=2.0.0-rc.1", "tag" = "v2.0.0-rc.1", "git" = "https://github.com/lance-format/lance.git" }
+lance-testing = { "version" = "=2.0.0-rc.1", "tag" = "v2.0.0-rc.1", "git" = "https://github.com/lance-format/lance.git" }
+lance-datafusion = { "version" = "=2.0.0-rc.1", "tag" = "v2.0.0-rc.1", "git" = "https://github.com/lance-format/lance.git" }
+lance-encoding = { "version" = "=2.0.0-rc.1", "tag" = "v2.0.0-rc.1", "git" = "https://github.com/lance-format/lance.git" }
+lance-arrow = { "version" = "=2.0.0-rc.1", "tag" = "v2.0.0-rc.1", "git" = "https://github.com/lance-format/lance.git" }
 ahash = "0.8"
 # Note that this one does not include pyarrow
-arrow = { version = "56.2", optional = false }
-arrow-array = "56.2"
-arrow-data = "56.2"
-arrow-ipc = "56.2"
-arrow-ord = "56.2"
-arrow-schema = "56.2"
-arrow-select = "56.2"
-arrow-cast = "56.2"
+arrow = { version = "57.2", optional = false }
+arrow-array = "57.2"
+arrow-data = "57.2"
+arrow-ipc = "57.2"
+arrow-ord = "57.2"
+arrow-schema = "57.2"
+arrow-select = "57.2"
+arrow-cast = "57.2"
 async-trait = "0"
-datafusion = { version = "50.1", default-features = false }
-datafusion-catalog = "50.1"
-datafusion-common = { version = "50.1", default-features = false }
-datafusion-execution = "50.1"
-datafusion-expr = "50.1"
-datafusion-physical-plan = "50.1"
+datafusion = { version = "51.0", default-features = false }
+datafusion-catalog = "51.0"
+datafusion-common = { version = "51.0", default-features = false }
+datafusion-execution = "51.0"
+datafusion-expr = "51.0"
+datafusion-physical-plan = "51.0"
 env_logger = "0.11"
-half = { "version" = "2.6.0", default-features = false, features = [
+half = { "version" = "2.7.1", default-features = false, features = [
    "num-traits",
 ] }
 futures = "0"
--- a/nodejs/test/table.test.ts
+++ b/nodejs/test/table.test.ts
@@ -1520,9 +1520,9 @@ describe("when optimizing a dataset", () => {

  it("delete unverified", async () => {
    const version = await table.version();
-    const versionFile = `${tmpDir.name}/${table.name}.lance/_versions/${
-      version - 1
-    }.manifest`;
+    const versionFile = `${tmpDir.name}/${table.name}.lance/_versions/${String(
+      18446744073709551615n - (BigInt(version) - 1n),
+    ).padStart(20, "0")}.manifest`;
    fs.rmSync(versionFile);

    let stats = await table.optimize({ deleteUnverified: false });
--- a/python/.bumpversion.toml
+++ b/python/.bumpversion.toml
@@ -1,5 +1,5 @@
 [tool.bumpversion]
-current_version = "0.27.1"
+current_version = "0.27.0"
 parse = """(?x)
    (?P<major>0|[1-9]\\d*)\\.
    (?P<minor>0|[1-9]\\d*)\\.
--- a/python/Cargo.toml
+++ b/python/Cargo.toml
@@ -1,28 +1,28 @@
 [package]
 name = "lancedb-python"
-version = "0.27.1"
+version = "0.27.0"
 edition.workspace = true
 description = "Python bindings for LanceDB"
 license.workspace = true
 repository.workspace = true
 keywords.workspace = true
 categories.workspace = true
-rust-version = "1.88.0"
+rust-version = "1.75.0"

 [lib]
 name = "_lancedb"
 crate-type = ["cdylib"]

 [dependencies]
-arrow = { version = "56.2", features = ["pyarrow"] }
+arrow = { version = "57.2", features = ["pyarrow"] }
 async-trait = "0.1"
 lancedb = { path = "../rust/lancedb", default-features = false }
 lance-core.workspace = true
 lance-namespace.workspace = true
 lance-io.workspace = true
 env_logger.workspace = true
-pyo3 = { version = "0.25", features = ["extension-module", "abi3-py39"] }
-pyo3-async-runtimes = { version = "0.25", features = [
+pyo3 = { version = "0.26", features = ["extension-module", "abi3-py39"] }
+pyo3-async-runtimes = { version = "0.26", features = [
    "attributes",
    "tokio-runtime",
 ] }
@@ -32,7 +32,7 @@ snafu.workspace = true
 tokio = { version = "1.40", features = ["sync"] }

 [build-dependencies]
-pyo3-build-config = { version = "0.25", features = [
+pyo3-build-config = { version = "0.26", features = [
    "extension-module",
    "abi3-py39",
 ] }
--- a/python/python/lancedb/embeddings/colpali.py
+++ b/python/python/lancedb/embeddings/colpali.py
@@ -9,6 +9,8 @@ import numpy as np
 import io
 import warnings

+from pydantic import Field
+
 from ..util import attempt_import_or_raise
 from .base import EmbeddingFunction
 from .registry import register
@@ -26,7 +28,7 @@ class ColPaliEmbeddings(EmbeddingFunction):

    Parameters
    ----------
-    model_name : str
+    colpali_model_name : str
        The name of the model to use (e.g., "Metric-AI/ColQwen2.5-3b-multilingual-v1.0")
        Supports models based on these engines:
        - ColPali: "vidore/colpali-v1.3" and others
@@ -57,7 +59,10 @@ class ColPaliEmbeddings(EmbeddingFunction):
        useful for large models that do not fit in memory.
    """

-    model_name: str = "Metric-AI/ColQwen2.5-3b-multilingual-v1.0"
+    colpali_model_name: str = Field(
+        default="Metric-AI/ColQwen2.5-3b-multilingual-v1.0",
+        validation_alias="model_name",
+    )
    device: str = "auto"
    dtype: str = "bfloat16"
    use_token_pooling: bool = True
@@ -107,7 +112,7 @@ class ColPaliEmbeddings(EmbeddingFunction):
            self._processor,
            self._token_pooler,
        ) = self._load_model(
-            self.model_name,
+            self.colpali_model_name,
            dtype,
            device,
            self.pooling_strategy,
--- a/python/python/lancedb/embeddings/siglip.py
+++ b/python/python/lancedb/embeddings/siglip.py
@@ -10,7 +10,7 @@ import urllib.parse as urlparse
 import numpy as np
 import pyarrow as pa
 from tqdm import tqdm
-from pydantic import PrivateAttr
+from pydantic import Field, PrivateAttr

 from ..util import attempt_import_or_raise
 from .base import EmbeddingFunction
@@ -24,7 +24,10 @@ if TYPE_CHECKING:

@register("siglip")
 class SigLipEmbeddings(EmbeddingFunction):
-    model_name: str = "google/siglip-base-patch16-224"
+    siglip_model_name: str = Field(
+        default="google/siglip-base-patch16-224",
+        validation_alias="model_name",
+    )
    device: str = "cpu"
    batch_size: int = 64
    normalize: bool = True
@@ -39,8 +42,10 @@ class SigLipEmbeddings(EmbeddingFunction):
        transformers = attempt_import_or_raise("transformers")
        self._torch = attempt_import_or_raise("torch")

-        self._processor = transformers.AutoProcessor.from_pretrained(self.model_name)
-        self._model = transformers.SiglipModel.from_pretrained(self.model_name)
+        self._processor = transformers.AutoProcessor.from_pretrained(
+            self.siglip_model_name
+        )
+        self._model = transformers.SiglipModel.from_pretrained(self.siglip_model_name)
        self._model.to(self.device)
        self._model.eval()
        self._ndims = None
--- a/python/python/lancedb/query.py
+++ b/python/python/lancedb/query.py
@@ -961,22 +961,27 @@ class LanceQueryBuilder(ABC):
        >>> query = [100, 100]
        >>> plan = table.search(query).analyze_plan()
        >>> print(plan)  # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
-        AnalyzeExec verbose=true, metrics=[], cumulative_cpu=...
-          TracedExec, metrics=[], cumulative_cpu=...
-            ProjectionExec: expr=[...], metrics=[...], cumulative_cpu=...
-              GlobalLimitExec: skip=0, fetch=10, metrics=[...], cumulative_cpu=...
-                FilterExec: _distance@2 IS NOT NULL,
-                metrics=[output_rows=..., elapsed_compute=...], cumulative_cpu=...
-                  SortExec: TopK(fetch=10), expr=[...],
+        AnalyzeExec verbose=true, elapsed=..., metrics=...
+          TracedExec, elapsed=..., metrics=...
+            ProjectionExec: elapsed=..., expr=[...],
+            metrics=[output_rows=..., elapsed_compute=..., output_bytes=...]
+              GlobalLimitExec: elapsed=..., skip=0, fetch=10,
+              metrics=[output_rows=..., elapsed_compute=..., output_bytes=...]
+                FilterExec: elapsed=..., _distance@2 IS NOT NULL, metrics=[...]
+                  SortExec: elapsed=..., TopK(fetch=10), expr=[...],
                  preserve_partitioning=[...],
-                  metrics=[output_rows=..., elapsed_compute=..., row_replacements=...],
-                  cumulative_cpu=...
-                    KNNVectorDistance: metric=l2,
-                    metrics=[output_rows=..., elapsed_compute=..., output_batches=...],
-                    cumulative_cpu=...
-                      LanceRead: uri=..., projection=[vector], ...
-                      metrics=[output_rows=..., elapsed_compute=...,
-                      bytes_read=..., iops=..., requests=...], cumulative_cpu=...
+                  metrics=[output_rows=..., elapsed_compute=...,
+                  output_bytes=..., row_replacements=...]
+                    KNNVectorDistance: elapsed=..., metric=l2,
+                    metrics=[output_rows=..., elapsed_compute=...,
+                    output_bytes=..., output_batches=...]
+                      LanceRead: elapsed=..., uri=..., projection=[vector],
+                      num_fragments=..., range_before=None, range_after=None,
+                      row_id=true, row_addr=false,
+                      full_filter=--, refine_filter=--,
+                      metrics=[output_rows=..., elapsed_compute=..., output_bytes=...,
+                      fragments_scanned=..., ranges_scanned=1, rows_scanned=1,
+                      bytes_read=..., iops=..., requests=..., task_wait_time=...]

        Returns
        -------
--- a/python/python/tests/test_remote_db.py
+++ b/python/python/tests/test_remote_db.py
@@ -601,7 +601,6 @@ def test_head():
 def test_query_sync_minimal():
    def handler(body):
        assert body == {
-            "distance_type": "l2",
            "k": 10,
            "prefilter": True,
            "refine_factor": None,
@@ -685,7 +684,6 @@ def test_query_sync_maximal():
 def test_query_sync_nprobes():
    def handler(body):
        assert body == {
-            "distance_type": "l2",
            "k": 10,
            "prefilter": True,
            "fast_search": True,
@@ -715,7 +713,6 @@ def test_query_sync_nprobes():
 def test_query_sync_no_max_nprobes():
    def handler(body):
        assert body == {
-            "distance_type": "l2",
            "k": 10,
            "prefilter": True,
            "fast_search": True,
@@ -838,7 +835,6 @@ def test_query_sync_hybrid():
        else:
            # Vector query
            assert body == {
-                "distance_type": "l2",
                "k": 42,
                "prefilter": True,
                "refine_factor": None,
--- a/python/python/tests/test_table.py
+++ b/python/python/tests/test_table.py
@@ -1880,8 +1880,13 @@ async def test_optimize_delete_unverified(tmp_db_async: AsyncConnection, tmp_pat
        ],
    )
    version = await table.version()
-    path = tmp_path / "test.lance" / "_versions" / f"{version - 1}.manifest"
+    assert version == 2
+
+    # By removing a manifest file, we make the data files we just inserted unverified
+    version_name = 18446744073709551615 - (version - 1)
+    path = tmp_path / "test.lance" / "_versions" / f"{version_name:020}.manifest"
    os.remove(path)
+
    stats = await table.optimize(delete_unverified=False)
    assert stats.prune.old_versions_removed == 0
    stats = await table.optimize(
--- a/python/src/arrow.rs
+++ b/python/src/arrow.rs
@@ -10,8 +10,7 @@ use arrow::{
 use futures::stream::StreamExt;
 use lancedb::arrow::SendableRecordBatchStream;
 use pyo3::{
-    exceptions::PyStopAsyncIteration, pyclass, pymethods, Bound, PyAny, PyObject, PyRef, PyResult,
-    Python,
+    exceptions::PyStopAsyncIteration, pyclass, pymethods, Bound, Py, PyAny, PyRef, PyResult, Python,
 };
 use pyo3_async_runtimes::tokio::future_into_py;

@@ -36,8 +35,11 @@ impl RecordBatchStream {
 #[pymethods]
 impl RecordBatchStream {
    #[getter]
-    pub fn schema(&self, py: Python) -> PyResult<PyObject> {
-        (*self.schema).clone().into_pyarrow(py)
+    pub fn schema(&self, py: Python) -> PyResult<Py<PyAny>> {
+        (*self.schema)
+            .clone()
+            .into_pyarrow(py)
+            .map(|obj| obj.unbind())
    }

    pub fn __aiter__(self_: PyRef<'_, Self>) -> PyRef<'_, Self> {
@@ -53,7 +55,12 @@ impl RecordBatchStream {
                .next()
                .await
                .ok_or_else(|| PyStopAsyncIteration::new_err(""))?;
-            Python::with_gil(|py| inner_next.infer_error()?.to_pyarrow(py))
+            Python::attach(|py| {
+                inner_next
+                    .infer_error()?
+                    .to_pyarrow(py)
+                    .map(|obj| obj.unbind())
+            })
        })
    }
 }
--- a/python/src/connection.rs
+++ b/python/src/connection.rs
@@ -12,7 +12,7 @@ use pyo3::{
    exceptions::{PyRuntimeError, PyValueError},
    pyclass, pyfunction, pymethods,
    types::{PyDict, PyDictMethods},
-    Bound, FromPyObject, Py, PyAny, PyObject, PyRef, PyResult, Python,
+    Bound, FromPyObject, Py, PyAny, PyRef, PyResult, Python,
 };
 use pyo3_async_runtimes::tokio::future_into_py;

@@ -114,7 +114,7 @@ impl Connection {
        data: Bound<'_, PyAny>,
        namespace: Vec<String>,
        storage_options: Option<HashMap<String, String>>,
-        storage_options_provider: Option<PyObject>,
+        storage_options_provider: Option<Py<PyAny>>,
        location: Option<String>,
    ) -> PyResult<Bound<'a, PyAny>> {
        let inner = self_.get_inner()?.clone();
@@ -152,7 +152,7 @@ impl Connection {
        schema: Bound<'_, PyAny>,
        namespace: Vec<String>,
        storage_options: Option<HashMap<String, String>>,
-        storage_options_provider: Option<PyObject>,
+        storage_options_provider: Option<Py<PyAny>>,
        location: Option<String>,
    ) -> PyResult<Bound<'a, PyAny>> {
        let inner = self_.get_inner()?.clone();
@@ -187,7 +187,7 @@ impl Connection {
        name: String,
        namespace: Vec<String>,
        storage_options: Option<HashMap<String, String>>,
-        storage_options_provider: Option<PyObject>,
+        storage_options_provider: Option<Py<PyAny>>,
        index_cache_size: Option<u32>,
        location: Option<String>,
    ) -> PyResult<Bound<'_, PyAny>> {
@@ -307,7 +307,7 @@ impl Connection {
                ..Default::default()
            };
            let response = inner.list_namespaces(request).await.infer_error()?;
-            Python::with_gil(|py| -> PyResult<Py<PyDict>> {
+            Python::attach(|py| -> PyResult<Py<PyDict>> {
                let dict = PyDict::new(py);
                dict.set_item("namespaces", response.namespaces)?;
                dict.set_item("page_token", response.page_token)?;
@@ -345,7 +345,7 @@ impl Connection {
                ..Default::default()
            };
            let response = inner.create_namespace(request).await.infer_error()?;
-            Python::with_gil(|py| -> PyResult<Py<PyDict>> {
+            Python::attach(|py| -> PyResult<Py<PyDict>> {
                let dict = PyDict::new(py);
                dict.set_item("properties", response.properties)?;
                Ok(dict.unbind())
@@ -386,7 +386,7 @@ impl Connection {
                ..Default::default()
            };
            let response = inner.drop_namespace(request).await.infer_error()?;
-            Python::with_gil(|py| -> PyResult<Py<PyDict>> {
+            Python::attach(|py| -> PyResult<Py<PyDict>> {
                let dict = PyDict::new(py);
                dict.set_item("properties", response.properties)?;
                dict.set_item("transaction_id", response.transaction_id)?;
@@ -413,7 +413,7 @@ impl Connection {
                ..Default::default()
            };
            let response = inner.describe_namespace(request).await.infer_error()?;
-            Python::with_gil(|py| -> PyResult<Py<PyDict>> {
+            Python::attach(|py| -> PyResult<Py<PyDict>> {
                let dict = PyDict::new(py);
                dict.set_item("properties", response.properties)?;
                Ok(dict.unbind())
@@ -443,7 +443,7 @@ impl Connection {
                ..Default::default()
            };
            let response = inner.list_tables(request).await.infer_error()?;
-            Python::with_gil(|py| -> PyResult<Py<PyDict>> {
+            Python::attach(|py| -> PyResult<Py<PyDict>> {
                let dict = PyDict::new(py);
                dict.set_item("tables", response.tables)?;
                dict.set_item("page_token", response.page_token)?;
--- a/python/src/error.rs
+++ b/python/src/error.rs
@@ -40,7 +40,7 @@ impl<T> PythonErrorExt<T> for std::result::Result<T, LanceError> {
                    request_id,
                    source,
                    status_code,
-                } => Python::with_gil(|py| {
+                } => Python::attach(|py| {
                    let message = err.to_string();
                    let http_err_cls = py
                        .import(intern!(py, "lancedb.remote.errors"))?
@@ -75,7 +75,7 @@ impl<T> PythonErrorExt<T> for std::result::Result<T, LanceError> {
                    max_read_failures,
                    source,
                    status_code,
-                } => Python::with_gil(|py| {
+                } => Python::attach(|py| {
                    let cause_err = http_from_rust_error(
                        py,
                        source.as_ref(),
--- a/python/src/header.rs
+++ b/python/src/header.rs
@@ -12,7 +12,7 @@ pub struct PyHeaderProvider {

 impl Clone for PyHeaderProvider {
    fn clone(&self) -> Self {
-        Python::with_gil(|py| Self {
+        Python::attach(|py| Self {
            provider: self.provider.clone_ref(py),
        })
    }
@@ -25,7 +25,7 @@ impl PyHeaderProvider {

    /// Get headers from the Python provider (internal implementation)
    fn get_headers_internal(&self) -> Result<HashMap<String, String>, String> {
-        Python::with_gil(|py| {
+        Python::attach(|py| {
            // Call the get_headers method
            let result = self.provider.call_method0(py, "get_headers");

--- a/python/src/permutation.rs
+++ b/python/src/permutation.rs
@@ -281,7 +281,7 @@ impl PyPermutationReader {
        let reader = slf.reader.clone();
        future_into_py(slf.py(), async move {
            let schema = reader.output_schema(selection).await.infer_error()?;
-            Python::with_gil(|py| schema.to_pyarrow(py))
+            Python::attach(|py| schema.to_pyarrow(py).map(|obj| obj.unbind()))
        })
    }

--- a/python/src/query.rs
+++ b/python/src/query.rs
@@ -453,7 +453,7 @@ impl Query {
        let inner = self_.inner.clone();
        future_into_py(self_.py(), async move {
            let schema = inner.output_schema().await.infer_error()?;
-            Python::with_gil(|py| schema.to_pyarrow(py))
+            Python::attach(|py| schema.to_pyarrow(py).map(|obj| obj.unbind()))
        })
    }

@@ -532,7 +532,7 @@ impl TakeQuery {
        let inner = self_.inner.clone();
        future_into_py(self_.py(), async move {
            let schema = inner.output_schema().await.infer_error()?;
-            Python::with_gil(|py| schema.to_pyarrow(py))
+            Python::attach(|py| schema.to_pyarrow(py).map(|obj| obj.unbind()))
        })
    }

@@ -627,7 +627,7 @@ impl FTSQuery {
        let inner = self_.inner.clone();
        future_into_py(self_.py(), async move {
            let schema = inner.output_schema().await.infer_error()?;
-            Python::with_gil(|py| schema.to_pyarrow(py))
+            Python::attach(|py| schema.to_pyarrow(py).map(|obj| obj.unbind()))
        })
    }

@@ -806,7 +806,7 @@ impl VectorQuery {
        let inner = self_.inner.clone();
        future_into_py(self_.py(), async move {
            let schema = inner.output_schema().await.infer_error()?;
-            Python::with_gil(|py| schema.to_pyarrow(py))
+            Python::attach(|py| schema.to_pyarrow(py).map(|obj| obj.unbind()))
        })
    }

--- a/python/src/storage_options.rs
+++ b/python/src/storage_options.rs
@@ -17,20 +17,20 @@ use pyo3::types::PyDict;
 /// Internal wrapper around a Python object implementing StorageOptionsProvider
 pub struct PyStorageOptionsProvider {
    /// The Python object implementing fetch_storage_options()
-    inner: PyObject,
+    inner: Py<PyAny>,
 }

 impl Clone for PyStorageOptionsProvider {
    fn clone(&self) -> Self {
-        Python::with_gil(|py| Self {
+        Python::attach(|py| Self {
            inner: self.inner.clone_ref(py),
        })
    }
 }

 impl PyStorageOptionsProvider {
-    pub fn new(obj: PyObject) -> PyResult<Self> {
-        Python::with_gil(|py| {
+    pub fn new(obj: Py<PyAny>) -> PyResult<Self> {
+        Python::attach(|py| {
            // Verify the object has a fetch_storage_options method
            if !obj.bind(py).hasattr("fetch_storage_options")? {
                return Err(pyo3::exceptions::PyTypeError::new_err(
@@ -60,7 +60,7 @@ impl StorageOptionsProvider for PyStorageOptionsProviderWrapper {
        let py_provider = self.py_provider.clone();

        tokio::task::spawn_blocking(move || {
-            Python::with_gil(|py| {
+            Python::attach(|py| {
                // Call the Python fetch_storage_options method
                let result = py_provider
                    .inner
@@ -119,7 +119,7 @@ impl StorageOptionsProvider for PyStorageOptionsProviderWrapper {
    }

    fn provider_id(&self) -> String {
-        Python::with_gil(|py| {
+        Python::attach(|py| {
            // Call provider_id() method on the Python object
            let obj = self.py_provider.inner.bind(py);
            obj.call_method0("provider_id")
@@ -143,7 +143,7 @@ impl std::fmt::Debug for PyStorageOptionsProviderWrapper {
 /// This is the main entry point for converting Python StorageOptionsProvider objects
 /// to Rust trait objects that can be used by the Lance ecosystem.
 pub fn py_object_to_storage_options_provider(
-    py_obj: PyObject,
+    py_obj: Py<PyAny>,
 ) -> PyResult<Arc<dyn StorageOptionsProvider>> {
    let py_provider = PyStorageOptionsProvider::new(py_obj)?;
    Ok(Arc::new(PyStorageOptionsProviderWrapper::new(py_provider)))
--- a/python/src/table.rs
+++ b/python/src/table.rs
@@ -287,7 +287,7 @@ impl Table {
        let inner = self_.inner_ref()?.clone();
        future_into_py(self_.py(), async move {
            let schema = inner.schema().await.infer_error()?;
-            Python::with_gil(|py| schema.to_pyarrow(py))
+            Python::attach(|py| schema.to_pyarrow(py).map(|obj| obj.unbind()))
        })
    }

@@ -437,7 +437,7 @@ impl Table {
        future_into_py(self_.py(), async move {
            let stats = inner.index_stats(&index_name).await.infer_error()?;
            if let Some(stats) = stats {
-                Python::with_gil(|py| {
+                Python::attach(|py| {
                    let dict = PyDict::new(py);
                    dict.set_item("num_indexed_rows", stats.num_indexed_rows)?;
                    dict.set_item("num_unindexed_rows", stats.num_unindexed_rows)?;
@@ -467,7 +467,7 @@ impl Table {
        let inner = self_.inner_ref()?.clone();
        future_into_py(self_.py(), async move {
            let stats = inner.stats().await.infer_error()?;
-            Python::with_gil(|py| {
+            Python::attach(|py| {
                let dict = PyDict::new(py);
                dict.set_item("total_bytes", stats.total_bytes)?;
                dict.set_item("num_rows", stats.num_rows)?;
@@ -521,7 +521,7 @@ impl Table {
        let inner = self_.inner_ref()?.clone();
        future_into_py(self_.py(), async move {
            let versions = inner.list_versions().await.infer_error()?;
-            let versions_as_dict = Python::with_gil(|py| {
+            let versions_as_dict = Python::attach(|py| {
                versions
                    .iter()
                    .map(|v| {
@@ -872,7 +872,7 @@ impl Tags {
            let tags = inner.tags().await.infer_error()?;
            let res = tags.list().await.infer_error()?;

-            Python::with_gil(|py| {
+            Python::attach(|py| {
                let py_dict = PyDict::new(py);
                for (key, contents) in res {
                    let value_dict = PyDict::new(py);
--- a/rust/lancedb/src/connection.rs
+++ b/rust/lancedb/src/connection.rs
@@ -892,7 +892,6 @@ pub struct ConnectBuilder {
    embedding_registry: Option<Arc<dyn EmbeddingRegistry>>,
 }

-#[cfg(feature = "remote")]
 const ENV_VARS_TO_STORAGE_OPTS: [(&str, &str); 1] =
    [("AZURE_STORAGE_ACCOUNT_NAME", "azure_storage_account_name")];

--- a/rust/lancedb/src/dataloader/permutation/shuffle.rs
+++ b/rust/lancedb/src/dataloader/permutation/shuffle.rs
@@ -171,7 +171,7 @@ impl Shuffler {
            // This is kind of an annoying limitation but if we allow runt clumps from batches then
            // clumps will get unaligned and we will mess up the clumps when we do the in-memory
            // shuffle step.  If this is a problem we can probably figure out a better way to do this.
-            if !is_last && !(batch.num_rows() as u64).is_multiple_of(clump_size) {
+            if !is_last && batch.num_rows() as u64 % clump_size != 0 {
                return Err(Error::Runtime {
                    message: format!(
                        "Expected batch size ({}) to be divisible by clump size ({})",
--- a/rust/lancedb/src/dataloader/permutation/split.rs
+++ b/rust/lancedb/src/dataloader/permutation/split.rs
@@ -1,9 +1,12 @@
 // SPDX-License-Identifier: Apache-2.0
 // SPDX-FileCopyrightText: Copyright The LanceDB Authors

-use std::sync::{
-    atomic::{AtomicBool, AtomicU64, AtomicUsize, Ordering},
-    Arc,
+use std::{
+    iter,
+    sync::{
+        atomic::{AtomicBool, AtomicU64, AtomicUsize, Ordering},
+        Arc,
+    },
 };

 use arrow_array::{Array, BooleanArray, RecordBatch, UInt64Array};
@@ -155,7 +158,7 @@ impl Splitter {
                remaining_in_split
            };

-            split_ids.extend(std::iter::repeat_n(split_id as u64, rows_to_add as usize));
+            split_ids.extend(iter::repeat(split_id as u64).take(rows_to_add as usize));
            if done {
                // Quit early if we've run out of splits
                break;
@@ -659,7 +662,7 @@ mod tests {
        assert_eq!(split_batch.num_rows(), total_split_sizes as usize);
        let mut expected = Vec::with_capacity(total_split_sizes as usize);
        for (i, size) in expected_split_sizes.iter().enumerate() {
-            expected.extend(std::iter::repeat_n(i as u64, *size as usize));
+            expected.extend(iter::repeat(i as u64).take(*size as usize));
        }
        let expected = Arc::new(UInt64Array::from(expected)) as Arc<dyn Array>;

--- a/rust/lancedb/src/index/vector.rs
+++ b/rust/lancedb/src/index/vector.rs
@@ -297,10 +297,10 @@ impl IvfPqIndexBuilder {
 }

 pub(crate) fn suggested_num_sub_vectors(dim: u32) -> u32 {
-    if dim.is_multiple_of(16) {
+    if dim % 16 == 0 {
        // Should be more aggressive than this default.
        dim / 16
-    } else if dim.is_multiple_of(8) {
+    } else if dim % 8 == 0 {
        dim / 8
    } else {
        log::warn!(
--- a/rust/lancedb/src/remote/table.rs
+++ b/rust/lancedb/src/remote/table.rs
@@ -468,7 +468,9 @@ impl<S: HttpSend> RemoteTable<S> {
        self.apply_query_params(&mut body, &query.base)?;

        // Apply general parameters, before we dispatch based on number of query vectors.
-        body["distance_type"] = serde_json::json!(query.distance_type.unwrap_or_default());
+        if let Some(distance_type) = query.distance_type {
+            body["distance_type"] = serde_json::json!(distance_type);
+        }
        // In 0.23.1 we migrated from `nprobes` to `minimum_nprobes` and `maximum_nprobes`.
        // Old client / new server: since minimum_nprobes is missing, fallback to nprobes
        // New client / old server: old server will only see nprobes, make sure to set both
@@ -2230,7 +2232,6 @@ mod tests {
            let body: serde_json::Value = serde_json::from_slice(body).unwrap();
            let mut expected_body = serde_json::json!({
                "prefilter": true,
-                "distance_type": "l2",
                "nprobes": 20,
                "minimum_nprobes": 20,
                "maximum_nprobes": 20,
--- a/rust/lancedb/src/table.rs
+++ b/rust/lancedb/src/table.rs
@@ -1425,7 +1425,9 @@ impl Table {
            })
            .collect::<Vec<_>>();

-        let unioned = Arc::new(UnionExec::new(projected_plans));
+        let unioned = UnionExec::try_new(projected_plans).map_err(|err| Error::Runtime {
+            message: err.to_string(),
+        })?;
        // We require 1 partition in the final output
        let repartitioned = RepartitionExec::try_new(
            unioned,
@@ -2059,7 +2061,7 @@ impl NativeTable {
            return provided;
        }
        let suggested = suggested_num_sub_vectors(dim);
-        if num_bits.is_some_and(|num_bits| num_bits == 4) && !suggested.is_multiple_of(2) {
+        if num_bits.is_some_and(|num_bits| num_bits == 4) && suggested % 2 != 0 {
            // num_sub_vectors must be even when 4 bits are used
            suggested + 1
        } else {
@@ -3400,6 +3402,7 @@ pub struct FragmentSummaryStats {
 #[cfg(test)]
 #[allow(deprecated)]
 mod tests {
+    use std::iter;
    use std::sync::atomic::{AtomicBool, Ordering};
    use std::sync::Arc;
    use std::time::Duration;
@@ -4016,7 +4019,7 @@ mod tests {
                schema.clone(),
                vec![
                    Arc::new(Int32Array::from_iter_values(offset..(offset + 10))),
-                    Arc::new(Int32Array::from_iter_values(std::iter::repeat_n(age, 10))),
+                    Arc::new(Int32Array::from_iter_values(iter::repeat(age).take(10))),
                ],
            )],
            schema,
--- a/rust/lancedb/src/table/dataset.rs
+++ b/rust/lancedb/src/table/dataset.rs
@@ -100,7 +100,8 @@ impl DatasetRef {
                let should_checkout = match &target_ref {
                    refs::Ref::Version(_, Some(target_ver)) => version != target_ver,
                    refs::Ref::Version(_, None) => true, // No specific version, always checkout
-                    refs::Ref::Tag(_) => true,           // Always checkout for tags
+                    refs::Ref::VersionNumber(target_ver) => version != target_ver,
+                    refs::Ref::Tag(_) => true, // Always checkout for tags
                };

                if should_checkout {
--- a/rust/lancedb/tests/embedding_registry_test.rs
+++ b/rust/lancedb/tests/embedding_registry_test.rs
@@ -4,6 +4,7 @@
 use std::{
    borrow::Cow,
    collections::{HashMap, HashSet},
+    iter::repeat,
    sync::Arc,
 };

@@ -267,10 +268,9 @@ fn create_some_records() -> Result<impl IntoArrow> {
            schema.clone(),
            vec![
                Arc::new(Int32Array::from_iter_values(0..TOTAL as i32)),
-                Arc::new(StringArray::from_iter(std::iter::repeat_n(
-                    Some("hello world".to_string()),
-                    TOTAL,
-                ))),
+                Arc::new(StringArray::from_iter(
+                    repeat(Some("hello world".to_string())).take(TOTAL),
+                )),
            ],
        )
        .unwrap()]