Bump version: 0.25.1-beta.1 → 0.25.1-beta.2

fix: add missing validations to namespace operations (#2659 )
refactor: remove catalog implementation now that we have namespaces in database (#2662 )
2026-01-04 19:02:58 +00:00 · 2025-09-18 20:21:33 +00:00 · 2025-09-17 23:27:04 -07:00 · 2025-09-17 08:40:20 -07:00 · 2025-09-12 13:07:32 -07:00 · 2025-09-11 15:30:35 -07:00
42 changed files with 330 additions and 896 deletions
--- a/.bumpversion.toml
+++ b/.bumpversion.toml
@@ -1,5 +1,5 @@
 [tool.bumpversion]
-current_version = "0.22.1-beta.0"
+current_version = "0.22.1-beta.1"
 parse = """(?x)
    (?P<major>0|[1-9]\\d*)\\.
    (?P<minor>0|[1-9]\\d*)\\.
--- a/.github/workflows/docs_test.yml
+++ b/.github/workflows/docs_test.yml
@@ -24,7 +24,8 @@ env:
 jobs:
  test-python:
    name: Test doc python code
-    runs-on: ubuntu-24.04
+    runs-on: warp-ubuntu-2204-x64-8x
    timeout-minutes: 60
    steps:
    - name: Checkout
      uses: actions/checkout@v4
@@ -48,7 +49,6 @@ jobs:
      uses: swatinem/rust-cache@v2
    - name: Build Python
      working-directory: docs/test
      timeout-minutes: 60
      run:
        python -m pip install --extra-index-url https://pypi.fury.io/lancedb/ -r requirements.txt
    - name: Create test files
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -713,9 +713,9 @@ dependencies = [
 [[package]]
 name = "aws-sdk-s3"
-version = "1.104.0"
+version = "1.105.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "38c488cd6abb0ec9811c401894191932e941c5f91dc226043edacd0afa1634bc"
+checksum = "c99789e929b5e1d9a5aa3fa1d81317f3a789afc796141d11b0eaafd9d9f47e38"
 dependencies = [
 "aws-credential-types",
 "aws-runtime",
@@ -963,9 +963,9 @@ dependencies = [
 [[package]]
 name = "aws-smithy-runtime"
-version = "1.9.1"
+version = "1.9.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d3946acbe1ead1301ba6862e712c7903ca9bb230bdf1fbd1b5ac54158ef2ab1f"
+checksum = "4fa63ad37685ceb7762fa4d73d06f1d5493feb88e3f27259b9ed277f4c01b185"
 dependencies = [
 "aws-smithy-async",
 "aws-smithy-http",
@@ -1153,7 +1153,7 @@ dependencies = [
 "bitflags 2.9.4",
 "cexpr",
 "clang-sys",
- "itertools 0.12.1",
+ "itertools 0.11.0",
 "lazy_static",
 "lazycell",
 "log",
@@ -4628,7 +4628,7 @@ dependencies = [
 [[package]]
 name = "lancedb"
-version = "0.22.1-beta.0"
+version = "0.22.1-beta.1"
 dependencies = [
 "arrow",
 "arrow-array",
@@ -4715,7 +4715,7 @@ dependencies = [
 [[package]]
 name = "lancedb-nodejs"
-version = "0.22.1-beta.0"
+version = "0.22.1-beta.1"
 dependencies = [
 "arrow-array",
 "arrow-ipc",
@@ -4735,7 +4735,7 @@ dependencies = [
 [[package]]
 name = "lancedb-python"
-version = "0.25.1-beta.0"
+version = "0.25.1-beta.1"
 dependencies = [
 "arrow",
 "async-trait",
--- a/java/core/lancedb-jni/src/ffi.rs
+++ b/java/core/lancedb-jni/src/ffi.rs
@@ -16,6 +16,7 @@ pub trait JNIEnvExt {
    fn get_integers(&mut self, obj: &JObject) -> Result<Vec<i32>>;
    /// Get strings from Java List<String> object.
    #[allow(dead_code)]
    fn get_strings(&mut self, obj: &JObject) -> Result<Vec<String>>;
    /// Get strings from Java String[] object.
--- a/java/core/lancedb-jni/src/traits.rs
+++ b/java/core/lancedb-jni/src/traits.rs
@@ -6,6 +6,7 @@ use jni::JNIEnv;
 use crate::Result;
 #[allow(dead_code)]
 pub trait FromJObject<T> {
    fn extract(&self) -> Result<T>;
 }
@@ -39,6 +40,7 @@ impl FromJObject<f64> for JObject<'_> {
    }
 }
 #[allow(dead_code)]
 pub trait FromJString {
    fn extract(&self, env: &mut JNIEnv) -> Result<String>;
 }
@@ -66,6 +68,7 @@ pub trait JMapExt {
    fn get_f64(&self, env: &mut JNIEnv, key: &str) -> Result<Option<f64>>;
 }
 #[allow(dead_code)]
 fn get_map_value<T>(env: &mut JNIEnv, map: &JMap, key: &str) -> Result<Option<T>>
 where
    for<'a> JObject<'a>: FromJObject<T>,
--- a/java/core/pom.xml
+++ b/java/core/pom.xml
@@ -8,7 +8,7 @@
    <parent>
        <groupId>com.lancedb</groupId>
        <artifactId>lancedb-parent</artifactId>
-        <version>0.22.1-beta.0</version>
+        <version>0.22.1-beta.1</version>
        <relativePath>../pom.xml</relativePath>
    </parent>
--- a/java/lance-namespace/pom.xml
+++ b/java/lance-namespace/pom.xml
@@ -8,7 +8,7 @@
    <parent>
        <groupId>com.lancedb</groupId>
        <artifactId>lancedb-parent</artifactId>
-        <version>0.22.1-beta.0</version>
+        <version>0.22.1-beta.1</version>
        <relativePath>../pom.xml</relativePath>
    </parent>
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -6,7 +6,7 @@
    <groupId>com.lancedb</groupId>
    <artifactId>lancedb-parent</artifactId>
-    <version>0.22.1-beta.0</version>
+    <version>0.22.1-beta.1</version>
    <packaging>pom</packaging>
    <name>${project.artifactId}</name>
    <description>LanceDB Java SDK Parent POM</description>
--- a/nodejs/Cargo.toml
+++ b/nodejs/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 name = "lancedb-nodejs"
 edition.workspace = true
-version = "0.22.1-beta.0"
+version = "0.22.1-beta.1"
 license.workspace = true
 description.workspace = true
 repository.workspace = true
--- a/nodejs/npm/darwin-arm64/package.json
+++ b/nodejs/npm/darwin-arm64/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-darwin-arm64",
-	"version": "0.22.1-beta.0",
+	"version": "0.22.1-beta.1",
 	"os": ["darwin"],
 	"cpu": ["arm64"],
 	"main": "lancedb.darwin-arm64.node",
--- a/nodejs/npm/darwin-x64/package.json
+++ b/nodejs/npm/darwin-x64/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-darwin-x64",
-	"version": "0.22.1-beta.0",
+	"version": "0.22.1-beta.1",
 	"os": ["darwin"],
 	"cpu": ["x64"],
 	"main": "lancedb.darwin-x64.node",
--- a/nodejs/npm/linux-arm64-gnu/package.json
+++ b/nodejs/npm/linux-arm64-gnu/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-linux-arm64-gnu",
-	"version": "0.22.1-beta.0",
+	"version": "0.22.1-beta.1",
 	"os": ["linux"],
 	"cpu": ["arm64"],
 	"main": "lancedb.linux-arm64-gnu.node",
--- a/nodejs/npm/linux-arm64-musl/package.json
+++ b/nodejs/npm/linux-arm64-musl/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-linux-arm64-musl",
-	"version": "0.22.1-beta.0",
+	"version": "0.22.1-beta.1",
 	"os": ["linux"],
 	"cpu": ["arm64"],
 	"main": "lancedb.linux-arm64-musl.node",
--- a/nodejs/npm/linux-x64-gnu/package.json
+++ b/nodejs/npm/linux-x64-gnu/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-linux-x64-gnu",
-	"version": "0.22.1-beta.0",
+	"version": "0.22.1-beta.1",
 	"os": ["linux"],
 	"cpu": ["x64"],
 	"main": "lancedb.linux-x64-gnu.node",
--- a/nodejs/npm/linux-x64-musl/package.json
+++ b/nodejs/npm/linux-x64-musl/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-linux-x64-musl",
-	"version": "0.22.1-beta.0",
+	"version": "0.22.1-beta.1",
 	"os": ["linux"],
 	"cpu": ["x64"],
 	"main": "lancedb.linux-x64-musl.node",
--- a/nodejs/npm/win32-arm64-msvc/package.json
+++ b/nodejs/npm/win32-arm64-msvc/package.json
@@ -1,6 +1,6 @@
 {
  "name": "@lancedb/lancedb-win32-arm64-msvc",
-  "version": "0.22.1-beta.0",
+  "version": "0.22.1-beta.1",
  "os": [
    "win32"
  ],
--- a/nodejs/npm/win32-x64-msvc/package.json
+++ b/nodejs/npm/win32-x64-msvc/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-win32-x64-msvc",
-	"version": "0.22.1-beta.0",
+	"version": "0.22.1-beta.1",
 	"os": ["win32"],
 	"cpu": ["x64"],
 	"main": "lancedb.win32-x64-msvc.node",
--- a/nodejs/package-lock.json
+++ b/nodejs/package-lock.json
@@ -1,12 +1,12 @@
 {
  "name": "@lancedb/lancedb",
-  "version": "0.22.1-beta.0",
+  "version": "0.22.1-beta.1",
  "lockfileVersion": 3,
  "requires": true,
  "packages": {
    "": {
      "name": "@lancedb/lancedb",
-      "version": "0.22.1-beta.0",
+      "version": "0.22.1-beta.1",
      "cpu": [
        "x64",
        "arm64"
--- a/nodejs/package.json
+++ b/nodejs/package.json
@@ -11,7 +11,7 @@
    "ann"
  ],
  "private": false,
-  "version": "0.22.1-beta.0",
+  "version": "0.22.1-beta.1",
  "main": "dist/index.js",
  "exports": {
    ".": "./dist/index.js",
--- a/python/.bumpversion.toml
+++ b/python/.bumpversion.toml
@@ -1,5 +1,5 @@
 [tool.bumpversion]
-current_version = "0.25.1-beta.1"
+current_version = "0.25.1-beta.2"
 parse = """(?x)
    (?P<major>0|[1-9]\\d*)\\.
    (?P<minor>0|[1-9]\\d*)\\.
--- a/python/Cargo.toml
+++ b/python/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "lancedb-python"
-version = "0.25.1-beta.1"
+version = "0.25.1-beta.2"
 edition.workspace = true
 description = "Python bindings for LanceDB"
 license.workspace = true
--- a/python/python/lancedb/embeddings/registry.py
+++ b/python/python/lancedb/embeddings/registry.py
@@ -122,7 +122,7 @@ class EmbeddingFunctionRegistry:
            obj["vector_column"]: EmbeddingFunctionConfig(
                vector_column=obj["vector_column"],
                source_column=obj["source_column"],
-                function=self.get(obj["name"])(**obj["model"]),
+                function=self.get(obj["name"]).create(**obj["model"]),
            )
            for obj in raw_list
        }
--- a/python/python/lancedb/index.py
+++ b/python/python/lancedb/index.py
@@ -251,6 +251,13 @@ class HnswPq:
        results. In most cases, there is no benefit to setting this higher than 500.
        This value should be set to a value that is not less than `ef` in the
        search phase.
    target_partition_size, default is 1,048,576
        The target size of each partition.
        This value controls the tradeoff between search performance and accuracy.
        faster search but less accurate results as higher value.
    """
    distance_type: Literal["l2", "cosine", "dot"] = "l2"
@@ -261,6 +268,7 @@ class HnswPq:
    sample_rate: int = 256
    m: int = 20
    ef_construction: int = 300
    target_partition_size: Optional[int] = None
@dataclass
@@ -351,6 +359,12 @@ class HnswSq:
        This value should be set to a value that is not less than `ef` in the search
        phase.
    target_partition_size, default is 1,048,576
        The target size of each partition.
        This value controls the tradeoff between search performance and accuracy.
        faster search but less accurate results as higher value.
    """
    distance_type: Literal["l2", "cosine", "dot"] = "l2"
@@ -359,6 +373,7 @@ class HnswSq:
    sample_rate: int = 256
    m: int = 20
    ef_construction: int = 300
    target_partition_size: Optional[int] = None
@dataclass
@@ -444,12 +459,20 @@ class IvfFlat:
        cases the default should be sufficient.
        The default value is 256.
    target_partition_size, default is 8192
        The target size of each partition.
        This value controls the tradeoff between search performance and accuracy.
        faster search but less accurate results as higher value.
    """
    distance_type: Literal["l2", "cosine", "dot", "hamming"] = "l2"
    num_partitions: Optional[int] = None
    max_iterations: int = 50
    sample_rate: int = 256
    target_partition_size: Optional[int] = None
@dataclass
@@ -564,6 +587,13 @@ class IvfPq:
        cases the default should be sufficient.
        The default value is 256.
    target_partition_size, default is 8192
        The target size of each partition.
        This value controls the tradeoff between search performance and accuracy.
        faster search but less accurate results as higher value.
    """
    distance_type: Literal["l2", "cosine", "dot"] = "l2"
@@ -572,6 +602,7 @@ class IvfPq:
    num_bits: int = 8
    max_iterations: int = 50
    sample_rate: int = 256
    target_partition_size: Optional[int] = None
 __all__ = [
--- a/python/python/lancedb/table.py
+++ b/python/python/lancedb/table.py
@@ -691,6 +691,7 @@ class Table(ABC):
        ef_construction: int = 300,
        name: Optional[str] = None,
        train: bool = True,
        target_partition_size: Optional[int] = None,
    ):
        """Create an index on the table.
@@ -2002,6 +2003,7 @@ class LanceTable(Table):
        *,
        name: Optional[str] = None,
        train: bool = True,
        target_partition_size: Optional[int] = None,
    ):
        """Create an index on the table."""
        if accelerator is not None:
@@ -2018,6 +2020,7 @@ class LanceTable(Table):
                num_bits=num_bits,
                m=m,
                ef_construction=ef_construction,
                target_partition_size=target_partition_size,
            )
            self.checkout_latest()
            return
@@ -2027,6 +2030,7 @@ class LanceTable(Table):
                num_partitions=num_partitions,
                max_iterations=max_iterations,
                sample_rate=sample_rate,
                target_partition_size=target_partition_size,
            )
        elif index_type == "IVF_PQ":
            config = IvfPq(
@@ -2036,6 +2040,7 @@ class LanceTable(Table):
                num_bits=num_bits,
                max_iterations=max_iterations,
                sample_rate=sample_rate,
                target_partition_size=target_partition_size,
            )
        elif index_type == "IVF_HNSW_PQ":
            config = HnswPq(
@@ -2047,6 +2052,7 @@ class LanceTable(Table):
                sample_rate=sample_rate,
                m=m,
                ef_construction=ef_construction,
                target_partition_size=target_partition_size,
            )
        elif index_type == "IVF_HNSW_SQ":
            config = HnswSq(
@@ -2056,6 +2062,7 @@ class LanceTable(Table):
                sample_rate=sample_rate,
                m=m,
                ef_construction=ef_construction,
                target_partition_size=target_partition_size,
            )
        else:
            raise ValueError(f"Unknown index type {index_type}")
--- a/python/python/tests/test_embeddings.py
+++ b/python/python/tests/test_embeddings.py
@@ -114,6 +114,63 @@ def test_embedding_function_variables():
    assert func.safe_model_dump()["secret_key"] == "$var:secret"
 def test_parse_functions_with_variables():
    @register("variable-parsing-test")
    class VariableParsingFunction(TextEmbeddingFunction):
        api_key: str
        base_url: Optional[str] = None
        @staticmethod
        def sensitive_keys():
            return ["api_key"]
        def ndims(self):
            return 10
        def generate_embeddings(self, texts):
            # Mock implementation that just returns random embeddings
            # In real usage, this would use the api_key to call an API
            return [np.random.rand(self.ndims()).tolist() for _ in texts]
    registry = EmbeddingFunctionRegistry.get_instance()
    registry.set_var("test_api_key", "sk-test-key-12345")
    registry.set_var("test_base_url", "https://api.example.com")
    conf = EmbeddingFunctionConfig(
        source_column="text",
        vector_column="vector",
        function=registry.get("variable-parsing-test").create(
            api_key="$var:test_api_key", base_url="$var:test_base_url"
        ),
    )
    metadata = registry.get_table_metadata([conf])
    # Create a mock arrow table with the metadata
    schema = pa.schema(
        [pa.field("text", pa.string()), pa.field("vector", pa.list_(pa.float32(), 10))]
    )
    table = pa.table({"text": [], "vector": []}, schema=schema)
    table = table.replace_schema_metadata(metadata)
    ds = lance.write_dataset(table, "memory://")
    configs = registry.parse_functions(ds.schema.metadata)
    assert "vector" in configs
    parsed_func = configs["vector"].function
    assert parsed_func.api_key == "sk-test-key-12345"
    assert parsed_func.base_url == "https://api.example.com"
    embeddings = parsed_func.generate_embeddings(["test text"])
    assert len(embeddings) == 1
    assert len(embeddings[0]) == 10
    assert parsed_func.safe_model_dump()["api_key"] == "$var:test_api_key"
 def test_embedding_with_bad_results(tmp_path):
    @register("null-embedding")
    class NullEmbeddingFunction(TextEmbeddingFunction):
--- a/python/python/tests/test_table.py
+++ b/python/python/tests/test_table.py
@@ -674,6 +674,45 @@ def test_create_index_method(mock_create_index, mem_db: DBConnection):
        "vector", replace=True, config=expected_config, name=None, train=True
    )
    # Test with target_partition_size
    table.create_index(
        metric="l2",
        num_sub_vectors=96,
        vector_column_name="vector",
        replace=True,
        index_cache_size=256,
        num_bits=4,
        target_partition_size=8192,
    )
    expected_config = IvfPq(
        distance_type="l2",
        num_sub_vectors=96,
        num_bits=4,
        target_partition_size=8192,
    )
    mock_create_index.assert_called_with(
        "vector", replace=True, config=expected_config, name=None, train=True
    )
    # target_partition_size has a default value,
    # so `num_partitions` and `target_partition_size` are not required
    table.create_index(
        metric="l2",
        num_sub_vectors=96,
        vector_column_name="vector",
        replace=True,
        index_cache_size=256,
        num_bits=4,
    )
    expected_config = IvfPq(
        distance_type="l2",
        num_sub_vectors=96,
        num_bits=4,
    )
    mock_create_index.assert_called_with(
        "vector", replace=True, config=expected_config, name=None, train=True
    )
    table.create_index(
        vector_column_name="my_vector",
        metric="dot",
--- a/python/src/connection.rs
+++ b/python/src/connection.rs
@@ -255,7 +255,7 @@ impl Connection {
 #[pyo3(signature = (uri, api_key=None, region=None, host_override=None, read_consistency_interval=None, client_config=None, storage_options=None, session=None))]
 #[allow(clippy::too_many_arguments)]
 pub fn connect(
-    py: Python,
+    py: Python<'_>,
    uri: String,
    api_key: Option<String>,
    region: Option<String>,
--- a/python/src/index.rs
+++ b/python/src/index.rs
@@ -63,6 +63,9 @@ pub fn extract_index_params(source: &Option<Bound<'_, PyAny>>) -> PyResult<Lance
                if let Some(num_partitions) = params.num_partitions {
                    ivf_flat_builder = ivf_flat_builder.num_partitions(num_partitions);
                }
                if let Some(target_partition_size) = params.target_partition_size {
                    ivf_flat_builder = ivf_flat_builder.target_partition_size(target_partition_size);
                }
                Ok(LanceDbIndex::IvfFlat(ivf_flat_builder))
            },
            "IvfPq" => {
@@ -76,6 +79,9 @@ pub fn extract_index_params(source: &Option<Bound<'_, PyAny>>) -> PyResult<Lance
                if let Some(num_partitions) = params.num_partitions {
                    ivf_pq_builder = ivf_pq_builder.num_partitions(num_partitions);
                }
                if let Some(target_partition_size) = params.target_partition_size {
                    ivf_pq_builder = ivf_pq_builder.target_partition_size(target_partition_size);
                }
                if let Some(num_sub_vectors) = params.num_sub_vectors {
                    ivf_pq_builder = ivf_pq_builder.num_sub_vectors(num_sub_vectors);
                }
@@ -94,6 +100,9 @@ pub fn extract_index_params(source: &Option<Bound<'_, PyAny>>) -> PyResult<Lance
                if let Some(num_partitions) = params.num_partitions {
                    hnsw_pq_builder = hnsw_pq_builder.num_partitions(num_partitions);
                }
                if let Some(target_partition_size) = params.target_partition_size {
                    hnsw_pq_builder = hnsw_pq_builder.target_partition_size(target_partition_size);
                }
                if let Some(num_sub_vectors) = params.num_sub_vectors {
                    hnsw_pq_builder = hnsw_pq_builder.num_sub_vectors(num_sub_vectors);
                }
@@ -111,6 +120,9 @@ pub fn extract_index_params(source: &Option<Bound<'_, PyAny>>) -> PyResult<Lance
                if let Some(num_partitions) = params.num_partitions {
                    hnsw_sq_builder = hnsw_sq_builder.num_partitions(num_partitions);
                }
                if let Some(target_partition_size) = params.target_partition_size {
                    hnsw_sq_builder = hnsw_sq_builder.target_partition_size(target_partition_size);
                }
                Ok(LanceDbIndex::IvfHnswSq(hnsw_sq_builder))
            },
            not_supported => Err(PyValueError::new_err(format!(
@@ -144,6 +156,7 @@ struct IvfFlatParams {
    num_partitions: Option<u32>,
    max_iterations: u32,
    sample_rate: u32,
    target_partition_size: Option<u32>,
 }
 #[derive(FromPyObject)]
@@ -154,6 +167,7 @@ struct IvfPqParams {
    num_bits: u32,
    max_iterations: u32,
    sample_rate: u32,
    target_partition_size: Option<u32>,
 }
 #[derive(FromPyObject)]
@@ -166,6 +180,7 @@ struct IvfHnswPqParams {
    sample_rate: u32,
    m: u32,
    ef_construction: u32,
    target_partition_size: Option<u32>,
 }
 #[derive(FromPyObject)]
@@ -176,6 +191,7 @@ struct IvfHnswSqParams {
    sample_rate: u32,
    m: u32,
    ef_construction: u32,
    target_partition_size: Option<u32>,
 }
 #[pyclass(get_all)]
--- a/rust/lancedb/Cargo.toml
+++ b/rust/lancedb/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "lancedb"
-version = "0.22.1-beta.0"
+version = "0.22.1-beta.1"
 edition.workspace = true
 description = "LanceDB: A serverless, low-latency vector database for AI applications"
 license.workspace = true
@@ -86,11 +86,11 @@ rand = { version = "0.9", features = ["small_rng"] }
 random_word = { version = "0.4.3", features = ["en"] }
 uuid = { version = "1.7.0", features = ["v4"] }
 walkdir = "2"
-aws-sdk-dynamodb = { version = "1.38.0" }
+aws-sdk-dynamodb = { version = "1.55.0" }
-aws-sdk-s3 = { version = "1.38.0" }
+aws-sdk-s3 = { version = "1.55.0" }
-aws-sdk-kms = { version = "1.37" }
+aws-sdk-kms = { version = "1.48.0" }
-aws-config = { version = "1.0" }
+aws-config = { version = "1.5.10" }
-aws-smithy-runtime = { version = "1.3" }
+aws-smithy-runtime = { version = "1.9.1" }
 datafusion.workspace = true
 http-body = "1"                                        # Matching reqwest
 rstest = "0.23.0"
--- a/rust/lancedb/src/catalog.rs
+++ b/rust/lancedb/src/catalog.rs
@@ -1,86 +0,0 @@
 // SPDX-License-Identifier: Apache-2.0
 // SPDX-FileCopyrightText: Copyright The LanceDB Authors
 //! Catalog implementation for managing databases
 pub mod listing;
 use std::collections::HashMap;
 use std::sync::Arc;
 use crate::database::Database;
 use crate::error::Result;
 use async_trait::async_trait;
 pub trait CatalogOptions {
    fn serialize_into_map(&self, map: &mut HashMap<String, String>);
 }
 /// Request parameters for listing databases
 #[derive(Clone, Debug, Default)]
 pub struct DatabaseNamesRequest {
    /// Start listing after this name (exclusive)
    pub start_after: Option<String>,
    /// Maximum number of names to return
    pub limit: Option<u32>,
 }
 /// Request to open an existing database
 #[derive(Clone, Debug)]
 pub struct OpenDatabaseRequest {
    /// The name of the database to open
    pub name: String,
    /// A map of database-specific options
    ///
    /// Consult the catalog / database implementation to determine which options are available
    pub database_options: HashMap<String, String>,
 }
 /// Database creation mode
 ///
 /// The default behavior is Create
 pub enum CreateDatabaseMode {
    /// Create new database, error if exists
    Create,
    /// Open existing database if present
    ExistOk,
    /// Overwrite existing database
    Overwrite,
 }
 impl Default for CreateDatabaseMode {
    fn default() -> Self {
        Self::Create
    }
 }
 /// Request to create a new database
 pub struct CreateDatabaseRequest {
    /// The name of the database to create
    pub name: String,
    /// The creation mode
    pub mode: CreateDatabaseMode,
    /// A map of catalog-specific options, consult your catalog implementation to determine what's available
    pub options: HashMap<String, String>,
 }
 #[async_trait]
 pub trait Catalog: Send + Sync + std::fmt::Debug + 'static {
    /// List database names with pagination
    async fn database_names(&self, request: DatabaseNamesRequest) -> Result<Vec<String>>;
    /// Create a new database
    async fn create_database(&self, request: CreateDatabaseRequest) -> Result<Arc<dyn Database>>;
    /// Open existing database
    async fn open_database(&self, request: OpenDatabaseRequest) -> Result<Arc<dyn Database>>;
    /// Rename database
    async fn rename_database(&self, old_name: &str, new_name: &str) -> Result<()>;
    /// Delete database
    async fn drop_database(&self, name: &str) -> Result<()>;
    /// Delete all databases
    async fn drop_all_databases(&self) -> Result<()>;
 }
--- a/rust/lancedb/src/catalog/listing.rs
+++ b/rust/lancedb/src/catalog/listing.rs
@@ -1,624 +0,0 @@
 // SPDX-License-Identifier: Apache-2.0
 // SPDX-FileCopyrightText: Copyright The LanceDB Authors
 //! Catalog implementation based on a local file system.
 use std::collections::HashMap;
 use std::fs::create_dir_all;
 use std::path::Path;
 use std::sync::Arc;
 use super::{
    Catalog, CatalogOptions, CreateDatabaseMode, CreateDatabaseRequest, DatabaseNamesRequest,
    OpenDatabaseRequest,
 };
 use crate::connection::ConnectRequest;
 use crate::database::listing::{ListingDatabase, ListingDatabaseOptions};
 use crate::database::{Database, DatabaseOptions};
 use crate::error::{CreateDirSnafu, Error, Result};
 use async_trait::async_trait;
 use lance::io::{ObjectStore, ObjectStoreParams, ObjectStoreRegistry};
 use lance_io::local::to_local_path;
 use object_store::path::Path as ObjectStorePath;
 use snafu::ResultExt;
 /// Options for the listing catalog
 ///
 /// Note: the catalog will use the `storage_options` configured on
 /// db_options to configure storage for listing / creating / deleting
 /// databases.
 #[derive(Clone, Debug, Default)]
 pub struct ListingCatalogOptions {
    /// The options to use for databases opened by this catalog
    ///
    /// This also contains the storage options used by the catalog
    pub db_options: ListingDatabaseOptions,
 }
 impl CatalogOptions for ListingCatalogOptions {
    fn serialize_into_map(&self, map: &mut HashMap<String, String>) {
        self.db_options.serialize_into_map(map);
    }
 }
 impl ListingCatalogOptions {
    pub fn builder() -> ListingCatalogOptionsBuilder {
        ListingCatalogOptionsBuilder::new()
    }
    pub(crate) fn parse_from_map(map: &HashMap<String, String>) -> Result<Self> {
        let db_options = ListingDatabaseOptions::parse_from_map(map)?;
        Ok(Self { db_options })
    }
 }
 #[derive(Clone, Debug, Default)]
 pub struct ListingCatalogOptionsBuilder {
    options: ListingCatalogOptions,
 }
 impl ListingCatalogOptionsBuilder {
    pub fn new() -> Self {
        Self {
            options: ListingCatalogOptions::default(),
        }
    }
    pub fn db_options(mut self, db_options: ListingDatabaseOptions) -> Self {
        self.options.db_options = db_options;
        self
    }
    pub fn build(self) -> ListingCatalogOptions {
        self.options
    }
 }
 /// A catalog implementation that works by listing subfolders in a directory
 ///
 /// The listing catalog will be created with a base folder specified by the URI.  Every subfolder
 /// in this base folder will be considered a database.  These will be opened as a
 /// [`crate::database::listing::ListingDatabase`]
 #[derive(Debug)]
 pub struct ListingCatalog {
    object_store: Arc<ObjectStore>,
    uri: String,
    base_path: ObjectStorePath,
    options: ListingCatalogOptions,
 }
 impl ListingCatalog {
    /// Try to create a local directory to store the lancedb dataset
    pub fn try_create_dir(path: &str) -> core::result::Result<(), std::io::Error> {
        let path = Path::new(path);
        if !path.try_exists()? {
            create_dir_all(path)?;
        }
        Ok(())
    }
    pub fn uri(&self) -> &str {
        &self.uri
    }
    async fn open_path(path: &str) -> Result<Self> {
        let (object_store, base_path) = ObjectStore::from_uri(path).await?;
        if object_store.is_local() {
            Self::try_create_dir(path).context(CreateDirSnafu { path })?;
        }
        Ok(Self {
            uri: path.to_string(),
            base_path,
            object_store,
            options: ListingCatalogOptions::default(),
        })
    }
    pub async fn connect(request: &ConnectRequest) -> Result<Self> {
        let uri = &request.uri;
        let parse_res = url::Url::parse(uri);
        let options = ListingCatalogOptions::parse_from_map(&request.options)?;
        match parse_res {
            Ok(url) if url.scheme().len() == 1 && cfg!(windows) => Self::open_path(uri).await,
            Ok(url) => {
                let plain_uri = url.to_string();
                let registry = Arc::new(ObjectStoreRegistry::default());
                let storage_options = options.db_options.storage_options.clone();
                let os_params = ObjectStoreParams {
                    storage_options: Some(storage_options.clone()),
                    ..Default::default()
                };
                let (object_store, base_path) =
                    ObjectStore::from_uri_and_params(registry, &plain_uri, &os_params).await?;
                if object_store.is_local() {
                    Self::try_create_dir(&plain_uri).context(CreateDirSnafu { path: plain_uri })?;
                }
                Ok(Self {
                    uri: String::from(url.clone()),
                    base_path,
                    object_store,
                    options,
                })
            }
            Err(_) => Self::open_path(uri).await,
        }
    }
    fn database_path(&self, name: &str) -> ObjectStorePath {
        self.base_path.child(name.replace('\\', "/"))
    }
 }
 #[async_trait]
 impl Catalog for ListingCatalog {
    async fn database_names(&self, request: DatabaseNamesRequest) -> Result<Vec<String>> {
        let mut f = self
            .object_store
            .read_dir(self.base_path.clone())
            .await?
            .iter()
            .map(Path::new)
            .filter_map(|p| p.file_name().and_then(|s| s.to_str().map(String::from)))
            .collect::<Vec<String>>();
        f.sort();
        if let Some(start_after) = request.start_after {
            let index = f
                .iter()
                .position(|name| name.as_str() > start_after.as_str())
                .unwrap_or(f.len());
            f.drain(0..index);
        }
        if let Some(limit) = request.limit {
            f.truncate(limit as usize);
        }
        Ok(f)
    }
    async fn create_database(&self, request: CreateDatabaseRequest) -> Result<Arc<dyn Database>> {
        let db_path = self.database_path(&request.name);
        let db_path_str = to_local_path(&db_path);
        let exists = Path::new(&db_path_str).exists();
        match request.mode {
            CreateDatabaseMode::Create if exists => {
                return Err(Error::DatabaseAlreadyExists { name: request.name })
            }
            CreateDatabaseMode::Create => {
                create_dir_all(db_path.to_string()).unwrap();
            }
            CreateDatabaseMode::ExistOk => {
                if !exists {
                    create_dir_all(db_path.to_string()).unwrap();
                }
            }
            CreateDatabaseMode::Overwrite => {
                if exists {
                    self.drop_database(&request.name).await?;
                }
                create_dir_all(db_path.to_string()).unwrap();
            }
        }
        let db_uri = format!("/{}/{}", self.base_path, request.name);
        let mut connect_request = ConnectRequest {
            uri: db_uri,
            #[cfg(feature = "remote")]
            client_config: Default::default(),
            read_consistency_interval: None,
            options: Default::default(),
            session: None,
        };
        // Add the db options to the connect request
        self.options
            .db_options
            .serialize_into_map(&mut connect_request.options);
        Ok(Arc::new(
            ListingDatabase::connect_with_options(&connect_request).await?,
        ))
    }
    async fn open_database(&self, request: OpenDatabaseRequest) -> Result<Arc<dyn Database>> {
        let db_path = self.database_path(&request.name);
        let db_path_str = to_local_path(&db_path);
        let exists = Path::new(&db_path_str).exists();
        if !exists {
            return Err(Error::DatabaseNotFound { name: request.name });
        }
        let mut connect_request = ConnectRequest {
            uri: db_path.to_string(),
            #[cfg(feature = "remote")]
            client_config: Default::default(),
            read_consistency_interval: None,
            options: Default::default(),
            session: None,
        };
        // Add the db options to the connect request
        self.options
            .db_options
            .serialize_into_map(&mut connect_request.options);
        Ok(Arc::new(
            ListingDatabase::connect_with_options(&connect_request).await?,
        ))
    }
    async fn rename_database(&self, _old_name: &str, _new_name: &str) -> Result<()> {
        Err(Error::NotSupported {
            message: "rename_database is not supported in LanceDB OSS yet".to_string(),
        })
    }
    async fn drop_database(&self, name: &str) -> Result<()> {
        let db_path = self.database_path(name);
        self.object_store
            .remove_dir_all(db_path.clone())
            .await
            .map_err(|err| match err {
                lance::Error::NotFound { .. } => Error::DatabaseNotFound {
                    name: name.to_owned(),
                },
                _ => Error::from(err),
            })?;
        Ok(())
    }
    async fn drop_all_databases(&self) -> Result<()> {
        self.object_store
            .remove_dir_all(self.base_path.clone())
            .await?;
        Ok(())
    }
 }
 #[cfg(all(test, not(windows)))]
 mod tests {
    use super::*;
    /// file:/// URIs with drive letters do not work correctly on Windows
    #[cfg(windows)]
    fn path_to_uri(path: PathBuf) -> String {
        path.to_str().unwrap().to_string()
    }
    #[cfg(not(windows))]
    fn path_to_uri(path: PathBuf) -> String {
        Url::from_file_path(path).unwrap().to_string()
    }
    async fn setup_catalog() -> (TempDir, ListingCatalog) {
        let tempdir = tempfile::tempdir().unwrap();
        let catalog_path = tempdir.path().join("catalog");
        std::fs::create_dir_all(&catalog_path).unwrap();
        let uri = path_to_uri(catalog_path);
        let request = ConnectRequest {
            uri: uri.clone(),
            #[cfg(feature = "remote")]
            client_config: Default::default(),
            options: Default::default(),
            read_consistency_interval: None,
            session: None,
        };
        let catalog = ListingCatalog::connect(&request).await.unwrap();
        (tempdir, catalog)
    }
    use crate::database::{CreateTableData, CreateTableRequest, TableNamesRequest};
    use crate::table::TableDefinition;
    use arrow_schema::Field;
    use std::path::PathBuf;
    use std::sync::Arc;
    use tempfile::{tempdir, TempDir};
    use url::Url;
    #[tokio::test]
    async fn test_database_names() {
        let (_tempdir, catalog) = setup_catalog().await;
        let names = catalog
            .database_names(DatabaseNamesRequest::default())
            .await
            .unwrap();
        assert!(names.is_empty());
    }
    #[tokio::test]
    async fn test_create_database() {
        let (_tempdir, catalog) = setup_catalog().await;
        catalog
            .create_database(CreateDatabaseRequest {
                name: "db1".into(),
                mode: CreateDatabaseMode::Create,
                options: HashMap::new(),
            })
            .await
            .unwrap();
        let names = catalog
            .database_names(DatabaseNamesRequest::default())
            .await
            .unwrap();
        assert_eq!(names, vec!["db1"]);
    }
    #[tokio::test]
    async fn test_create_database_exist_ok() {
        let (_tempdir, catalog) = setup_catalog().await;
        let db1 = catalog
            .create_database(CreateDatabaseRequest {
                name: "db_exist_ok".into(),
                mode: CreateDatabaseMode::ExistOk,
                options: HashMap::new(),
            })
            .await
            .unwrap();
        let dummy_schema = Arc::new(arrow_schema::Schema::new(Vec::<Field>::default()));
        db1.create_table(CreateTableRequest {
            name: "test_table".parse().unwrap(),
            data: CreateTableData::Empty(TableDefinition::new_from_schema(dummy_schema)),
            mode: Default::default(),
            write_options: Default::default(),
            namespace: vec![],
        })
        .await
        .unwrap();
        let db2 = catalog
            .create_database(CreateDatabaseRequest {
                name: "db_exist_ok".into(),
                mode: CreateDatabaseMode::ExistOk,
                options: HashMap::new(),
            })
            .await
            .unwrap();
        let tables = db2.table_names(TableNamesRequest::default()).await.unwrap();
        assert_eq!(tables, vec!["test_table".to_string()]);
    }
    #[tokio::test]
    async fn test_create_database_overwrite() {
        let (_tempdir, catalog) = setup_catalog().await;
        let db = catalog
            .create_database(CreateDatabaseRequest {
                name: "db_overwrite".into(),
                mode: CreateDatabaseMode::Create,
                options: HashMap::new(),
            })
            .await
            .unwrap();
        let dummy_schema = Arc::new(arrow_schema::Schema::new(Vec::<Field>::default()));
        db.create_table(CreateTableRequest {
            name: "old_table".parse().unwrap(),
            data: CreateTableData::Empty(TableDefinition::new_from_schema(dummy_schema)),
            mode: Default::default(),
            write_options: Default::default(),
            namespace: vec![],
        })
        .await
        .unwrap();
        let tables = db.table_names(TableNamesRequest::default()).await.unwrap();
        assert!(!tables.is_empty());
        let new_db = catalog
            .create_database(CreateDatabaseRequest {
                name: "db_overwrite".into(),
                mode: CreateDatabaseMode::Overwrite,
                options: HashMap::new(),
            })
            .await
            .unwrap();
        let tables = new_db
            .table_names(TableNamesRequest::default())
            .await
            .unwrap();
        assert!(tables.is_empty());
    }
    #[tokio::test]
    async fn test_create_database_overwrite_non_existing() {
        let (_tempdir, catalog) = setup_catalog().await;
        catalog
            .create_database(CreateDatabaseRequest {
                name: "new_db".into(),
                mode: CreateDatabaseMode::Overwrite,
                options: HashMap::new(),
            })
            .await
            .unwrap();
        let names = catalog
            .database_names(DatabaseNamesRequest::default())
            .await
            .unwrap();
        assert!(names.contains(&"new_db".to_string()));
    }
    #[tokio::test]
    async fn test_open_database() {
        let (_tempdir, catalog) = setup_catalog().await;
        // Test open non-existent
        let result = catalog
            .open_database(OpenDatabaseRequest {
                name: "missing".into(),
                database_options: HashMap::new(),
            })
            .await;
        assert!(matches!(
            result.unwrap_err(),
            Error::DatabaseNotFound { name } if name == "missing"
        ));
        // Create and open
        catalog
            .create_database(CreateDatabaseRequest {
                name: "valid_db".into(),
                mode: CreateDatabaseMode::Create,
                options: HashMap::new(),
            })
            .await
            .unwrap();
        let db = catalog
            .open_database(OpenDatabaseRequest {
                name: "valid_db".into(),
                database_options: HashMap::new(),
            })
            .await
            .unwrap();
        assert_eq!(
            db.table_names(TableNamesRequest::default()).await.unwrap(),
            Vec::<String>::new()
        );
    }
    #[tokio::test]
    async fn test_drop_database() {
        let (_tempdir, catalog) = setup_catalog().await;
        // Create test database
        catalog
            .create_database(CreateDatabaseRequest {
                name: "to_drop".into(),
                mode: CreateDatabaseMode::Create,
                options: HashMap::new(),
            })
            .await
            .unwrap();
        let names = catalog
            .database_names(DatabaseNamesRequest::default())
            .await
            .unwrap();
        assert!(!names.is_empty());
        // Drop database
        catalog.drop_database("to_drop").await.unwrap();
        let names = catalog
            .database_names(DatabaseNamesRequest::default())
            .await
            .unwrap();
        assert!(names.is_empty());
    }
    #[tokio::test]
    async fn test_drop_all_databases() {
        let (_tempdir, catalog) = setup_catalog().await;
        catalog
            .create_database(CreateDatabaseRequest {
                name: "db1".into(),
                mode: CreateDatabaseMode::Create,
                options: HashMap::new(),
            })
            .await
            .unwrap();
        catalog
            .create_database(CreateDatabaseRequest {
                name: "db2".into(),
                mode: CreateDatabaseMode::Create,
                options: HashMap::new(),
            })
            .await
            .unwrap();
        catalog.drop_all_databases().await.unwrap();
        let names = catalog
            .database_names(DatabaseNamesRequest::default())
            .await
            .unwrap();
        assert!(names.is_empty());
    }
    #[tokio::test]
    async fn test_rename_database_unsupported() {
        let (_tempdir, catalog) = setup_catalog().await;
        let result = catalog.rename_database("old", "new").await;
        assert!(matches!(
            result.unwrap_err(),
            Error::NotSupported { message } if message.contains("rename_database")
        ));
    }
    #[tokio::test]
    async fn test_connect_local_path() {
        let tmp_dir = tempdir().unwrap();
        let path = tmp_dir.path().to_str().unwrap();
        let request = ConnectRequest {
            uri: path.to_string(),
            #[cfg(feature = "remote")]
            client_config: Default::default(),
            options: Default::default(),
            read_consistency_interval: None,
            session: None,
        };
        let catalog = ListingCatalog::connect(&request).await.unwrap();
        assert!(catalog.object_store.is_local());
        assert_eq!(catalog.uri, path);
    }
    #[tokio::test]
    async fn test_connect_file_scheme() {
        let tmp_dir = tempdir().unwrap();
        let path = tmp_dir.path();
        let uri = path_to_uri(path.to_path_buf());
        let request = ConnectRequest {
            uri: uri.clone(),
            #[cfg(feature = "remote")]
            client_config: Default::default(),
            options: Default::default(),
            read_consistency_interval: None,
            session: None,
        };
        let catalog = ListingCatalog::connect(&request).await.unwrap();
        assert!(catalog.object_store.is_local());
        assert_eq!(catalog.uri, uri);
    }
    #[tokio::test]
    async fn test_connect_invalid_uri_fallback() {
        let invalid_uri = "invalid:///path";
        let request = ConnectRequest {
            uri: invalid_uri.to_string(),
            #[cfg(feature = "remote")]
            client_config: Default::default(),
            options: Default::default(),
            read_consistency_interval: None,
            session: None,
        };
        let result = ListingCatalog::connect(&request).await;
        assert!(result.is_err());
    }
 }
--- a/rust/lancedb/src/connection.rs
+++ b/rust/lancedb/src/connection.rs
@@ -13,8 +13,6 @@ use lance::dataset::ReadParams;
 use object_store::aws::AwsCredential;
 use crate::arrow::{IntoArrow, IntoArrowStream, SendableRecordBatchStream};
 use crate::catalog::listing::ListingCatalog;
 use crate::catalog::CatalogOptions;
 use crate::database::listing::{
    ListingDatabase, OPT_NEW_TABLE_STORAGE_VERSION, OPT_NEW_TABLE_V2_MANIFEST_PATHS,
 };
@@ -660,7 +658,7 @@ pub struct ConnectRequest {
    #[cfg(feature = "remote")]
    pub client_config: ClientConfig,
-    /// Database/Catalog specific options
+    /// Database specific options
    pub options: HashMap<String, String>,
    /// The interval at which to check for updates from other processes.
@@ -937,50 +935,6 @@ pub fn connect(uri: &str) -> ConnectBuilder {
    ConnectBuilder::new(uri)
 }
 /// A builder for configuring a connection to a LanceDB catalog
 #[derive(Debug)]
 pub struct CatalogConnectBuilder {
    request: ConnectRequest,
 }
 impl CatalogConnectBuilder {
    /// Create a new [`CatalogConnectBuilder`] with the given catalog URI.
    pub fn new(uri: &str) -> Self {
        Self {
            request: ConnectRequest {
                uri: uri.to_string(),
                #[cfg(feature = "remote")]
                client_config: Default::default(),
                read_consistency_interval: None,
                options: HashMap::new(),
                session: None,
            },
        }
    }
    pub fn catalog_options(mut self, catalog_options: &dyn CatalogOptions) -> Self {
        catalog_options.serialize_into_map(&mut self.request.options);
        self
    }
    /// Establishes a connection to the catalog
    pub async fn execute(self) -> Result<Arc<ListingCatalog>> {
        let catalog = ListingCatalog::connect(&self.request).await?;
        Ok(Arc::new(catalog))
    }
 }
 /// Connect to a LanceDB catalog.
 ///
 /// A catalog is a container for databases, which in turn are containers for tables.
 ///
 /// # Arguments
 ///
 /// * `uri` - URI where the catalog is located, can be a local directory or supported remote cloud storage.
 pub fn connect_catalog(uri: &str) -> CatalogConnectBuilder {
    CatalogConnectBuilder::new(uri)
 }
 #[cfg(all(test, feature = "remote"))]
 mod test_utils {
    use super::*;
@@ -1022,7 +976,6 @@ mod test_utils {
 mod tests {
    use std::fs::create_dir_all;
    use crate::catalog::{Catalog, DatabaseNamesRequest, OpenDatabaseRequest};
    use crate::database::listing::{ListingDatabaseOptions, NewTableConfig};
    use crate::query::QueryBase;
    use crate::query::{ExecutableQuery, QueryExecutionOptions};
@@ -1328,91 +1281,4 @@ mod tests {
            .unwrap();
        assert_eq!(other_schema, overwritten.schema().await.unwrap());
    }
    #[tokio::test]
    async fn test_connect_catalog() {
        let tmp_dir = tempdir().unwrap();
        let uri = tmp_dir.path().to_str().unwrap();
        let catalog = connect_catalog(uri).execute().await.unwrap();
        // Verify that we can get the uri from the catalog
        let catalog_uri = catalog.uri();
        assert_eq!(catalog_uri, uri);
        // Check that the catalog is initially empty
        let dbs = catalog
            .database_names(DatabaseNamesRequest::default())
            .await
            .unwrap();
        assert_eq!(dbs.len(), 0);
    }
    #[tokio::test]
    #[cfg(not(windows))]
    async fn test_catalog_create_database() {
        let tmp_dir = tempdir().unwrap();
        let uri = tmp_dir.path().to_str().unwrap();
        let catalog = connect_catalog(uri).execute().await.unwrap();
        let db_name = "test_db";
        catalog
            .create_database(crate::catalog::CreateDatabaseRequest {
                name: db_name.to_string(),
                mode: Default::default(),
                options: Default::default(),
            })
            .await
            .unwrap();
        let dbs = catalog
            .database_names(DatabaseNamesRequest::default())
            .await
            .unwrap();
        assert_eq!(dbs.len(), 1);
        assert_eq!(dbs[0], db_name);
        let db = catalog
            .open_database(OpenDatabaseRequest {
                name: db_name.to_string(),
                database_options: HashMap::new(),
            })
            .await
            .unwrap();
        let tables = db.table_names(Default::default()).await.unwrap();
        assert_eq!(tables.len(), 0);
    }
    #[tokio::test]
    #[cfg(not(windows))]
    async fn test_catalog_drop_database() {
        let tmp_dir = tempdir().unwrap();
        let uri = tmp_dir.path().to_str().unwrap();
        let catalog = connect_catalog(uri).execute().await.unwrap();
        // Create and then drop a database
        let db_name = "test_db_to_drop";
        catalog
            .create_database(crate::catalog::CreateDatabaseRequest {
                name: db_name.to_string(),
                mode: Default::default(),
                options: Default::default(),
            })
            .await
            .unwrap();
        let dbs = catalog
            .database_names(DatabaseNamesRequest::default())
            .await
            .unwrap();
        assert_eq!(dbs.len(), 1);
        catalog.drop_database(db_name).await.unwrap();
        let dbs_after = catalog
            .database_names(DatabaseNamesRequest::default())
            .await
            .unwrap();
        assert_eq!(dbs_after.len(), 0);
    }
 }
--- a/rust/lancedb/src/database/listing.rs
+++ b/rust/lancedb/src/database/listing.rs
@@ -587,7 +587,13 @@ impl ListingDatabase {
 #[async_trait::async_trait]
 impl Database for ListingDatabase {
-    async fn list_namespaces(&self, _request: ListNamespacesRequest) -> Result<Vec<String>> {
+    async fn list_namespaces(&self, request: ListNamespacesRequest) -> Result<Vec<String>> {
        if !request.namespace.is_empty() {
            return Err(Error::NotSupported {
                message: "Namespace operations are not supported for listing database".into(),
            });
        }
        Ok(Vec::new())
    }
--- a/rust/lancedb/src/embeddings.rs
+++ b/rust/lancedb/src/embeddings.rs
@@ -45,10 +45,10 @@ use crate::{
 pub trait EmbeddingFunction: std::fmt::Debug + Send + Sync {
    fn name(&self) -> &str;
    /// The type of the input data
-    fn source_type(&self) -> Result<Cow<DataType>>;
+    fn source_type(&self) -> Result<Cow<'_, DataType>>;
    /// The type of the output data
    /// This should **always** match the output of the `embed` function
-    fn dest_type(&self) -> Result<Cow<DataType>>;
+    fn dest_type(&self) -> Result<Cow<'_, DataType>>;
    /// Compute the embeddings for the source column in the database
    fn compute_source_embeddings(&self, source: Arc<dyn Array>) -> Result<Arc<dyn Array>>;
    /// Compute the embeddings for a given user query
--- a/rust/lancedb/src/embeddings/bedrock.rs
+++ b/rust/lancedb/src/embeddings/bedrock.rs
@@ -75,11 +75,11 @@ impl EmbeddingFunction for BedrockEmbeddingFunction {
        "bedrock"
    }
-    fn source_type(&self) -> Result<Cow<DataType>> {
+    fn source_type(&self) -> Result<Cow<'_, DataType>> {
        Ok(Cow::Owned(DataType::Utf8))
    }
-    fn dest_type(&self) -> Result<Cow<DataType>> {
+    fn dest_type(&self) -> Result<Cow<'_, DataType>> {
        let n_dims = self.model.ndims();
        Ok(Cow::Owned(DataType::new_fixed_size_list(
            DataType::Float32,
--- a/rust/lancedb/src/embeddings/openai.rs
+++ b/rust/lancedb/src/embeddings/openai.rs
@@ -144,11 +144,11 @@ impl EmbeddingFunction for OpenAIEmbeddingFunction {
        "openai"
    }
-    fn source_type(&self) -> Result<Cow<DataType>> {
+    fn source_type(&self) -> Result<Cow<'_, DataType>> {
        Ok(Cow::Owned(DataType::Utf8))
    }
-    fn dest_type(&self) -> Result<Cow<DataType>> {
+    fn dest_type(&self) -> Result<Cow<'_, DataType>> {
        let n_dims = self.model.ndims();
        Ok(Cow::Owned(DataType::new_fixed_size_list(
            DataType::Float32,
--- a/rust/lancedb/src/embeddings/sentence_transformers.rs
+++ b/rust/lancedb/src/embeddings/sentence_transformers.rs
@@ -407,11 +407,11 @@ impl EmbeddingFunction for SentenceTransformersEmbeddings {
        "sentence-transformers"
    }
-    fn source_type(&self) -> crate::Result<std::borrow::Cow<arrow_schema::DataType>> {
+    fn source_type(&self) -> crate::Result<std::borrow::Cow<'_, arrow_schema::DataType>> {
        Ok(Cow::Owned(DataType::Utf8))
    }
-    fn dest_type(&self) -> crate::Result<std::borrow::Cow<arrow_schema::DataType>> {
+    fn dest_type(&self) -> crate::Result<std::borrow::Cow<'_, arrow_schema::DataType>> {
        let (n_dims, dtype) = self.compute_ndims_and_dtype()?;
        Ok(Cow::Owned(DataType::new_fixed_size_list(
            dtype,
--- a/rust/lancedb/src/index/vector.rs
+++ b/rust/lancedb/src/index/vector.rs
@@ -112,6 +112,15 @@ macro_rules! impl_ivf_params_setter {
            self.max_iterations = max_iterations;
            self
        }
        /// The target size of each partition.
        ///
        /// This value controls the tradeoff between search performance and accuracy.
        /// The higher the value the faster the search but the less accurate the results will be.
        pub fn target_partition_size(mut self, target_partition_size: u32) -> Self {
            self.target_partition_size = Some(target_partition_size);
            self
        }
    };
 }
@@ -182,6 +191,7 @@ pub struct IvfFlatIndexBuilder {
    pub(crate) num_partitions: Option<u32>,
    pub(crate) sample_rate: u32,
    pub(crate) max_iterations: u32,
    pub(crate) target_partition_size: Option<u32>,
 }
 impl Default for IvfFlatIndexBuilder {
@@ -191,6 +201,7 @@ impl Default for IvfFlatIndexBuilder {
            num_partitions: None,
            sample_rate: 256,
            max_iterations: 50,
            target_partition_size: None,
        }
    }
 }
@@ -228,6 +239,7 @@ pub struct IvfPqIndexBuilder {
    pub(crate) num_partitions: Option<u32>,
    pub(crate) sample_rate: u32,
    pub(crate) max_iterations: u32,
    pub(crate) target_partition_size: Option<u32>,
    // PQ
    pub(crate) num_sub_vectors: Option<u32>,
@@ -243,6 +255,7 @@ impl Default for IvfPqIndexBuilder {
            num_bits: None,
            sample_rate: 256,
            max_iterations: 50,
            target_partition_size: None,
        }
    }
 }
@@ -293,6 +306,7 @@ pub struct IvfHnswPqIndexBuilder {
    pub(crate) num_partitions: Option<u32>,
    pub(crate) sample_rate: u32,
    pub(crate) max_iterations: u32,
    pub(crate) target_partition_size: Option<u32>,
    // HNSW
    pub(crate) m: u32,
@@ -314,6 +328,7 @@ impl Default for IvfHnswPqIndexBuilder {
            max_iterations: 50,
            m: 20,
            ef_construction: 300,
            target_partition_size: None,
        }
    }
 }
@@ -341,6 +356,7 @@ pub struct IvfHnswSqIndexBuilder {
    pub(crate) num_partitions: Option<u32>,
    pub(crate) sample_rate: u32,
    pub(crate) max_iterations: u32,
    pub(crate) target_partition_size: Option<u32>,
    // HNSW
    pub(crate) m: u32,
@@ -358,6 +374,7 @@ impl Default for IvfHnswSqIndexBuilder {
            max_iterations: 50,
            m: 20,
            ef_construction: 300,
            target_partition_size: None,
        }
    }
 }
--- a/rust/lancedb/src/lib.rs
+++ b/rust/lancedb/src/lib.rs
@@ -191,7 +191,6 @@
 //! ```
 pub mod arrow;
 pub mod catalog;
 pub mod connection;
 pub mod data;
 pub mod database;
--- a/rust/lancedb/src/table.rs
+++ b/rust/lancedb/src/table.rs
@@ -242,17 +242,15 @@ pub struct OptimizeStats {
 /// Describes what happens when a vector either contains NaN or
 /// does not have enough values
 #[derive(Clone, Debug, Default)]
 #[allow(dead_code)] // https://github.com/lancedb/lancedb/issues/992
 enum BadVectorHandling {
    /// An error is returned
    #[default]
    Error,
    #[allow(dead_code)] // https://github.com/lancedb/lancedb/issues/992
    /// The offending row is droppped
    Drop,
    #[allow(dead_code)] // https://github.com/lancedb/lancedb/issues/992
    /// The invalid/missing items are replaced by fill_value
    Fill(f32),
    #[allow(dead_code)] // https://github.com/lancedb/lancedb/issues/992
    /// The invalid items are replaced by NULL
    None,
 }
--- a/rust/lancedb/src/utils.rs
+++ b/rust/lancedb/src/utils.rs
@@ -20,6 +20,8 @@ use datafusion_physical_plan::SendableRecordBatchStream;
 lazy_static! {
    static ref TABLE_NAME_REGEX: regex::Regex = regex::Regex::new(r"^[a-zA-Z0-9_\-\.]+$").unwrap();
    static ref NAMESPACE_NAME_REGEX: regex::Regex =
        regex::Regex::new(r"^[a-zA-Z0-9_\-\.]+$").unwrap();
 }
 pub trait PatchStoreParam {
@@ -98,6 +100,53 @@ pub fn validate_table_name(name: &str) -> Result<()> {
    Ok(())
 }
 /// Validate a namespace name component
 ///
 /// Namespace names must:
 /// - Not be empty
 /// - Only contain alphanumeric characters, underscores, hyphens, and periods
 ///
 /// # Arguments
 /// * `name` - A single namespace component (not the full path)
 ///
 /// # Returns
 /// * `Ok(())` if the namespace name is valid
 /// * `Err(Error)` if the namespace name is invalid
 pub fn validate_namespace_name(name: &str) -> Result<()> {
    if name.is_empty() {
        return Err(Error::InvalidInput {
            message: "Namespace names cannot be empty strings".to_string(),
        });
    }
    if !NAMESPACE_NAME_REGEX.is_match(name) {
        return Err(Error::InvalidInput {
            message: format!(
                "Invalid namespace name '{}': Namespace names can only contain alphanumeric characters, underscores, hyphens, and periods",
                name
            ),
        });
    }
    Ok(())
 }
 /// Validate all components of a namespace
 ///
 /// Iterates through all namespace components and validates each one.
 /// Returns an error if any component is invalid.
 ///
 /// # Arguments
 /// * `namespace` - The namespace components to validate
 ///
 /// # Returns
 /// * `Ok(())` if all namespace components are valid
 /// * `Err(Error)` if any component is invalid
 pub fn validate_namespace(namespace: &[String]) -> Result<()> {
    for component in namespace {
        validate_namespace_name(component)?;
    }
    Ok(())
 }
 /// Find one default column to create index or perform vector query.
 pub(crate) fn default_vector_column(schema: &Schema, dim: Option<i32>) -> Result<String> {
    // Try to find a vector column.
@@ -345,6 +394,61 @@ mod tests {
        assert!(validate_table_name("name with space").is_err());
    }
    #[test]
    fn test_validate_namespace_name() {
        // Valid namespace names
        assert!(validate_namespace_name("ns1").is_ok());
        assert!(validate_namespace_name("namespace_123").is_ok());
        assert!(validate_namespace_name("my-namespace").is_ok());
        assert!(validate_namespace_name("my.namespace").is_ok());
        assert!(validate_namespace_name("NS_1.2.3").is_ok());
        assert!(validate_namespace_name("a").is_ok());
        assert!(validate_namespace_name("123").is_ok());
        assert!(validate_namespace_name("_underscore").is_ok());
        assert!(validate_namespace_name("-hyphen").is_ok());
        assert!(validate_namespace_name(".period").is_ok());
        // Invalid namespace names
        assert!(validate_namespace_name("").is_err());
        assert!(validate_namespace_name("namespace with spaces").is_err());
        assert!(validate_namespace_name("namespace/with/slashes").is_err());
        assert!(validate_namespace_name("namespace\\with\\backslashes").is_err());
        assert!(validate_namespace_name("namespace$with$delimiter").is_err());
        assert!(validate_namespace_name("namespace@special").is_err());
        assert!(validate_namespace_name("namespace#hash").is_err());
    }
    #[test]
    fn test_validate_namespace() {
        // Valid namespace with single component
        assert!(validate_namespace(&["ns1".to_string()]).is_ok());
        // Valid namespace with multiple components
        assert!(
            validate_namespace(&["ns1".to_string(), "ns2".to_string(), "ns3".to_string()]).is_ok()
        );
        // Empty namespace (root) is valid
        assert!(validate_namespace(&[]).is_ok());
        // Invalid: contains empty component
        assert!(validate_namespace(&["ns1".to_string(), "".to_string()]).is_err());
        // Invalid: contains component with spaces
        assert!(validate_namespace(&["ns1".to_string(), "ns 2".to_string()]).is_err());
        // Invalid: contains component with special characters
        assert!(validate_namespace(&["ns1".to_string(), "ns@2".to_string()]).is_err());
        assert!(validate_namespace(&["ns1".to_string(), "ns/2".to_string()]).is_err());
        assert!(validate_namespace(&["ns1".to_string(), "ns$2".to_string()]).is_err());
        // Valid: underscores, hyphens, and periods are allowed
        assert!(
            validate_namespace(&["ns_1".to_string(), "ns-2".to_string(), "ns.3".to_string()])
                .is_ok()
        );
    }
    #[test]
    fn test_string_to_datatype() {
        let string = "int32";
--- a/rust/lancedb/tests/embedding_registry_test.rs
+++ b/rust/lancedb/tests/embedding_registry_test.rs
@@ -341,10 +341,10 @@ impl EmbeddingFunction for MockEmbed {
    fn name(&self) -> &str {
        &self.name
    }
-    fn source_type(&self) -> Result<Cow<DataType>> {
+    fn source_type(&self) -> Result<Cow<'_, DataType>> {
        Ok(Cow::Borrowed(&self.source_type))
    }
-    fn dest_type(&self) -> Result<Cow<DataType>> {
+    fn dest_type(&self) -> Result<Cow<'_, DataType>> {
        Ok(Cow::Borrowed(&self.dest_type))
    }
    fn compute_source_embeddings(&self, source: Arc<dyn Array>) -> Result<Arc<dyn Array>> {
Author	SHA1	Message	Date
Lance Release	5e1e9add07	Bump version: 0.25.1-beta.1 → 0.25.1-beta.2	2025-09-18 20:21:33 +00:00
Jack Ye	97e9938dfe	fix: add missing validations to namespace operations (#2659 )	2025-09-17 23:27:04 -07:00
Weston Pace	1d4b92e01e	refactor: remove catalog implementation now that we have namespaces in database (#2662 ) We had previously prototyped a `Catalog` trait anticipating a three-tiered Catalog-Database-Table structure. Now that we have namespaces in the `Database` we can support any tiering scheme and the `Catalog` trait is no longer needed.	2025-09-17 08:40:20 -07:00
Le Duc Manh	4c9fc3044b	fix: use create to resolve variables (#2640 ) # What - Use `create` to resolve variables values # Reference Fixes #2181 --------- Co-authored-by: Will Jones <willjones127@gmail.com>	2025-09-12 13:07:32 -07:00
Jack Ye	0ebc8d45a8	chore: fix no lock build warnings and CI timeouts (#2650 ) Example CI failures: - publish build timeout: https://github.com/lancedb/lancedb/actions/runs/17626482881/job/50084552906 - doc test build timeout: https://github.com/lancedb/lancedb/actions/runs/17627058590/job/50086456818	2025-09-11 15:30:35 -07:00
BubbleCal	f7d78c3420	feat: add 'target_partition_size' param (#2642 ) this exposes the param `target_partition_size` from lance --------- Signed-off-by: BubbleCal <bubble-cal@outlook.com>	2025-09-11 22:56:16 +08:00
Lance Release	6ea6884260	Bump version: 0.22.1-beta.0 → 0.22.1-beta.1	2025-09-10 20:49:43 +00:00