From 504bdc471cabc74681f296e6375fffd1c5393bf7 Mon Sep 17 00:00:00 2001 From: Jack Ye Date: Wed, 24 Sep 2025 15:33:31 -0700 Subject: [PATCH] feat(rust): support namespace backed database (#2664) This PR adds support for namespace-backed databases through lance-namespace integration, enabling centralized table management through namespace APIs. --------- Co-authored-by: Claude --- Cargo.lock | 1466 ++++++++++++++++++++---- Cargo.toml | 1 + rust/lancedb/Cargo.toml | 1 + rust/lancedb/src/connection.rs | 111 ++ rust/lancedb/src/database.rs | 1 + rust/lancedb/src/database/namespace.rs | 840 ++++++++++++++ rust/lancedb/src/lib.rs | 4 +- 7 files changed, 2196 insertions(+), 228 deletions(-) create mode 100644 rust/lancedb/src/database/namespace.rs diff --git a/Cargo.lock b/Cargo.lock index 817b041e..169fe756 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1964,6 +1964,54 @@ dependencies = [ "parking_lot_core", ] +[[package]] +name = "datafusion" +version = "48.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a11e19a7ccc5bb979c95c1dceef663eab39c9061b3bbf8d1937faf0f03bf41f" +dependencies = [ + "arrow", + "arrow-ipc", + "arrow-schema", + "async-trait", + "bytes", + "chrono", + "datafusion-catalog 48.0.1", + "datafusion-catalog-listing 48.0.1", + "datafusion-common 48.0.1", + "datafusion-common-runtime 48.0.1", + "datafusion-datasource 48.0.1", + "datafusion-datasource-csv 48.0.1", + "datafusion-datasource-json 48.0.1", + "datafusion-execution 48.0.1", + "datafusion-expr 48.0.1", + "datafusion-expr-common 48.0.1", + "datafusion-functions 48.0.1", + "datafusion-functions-aggregate 48.0.1", + "datafusion-functions-nested 48.0.1", + "datafusion-functions-table 48.0.1", + "datafusion-functions-window 48.0.1", + "datafusion-optimizer 48.0.1", + "datafusion-physical-expr 48.0.1", + "datafusion-physical-expr-common 48.0.1", + "datafusion-physical-optimizer 48.0.1", + "datafusion-physical-plan 48.0.1", + "datafusion-session 48.0.1", + "datafusion-sql 48.0.1", + "futures", + "itertools 0.14.0", + "log", + "object_store", + "parking_lot", + "rand 0.9.2", + "regex", + "sqlparser 0.55.0", + "tempfile", + "tokio", + "url", + "uuid", +] + [[package]] name = "datafusion" version = "49.0.2" @@ -1976,28 +2024,28 @@ dependencies = [ "async-trait", "bytes", "chrono", - "datafusion-catalog", - "datafusion-catalog-listing", - "datafusion-common", - "datafusion-common-runtime", - "datafusion-datasource", - "datafusion-datasource-csv", - "datafusion-datasource-json", - "datafusion-execution", - "datafusion-expr", - "datafusion-expr-common", - "datafusion-functions", - "datafusion-functions-aggregate", - "datafusion-functions-nested", - "datafusion-functions-table", - "datafusion-functions-window", - "datafusion-optimizer", - "datafusion-physical-expr", - "datafusion-physical-expr-common", - "datafusion-physical-optimizer", - "datafusion-physical-plan", - "datafusion-session", - "datafusion-sql", + "datafusion-catalog 49.0.2", + "datafusion-catalog-listing 49.0.2", + "datafusion-common 49.0.2", + "datafusion-common-runtime 49.0.2", + "datafusion-datasource 49.0.2", + "datafusion-datasource-csv 49.0.2", + "datafusion-datasource-json 49.0.2", + "datafusion-execution 49.0.2", + "datafusion-expr 49.0.2", + "datafusion-expr-common 49.0.2", + "datafusion-functions 49.0.2", + "datafusion-functions-aggregate 49.0.2", + "datafusion-functions-nested 49.0.2", + "datafusion-functions-table 49.0.2", + "datafusion-functions-window 49.0.2", + "datafusion-optimizer 49.0.2", + "datafusion-physical-expr 49.0.2", + "datafusion-physical-expr-common 49.0.2", + "datafusion-physical-optimizer 49.0.2", + "datafusion-physical-plan 49.0.2", + "datafusion-session 49.0.2", + "datafusion-sql 49.0.2", "futures", "itertools 0.14.0", "log", @@ -2012,6 +2060,32 @@ dependencies = [ "uuid", ] +[[package]] +name = "datafusion-catalog" +version = "48.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94985e67cab97b1099db2a7af11f31a45008b282aba921c1e1d35327c212ec18" +dependencies = [ + "arrow", + "async-trait", + "dashmap", + "datafusion-common 48.0.1", + "datafusion-common-runtime 48.0.1", + "datafusion-datasource 48.0.1", + "datafusion-execution 48.0.1", + "datafusion-expr 48.0.1", + "datafusion-physical-expr 48.0.1", + "datafusion-physical-plan 48.0.1", + "datafusion-session 48.0.1", + "datafusion-sql 48.0.1", + "futures", + "itertools 0.14.0", + "log", + "object_store", + "parking_lot", + "tokio", +] + [[package]] name = "datafusion-catalog" version = "49.0.2" @@ -2021,15 +2095,15 @@ dependencies = [ "arrow", "async-trait", "dashmap", - "datafusion-common", - "datafusion-common-runtime", - "datafusion-datasource", - "datafusion-execution", - "datafusion-expr", - "datafusion-physical-expr", - "datafusion-physical-plan", - "datafusion-session", - "datafusion-sql", + "datafusion-common 49.0.2", + "datafusion-common-runtime 49.0.2", + "datafusion-datasource 49.0.2", + "datafusion-execution 49.0.2", + "datafusion-expr 49.0.2", + "datafusion-physical-expr 49.0.2", + "datafusion-physical-plan 49.0.2", + "datafusion-session 49.0.2", + "datafusion-sql 49.0.2", "futures", "itertools 0.14.0", "log", @@ -2038,6 +2112,29 @@ dependencies = [ "tokio", ] +[[package]] +name = "datafusion-catalog-listing" +version = "48.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e002df133bdb7b0b9b429d89a69aa77b35caeadee4498b2ce1c7c23a99516988" +dependencies = [ + "arrow", + "async-trait", + "datafusion-catalog 48.0.1", + "datafusion-common 48.0.1", + "datafusion-datasource 48.0.1", + "datafusion-execution 48.0.1", + "datafusion-expr 48.0.1", + "datafusion-physical-expr 48.0.1", + "datafusion-physical-expr-common 48.0.1", + "datafusion-physical-plan 48.0.1", + "datafusion-session 48.0.1", + "futures", + "log", + "object_store", + "tokio", +] + [[package]] name = "datafusion-catalog-listing" version = "49.0.2" @@ -2046,21 +2143,43 @@ checksum = "051a1634628c2d1296d4e326823e7536640d87a118966cdaff069b68821ad53b" dependencies = [ "arrow", "async-trait", - "datafusion-catalog", - "datafusion-common", - "datafusion-datasource", - "datafusion-execution", - "datafusion-expr", - "datafusion-physical-expr", - "datafusion-physical-expr-common", - "datafusion-physical-plan", - "datafusion-session", + "datafusion-catalog 49.0.2", + "datafusion-common 49.0.2", + "datafusion-datasource 49.0.2", + "datafusion-execution 49.0.2", + "datafusion-expr 49.0.2", + "datafusion-physical-expr 49.0.2", + "datafusion-physical-expr-common 49.0.2", + "datafusion-physical-plan 49.0.2", + "datafusion-session 49.0.2", "futures", "log", "object_store", "tokio", ] +[[package]] +name = "datafusion-common" +version = "48.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e13242fc58fd753787b0a538e5ae77d356cb9d0656fa85a591a33c5f106267f6" +dependencies = [ + "ahash", + "arrow", + "arrow-ipc", + "base64 0.22.1", + "half", + "hashbrown 0.14.5", + "indexmap 2.11.0", + "libc", + "log", + "object_store", + "paste", + "sqlparser 0.55.0", + "tokio", + "web-time", +] + [[package]] name = "datafusion-common" version = "49.0.2" @@ -2084,6 +2203,17 @@ dependencies = [ "web-time", ] +[[package]] +name = "datafusion-common-runtime" +version = "48.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2239f964e95c3a5d6b4a8cde07e646de8995c1396a7fd62c6e784f5341db499" +dependencies = [ + "futures", + "log", + "tokio", +] + [[package]] name = "datafusion-common-runtime" version = "49.0.2" @@ -2095,6 +2225,34 @@ dependencies = [ "tokio", ] +[[package]] +name = "datafusion-datasource" +version = "48.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2cf792579bc8bf07d1b2f68c2d5382f8a63679cce8fbebfd4ba95742b6e08864" +dependencies = [ + "arrow", + "async-trait", + "bytes", + "chrono", + "datafusion-common 48.0.1", + "datafusion-common-runtime 48.0.1", + "datafusion-execution 48.0.1", + "datafusion-expr 48.0.1", + "datafusion-physical-expr 48.0.1", + "datafusion-physical-expr-common 48.0.1", + "datafusion-physical-plan 48.0.1", + "datafusion-session 48.0.1", + "futures", + "glob", + "itertools 0.14.0", + "log", + "object_store", + "rand 0.9.2", + "tokio", + "url", +] + [[package]] name = "datafusion-datasource" version = "49.0.2" @@ -2105,14 +2263,14 @@ dependencies = [ "async-trait", "bytes", "chrono", - "datafusion-common", - "datafusion-common-runtime", - "datafusion-execution", - "datafusion-expr", - "datafusion-physical-expr", - "datafusion-physical-expr-common", - "datafusion-physical-plan", - "datafusion-session", + "datafusion-common 49.0.2", + "datafusion-common-runtime 49.0.2", + "datafusion-execution 49.0.2", + "datafusion-expr 49.0.2", + "datafusion-physical-expr 49.0.2", + "datafusion-physical-expr-common 49.0.2", + "datafusion-physical-plan 49.0.2", + "datafusion-session 49.0.2", "futures", "glob", "itertools 0.14.0", @@ -2123,6 +2281,31 @@ dependencies = [ "url", ] +[[package]] +name = "datafusion-datasource-csv" +version = "48.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfc114f9a1415174f3e8d2719c371fc72092ef2195a7955404cfe6b2ba29a706" +dependencies = [ + "arrow", + "async-trait", + "bytes", + "datafusion-catalog 48.0.1", + "datafusion-common 48.0.1", + "datafusion-common-runtime 48.0.1", + "datafusion-datasource 48.0.1", + "datafusion-execution 48.0.1", + "datafusion-expr 48.0.1", + "datafusion-physical-expr 48.0.1", + "datafusion-physical-expr-common 48.0.1", + "datafusion-physical-plan 48.0.1", + "datafusion-session 48.0.1", + "futures", + "object_store", + "regex", + "tokio", +] + [[package]] name = "datafusion-datasource-csv" version = "49.0.2" @@ -2132,22 +2315,47 @@ dependencies = [ "arrow", "async-trait", "bytes", - "datafusion-catalog", - "datafusion-common", - "datafusion-common-runtime", - "datafusion-datasource", - "datafusion-execution", - "datafusion-expr", - "datafusion-physical-expr", - "datafusion-physical-expr-common", - "datafusion-physical-plan", - "datafusion-session", + "datafusion-catalog 49.0.2", + "datafusion-common 49.0.2", + "datafusion-common-runtime 49.0.2", + "datafusion-datasource 49.0.2", + "datafusion-execution 49.0.2", + "datafusion-expr 49.0.2", + "datafusion-physical-expr 49.0.2", + "datafusion-physical-expr-common 49.0.2", + "datafusion-physical-plan 49.0.2", + "datafusion-session 49.0.2", "futures", "object_store", "regex", "tokio", ] +[[package]] +name = "datafusion-datasource-json" +version = "48.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d88dd5e215c420a52362b9988ecd4cefd71081b730663d4f7d886f706111fc75" +dependencies = [ + "arrow", + "async-trait", + "bytes", + "datafusion-catalog 48.0.1", + "datafusion-common 48.0.1", + "datafusion-common-runtime 48.0.1", + "datafusion-datasource 48.0.1", + "datafusion-execution 48.0.1", + "datafusion-expr 48.0.1", + "datafusion-physical-expr 48.0.1", + "datafusion-physical-expr-common 48.0.1", + "datafusion-physical-plan 48.0.1", + "datafusion-session 48.0.1", + "futures", + "object_store", + "serde_json", + "tokio", +] + [[package]] name = "datafusion-datasource-json" version = "49.0.2" @@ -2157,28 +2365,53 @@ dependencies = [ "arrow", "async-trait", "bytes", - "datafusion-catalog", - "datafusion-common", - "datafusion-common-runtime", - "datafusion-datasource", - "datafusion-execution", - "datafusion-expr", - "datafusion-physical-expr", - "datafusion-physical-expr-common", - "datafusion-physical-plan", - "datafusion-session", + "datafusion-catalog 49.0.2", + "datafusion-common 49.0.2", + "datafusion-common-runtime 49.0.2", + "datafusion-datasource 49.0.2", + "datafusion-execution 49.0.2", + "datafusion-expr 49.0.2", + "datafusion-physical-expr 49.0.2", + "datafusion-physical-expr-common 49.0.2", + "datafusion-physical-plan 49.0.2", + "datafusion-session 49.0.2", "futures", "object_store", "serde_json", "tokio", ] +[[package]] +name = "datafusion-doc" +version = "48.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e0e7b648387b0c1937b83cb328533c06c923799e73a9e3750b762667f32662c0" + [[package]] name = "datafusion-doc" version = "49.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8ff336d1d755399753a9e4fbab001180e346fc8bfa063a97f1214b82274c00f8" +[[package]] +name = "datafusion-execution" +version = "48.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9609d83d52ff8315283c6dad3b97566e877d8f366fab4c3297742f33dcd636c7" +dependencies = [ + "arrow", + "dashmap", + "datafusion-common 48.0.1", + "datafusion-expr 48.0.1", + "futures", + "log", + "object_store", + "parking_lot", + "rand 0.9.2", + "tempfile", + "url", +] + [[package]] name = "datafusion-execution" version = "49.0.2" @@ -2187,8 +2420,8 @@ checksum = "042ea192757d1b2d7dcf71643e7ff33f6542c7704f00228d8b85b40003fd8e0f" dependencies = [ "arrow", "dashmap", - "datafusion-common", - "datafusion-expr", + "datafusion-common 49.0.2", + "datafusion-expr 49.0.2", "futures", "log", "object_store", @@ -2198,6 +2431,26 @@ dependencies = [ "url", ] +[[package]] +name = "datafusion-expr" +version = "48.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e75230cd67f650ef0399eb00f54d4a073698f2c0262948298e5299fc7324da63" +dependencies = [ + "arrow", + "chrono", + "datafusion-common 48.0.1", + "datafusion-doc 48.0.1", + "datafusion-expr-common 48.0.1", + "datafusion-functions-aggregate-common 48.0.1", + "datafusion-functions-window-common 48.0.1", + "datafusion-physical-expr-common 48.0.1", + "indexmap 2.11.0", + "paste", + "serde_json", + "sqlparser 0.55.0", +] + [[package]] name = "datafusion-expr" version = "49.0.2" @@ -2207,18 +2460,31 @@ dependencies = [ "arrow", "async-trait", "chrono", - "datafusion-common", - "datafusion-doc", - "datafusion-expr-common", - "datafusion-functions-aggregate-common", - "datafusion-functions-window-common", - "datafusion-physical-expr-common", + "datafusion-common 49.0.2", + "datafusion-doc 49.0.2", + "datafusion-expr-common 49.0.2", + "datafusion-functions-aggregate-common 49.0.2", + "datafusion-functions-window-common 49.0.2", + "datafusion-physical-expr-common 49.0.2", "indexmap 2.11.0", "paste", "serde_json", "sqlparser 0.55.0", ] +[[package]] +name = "datafusion-expr-common" +version = "48.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70fafb3a045ed6c49cfca0cd090f62cf871ca6326cc3355cb0aaf1260fa760b6" +dependencies = [ + "arrow", + "datafusion-common 48.0.1", + "indexmap 2.11.0", + "itertools 0.14.0", + "paste", +] + [[package]] name = "datafusion-expr-common" version = "49.0.2" @@ -2226,12 +2492,41 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9d5c267104849d5fa6d81cf5ba88f35ecd58727729c5eb84066c25227b644ae2" dependencies = [ "arrow", - "datafusion-common", + "datafusion-common 49.0.2", "indexmap 2.11.0", "itertools 0.14.0", "paste", ] +[[package]] +name = "datafusion-functions" +version = "48.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cdf9a9cf655265861a20453b1e58357147eab59bdc90ce7f2f68f1f35104d3bb" +dependencies = [ + "arrow", + "arrow-buffer", + "base64 0.22.1", + "blake2", + "blake3", + "chrono", + "datafusion-common 48.0.1", + "datafusion-doc 48.0.1", + "datafusion-execution 48.0.1", + "datafusion-expr 48.0.1", + "datafusion-expr-common 48.0.1", + "datafusion-macros 48.0.1", + "hex", + "itertools 0.14.0", + "log", + "md-5", + "rand 0.9.2", + "regex", + "sha2", + "unicode-segmentation", + "uuid", +] + [[package]] name = "datafusion-functions" version = "49.0.2" @@ -2244,12 +2539,12 @@ dependencies = [ "blake2", "blake3", "chrono", - "datafusion-common", - "datafusion-doc", - "datafusion-execution", - "datafusion-expr", - "datafusion-expr-common", - "datafusion-macros", + "datafusion-common 49.0.2", + "datafusion-doc 49.0.2", + "datafusion-execution 49.0.2", + "datafusion-expr 49.0.2", + "datafusion-expr-common 49.0.2", + "datafusion-macros 49.0.2", "hex", "itertools 0.14.0", "log", @@ -2261,6 +2556,27 @@ dependencies = [ "uuid", ] +[[package]] +name = "datafusion-functions-aggregate" +version = "48.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f07e49733d847be0a05235e17b884d326a2fd402c97a89fe8bcf0bfba310005" +dependencies = [ + "ahash", + "arrow", + "datafusion-common 48.0.1", + "datafusion-doc 48.0.1", + "datafusion-execution 48.0.1", + "datafusion-expr 48.0.1", + "datafusion-functions-aggregate-common 48.0.1", + "datafusion-macros 48.0.1", + "datafusion-physical-expr 48.0.1", + "datafusion-physical-expr-common 48.0.1", + "half", + "log", + "paste", +] + [[package]] name = "datafusion-functions-aggregate" version = "49.0.2" @@ -2269,19 +2585,32 @@ checksum = "35f61d5198a35ed368bf3aacac74f0d0fa33de7a7cb0c57e9f68ab1346d2f952" dependencies = [ "ahash", "arrow", - "datafusion-common", - "datafusion-doc", - "datafusion-execution", - "datafusion-expr", - "datafusion-functions-aggregate-common", - "datafusion-macros", - "datafusion-physical-expr", - "datafusion-physical-expr-common", + "datafusion-common 49.0.2", + "datafusion-doc 49.0.2", + "datafusion-execution 49.0.2", + "datafusion-expr 49.0.2", + "datafusion-functions-aggregate-common 49.0.2", + "datafusion-macros 49.0.2", + "datafusion-physical-expr 49.0.2", + "datafusion-physical-expr-common 49.0.2", "half", "log", "paste", ] +[[package]] +name = "datafusion-functions-aggregate-common" +version = "48.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4512607e10d72b0b0a1dc08f42cb5bd5284cb8348b7fea49dc83409493e32b1b" +dependencies = [ + "ahash", + "arrow", + "datafusion-common 48.0.1", + "datafusion-expr-common 48.0.1", + "datafusion-physical-expr-common 48.0.1", +] + [[package]] name = "datafusion-functions-aggregate-common" version = "49.0.2" @@ -2290,9 +2619,30 @@ checksum = "13efdb17362be39b5024f6da0d977ffe49c0212929ec36eec550e07e2bc7812f" dependencies = [ "ahash", "arrow", - "datafusion-common", - "datafusion-expr-common", - "datafusion-physical-expr-common", + "datafusion-common 49.0.2", + "datafusion-expr-common 49.0.2", + "datafusion-physical-expr-common 49.0.2", +] + +[[package]] +name = "datafusion-functions-nested" +version = "48.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2ab331806e34f5545e5f03396e4d5068077395b1665795d8f88c14ec4f1e0b7a" +dependencies = [ + "arrow", + "arrow-ord", + "datafusion-common 48.0.1", + "datafusion-doc 48.0.1", + "datafusion-execution 48.0.1", + "datafusion-expr 48.0.1", + "datafusion-functions 48.0.1", + "datafusion-functions-aggregate 48.0.1", + "datafusion-macros 48.0.1", + "datafusion-physical-expr-common 48.0.1", + "itertools 0.14.0", + "log", + "paste", ] [[package]] @@ -2303,20 +2653,36 @@ checksum = "9187678af567d7c9e004b72a0b6dc5b0a00ebf4901cb3511ed2db4effe092e66" dependencies = [ "arrow", "arrow-ord", - "datafusion-common", - "datafusion-doc", - "datafusion-execution", - "datafusion-expr", - "datafusion-functions", - "datafusion-functions-aggregate", - "datafusion-functions-aggregate-common", - "datafusion-macros", - "datafusion-physical-expr-common", + "datafusion-common 49.0.2", + "datafusion-doc 49.0.2", + "datafusion-execution 49.0.2", + "datafusion-expr 49.0.2", + "datafusion-functions 49.0.2", + "datafusion-functions-aggregate 49.0.2", + "datafusion-functions-aggregate-common 49.0.2", + "datafusion-macros 49.0.2", + "datafusion-physical-expr-common 49.0.2", "itertools 0.14.0", "log", "paste", ] +[[package]] +name = "datafusion-functions-table" +version = "48.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4ac2c0be983a06950ef077e34e0174aa0cb9e346f3aeae459823158037ade37" +dependencies = [ + "arrow", + "async-trait", + "datafusion-catalog 48.0.1", + "datafusion-common 48.0.1", + "datafusion-expr 48.0.1", + "datafusion-physical-plan 48.0.1", + "parking_lot", + "paste", +] + [[package]] name = "datafusion-functions-table" version = "49.0.2" @@ -2325,14 +2691,32 @@ checksum = "ecf156589cc21ef59fe39c7a9a841b4a97394549643bbfa88cc44e8588cf8fe5" dependencies = [ "arrow", "async-trait", - "datafusion-catalog", - "datafusion-common", - "datafusion-expr", - "datafusion-physical-plan", + "datafusion-catalog 49.0.2", + "datafusion-common 49.0.2", + "datafusion-expr 49.0.2", + "datafusion-physical-plan 49.0.2", "parking_lot", "paste", ] +[[package]] +name = "datafusion-functions-window" +version = "48.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "36f3d92731de384c90906941d36dcadf6a86d4128409a9c5cd916662baed5f53" +dependencies = [ + "arrow", + "datafusion-common 48.0.1", + "datafusion-doc 48.0.1", + "datafusion-expr 48.0.1", + "datafusion-functions-window-common 48.0.1", + "datafusion-macros 48.0.1", + "datafusion-physical-expr 48.0.1", + "datafusion-physical-expr-common 48.0.1", + "log", + "paste", +] + [[package]] name = "datafusion-functions-window" version = "49.0.2" @@ -2340,25 +2724,46 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "edcb25e3e369f1366ec9a261456e45b5aad6ea1c0c8b4ce546587207c501ed9e" dependencies = [ "arrow", - "datafusion-common", - "datafusion-doc", - "datafusion-expr", - "datafusion-functions-window-common", - "datafusion-macros", - "datafusion-physical-expr", - "datafusion-physical-expr-common", + "datafusion-common 49.0.2", + "datafusion-doc 49.0.2", + "datafusion-expr 49.0.2", + "datafusion-functions-window-common 49.0.2", + "datafusion-macros 49.0.2", + "datafusion-physical-expr 49.0.2", + "datafusion-physical-expr-common 49.0.2", "log", "paste", ] +[[package]] +name = "datafusion-functions-window-common" +version = "48.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c679f8bf0971704ec8fd4249fcbb2eb49d6a12cc3e7a840ac047b4928d3541b5" +dependencies = [ + "datafusion-common 48.0.1", + "datafusion-physical-expr-common 48.0.1", +] + [[package]] name = "datafusion-functions-window-common" version = "49.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8996a8e11174d0bd7c62dc2f316485affc6ae5ffd5b8a68b508137ace2310294" dependencies = [ - "datafusion-common", - "datafusion-physical-expr-common", + "datafusion-common 49.0.2", + "datafusion-physical-expr-common 49.0.2", +] + +[[package]] +name = "datafusion-macros" +version = "48.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2821de7cb0362d12e75a5196b636a59ea3584ec1e1cc7dc6f5e34b9e8389d251" +dependencies = [ + "datafusion-expr 48.0.1", + "quote", + "syn 2.0.106", ] [[package]] @@ -2367,11 +2772,29 @@ version = "49.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "95ee8d1be549eb7316f437035f2cec7ec42aba8374096d807c4de006a3b5d78a" dependencies = [ - "datafusion-expr", + "datafusion-expr 49.0.2", "quote", "syn 2.0.106", ] +[[package]] +name = "datafusion-optimizer" +version = "48.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1594c7a97219ede334f25347ad8d57056621e7f4f35a0693c8da876e10dd6a53" +dependencies = [ + "arrow", + "chrono", + "datafusion-common 48.0.1", + "datafusion-expr 48.0.1", + "datafusion-physical-expr 48.0.1", + "indexmap 2.11.0", + "itertools 0.14.0", + "log", + "regex", + "regex-syntax", +] + [[package]] name = "datafusion-optimizer" version = "49.0.2" @@ -2380,10 +2803,10 @@ checksum = "c9fa98671458254928af854e5f6c915e66b860a8bde505baea0ff2892deab74d" dependencies = [ "arrow", "chrono", - "datafusion-common", - "datafusion-expr", - "datafusion-expr-common", - "datafusion-physical-expr", + "datafusion-common 49.0.2", + "datafusion-expr 49.0.2", + "datafusion-expr-common 49.0.2", + "datafusion-physical-expr 49.0.2", "indexmap 2.11.0", "itertools 0.14.0", "log", @@ -2391,6 +2814,28 @@ dependencies = [ "regex-syntax", ] +[[package]] +name = "datafusion-physical-expr" +version = "48.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc6da0f2412088d23f6b01929dedd687b5aee63b19b674eb73d00c3eb3c883b7" +dependencies = [ + "ahash", + "arrow", + "datafusion-common 48.0.1", + "datafusion-expr 48.0.1", + "datafusion-expr-common 48.0.1", + "datafusion-functions-aggregate-common 48.0.1", + "datafusion-physical-expr-common 48.0.1", + "half", + "hashbrown 0.14.5", + "indexmap 2.11.0", + "itertools 0.14.0", + "log", + "paste", + "petgraph 0.8.2", +] + [[package]] name = "datafusion-physical-expr" version = "49.0.2" @@ -2399,11 +2844,11 @@ checksum = "3515d51531cca5f7b5a6f3ea22742b71bb36fc378b465df124ff9a2fa349b002" dependencies = [ "ahash", "arrow", - "datafusion-common", - "datafusion-expr", - "datafusion-expr-common", - "datafusion-functions-aggregate-common", - "datafusion-physical-expr-common", + "datafusion-common 49.0.2", + "datafusion-expr 49.0.2", + "datafusion-expr-common 49.0.2", + "datafusion-functions-aggregate-common 49.0.2", + "datafusion-physical-expr-common 49.0.2", "half", "hashbrown 0.14.5", "indexmap 2.11.0", @@ -2413,6 +2858,20 @@ dependencies = [ "petgraph 0.8.2", ] +[[package]] +name = "datafusion-physical-expr-common" +version = "48.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dcb0dbd9213078a593c3fe28783beaa625a4e6c6a6c797856ee2ba234311fb96" +dependencies = [ + "ahash", + "arrow", + "datafusion-common 48.0.1", + "datafusion-expr-common 48.0.1", + "hashbrown 0.14.5", + "itertools 0.14.0", +] + [[package]] name = "datafusion-physical-expr-common" version = "49.0.2" @@ -2421,12 +2880,30 @@ checksum = "24485475d9c618a1d33b2a3dad003d946dc7a7bbf0354d125301abc0a5a79e3e" dependencies = [ "ahash", "arrow", - "datafusion-common", - "datafusion-expr-common", + "datafusion-common 49.0.2", + "datafusion-expr-common 49.0.2", "hashbrown 0.14.5", "itertools 0.14.0", ] +[[package]] +name = "datafusion-physical-optimizer" +version = "48.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d140854b2db3ef8ac611caad12bfb2e1e1de827077429322a6188f18fc0026a" +dependencies = [ + "arrow", + "datafusion-common 48.0.1", + "datafusion-execution 48.0.1", + "datafusion-expr 48.0.1", + "datafusion-expr-common 48.0.1", + "datafusion-physical-expr 48.0.1", + "datafusion-physical-expr-common 48.0.1", + "datafusion-physical-plan 48.0.1", + "itertools 0.14.0", + "log", +] + [[package]] name = "datafusion-physical-optimizer" version = "49.0.2" @@ -2434,18 +2911,48 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b9da411a0a64702f941a12af2b979434d14ec5d36c6f49296966b2c7639cbb3a" dependencies = [ "arrow", - "datafusion-common", - "datafusion-execution", - "datafusion-expr", - "datafusion-expr-common", - "datafusion-physical-expr", - "datafusion-physical-expr-common", - "datafusion-physical-plan", + "datafusion-common 49.0.2", + "datafusion-execution 49.0.2", + "datafusion-expr 49.0.2", + "datafusion-expr-common 49.0.2", + "datafusion-physical-expr 49.0.2", + "datafusion-physical-expr-common 49.0.2", + "datafusion-physical-plan 49.0.2", "datafusion-pruning", "itertools 0.14.0", "log", ] +[[package]] +name = "datafusion-physical-plan" +version = "48.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b46cbdf21a01206be76d467f325273b22c559c744a012ead5018dfe79597de08" +dependencies = [ + "ahash", + "arrow", + "arrow-ord", + "arrow-schema", + "async-trait", + "chrono", + "datafusion-common 48.0.1", + "datafusion-common-runtime 48.0.1", + "datafusion-execution 48.0.1", + "datafusion-expr 48.0.1", + "datafusion-functions-window-common 48.0.1", + "datafusion-physical-expr 48.0.1", + "datafusion-physical-expr-common 48.0.1", + "futures", + "half", + "hashbrown 0.14.5", + "indexmap 2.11.0", + "itertools 0.14.0", + "log", + "parking_lot", + "pin-project-lite", + "tokio", +] + [[package]] name = "datafusion-physical-plan" version = "49.0.2" @@ -2458,13 +2965,13 @@ dependencies = [ "arrow-schema", "async-trait", "chrono", - "datafusion-common", - "datafusion-common-runtime", - "datafusion-execution", - "datafusion-expr", - "datafusion-functions-window-common", - "datafusion-physical-expr", - "datafusion-physical-expr-common", + "datafusion-common 49.0.2", + "datafusion-common-runtime 49.0.2", + "datafusion-execution 49.0.2", + "datafusion-expr 49.0.2", + "datafusion-functions-window-common 49.0.2", + "datafusion-physical-expr 49.0.2", + "datafusion-physical-expr-common 49.0.2", "futures", "half", "hashbrown 0.14.5", @@ -2484,16 +2991,40 @@ checksum = "391a457b9d23744c53eeb89edd1027424cba100581488d89800ed841182df905" dependencies = [ "arrow", "arrow-schema", - "datafusion-common", - "datafusion-datasource", - "datafusion-expr-common", - "datafusion-physical-expr", - "datafusion-physical-expr-common", - "datafusion-physical-plan", + "datafusion-common 49.0.2", + "datafusion-datasource 49.0.2", + "datafusion-expr-common 49.0.2", + "datafusion-physical-expr 49.0.2", + "datafusion-physical-expr-common 49.0.2", + "datafusion-physical-plan 49.0.2", "itertools 0.14.0", "log", ] +[[package]] +name = "datafusion-session" +version = "48.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a72733766ddb5b41534910926e8da5836622316f6283307fd9fb7e19811a59c" +dependencies = [ + "arrow", + "async-trait", + "dashmap", + "datafusion-common 48.0.1", + "datafusion-common-runtime 48.0.1", + "datafusion-execution 48.0.1", + "datafusion-expr 48.0.1", + "datafusion-physical-expr 48.0.1", + "datafusion-physical-plan 48.0.1", + "datafusion-sql 48.0.1", + "futures", + "itertools 0.14.0", + "log", + "object_store", + "parking_lot", + "tokio", +] + [[package]] name = "datafusion-session" version = "49.0.2" @@ -2503,13 +3034,13 @@ dependencies = [ "arrow", "async-trait", "dashmap", - "datafusion-common", - "datafusion-common-runtime", - "datafusion-execution", - "datafusion-expr", - "datafusion-physical-expr", - "datafusion-physical-plan", - "datafusion-sql", + "datafusion-common 49.0.2", + "datafusion-common-runtime 49.0.2", + "datafusion-execution 49.0.2", + "datafusion-expr 49.0.2", + "datafusion-physical-expr 49.0.2", + "datafusion-physical-plan 49.0.2", + "datafusion-sql 49.0.2", "futures", "itertools 0.14.0", "log", @@ -2518,6 +3049,22 @@ dependencies = [ "tokio", ] +[[package]] +name = "datafusion-sql" +version = "48.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c5162338cdec9cc7ea13a0e6015c361acad5ec1d88d83f7c86301f789473971f" +dependencies = [ + "arrow", + "bigdecimal", + "datafusion-common 48.0.1", + "datafusion-expr 48.0.1", + "indexmap 2.11.0", + "log", + "regex", + "sqlparser 0.55.0", +] + [[package]] name = "datafusion-sql" version = "49.0.2" @@ -2526,8 +3073,8 @@ checksum = "9082779be8ce4882189b229c0cff4393bd0808282a7194130c9f32159f185e25" dependencies = [ "arrow", "bigdecimal", - "datafusion-common", - "datafusion-expr", + "datafusion-common 49.0.2", + "datafusion-expr 49.0.2", "indexmap 2.11.0", "log", "regex", @@ -3038,6 +3585,15 @@ version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c" +[[package]] +name = "fsst" +version = "0.31.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2afa8e97cfde0f44698a13dc5afc0a29840eb0d252a918986b34dc5baa166d9a" +dependencies = [ + "rand 0.8.5", +] + [[package]] name = "fsst" version = "0.37.0" @@ -4217,6 +4773,69 @@ dependencies = [ "simple_asn1", ] +[[package]] +name = "lance" +version = "0.31.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8258e6fe0283f6a9764ffe520e3e2e7727b00a7f14f03852b167692608242f0e" +dependencies = [ + "arrow", + "arrow-arith", + "arrow-array", + "arrow-buffer", + "arrow-ipc", + "arrow-ord", + "arrow-row", + "arrow-schema", + "arrow-select", + "async-recursion", + "async-trait", + "async_cell", + "aws-credential-types", + "byteorder", + "bytes", + "chrono", + "dashmap", + "datafusion 48.0.1", + "datafusion-expr 48.0.1", + "datafusion-functions 48.0.1", + "datafusion-physical-expr 48.0.1", + "datafusion-physical-plan 48.0.1", + "deepsize", + "either", + "futures", + "half", + "humantime", + "itertools 0.13.0", + "lance-arrow 0.31.1", + "lance-core 0.31.1", + "lance-datafusion 0.31.1", + "lance-encoding 0.31.1", + "lance-file 0.31.1", + "lance-index 0.31.1", + "lance-io 0.31.1", + "lance-linalg 0.31.1", + "lance-table 0.31.1", + "log", + "moka", + "object_store", + "permutation", + "pin-project", + "prost", + "prost-types", + "rand 0.8.5", + "roaring", + "serde", + "serde_json", + "snafu", + "tantivy", + "tempfile", + "tokio", + "tracing", + "url", + "uuid", +] + [[package]] name = "lance" version = "0.37.0" @@ -4241,26 +4860,26 @@ dependencies = [ "bytes", "chrono", "dashmap", - "datafusion", - "datafusion-expr", - "datafusion-functions", - "datafusion-physical-expr", - "datafusion-physical-plan", + "datafusion 49.0.2", + "datafusion-expr 49.0.2", + "datafusion-functions 49.0.2", + "datafusion-physical-expr 49.0.2", + "datafusion-physical-plan 49.0.2", "deepsize", "either", "futures", "half", "humantime", "itertools 0.13.0", - "lance-arrow", - "lance-core", - "lance-datafusion", - "lance-encoding", - "lance-file", - "lance-index", - "lance-io", - "lance-linalg", - "lance-table", + "lance-arrow 0.37.0", + "lance-core 0.37.0", + "lance-datafusion 0.37.0", + "lance-encoding 0.37.0", + "lance-file 0.37.0", + "lance-index 0.37.0", + "lance-io 0.37.0", + "lance-linalg 0.37.0", + "lance-table 0.37.0", "log", "moka", "object_store", @@ -4282,6 +4901,25 @@ dependencies = [ "uuid", ] +[[package]] +name = "lance-arrow" +version = "0.31.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "135b0fdadc4ada6f8c382379a97f09f6a6186f05b33bd23743a959151a5cf233" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-data", + "arrow-schema", + "arrow-select", + "bytes", + "getrandom 0.2.16", + "half", + "num-traits", + "rand 0.8.5", +] + [[package]] name = "lance-arrow" version = "0.37.0" @@ -4313,6 +4951,43 @@ dependencies = [ "seq-macro", ] +[[package]] +name = "lance-core" +version = "0.31.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "83eb8468fcdfe4b4eacbcb1c151a72ac032d9143f4203a7f3bfbde64214a40b3" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-schema", + "async-trait", + "byteorder", + "bytes", + "chrono", + "datafusion-common 48.0.1", + "datafusion-sql 48.0.1", + "deepsize", + "futures", + "lance-arrow 0.31.1", + "libc", + "log", + "mock_instant", + "moka", + "num_cpus", + "object_store", + "pin-project", + "prost", + "rand 0.8.5", + "roaring", + "serde_json", + "snafu", + "tokio", + "tokio-stream", + "tokio-util", + "tracing", + "url", +] + [[package]] name = "lance-core" version = "0.37.0" @@ -4326,11 +5001,11 @@ dependencies = [ "byteorder", "bytes", "chrono", - "datafusion-common", - "datafusion-sql", + "datafusion-common 49.0.2", + "datafusion-sql 49.0.2", "deepsize", "futures", - "lance-arrow", + "lance-arrow 0.37.0", "libc", "log", "mock_instant", @@ -4350,6 +5025,36 @@ dependencies = [ "url", ] +[[package]] +name = "lance-datafusion" +version = "0.31.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95a266374d53d37ee4378fbc3e4827931e1119e609bc41b64eb5a5ca93c8e8eb" +dependencies = [ + "arrow", + "arrow-array", + "arrow-buffer", + "arrow-ord", + "arrow-schema", + "arrow-select", + "async-trait", + "datafusion 48.0.1", + "datafusion-common 48.0.1", + "datafusion-functions 48.0.1", + "datafusion-physical-expr 48.0.1", + "futures", + "lance-arrow 0.31.1", + "lance-core 0.31.1", + "lance-datagen 0.31.1", + "log", + "pin-project", + "prost", + "snafu", + "tempfile", + "tokio", + "tracing", +] + [[package]] name = "lance-datafusion" version = "0.37.0" @@ -4363,15 +5068,15 @@ dependencies = [ "arrow-schema", "arrow-select", "async-trait", - "datafusion", - "datafusion-common", - "datafusion-functions", - "datafusion-physical-expr", + "datafusion 49.0.2", + "datafusion-common 49.0.2", + "datafusion-functions 49.0.2", + "datafusion-physical-expr 49.0.2", "futures", "jsonb", - "lance-arrow", - "lance-core", - "lance-datagen", + "lance-arrow 0.37.0", + "lance-core 0.37.0", + "lance-datagen 0.37.0", "log", "pin-project", "prost", @@ -4381,6 +5086,24 @@ dependencies = [ "tracing", ] +[[package]] +name = "lance-datagen" +version = "0.31.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3890c9f35c318969735387f154c7633ccd5039e6a9879b9372a4005e43182073" +dependencies = [ + "arrow", + "arrow-array", + "arrow-cast", + "arrow-schema", + "chrono", + "futures", + "hex", + "rand 0.8.5", + "rand_xoshiro 0.6.0", + "random_word 0.5.2", +] + [[package]] name = "lance-datagen" version = "0.37.0" @@ -4396,10 +5119,51 @@ dependencies = [ "half", "hex", "rand 0.9.2", - "rand_xoshiro", + "rand_xoshiro 0.7.0", "random_word 0.5.2", ] +[[package]] +name = "lance-encoding" +version = "0.31.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b68ab7867165826bf893dcff9e2ce565e6a0299862b7315a1c0ef1470a6c9144" +dependencies = [ + "arrayref", + "arrow", + "arrow-arith", + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-data", + "arrow-schema", + "arrow-select", + "bytemuck", + "byteorder", + "bytes", + "fsst 0.31.1", + "futures", + "hex", + "hyperloglogplus", + "itertools 0.13.0", + "lance-arrow 0.31.1", + "lance-core 0.31.1", + "log", + "lz4", + "num-traits", + "paste", + "prost", + "prost-build", + "prost-types", + "rand 0.8.5", + "seq-macro", + "snafu", + "tokio", + "tracing", + "xxhash-rust", + "zstd", +] + [[package]] name = "lance-encoding" version = "0.37.0" @@ -4416,14 +5180,14 @@ dependencies = [ "bytemuck", "byteorder", "bytes", - "fsst", + "fsst 0.37.0", "futures", "hex", "hyperloglogplus", "itertools 0.13.0", - "lance-arrow", + "lance-arrow 0.37.0", "lance-bitpacking", - "lance-core", + "lance-core 0.37.0", "log", "lz4", "num-traits", @@ -4439,6 +5203,42 @@ dependencies = [ "zstd", ] +[[package]] +name = "lance-file" +version = "0.31.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc5c8e5b19054c74e0bf7fcfa6038bfec0f881209d66e04e41f4a4f2e0272317" +dependencies = [ + "arrow-arith", + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "arrow-select", + "async-recursion", + "async-trait", + "byteorder", + "bytes", + "datafusion-common 48.0.1", + "deepsize", + "futures", + "lance-arrow 0.31.1", + "lance-core 0.31.1", + "lance-encoding 0.31.1", + "lance-io 0.31.1", + "log", + "num-traits", + "object_store", + "prost", + "prost-build", + "prost-types", + "roaring", + "snafu", + "tempfile", + "tokio", + "tracing", +] + [[package]] name = "lance-file" version = "0.37.0" @@ -4455,13 +5255,13 @@ dependencies = [ "async-trait", "byteorder", "bytes", - "datafusion-common", + "datafusion-common 49.0.2", "deepsize", "futures", - "lance-arrow", - "lance-core", - "lance-encoding", - "lance-io", + "lance-arrow 0.37.0", + "lance-core 0.37.0", + "lance-encoding 0.37.0", + "lance-io 0.37.0", "log", "num-traits", "object_store", @@ -4475,6 +5275,62 @@ dependencies = [ "tracing", ] +[[package]] +name = "lance-index" +version = "0.31.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa06deb03ad01fb42790f525e60b297ff9011a6590f769f698626a8e5ea53350" +dependencies = [ + "arrow", + "arrow-array", + "arrow-ord", + "arrow-schema", + "arrow-select", + "async-channel", + "async-recursion", + "async-trait", + "bitpacking", + "bitvec", + "bytes", + "crossbeam-queue", + "datafusion 48.0.1", + "datafusion-common 48.0.1", + "datafusion-expr 48.0.1", + "datafusion-physical-expr 48.0.1", + "datafusion-sql 48.0.1", + "deepsize", + "dirs", + "fst", + "futures", + "half", + "itertools 0.13.0", + "lance-arrow 0.31.1", + "lance-core 0.31.1", + "lance-datafusion 0.31.1", + "lance-encoding 0.31.1", + "lance-file 0.31.1", + "lance-io 0.31.1", + "lance-linalg 0.31.1", + "lance-table 0.31.1", + "log", + "moka", + "num-traits", + "object_store", + "prost", + "prost-build", + "rand 0.8.5", + "rayon", + "roaring", + "serde", + "serde_json", + "snafu", + "tantivy", + "tempfile", + "tokio", + "tracing", + "uuid", +] + [[package]] name = "lance-index" version = "0.37.0" @@ -4493,11 +5349,11 @@ dependencies = [ "bitvec", "bytes", "crossbeam-queue", - "datafusion", - "datafusion-common", - "datafusion-expr", - "datafusion-physical-expr", - "datafusion-sql", + "datafusion 49.0.2", + "datafusion-common 49.0.2", + "datafusion-expr 49.0.2", + "datafusion-physical-expr 49.0.2", + "datafusion-sql 49.0.2", "deepsize", "dirs", "fastbloom", @@ -4506,15 +5362,15 @@ dependencies = [ "half", "itertools 0.13.0", "jsonb", - "lance-arrow", - "lance-core", - "lance-datafusion", - "lance-datagen", - "lance-encoding", - "lance-file", - "lance-io", - "lance-linalg", - "lance-table", + "lance-arrow 0.37.0", + "lance-core 0.37.0", + "lance-datafusion 0.37.0", + "lance-datagen 0.37.0", + "lance-encoding 0.37.0", + "lance-file 0.37.0", + "lance-io 0.37.0", + "lance-linalg 0.37.0", + "lance-table 0.37.0", "libm", "log", "num-traits", @@ -4536,6 +5392,46 @@ dependencies = [ "uuid", ] +[[package]] +name = "lance-io" +version = "0.31.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ac1794d9fe428e5a75fdef80e259b5d16af7ee5c95b866f1270c78368f206ed" +dependencies = [ + "arrow", + "arrow-arith", + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-data", + "arrow-schema", + "arrow-select", + "async-priority-channel", + "async-recursion", + "async-trait", + "aws-config", + "aws-credential-types", + "byteorder", + "bytes", + "chrono", + "deepsize", + "futures", + "lance-arrow 0.31.1", + "lance-core 0.31.1", + "log", + "object_store", + "path_abs", + "pin-project", + "prost", + "rand 0.8.5", + "serde", + "shellexpand", + "snafu", + "tokio", + "tracing", + "url", +] + [[package]] name = "lance-io" version = "0.37.0" @@ -4560,8 +5456,8 @@ dependencies = [ "chrono", "deepsize", "futures", - "lance-arrow", - "lance-core", + "lance-arrow 0.37.0", + "lance-core 0.37.0", "log", "object_store", "object_store_opendal", @@ -4578,6 +5474,30 @@ dependencies = [ "url", ] +[[package]] +name = "lance-linalg" +version = "0.31.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42bfda62395e6123de712a7fbd1a5db0c678e53a6f46f1c33d9729814106e7f1" +dependencies = [ + "arrow-array", + "arrow-ord", + "arrow-schema", + "bitvec", + "cc", + "deepsize", + "futures", + "half", + "lance-arrow 0.31.1", + "lance-core 0.31.1", + "log", + "num-traits", + "rand 0.8.5", + "rayon", + "tokio", + "tracing", +] + [[package]] name = "lance-linalg" version = "0.37.0" @@ -4593,8 +5513,8 @@ dependencies = [ "deepsize", "futures", "half", - "lance-arrow", - "lance-core", + "lance-arrow 0.37.0", + "lance-core 0.37.0", "log", "num-traits", "rand 0.9.2", @@ -4603,6 +5523,76 @@ dependencies = [ "tracing", ] +[[package]] +name = "lance-namespace" +version = "0.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8255a9cb9f71a476555f1cf016c6e8eadc86efa7071179b05533dd82998a83ac" +dependencies = [ + "arrow", + "async-trait", + "bytes", + "lance 0.31.1", + "lance-namespace-reqwest-client", + "opendal", + "reqwest", + "serde", + "serde_json", + "thiserror 1.0.69", + "url", +] + +[[package]] +name = "lance-namespace-reqwest-client" +version = "0.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e3b181528a13c6ba089901fd9298654a78cde4b2d8101861deaaecbcdb4b165" +dependencies = [ + "reqwest", + "serde", + "serde_json", + "serde_repr", + "url", +] + +[[package]] +name = "lance-table" +version = "0.31.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6919d1e0f24741a01b3ee65ee57f05e89472b75b6086874936e1f01e456f6f6f" +dependencies = [ + "arrow", + "arrow-array", + "arrow-buffer", + "arrow-ipc", + "arrow-schema", + "async-trait", + "byteorder", + "bytes", + "chrono", + "deepsize", + "futures", + "lance-arrow 0.31.1", + "lance-core 0.31.1", + "lance-file 0.31.1", + "lance-io 0.31.1", + "log", + "object_store", + "prost", + "prost-build", + "prost-types", + "rand 0.8.5", + "rangemap", + "roaring", + "serde", + "serde_json", + "snafu", + "tokio", + "tracing", + "url", + "uuid", +] + [[package]] name = "lance-table" version = "0.37.0" @@ -4622,10 +5612,10 @@ dependencies = [ "chrono", "deepsize", "futures", - "lance-arrow", - "lance-core", - "lance-file", - "lance-io", + "lance-arrow 0.37.0", + "lance-core 0.37.0", + "lance-file 0.37.0", + "lance-io 0.37.0", "log", "object_store", "prost", @@ -4651,7 +5641,7 @@ checksum = "384acc1dd13379a2ae24f3e3635d9c1f4fb4dc1534f7ffd2740c268f2eb73455" dependencies = [ "arrow-array", "arrow-schema", - "lance-arrow", + "lance-arrow 0.37.0", "num-traits", "rand 0.9.2", ] @@ -4682,24 +5672,25 @@ dependencies = [ "candle-transformers", "chrono", "crunchy", - "datafusion", - "datafusion-catalog", - "datafusion-common", - "datafusion-execution", - "datafusion-expr", - "datafusion-physical-plan", + "datafusion 49.0.2", + "datafusion-catalog 49.0.2", + "datafusion-common 49.0.2", + "datafusion-execution 49.0.2", + "datafusion-expr 49.0.2", + "datafusion-physical-plan 49.0.2", "futures", "half", "hf-hub", "http 1.3.1", "http-body 1.0.1", - "lance", - "lance-datafusion", - "lance-encoding", - "lance-index", - "lance-io", - "lance-linalg", - "lance-table", + "lance 0.37.0", + "lance-datafusion 0.37.0", + "lance-encoding 0.37.0", + "lance-index 0.37.0", + "lance-io 0.37.0", + "lance-linalg 0.37.0", + "lance-namespace", + "lance-table 0.37.0", "lance-testing", "lazy_static", "log", @@ -4734,7 +5725,7 @@ dependencies = [ "arrow", "arrow-schema", "jni", - "lance", + "lance 0.37.0", "lancedb", "lazy_static", "serde", @@ -6679,6 +7670,15 @@ dependencies = [ "rand 0.9.2", ] +[[package]] +name = "rand_xoshiro" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f97cdb2a36ed4183de61b2f824cc45c9f1037f28afe0a322e9fff4c108b5aaa" +dependencies = [ + "rand_core 0.6.4", +] + [[package]] name = "rand_xoshiro" version = "0.7.0" @@ -7452,6 +8452,17 @@ dependencies = [ "serde", ] +[[package]] +name = "serde_repr" +version = "0.1.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "175ee3e80ae9982737ca543e96133087cbd9a485eecc3bc4de9c1a37b47ea59c" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.106", +] + [[package]] name = "serde_urlencoded" version = "0.7.1" @@ -7733,6 +8744,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c4521174166bac1ff04fe16ef4524c70144cd29682a45978978ca3d7f4e0be11" dependencies = [ "log", + "recursive", "sqlparser_derive", ] diff --git a/Cargo.toml b/Cargo.toml index bf72f0fb..12472f09 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -23,6 +23,7 @@ lance-table = "=0.37.0" lance-testing = "=0.37.0" lance-datafusion = "=0.37.0" lance-encoding = "=0.37.0" +lance-namespace = "0.0.15" # Note that this one does not include pyarrow arrow = { version = "55.1", optional = false } arrow-array = "55.1" diff --git a/rust/lancedb/Cargo.toml b/rust/lancedb/Cargo.toml index 3c344cfe..1cb83a69 100644 --- a/rust/lancedb/Cargo.toml +++ b/rust/lancedb/Cargo.toml @@ -36,6 +36,7 @@ lance-table = { workspace = true } lance-linalg = { workspace = true } lance-testing = { workspace = true } lance-encoding = { workspace = true } +lance-namespace = { workspace = true } moka = { workspace = true } pin-project = { workspace = true } tokio = { version = "1.23", features = ["rt-multi-thread"] } diff --git a/rust/lancedb/src/connection.rs b/rust/lancedb/src/connection.rs index 41ddad43..08a10444 100644 --- a/rust/lancedb/src/connection.rs +++ b/rust/lancedb/src/connection.rs @@ -1015,6 +1015,117 @@ pub fn connect(uri: &str) -> ConnectBuilder { ConnectBuilder::new(uri) } +pub struct ConnectNamespaceBuilder { + ns_impl: String, + properties: HashMap, + storage_options: HashMap, + read_consistency_interval: Option, + embedding_registry: Option>, + session: Option>, +} + +impl ConnectNamespaceBuilder { + fn new(ns_impl: &str, properties: HashMap) -> Self { + Self { + ns_impl: ns_impl.to_string(), + properties, + storage_options: HashMap::new(), + read_consistency_interval: None, + embedding_registry: None, + session: None, + } + } + + /// Set an option for the storage layer. + /// + /// See available options at + pub fn storage_option(mut self, key: impl Into, value: impl Into) -> Self { + self.storage_options.insert(key.into(), value.into()); + self + } + + /// Set multiple options for the storage layer. + /// + /// See available options at + pub fn storage_options( + mut self, + pairs: impl IntoIterator, impl Into)>, + ) -> Self { + for (key, value) in pairs { + self.storage_options.insert(key.into(), value.into()); + } + self + } + + /// The interval at which to check for updates from other processes. + /// + /// If left unset, consistency is not checked. For maximum read + /// performance, this is the default. For strong consistency, set this to + /// zero seconds. Then every read will check for updates from other processes. + /// As a compromise, set this to a non-zero duration for eventual consistency. + pub fn read_consistency_interval( + mut self, + read_consistency_interval: std::time::Duration, + ) -> Self { + self.read_consistency_interval = Some(read_consistency_interval); + self + } + + /// Provide a custom [`EmbeddingRegistry`] to use for this connection. + pub fn embedding_registry(mut self, registry: Arc) -> Self { + self.embedding_registry = Some(registry); + self + } + + /// Set a custom session for object stores and caching. + /// + /// By default, a new session with default configuration will be created. + /// This method allows you to provide a custom session with your own + /// configuration for object store registries, caching, etc. + pub fn session(mut self, session: Arc) -> Self { + self.session = Some(session); + self + } + + /// Execute the connection + pub async fn execute(self) -> Result { + use crate::database::namespace::LanceNamespaceDatabase; + + let internal = Arc::new( + LanceNamespaceDatabase::connect( + &self.ns_impl, + self.properties, + self.storage_options, + self.read_consistency_interval, + self.session, + ) + .await?, + ); + + Ok(Connection { + internal, + uri: format!("namespace://{}", self.ns_impl), + embedding_registry: self + .embedding_registry + .unwrap_or_else(|| Arc::new(MemoryRegistry::new())), + }) + } +} + +/// Connect to a LanceDB database through a namespace. +/// +/// # Arguments +/// +/// * `ns_impl` - The namespace implementation to use (e.g., "dir" for directory-based, "rest" for REST API) +/// * `properties` - Configuration properties for the namespace implementation +/// ``` +pub fn connect_namespace( + ns_impl: &str, + properties: HashMap, +) -> ConnectNamespaceBuilder { + ConnectNamespaceBuilder::new(ns_impl, properties) +} + #[cfg(all(test, feature = "remote"))] mod test_utils { use super::*; diff --git a/rust/lancedb/src/database.rs b/rust/lancedb/src/database.rs index 2f22ea10..e5233920 100644 --- a/rust/lancedb/src/database.rs +++ b/rust/lancedb/src/database.rs @@ -29,6 +29,7 @@ use crate::error::Result; use crate::table::{BaseTable, TableDefinition, WriteOptions}; pub mod listing; +pub mod namespace; pub trait DatabaseOptions { fn serialize_into_map(&self, map: &mut HashMap); diff --git a/rust/lancedb/src/database/namespace.rs b/rust/lancedb/src/database/namespace.rs new file mode 100644 index 00000000..e7a6b41f --- /dev/null +++ b/rust/lancedb/src/database/namespace.rs @@ -0,0 +1,840 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright The LanceDB Authors + +//! Namespace-based database implementation that delegates table management to lance-namespace + +use std::collections::HashMap; +use std::sync::Arc; + +use async_trait::async_trait; +use lance_namespace::{ + connect as connect_namespace, + models::{ + CreateEmptyTableRequest, CreateNamespaceRequest, DescribeTableRequest, + DropNamespaceRequest, DropTableRequest, ListNamespacesRequest, ListTablesRequest, + }, + LanceNamespace, +}; + +use crate::connection::ConnectRequest; +use crate::database::listing::ListingDatabase; +use crate::error::{Error, Result}; + +use super::{ + BaseTable, CloneTableRequest, CreateNamespaceRequest as DbCreateNamespaceRequest, + CreateTableMode, CreateTableRequest as DbCreateTableRequest, Database, + DropNamespaceRequest as DbDropNamespaceRequest, + ListNamespacesRequest as DbListNamespacesRequest, OpenTableRequest, TableNamesRequest, +}; + +/// A database implementation that uses lance-namespace for table management +pub struct LanceNamespaceDatabase { + namespace: Arc, + // Storage options to be inherited by tables + storage_options: HashMap, + // Read consistency interval for tables + read_consistency_interval: Option, + // Optional session for object stores and caching + session: Option>, +} + +impl LanceNamespaceDatabase { + pub async fn connect( + ns_impl: &str, + ns_properties: HashMap, + storage_options: HashMap, + read_consistency_interval: Option, + session: Option>, + ) -> Result { + let namespace = connect_namespace(ns_impl, ns_properties.clone()) + .await + .map_err(|e| Error::InvalidInput { + message: format!("Failed to connect to namespace: {:?}", e), + })?; + + Ok(Self { + namespace, + storage_options, + read_consistency_interval, + session, + }) + } + + /// Helper method to create a ListingDatabase from a table location + /// + /// This method: + /// 1. Validates that the location ends with .lance + /// 2. Extracts the parent directory from the location + /// 3. Creates a ListingDatabase at that parent directory + async fn create_listing_database( + &self, + table_name: &str, + location: &str, + additional_storage_options: Option>, + ) -> Result> { + let expected_suffix = format!("{}.lance", table_name); + if !location.ends_with(&expected_suffix) { + return Err(Error::Runtime { + message: format!( + "Invalid table location '{}': expected to end with '{}'", + location, expected_suffix + ), + }); + } + + let parent_dir = location + .rsplit_once('/') + .map(|(parent, _)| parent.to_string()) + .ok_or_else(|| Error::Runtime { + message: format!("Invalid table location '{}': no parent directory", location), + })?; + + let mut merged_storage_options = self.storage_options.clone(); + if let Some(opts) = additional_storage_options { + merged_storage_options.extend(opts); + } + + let connect_request = ConnectRequest { + uri: parent_dir, + options: merged_storage_options, + read_consistency_interval: self.read_consistency_interval, + session: self.session.clone(), + #[cfg(feature = "remote")] + client_config: Default::default(), + }; + + let listing_db = ListingDatabase::connect_with_options(&connect_request) + .await + .map_err(|e| Error::Runtime { + message: format!("Failed to create listing database: {}", e), + })?; + + Ok(Arc::new(listing_db)) + } +} + +impl std::fmt::Debug for LanceNamespaceDatabase { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("LanceNamespaceDatabase") + .field("storage_options", &self.storage_options) + .field("read_consistency_interval", &self.read_consistency_interval) + .finish() + } +} + +impl std::fmt::Display for LanceNamespaceDatabase { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "LanceNamespaceDatabase") + } +} + +#[async_trait] +impl Database for LanceNamespaceDatabase { + async fn list_namespaces(&self, request: DbListNamespacesRequest) -> Result> { + let ns_request = ListNamespacesRequest { + id: if request.namespace.is_empty() { + None + } else { + Some(request.namespace) + }, + page_token: request.page_token, + limit: request.limit.map(|l| l as i32), + }; + + let response = self + .namespace + .list_namespaces(ns_request) + .await + .map_err(|e| Error::Runtime { + message: format!("Failed to list namespaces: {}", e), + })?; + + Ok(response.namespaces) + } + + async fn create_namespace(&self, request: DbCreateNamespaceRequest) -> Result<()> { + let ns_request = CreateNamespaceRequest { + id: if request.namespace.is_empty() { + None + } else { + Some(request.namespace) + }, + mode: None, + properties: None, + }; + + self.namespace + .create_namespace(ns_request) + .await + .map_err(|e| Error::Runtime { + message: format!("Failed to create namespace: {}", e), + })?; + + Ok(()) + } + + async fn drop_namespace(&self, request: DbDropNamespaceRequest) -> Result<()> { + let ns_request = DropNamespaceRequest { + id: if request.namespace.is_empty() { + None + } else { + Some(request.namespace) + }, + mode: None, + behavior: None, + }; + + self.namespace + .drop_namespace(ns_request) + .await + .map_err(|e| Error::Runtime { + message: format!("Failed to drop namespace: {}", e), + })?; + + Ok(()) + } + + async fn table_names(&self, request: TableNamesRequest) -> Result> { + let ns_request = ListTablesRequest { + id: if request.namespace.is_empty() { + None + } else { + Some(request.namespace) + }, + page_token: request.start_after, + limit: request.limit.map(|l| l as i32), + }; + + let response = + self.namespace + .list_tables(ns_request) + .await + .map_err(|e| Error::Runtime { + message: format!("Failed to list tables: {}", e), + })?; + + Ok(response.tables) + } + + async fn create_table(&self, request: DbCreateTableRequest) -> Result> { + let mut table_id = request.namespace.clone(); + table_id.push(request.name.clone()); + let describe_request = DescribeTableRequest { + id: Some(table_id.clone()), + version: None, + }; + + let describe_result = self.namespace.describe_table(describe_request).await; + + match request.mode { + CreateTableMode::Create => { + if describe_result.is_ok() { + return Err(Error::TableAlreadyExists { + name: request.name.clone(), + }); + } + } + CreateTableMode::Overwrite => { + if describe_result.is_ok() { + // Drop the existing table - must succeed + let drop_request = DropTableRequest { + id: Some(table_id.clone()), + }; + self.namespace + .drop_table(drop_request) + .await + .map_err(|e| Error::Runtime { + message: format!("Failed to drop existing table for overwrite: {}", e), + })?; + } + } + CreateTableMode::ExistOk(_) => { + if let Ok(response) = describe_result { + let location = response.location.ok_or_else(|| Error::Runtime { + message: "Table location is missing from namespace response".to_string(), + })?; + + let listing_db = self + .create_listing_database(&request.name, &location, response.storage_options) + .await?; + + return listing_db + .open_table(OpenTableRequest { + name: request.name.clone(), + namespace: request.namespace.clone(), + index_cache_size: None, + lance_read_params: None, + }) + .await; + } + } + } + + let mut table_id = request.namespace.clone(); + table_id.push(request.name.clone()); + + let create_empty_request = CreateEmptyTableRequest { + id: Some(table_id), + location: None, + properties: if self.storage_options.is_empty() { + None + } else { + Some(self.storage_options.clone()) + }, + }; + + let create_empty_response = self + .namespace + .create_empty_table(create_empty_request) + .await + .map_err(|e| Error::Runtime { + message: format!("Failed to create empty table: {}", e), + })?; + + let location = create_empty_response + .location + .ok_or_else(|| Error::Runtime { + message: "Table location is missing from create_empty_table response".to_string(), + })?; + + let listing_db = self + .create_listing_database( + &request.name, + &location, + create_empty_response.storage_options, + ) + .await?; + + listing_db.create_table(request).await + } + + async fn open_table(&self, request: OpenTableRequest) -> Result> { + let mut table_id = request.namespace.clone(); + table_id.push(request.name.clone()); + + let describe_request = DescribeTableRequest { + id: Some(table_id), + version: None, + }; + let response = self + .namespace + .describe_table(describe_request) + .await + .map_err(|e| Error::Runtime { + message: format!("Failed to describe table: {}", e), + })?; + + let location = response.location.ok_or_else(|| Error::Runtime { + message: "Table location is missing from namespace response".to_string(), + })?; + + let listing_db = self + .create_listing_database(&request.name, &location, response.storage_options) + .await?; + + listing_db.open_table(request).await + } + + async fn clone_table(&self, _request: CloneTableRequest) -> Result> { + Err(Error::NotSupported { + message: "clone_table is not supported for namespace connections".to_string(), + }) + } + + async fn rename_table( + &self, + _cur_name: &str, + _new_name: &str, + _cur_namespace: &[String], + _new_namespace: &[String], + ) -> Result<()> { + Err(Error::NotSupported { + message: "rename_table is not supported for namespace connections".to_string(), + }) + } + + async fn drop_table(&self, name: &str, namespace: &[String]) -> Result<()> { + let mut table_id = namespace.to_vec(); + table_id.push(name.to_string()); + + let drop_request = DropTableRequest { id: Some(table_id) }; + self.namespace + .drop_table(drop_request) + .await + .map_err(|e| Error::Runtime { + message: format!("Failed to drop table: {}", e), + })?; + + Ok(()) + } + + async fn drop_all_tables(&self, namespace: &[String]) -> Result<()> { + let tables = self + .table_names(TableNamesRequest { + namespace: namespace.to_vec(), + start_after: None, + limit: None, + }) + .await?; + + for table in tables { + self.drop_table(&table, namespace).await?; + } + + Ok(()) + } + + fn as_any(&self) -> &dyn std::any::Any { + self + } +} + +#[cfg(test)] +#[cfg(not(windows))] // TODO: support windows for lance-namespace +mod tests { + use super::*; + use crate::connect_namespace; + use crate::query::ExecutableQuery; + use arrow_array::{Int32Array, RecordBatch, RecordBatchIterator, StringArray}; + use arrow_schema::{DataType, Field, Schema}; + use futures::TryStreamExt; + use tempfile::tempdir; + + /// Helper function to create test data + fn create_test_data() -> RecordBatchIterator< + std::vec::IntoIter>, + > { + let schema = Arc::new(Schema::new(vec![ + Field::new("id", DataType::Int32, false), + Field::new("name", DataType::Utf8, false), + ])); + + let id_array = Int32Array::from(vec![1, 2, 3, 4, 5]); + let name_array = StringArray::from(vec!["Alice", "Bob", "Charlie", "David", "Eve"]); + + let batch = RecordBatch::try_new( + schema.clone(), + vec![Arc::new(id_array), Arc::new(name_array)], + ) + .unwrap(); + RecordBatchIterator::new(vec![std::result::Result::Ok(batch)].into_iter(), schema) + } + + #[tokio::test] + async fn test_namespace_connection_simple() { + // Test that namespace connections work with simple connect_namespace(impl_type, properties) + let tmp_dir = tempdir().unwrap(); + let root_path = tmp_dir.path().to_str().unwrap().to_string(); + + let mut properties = HashMap::new(); + properties.insert("root".to_string(), root_path); + + // This should succeed with directory-based namespace + let result = connect_namespace("dir", properties).execute().await; + + assert!(result.is_ok()); + } + + #[tokio::test] + async fn test_namespace_connection_with_storage_options() { + // Test namespace connections with storage options + let tmp_dir = tempdir().unwrap(); + let root_path = tmp_dir.path().to_str().unwrap().to_string(); + + let mut properties = HashMap::new(); + properties.insert("root".to_string(), root_path); + + // This should succeed with directory-based namespace and storage options + let result = connect_namespace("dir", properties) + .storage_option("timeout", "30s") + .execute() + .await; + + assert!(result.is_ok()); + } + + #[tokio::test] + async fn test_namespace_connection_with_all_options() { + use crate::embeddings::MemoryRegistry; + use std::time::Duration; + + // Test namespace connections with all configuration options + let tmp_dir = tempdir().unwrap(); + let root_path = tmp_dir.path().to_str().unwrap().to_string(); + + let mut properties = HashMap::new(); + properties.insert("root".to_string(), root_path); + + let embedding_registry = Arc::new(MemoryRegistry::new()); + let session = Arc::new(lance::session::Session::default()); + + // Test with all options set + let result = connect_namespace("dir", properties) + .storage_option("timeout", "30s") + .storage_options([("cache_size", "1gb"), ("region", "us-east-1")]) + .read_consistency_interval(Duration::from_secs(5)) + .embedding_registry(embedding_registry.clone()) + .session(session.clone()) + .execute() + .await; + + assert!(result.is_ok()); + + let conn = result.unwrap(); + + // Verify embedding registry is set correctly + assert!(std::ptr::eq( + conn.embedding_registry() as *const _, + embedding_registry.as_ref() as *const _ + )); + } + + #[tokio::test] + async fn test_namespace_create_table_basic() { + // Setup: Create a temporary directory for the namespace + let tmp_dir = tempdir().unwrap(); + let root_path = tmp_dir.path().to_str().unwrap().to_string(); + + // Connect to namespace using DirectoryNamespace + let mut properties = HashMap::new(); + properties.insert("root".to_string(), root_path); + + let conn = connect_namespace("dir", properties) + .execute() + .await + .expect("Failed to connect to namespace"); + + // Test: Create a table + let test_data = create_test_data(); + let table = conn + .create_table("test_table", test_data) + .execute() + .await + .expect("Failed to create table"); + + // Verify: Table was created and can be queried + let results = table + .query() + .execute() + .await + .expect("Failed to query table") + .try_collect::>() + .await + .expect("Failed to collect results"); + + assert_eq!(results.len(), 1); + assert_eq!(results[0].num_rows(), 5); + + // Verify: Table appears in table_names + let table_names = conn + .table_names() + .execute() + .await + .expect("Failed to list tables"); + assert!(table_names.contains(&"test_table".to_string())); + } + + #[tokio::test] + async fn test_namespace_describe_table() { + // Setup: Create a temporary directory for the namespace + let tmp_dir = tempdir().unwrap(); + let root_path = tmp_dir.path().to_str().unwrap().to_string(); + + // Connect to namespace + let mut properties = HashMap::new(); + properties.insert("root".to_string(), root_path); + + let conn = connect_namespace("dir", properties) + .execute() + .await + .expect("Failed to connect to namespace"); + + // Create a table first + let test_data = create_test_data(); + let _table = conn + .create_table("describe_test", test_data) + .execute() + .await + .expect("Failed to create table"); + + // Test: Open the table (which internally uses describe_table) + let opened_table = conn + .open_table("describe_test") + .execute() + .await + .expect("Failed to open table"); + + // Verify: Can query the opened table + let results = opened_table + .query() + .execute() + .await + .expect("Failed to query table") + .try_collect::>() + .await + .expect("Failed to collect results"); + + assert_eq!(results.len(), 1); + assert_eq!(results[0].num_rows(), 5); + + // Verify schema matches + let schema = opened_table.schema().await.expect("Failed to get schema"); + assert_eq!(schema.fields.len(), 2); + assert_eq!(schema.field(0).name(), "id"); + assert_eq!(schema.field(1).name(), "name"); + } + + #[tokio::test] + async fn test_namespace_create_table_overwrite_mode() { + // Setup: Create a temporary directory for the namespace + let tmp_dir = tempdir().unwrap(); + let root_path = tmp_dir.path().to_str().unwrap().to_string(); + + let mut properties = HashMap::new(); + properties.insert("root".to_string(), root_path); + + let conn = connect_namespace("dir", properties) + .execute() + .await + .expect("Failed to connect to namespace"); + + // Create initial table with 5 rows + let test_data1 = create_test_data(); + let _table1 = conn + .create_table("overwrite_test", test_data1) + .execute() + .await + .expect("Failed to create table"); + + // Create new data with 3 rows + let schema = Arc::new(Schema::new(vec![ + Field::new("id", DataType::Int32, false), + Field::new("name", DataType::Utf8, false), + ])); + let id_array = Int32Array::from(vec![10, 20, 30]); + let name_array = StringArray::from(vec!["New1", "New2", "New3"]); + let test_data2 = RecordBatch::try_new( + schema.clone(), + vec![Arc::new(id_array), Arc::new(name_array)], + ) + .unwrap(); + + // Test: Overwrite the table + let table2 = conn + .create_table( + "overwrite_test", + RecordBatchIterator::new( + vec![std::result::Result::Ok(test_data2)].into_iter(), + schema, + ), + ) + .mode(CreateTableMode::Overwrite) + .execute() + .await + .expect("Failed to overwrite table"); + + // Verify: Table has new data (3 rows instead of 5) + let results = table2 + .query() + .execute() + .await + .expect("Failed to query table") + .try_collect::>() + .await + .expect("Failed to collect results"); + + assert_eq!(results.len(), 1); + assert_eq!(results[0].num_rows(), 3); + + // Verify the data is actually the new data + let id_col = results[0] + .column(0) + .as_any() + .downcast_ref::() + .unwrap(); + assert_eq!(id_col.value(0), 10); + assert_eq!(id_col.value(1), 20); + assert_eq!(id_col.value(2), 30); + } + + #[tokio::test] + async fn test_namespace_create_table_exist_ok_mode() { + // Setup: Create a temporary directory for the namespace + let tmp_dir = tempdir().unwrap(); + let root_path = tmp_dir.path().to_str().unwrap().to_string(); + + let mut properties = HashMap::new(); + properties.insert("root".to_string(), root_path); + + let conn = connect_namespace("dir", properties) + .execute() + .await + .expect("Failed to connect to namespace"); + + // Create initial table with test data + let test_data1 = create_test_data(); + let _table1 = conn + .create_table("exist_ok_test", test_data1) + .execute() + .await + .expect("Failed to create table"); + + // Try to create again with exist_ok mode + let test_data2 = create_test_data(); + let table2 = conn + .create_table("exist_ok_test", test_data2) + .mode(CreateTableMode::exist_ok(|req| req)) + .execute() + .await + .expect("Failed with exist_ok mode"); + + // Verify: Table still has original data (5 rows) + let results = table2 + .query() + .execute() + .await + .expect("Failed to query table") + .try_collect::>() + .await + .expect("Failed to collect results"); + + assert_eq!(results.len(), 1); + assert_eq!(results[0].num_rows(), 5); + } + + #[tokio::test] + async fn test_namespace_create_multiple_tables() { + // Setup: Create a temporary directory for the namespace + let tmp_dir = tempdir().unwrap(); + let root_path = tmp_dir.path().to_str().unwrap().to_string(); + + let mut properties = HashMap::new(); + properties.insert("root".to_string(), root_path); + + let conn = connect_namespace("dir", properties) + .execute() + .await + .expect("Failed to connect to namespace"); + + // Create first table + let test_data1 = create_test_data(); + let _table1 = conn + .create_table("table1", test_data1) + .execute() + .await + .expect("Failed to create first table"); + + // Create second table + let test_data2 = create_test_data(); + let _table2 = conn + .create_table("table2", test_data2) + .execute() + .await + .expect("Failed to create second table"); + + // Verify: Both tables appear in table list + let table_names = conn + .table_names() + .execute() + .await + .expect("Failed to list tables"); + + assert!(table_names.contains(&"table1".to_string())); + assert!(table_names.contains(&"table2".to_string())); + + // Verify: Can open both tables + let opened_table1 = conn + .open_table("table1") + .execute() + .await + .expect("Failed to open table1"); + + let opened_table2 = conn + .open_table("table2") + .execute() + .await + .expect("Failed to open table2"); + + // Verify both tables work + let count1 = opened_table1 + .count_rows(None) + .await + .expect("Failed to count rows in table1"); + assert_eq!(count1, 5); + + let count2 = opened_table2 + .count_rows(None) + .await + .expect("Failed to count rows in table2"); + assert_eq!(count2, 5); + } + + #[tokio::test] + async fn test_namespace_table_not_found() { + // Setup: Create a temporary directory for the namespace + let tmp_dir = tempdir().unwrap(); + let root_path = tmp_dir.path().to_str().unwrap().to_string(); + + let mut properties = HashMap::new(); + properties.insert("root".to_string(), root_path); + + let conn = connect_namespace("dir", properties) + .execute() + .await + .expect("Failed to connect to namespace"); + + // Test: Try to open a non-existent table + let result = conn.open_table("non_existent_table").execute().await; + + // Verify: Should return an error + assert!(result.is_err()); + } + + #[tokio::test] + async fn test_namespace_drop_table() { + // Setup: Create a temporary directory for the namespace + let tmp_dir = tempdir().unwrap(); + let root_path = tmp_dir.path().to_str().unwrap().to_string(); + + let mut properties = HashMap::new(); + properties.insert("root".to_string(), root_path); + + let conn = connect_namespace("dir", properties) + .execute() + .await + .expect("Failed to connect to namespace"); + + // Create a table first + let test_data = create_test_data(); + let _table = conn + .create_table("drop_test", test_data) + .execute() + .await + .expect("Failed to create table"); + + // Verify table exists + let table_names_before = conn + .table_names() + .execute() + .await + .expect("Failed to list tables"); + assert!(table_names_before.contains(&"drop_test".to_string())); + + // Test: Drop the table + conn.drop_table("drop_test", &[]) + .await + .expect("Failed to drop table"); + + // Verify: Table no longer exists + let table_names_after = conn + .table_names() + .execute() + .await + .expect("Failed to list tables"); + assert!(!table_names_after.contains(&"drop_test".to_string())); + + // Verify: Cannot open dropped table + let open_result = conn.open_table("drop_test").execute().await; + assert!(open_result.is_err()); + } +} diff --git a/rust/lancedb/src/lib.rs b/rust/lancedb/src/lib.rs index 5f9f7ac6..e30ca714 100644 --- a/rust/lancedb/src/lib.rs +++ b/rust/lancedb/src/lib.rs @@ -212,7 +212,7 @@ use std::fmt::Display; use serde::{Deserialize, Serialize}; -pub use connection::Connection; +pub use connection::{ConnectNamespaceBuilder, Connection}; pub use error::{Error, Result}; use lance_linalg::distance::DistanceType as LanceDistanceType; pub use table::Table; @@ -289,6 +289,8 @@ impl Display for DistanceType { /// Connect to a database pub use connection::connect; +/// Connect to a namespace-backed database +pub use connection::connect_namespace; /// Re-export Lance Session and ObjectStoreRegistry for custom session creation pub use lance::session::Session;