diff --git a/Cargo.lock b/Cargo.lock index 29367189..6b063413 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1816,27 +1816,30 @@ dependencies = [ [[package]] name = "datafusion" -version = "45.0.0" +version = "46.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eae420e7a5b0b7f1c39364cc76cbcd0f5fdc416b2514ae3847c2676bbd60702a" +checksum = "914e6f9525599579abbd90b0f7a55afcaaaa40350b9e9ed52563f126dfe45fd3" dependencies = [ "arrow", - "arrow-array", "arrow-ipc", "arrow-schema", "async-trait", "bytes", "chrono", "datafusion-catalog", + "datafusion-catalog-listing", "datafusion-common", "datafusion-common-runtime", + "datafusion-datasource", "datafusion-execution", "datafusion-expr", + "datafusion-expr-common", "datafusion-functions", "datafusion-functions-aggregate", "datafusion-functions-nested", "datafusion-functions-table", "datafusion-functions-window", + "datafusion-macros", "datafusion-optimizer", "datafusion-physical-expr", "datafusion-physical-expr-common", @@ -1844,14 +1847,13 @@ dependencies = [ "datafusion-physical-plan", "datafusion-sql", "futures", - "glob", "itertools 0.14.0", "log", "object_store", "parking_lot", "rand 0.8.5", "regex", - "sqlparser 0.53.0", + "sqlparser 0.54.0", "tempfile", "tokio", "url", @@ -1860,9 +1862,9 @@ dependencies = [ [[package]] name = "datafusion-catalog" -version = "45.0.0" +version = "46.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6f27987bc22b810939e8dfecc55571e9d50355d6ea8ec1c47af8383a76a6d0e1" +checksum = "998a6549e6ee4ee3980e05590b2960446a56b343ea30199ef38acd0e0b9036e2" dependencies = [ "arrow", "async-trait", @@ -1876,21 +1878,39 @@ dependencies = [ "itertools 0.14.0", "log", "parking_lot", - "sqlparser 0.53.0", +] + +[[package]] +name = "datafusion-catalog-listing" +version = "46.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5ac10096a5b3c0d8a227176c0e543606860842e943594ccddb45cf42a526e43" +dependencies = [ + "arrow", + "async-trait", + "datafusion-catalog", + "datafusion-common", + "datafusion-datasource", + "datafusion-execution", + "datafusion-expr", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "datafusion-physical-plan", + "futures", + "log", + "object_store", + "tokio", ] [[package]] name = "datafusion-common" -version = "45.0.0" +version = "46.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3f6d5b8c9408cc692f7c194b8aa0c0f9b253e065a8d960ad9cdc2a13e697602" +checksum = "1f53d7ec508e1b3f68bd301cee3f649834fad51eff9240d898a4b2614cfd0a7a" dependencies = [ "ahash", "arrow", - "arrow-array", - "arrow-buffer", "arrow-ipc", - "arrow-schema", "base64 0.22.1", "half", "hashbrown 0.14.5", @@ -1899,32 +1919,60 @@ dependencies = [ "log", "object_store", "paste", - "sqlparser 0.53.0", + "sqlparser 0.54.0", "tokio", "web-time", ] [[package]] name = "datafusion-common-runtime" -version = "45.0.0" +version = "46.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0d4603c8e8a4baf77660ab7074cc66fc15cc8a18f2ce9dfadb755fc6ee294e48" +checksum = "e0fcf41523b22e14cc349b01526e8b9f59206653037f2949a4adbfde5f8cb668" dependencies = [ "log", "tokio", ] [[package]] -name = "datafusion-doc" -version = "45.0.0" +name = "datafusion-datasource" +version = "46.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5bf4bc68623a5cf231eed601ed6eb41f46a37c4d15d11a0bff24cbc8396cd66" +checksum = "cf7f37ad8b6e88b46c7eeab3236147d32ea64b823544f498455a8d9042839c92" +dependencies = [ + "arrow", + "async-trait", + "bytes", + "chrono", + "datafusion-catalog", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-execution", + "datafusion-expr", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "datafusion-physical-plan", + "futures", + "glob", + "itertools 0.14.0", + "log", + "object_store", + "rand 0.8.5", + "tokio", + "url", +] + +[[package]] +name = "datafusion-doc" +version = "46.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7db7a0239fd060f359dc56c6e7db726abaa92babaed2fb2e91c3a8b2fff8b256" [[package]] name = "datafusion-execution" -version = "45.0.0" +version = "46.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "88b491c012cdf8e051053426013429a76f74ee3c2db68496c79c323ca1084d27" +checksum = "0938f9e5b6bc5782be4111cdfb70c02b7b5451bf34fd57e4de062a7f7c4e31f1" dependencies = [ "arrow", "dashmap", @@ -1941,9 +1989,9 @@ dependencies = [ [[package]] name = "datafusion-expr" -version = "45.0.0" +version = "46.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5a181408d4fc5dc22f9252781a8f39f2d0e5d1b33ec9bde242844980a2689c1" +checksum = "b36c28b00b00019a8695ad7f1a53ee1673487b90322ecbd604e2cf32894eb14f" dependencies = [ "arrow", "chrono", @@ -1956,26 +2004,27 @@ dependencies = [ "indexmap 2.8.0", "paste", "serde_json", - "sqlparser 0.53.0", + "sqlparser 0.54.0", ] [[package]] name = "datafusion-expr-common" -version = "45.0.0" +version = "46.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d1129b48e8534d8c03c6543bcdccef0b55c8ac0c1272a15a56c67068b6eb1885" +checksum = "18f0a851a436c5a2139189eb4617a54e6a9ccb9edc96c4b3c83b3bb7c58b950e" dependencies = [ "arrow", "datafusion-common", + "indexmap 2.8.0", "itertools 0.14.0", "paste", ] [[package]] name = "datafusion-functions" -version = "45.0.0" +version = "46.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6125874e4856dfb09b59886784fcb74cde5cfc5930b3a80a1a728ef7a010df6b" +checksum = "e3196e37d7b65469fb79fee4f05e5bb58a456831035f9a38aa5919aeb3298d40" dependencies = [ "arrow", "arrow-buffer", @@ -1989,7 +2038,6 @@ dependencies = [ "datafusion-expr", "datafusion-expr-common", "datafusion-macros", - "hashbrown 0.14.5", "hex", "itertools 0.14.0", "log", @@ -2003,14 +2051,12 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate" -version = "45.0.0" +version = "46.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f3add7b1d3888e05e7c95f2b281af900ca69ebdcb21069ba679b33bde8b3b9d6" +checksum = "adfc2d074d5ee4d9354fdcc9283d5b2b9037849237ddecb8942a29144b77ca05" dependencies = [ "ahash", "arrow", - "arrow-buffer", - "arrow-schema", "datafusion-common", "datafusion-doc", "datafusion-execution", @@ -2026,9 +2072,9 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate-common" -version = "45.0.0" +version = "46.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e18baa4cfc3d2f144f74148ed68a1f92337f5072b6dde204a0dbbdf3324989c" +checksum = "1cbceba0f98d921309a9121b702bcd49289d383684cccabf9a92cda1602f3bbb" dependencies = [ "ahash", "arrow", @@ -2039,15 +2085,12 @@ dependencies = [ [[package]] name = "datafusion-functions-nested" -version = "45.0.0" +version = "46.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3ec5ee8cecb0dc370291279673097ddabec03a011f73f30d7f1096457127e03e" +checksum = "170e27ce4baa27113ddf5f77f1a7ec484b0dbeda0c7abbd4bad3fc609c8ab71a" dependencies = [ "arrow", - "arrow-array", - "arrow-buffer", "arrow-ord", - "arrow-schema", "datafusion-common", "datafusion-doc", "datafusion-execution", @@ -2063,9 +2106,9 @@ dependencies = [ [[package]] name = "datafusion-functions-table" -version = "45.0.0" +version = "46.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c403ddd473bbb0952ba880008428b3c7febf0ed3ce1eec35a205db20efb2a36" +checksum = "7d3a06a7f0817ded87b026a437e7e51de7f59d48173b0a4e803aa896a7bd6bb5" dependencies = [ "arrow", "async-trait", @@ -2079,9 +2122,9 @@ dependencies = [ [[package]] name = "datafusion-functions-window" -version = "45.0.0" +version = "46.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1ab18c2fb835614d06a75f24a9e09136d3a8c12a92d97c95a6af316a1787a9c5" +checksum = "d6c608b66496a1e05e3d196131eb9bebea579eed1f59e88d962baf3dda853bc6" dependencies = [ "datafusion-common", "datafusion-doc", @@ -2096,9 +2139,9 @@ dependencies = [ [[package]] name = "datafusion-functions-window-common" -version = "45.0.0" +version = "46.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a77b73bc15e7d1967121fdc7a55d819bfb9d6c03766a6c322247dce9094a53a4" +checksum = "da2f9d83348957b4ad0cd87b5cb9445f2651863a36592fe5484d43b49a5f8d82" dependencies = [ "datafusion-common", "datafusion-physical-expr-common", @@ -2106,9 +2149,9 @@ dependencies = [ [[package]] name = "datafusion-macros" -version = "45.0.0" +version = "46.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09369b8d962291e808977cf94d495fd8b5b38647232d7ef562c27ac0f495b0af" +checksum = "4800e1ff7ecf8f310887e9b54c9c444b8e215ccbc7b21c2f244cfae373b1ece7" dependencies = [ "datafusion-expr", "quote", @@ -2117,9 +2160,9 @@ dependencies = [ [[package]] name = "datafusion-optimizer" -version = "45.0.0" +version = "46.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2403a7e4a84637f3de7d8d4d7a9ccc0cc4be92d89b0161ba3ee5be82f0531c54" +checksum = "971c51c54cd309001376fae752fb15a6b41750b6d1552345c46afbfb6458801b" dependencies = [ "arrow", "chrono", @@ -2135,15 +2178,12 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" -version = "45.0.0" +version = "46.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86ff72ac702b62dbf2650c4e1d715ebd3e4aab14e3885e72e8549e250307347c" +checksum = "e1447c2c6bc8674a16be4786b4abf528c302803fafa186aa6275692570e64d85" dependencies = [ "ahash", "arrow", - "arrow-array", - "arrow-buffer", - "arrow-schema", "datafusion-common", "datafusion-expr", "datafusion-expr-common", @@ -2160,13 +2200,12 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-common" -version = "45.0.0" +version = "46.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "60982b7d684e25579ee29754b4333057ed62e2cc925383c5f0bd8cab7962f435" +checksum = "69f8c25dcd069073a75b3d2840a79d0f81e64bdd2c05f2d3d18939afb36a7dcb" dependencies = [ "ahash", "arrow", - "arrow-buffer", "datafusion-common", "datafusion-expr-common", "hashbrown 0.14.5", @@ -2175,12 +2214,11 @@ dependencies = [ [[package]] name = "datafusion-physical-optimizer" -version = "45.0.0" +version = "46.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac5e85c189d5238a5cf181a624e450c4cd4c66ac77ca551d6f3ff9080bac90bb" +checksum = "68da5266b5b9847c11d1b3404ee96b1d423814e1973e1ad3789131e5ec912763" dependencies = [ "arrow", - "arrow-schema", "datafusion-common", "datafusion-execution", "datafusion-expr", @@ -2188,22 +2226,18 @@ dependencies = [ "datafusion-physical-expr", "datafusion-physical-expr-common", "datafusion-physical-plan", - "futures", "itertools 0.14.0", "log", - "url", ] [[package]] name = "datafusion-physical-plan" -version = "45.0.0" +version = "46.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c36bf163956d7e2542657c78b3383fdc78f791317ef358a359feffcdb968106f" +checksum = "88cc160df00e413e370b3b259c8ea7bfbebc134d32de16325950e9e923846b7f" dependencies = [ "ahash", "arrow", - "arrow-array", - "arrow-buffer", "arrow-ord", "arrow-schema", "async-trait", @@ -2228,20 +2262,18 @@ dependencies = [ [[package]] name = "datafusion-sql" -version = "45.0.0" +version = "46.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e13caa4daede211ecec53c78b13c503b592794d125f9a3cc3afe992edf9e7f43" +checksum = "325a212b67b677c0eb91447bf9a11b630f9fc4f62d8e5d145bf859f5a6b29e64" dependencies = [ "arrow", - "arrow-array", - "arrow-schema", "bigdecimal", "datafusion-common", "datafusion-expr", "indexmap 2.8.0", "log", "regex", - "sqlparser 0.53.0", + "sqlparser 0.54.0", ] [[package]] @@ -2687,12 +2719,21 @@ checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c" [[package]] name = "fsst" -version = "0.25.1" -source = "git+https://github.com/lancedb/lance.git?tag=v0.25.1-beta.3#33634d3b2e8f6a54e63a97721c7fcd31206e999a" +version = "0.25.3" +source = "git+https://github.com/lancedb/lance?tag=v0.25.3-beta.1#ca2e69c2be80b0714d5ef1db5265bae9fadf682c" dependencies = [ "rand 0.8.5", ] +[[package]] +name = "fst" +version = "0.4.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ab85b9b05e3978cc9a9cf8fea7f01b494e1a09ed3037e16ba39edc7a29eb61a" +dependencies = [ + "utf8-ranges", +] + [[package]] name = "funty" version = "2.0.0" @@ -3666,8 +3707,8 @@ dependencies = [ [[package]] name = "lance" -version = "0.25.1" -source = "git+https://github.com/lancedb/lance.git?tag=v0.25.1-beta.3#33634d3b2e8f6a54e63a97721c7fcd31206e999a" +version = "0.25.3" +source = "git+https://github.com/lancedb/lance?tag=v0.25.3-beta.1#ca2e69c2be80b0714d5ef1db5265bae9fadf682c" dependencies = [ "arrow", "arrow-arith", @@ -3726,8 +3767,8 @@ dependencies = [ [[package]] name = "lance-arrow" -version = "0.25.1" -source = "git+https://github.com/lancedb/lance.git?tag=v0.25.1-beta.3#33634d3b2e8f6a54e63a97721c7fcd31206e999a" +version = "0.25.3" +source = "git+https://github.com/lancedb/lance?tag=v0.25.3-beta.1#ca2e69c2be80b0714d5ef1db5265bae9fadf682c" dependencies = [ "arrow-array", "arrow-buffer", @@ -3744,8 +3785,8 @@ dependencies = [ [[package]] name = "lance-core" -version = "0.25.1" -source = "git+https://github.com/lancedb/lance.git?tag=v0.25.1-beta.3#33634d3b2e8f6a54e63a97721c7fcd31206e999a" +version = "0.25.3" +source = "git+https://github.com/lancedb/lance?tag=v0.25.3-beta.1#ca2e69c2be80b0714d5ef1db5265bae9fadf682c" dependencies = [ "arrow-array", "arrow-buffer", @@ -3781,8 +3822,8 @@ dependencies = [ [[package]] name = "lance-datafusion" -version = "0.25.1" -source = "git+https://github.com/lancedb/lance.git?tag=v0.25.1-beta.3#33634d3b2e8f6a54e63a97721c7fcd31206e999a" +version = "0.25.3" +source = "git+https://github.com/lancedb/lance?tag=v0.25.3-beta.1#ca2e69c2be80b0714d5ef1db5265bae9fadf682c" dependencies = [ "arrow", "arrow-array", @@ -3798,6 +3839,7 @@ dependencies = [ "futures", "lance-arrow", "lance-core", + "lance-datagen", "lazy_static", "log", "prost", @@ -3806,10 +3848,26 @@ dependencies = [ "tracing", ] +[[package]] +name = "lance-datagen" +version = "0.25.3" +source = "git+https://github.com/lancedb/lance?tag=v0.25.3-beta.1#ca2e69c2be80b0714d5ef1db5265bae9fadf682c" +dependencies = [ + "arrow", + "arrow-array", + "arrow-cast", + "arrow-schema", + "chrono", + "futures", + "hex", + "rand 0.8.5", + "rand_xoshiro", +] + [[package]] name = "lance-encoding" -version = "0.25.1" -source = "git+https://github.com/lancedb/lance.git?tag=v0.25.1-beta.3#33634d3b2e8f6a54e63a97721c7fcd31206e999a" +version = "0.25.3" +source = "git+https://github.com/lancedb/lance?tag=v0.25.3-beta.1#ca2e69c2be80b0714d5ef1db5265bae9fadf682c" dependencies = [ "arrayref", "arrow", @@ -3832,6 +3890,7 @@ dependencies = [ "lance-core", "lazy_static", "log", + "lz4", "num-traits", "paste", "prost", @@ -3847,8 +3906,8 @@ dependencies = [ [[package]] name = "lance-file" -version = "0.25.1" -source = "git+https://github.com/lancedb/lance.git?tag=v0.25.1-beta.3#33634d3b2e8f6a54e63a97721c7fcd31206e999a" +version = "0.25.3" +source = "git+https://github.com/lancedb/lance?tag=v0.25.3-beta.1#ca2e69c2be80b0714d5ef1db5265bae9fadf682c" dependencies = [ "arrow-arith", "arrow-array", @@ -3882,8 +3941,8 @@ dependencies = [ [[package]] name = "lance-index" -version = "0.25.1" -source = "git+https://github.com/lancedb/lance.git?tag=v0.25.1-beta.3#33634d3b2e8f6a54e63a97721c7fcd31206e999a" +version = "0.25.3" +source = "git+https://github.com/lancedb/lance?tag=v0.25.3-beta.1#ca2e69c2be80b0714d5ef1db5265bae9fadf682c" dependencies = [ "arrow", "arrow-array", @@ -3902,6 +3961,7 @@ dependencies = [ "datafusion-sql", "deepsize", "dirs", + "fst", "futures", "half", "itertools 0.13.0", @@ -3935,8 +3995,8 @@ dependencies = [ [[package]] name = "lance-io" -version = "0.25.1" -source = "git+https://github.com/lancedb/lance.git?tag=v0.25.1-beta.3#33634d3b2e8f6a54e63a97721c7fcd31206e999a" +version = "0.25.3" +source = "git+https://github.com/lancedb/lance?tag=v0.25.3-beta.1#ca2e69c2be80b0714d5ef1db5265bae9fadf682c" dependencies = [ "arrow", "arrow-arith", @@ -3974,8 +4034,8 @@ dependencies = [ [[package]] name = "lance-linalg" -version = "0.25.1" -source = "git+https://github.com/lancedb/lance.git?tag=v0.25.1-beta.3#33634d3b2e8f6a54e63a97721c7fcd31206e999a" +version = "0.25.3" +source = "git+https://github.com/lancedb/lance?tag=v0.25.3-beta.1#ca2e69c2be80b0714d5ef1db5265bae9fadf682c" dependencies = [ "arrow-array", "arrow-ord", @@ -3998,8 +4058,8 @@ dependencies = [ [[package]] name = "lance-table" -version = "0.25.1" -source = "git+https://github.com/lancedb/lance.git?tag=v0.25.1-beta.3#33634d3b2e8f6a54e63a97721c7fcd31206e999a" +version = "0.25.3" +source = "git+https://github.com/lancedb/lance?tag=v0.25.3-beta.1#ca2e69c2be80b0714d5ef1db5265bae9fadf682c" dependencies = [ "arrow", "arrow-array", @@ -4038,8 +4098,8 @@ dependencies = [ [[package]] name = "lance-testing" -version = "0.25.1" -source = "git+https://github.com/lancedb/lance.git?tag=v0.25.1-beta.3#33634d3b2e8f6a54e63a97721c7fcd31206e999a" +version = "0.25.3" +source = "git+https://github.com/lancedb/lance?tag=v0.25.3-beta.1#ca2e69c2be80b0714d5ef1db5265bae9fadf682c" dependencies = [ "arrow-array", "arrow-schema", @@ -4050,7 +4110,7 @@ dependencies = [ [[package]] name = "lancedb" -version = "0.18.3-beta.0" +version = "0.19.0-beta.0" dependencies = [ "arrow", "arrow-array", @@ -4137,7 +4197,7 @@ dependencies = [ [[package]] name = "lancedb-node" -version = "0.18.3-beta.0" +version = "0.19.0-beta.0" dependencies = [ "arrow-array", "arrow-ipc", @@ -4162,7 +4222,7 @@ dependencies = [ [[package]] name = "lancedb-nodejs" -version = "0.18.3-beta.0" +version = "0.19.0-beta.0" dependencies = [ "arrow-array", "arrow-ipc", @@ -4180,7 +4240,7 @@ dependencies = [ [[package]] name = "lancedb-python" -version = "0.21.3-beta.0" +version = "0.22.0-beta.0" dependencies = [ "arrow", "env_logger", @@ -5895,6 +5955,15 @@ dependencies = [ "rand 0.8.5", ] +[[package]] +name = "rand_xoshiro" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f97cdb2a36ed4183de61b2f824cc45c9f1037f28afe0a322e9fff4c108b5aaa" +dependencies = [ + "rand_core 0.6.4", +] + [[package]] name = "random_word" version = "0.4.3" @@ -6781,11 +6850,12 @@ dependencies = [ [[package]] name = "sqlparser" -version = "0.53.0" +version = "0.54.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05a528114c392209b3264855ad491fcce534b94a38771b0a0b97a79379275ce8" +checksum = "c66e3b7374ad4a6af849b08b3e7a6eda0edbd82f0fd59b57e22671bf16979899" dependencies = [ "log", + "recursive", "sqlparser_derive", ] @@ -7636,7 +7706,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "458f7a779bf54acc9f347480ac654f68407d3aab21269a6e3c9f922acd9e2da9" dependencies = [ "getrandom 0.3.2", + "js-sys", "serde", + "wasm-bindgen", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index bb2cf7a3..f752ea45 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -21,16 +21,16 @@ categories = ["database-implementations"] rust-version = "1.78.0" [workspace.dependencies] -lance = { "version" = "=0.25.1", "features" = [ +lance = { "version" = "=0.25.3", "features" = [ "dynamodb", -], tag = "v0.25.1-beta.3", git = "https://github.com/lancedb/lance.git" } -lance-io = { version = "=0.25.1", tag = "v0.25.1-beta.3", git = "https://github.com/lancedb/lance.git" } -lance-index = { version = "=0.25.1", tag = "v0.25.1-beta.3", git = "https://github.com/lancedb/lance.git" } -lance-linalg = { version = "=0.25.1", tag = "v0.25.1-beta.3", git = "https://github.com/lancedb/lance.git" } -lance-table = { version = "=0.25.1", tag = "v0.25.1-beta.3", git = "https://github.com/lancedb/lance.git" } -lance-testing = { version = "=0.25.1", tag = "v0.25.1-beta.3", git = "https://github.com/lancedb/lance.git" } -lance-datafusion = { version = "=0.25.1", tag = "v0.25.1-beta.3", git = "https://github.com/lancedb/lance.git" } -lance-encoding = { version = "=0.25.1", tag = "v0.25.1-beta.3", git = "https://github.com/lancedb/lance.git" } +], tag = "v0.25.3-beta.1", git = "https://github.com/lancedb/lance" } +lance-io = { version = "=0.25.3", tag = "v0.25.3-beta.1", git = "https://github.com/lancedb/lance" } +lance-index = { version = "=0.25.3", tag = "v0.25.3-beta.1", git = "https://github.com/lancedb/lance" } +lance-linalg = { version = "=0.25.3", tag = "v0.25.3-beta.1", git = "https://github.com/lancedb/lance" } +lance-table = { version = "=0.25.3", tag = "v0.25.3-beta.1", git = "https://github.com/lancedb/lance" } +lance-testing = { version = "=0.25.3", tag = "v0.25.3-beta.1", git = "https://github.com/lancedb/lance" } +lance-datafusion = { version = "=0.25.3", tag = "v0.25.3-beta.1", git = "https://github.com/lancedb/lance" } +lance-encoding = { version = "=0.25.3", tag = "v0.25.3-beta.1", git = "https://github.com/lancedb/lance" } # Note that this one does not include pyarrow arrow = { version = "54.1", optional = false } arrow-array = "54.1" @@ -41,12 +41,12 @@ arrow-schema = "54.1" arrow-arith = "54.1" arrow-cast = "54.1" async-trait = "0" -datafusion = { version = "45.0", default-features = false } -datafusion-catalog = "45.0" -datafusion-common = { version = "45.0", default-features = false } -datafusion-execution = "45.0" -datafusion-expr = "45.0" -datafusion-physical-plan = "45.0" +datafusion = { version = "46.0", default-features = false } +datafusion-catalog = "46.0" +datafusion-common = { version = "46.0", default-features = false } +datafusion-execution = "46.0" +datafusion-expr = "46.0" +datafusion-physical-plan = "46.0" env_logger = "0.11" half = { "version" = "=2.4.1", default-features = false, features = [ "num-traits", diff --git a/docs/src/js/classes/BoostQuery.md b/docs/src/js/classes/BoostQuery.md new file mode 100644 index 00000000..450c3cc3 --- /dev/null +++ b/docs/src/js/classes/BoostQuery.md @@ -0,0 +1,75 @@ +[**@lancedb/lancedb**](../README.md) • **Docs** + +*** + +[@lancedb/lancedb](../globals.md) / BoostQuery + +# Class: BoostQuery + +Represents a full-text query interface. +This interface defines the structure and behavior for full-text queries, +including methods to retrieve the query type and convert the query to a dictionary format. + +## Implements + +- [`FullTextQuery`](../interfaces/FullTextQuery.md) + +## Constructors + +### new BoostQuery() + +```ts +new BoostQuery( + positive, + negative, + negativeBoost): BoostQuery +``` + +Creates an instance of BoostQuery. + +#### Parameters + +* **positive**: [`FullTextQuery`](../interfaces/FullTextQuery.md) + The positive query that boosts the relevance score. + +* **negative**: [`FullTextQuery`](../interfaces/FullTextQuery.md) + The negative query that reduces the relevance score. + +* **negativeBoost**: `number` + The factor by which the negative query reduces the score. + +#### Returns + +[`BoostQuery`](BoostQuery.md) + +## Methods + +### queryType() + +```ts +queryType(): FullTextQueryType +``` + +#### Returns + +[`FullTextQueryType`](../enumerations/FullTextQueryType.md) + +#### Implementation of + +[`FullTextQuery`](../interfaces/FullTextQuery.md).[`queryType`](../interfaces/FullTextQuery.md#querytype) + +*** + +### toDict() + +```ts +toDict(): Record +``` + +#### Returns + +`Record`<`string`, `unknown`> + +#### Implementation of + +[`FullTextQuery`](../interfaces/FullTextQuery.md).[`toDict`](../interfaces/FullTextQuery.md#todict) diff --git a/docs/src/js/classes/MatchQuery.md b/docs/src/js/classes/MatchQuery.md new file mode 100644 index 00000000..9b0e6a95 --- /dev/null +++ b/docs/src/js/classes/MatchQuery.md @@ -0,0 +1,83 @@ +[**@lancedb/lancedb**](../README.md) • **Docs** + +*** + +[@lancedb/lancedb](../globals.md) / MatchQuery + +# Class: MatchQuery + +Represents a full-text query interface. +This interface defines the structure and behavior for full-text queries, +including methods to retrieve the query type and convert the query to a dictionary format. + +## Implements + +- [`FullTextQuery`](../interfaces/FullTextQuery.md) + +## Constructors + +### new MatchQuery() + +```ts +new MatchQuery( + query, + column, + boost, + fuzziness, + maxExpansions): MatchQuery +``` + +Creates an instance of MatchQuery. + +#### Parameters + +* **query**: `string` + The text query to search for. + +* **column**: `string` + The name of the column to search within. + +* **boost**: `number` = `1.0` + (Optional) The boost factor to influence the relevance score of this query. Default is `1.0`. + +* **fuzziness**: `number` = `0` + (Optional) The allowed edit distance for fuzzy matching. Default is `0`. + +* **maxExpansions**: `number` = `50` + (Optional) The maximum number of terms to consider for fuzzy matching. Default is `50`. + +#### Returns + +[`MatchQuery`](MatchQuery.md) + +## Methods + +### queryType() + +```ts +queryType(): FullTextQueryType +``` + +#### Returns + +[`FullTextQueryType`](../enumerations/FullTextQueryType.md) + +#### Implementation of + +[`FullTextQuery`](../interfaces/FullTextQuery.md).[`queryType`](../interfaces/FullTextQuery.md#querytype) + +*** + +### toDict() + +```ts +toDict(): Record +``` + +#### Returns + +`Record`<`string`, `unknown`> + +#### Implementation of + +[`FullTextQuery`](../interfaces/FullTextQuery.md).[`toDict`](../interfaces/FullTextQuery.md#todict) diff --git a/docs/src/js/classes/MultiMatchQuery.md b/docs/src/js/classes/MultiMatchQuery.md new file mode 100644 index 00000000..f1e5673f --- /dev/null +++ b/docs/src/js/classes/MultiMatchQuery.md @@ -0,0 +1,77 @@ +[**@lancedb/lancedb**](../README.md) • **Docs** + +*** + +[@lancedb/lancedb](../globals.md) / MultiMatchQuery + +# Class: MultiMatchQuery + +Represents a full-text query interface. +This interface defines the structure and behavior for full-text queries, +including methods to retrieve the query type and convert the query to a dictionary format. + +## Implements + +- [`FullTextQuery`](../interfaces/FullTextQuery.md) + +## Constructors + +### new MultiMatchQuery() + +```ts +new MultiMatchQuery( + query, + columns, + boosts): MultiMatchQuery +``` + +Creates an instance of MultiMatchQuery. + +#### Parameters + +* **query**: `string` + The text query to search for across multiple columns. + +* **columns**: `string`[] + An array of column names to search within. + +* **boosts**: `number`[] = `...` + (Optional) An array of boost factors corresponding to each column. Default is an array of 1.0 for each column. + The `boosts` array should have the same length as `columns`. If not provided, all columns will have a default boost of 1.0. + If the length of `boosts` is less than `columns`, it will be padded with 1.0s. + +#### Returns + +[`MultiMatchQuery`](MultiMatchQuery.md) + +## Methods + +### queryType() + +```ts +queryType(): FullTextQueryType +``` + +#### Returns + +[`FullTextQueryType`](../enumerations/FullTextQueryType.md) + +#### Implementation of + +[`FullTextQuery`](../interfaces/FullTextQuery.md).[`queryType`](../interfaces/FullTextQuery.md#querytype) + +*** + +### toDict() + +```ts +toDict(): Record +``` + +#### Returns + +`Record`<`string`, `unknown`> + +#### Implementation of + +[`FullTextQuery`](../interfaces/FullTextQuery.md).[`toDict`](../interfaces/FullTextQuery.md#todict) diff --git a/docs/src/js/classes/PhraseQuery.md b/docs/src/js/classes/PhraseQuery.md new file mode 100644 index 00000000..66b62dd4 --- /dev/null +++ b/docs/src/js/classes/PhraseQuery.md @@ -0,0 +1,69 @@ +[**@lancedb/lancedb**](../README.md) • **Docs** + +*** + +[@lancedb/lancedb](../globals.md) / PhraseQuery + +# Class: PhraseQuery + +Represents a full-text query interface. +This interface defines the structure and behavior for full-text queries, +including methods to retrieve the query type and convert the query to a dictionary format. + +## Implements + +- [`FullTextQuery`](../interfaces/FullTextQuery.md) + +## Constructors + +### new PhraseQuery() + +```ts +new PhraseQuery(query, column): PhraseQuery +``` + +Creates an instance of `PhraseQuery`. + +#### Parameters + +* **query**: `string` + The phrase to search for in the specified column. + +* **column**: `string` + The name of the column to search within. + +#### Returns + +[`PhraseQuery`](PhraseQuery.md) + +## Methods + +### queryType() + +```ts +queryType(): FullTextQueryType +``` + +#### Returns + +[`FullTextQueryType`](../enumerations/FullTextQueryType.md) + +#### Implementation of + +[`FullTextQuery`](../interfaces/FullTextQuery.md).[`queryType`](../interfaces/FullTextQuery.md#querytype) + +*** + +### toDict() + +```ts +toDict(): Record +``` + +#### Returns + +`Record`<`string`, `unknown`> + +#### Implementation of + +[`FullTextQuery`](../interfaces/FullTextQuery.md).[`toDict`](../interfaces/FullTextQuery.md#todict) diff --git a/docs/src/js/classes/Query.md b/docs/src/js/classes/Query.md index 3d35251d..ae1ff574 100644 --- a/docs/src/js/classes/Query.md +++ b/docs/src/js/classes/Query.md @@ -206,7 +206,7 @@ fullTextSearch(query, options?): this #### Parameters -* **query**: `string` +* **query**: `string` \| [`FullTextQuery`](../interfaces/FullTextQuery.md) * **options?**: `Partial`<[`FullTextSearchOptions`](../interfaces/FullTextSearchOptions.md)> @@ -309,7 +309,7 @@ nearestToText(query, columns?): Query #### Parameters -* **query**: `string` +* **query**: `string` \| [`FullTextQuery`](../interfaces/FullTextQuery.md) * **columns?**: `string`[] diff --git a/docs/src/js/classes/QueryBase.md b/docs/src/js/classes/QueryBase.md index 77bb1690..b58173ed 100644 --- a/docs/src/js/classes/QueryBase.md +++ b/docs/src/js/classes/QueryBase.md @@ -192,7 +192,7 @@ fullTextSearch(query, options?): this #### Parameters -* **query**: `string` +* **query**: `string` \| [`FullTextQuery`](../interfaces/FullTextQuery.md) * **options?**: `Partial`<[`FullTextSearchOptions`](../interfaces/FullTextSearchOptions.md)> diff --git a/docs/src/js/classes/VectorQuery.md b/docs/src/js/classes/VectorQuery.md index c5d86c70..568ded42 100644 --- a/docs/src/js/classes/VectorQuery.md +++ b/docs/src/js/classes/VectorQuery.md @@ -347,7 +347,7 @@ fullTextSearch(query, options?): this #### Parameters -* **query**: `string` +* **query**: `string` \| [`FullTextQuery`](../interfaces/FullTextQuery.md) * **options?**: `Partial`<[`FullTextSearchOptions`](../interfaces/FullTextSearchOptions.md)> diff --git a/docs/src/js/enumerations/FullTextQueryType.md b/docs/src/js/enumerations/FullTextQueryType.md new file mode 100644 index 00000000..baec0d51 --- /dev/null +++ b/docs/src/js/enumerations/FullTextQueryType.md @@ -0,0 +1,46 @@ +[**@lancedb/lancedb**](../README.md) • **Docs** + +*** + +[@lancedb/lancedb](../globals.md) / FullTextQueryType + +# Enumeration: FullTextQueryType + +Enum representing the types of full-text queries supported. + +- `Match`: Performs a full-text search for terms in the query string. +- `MatchPhrase`: Searches for an exact phrase match in the text. +- `Boost`: Boosts the relevance score of specific terms in the query. +- `MultiMatch`: Searches across multiple fields for the query terms. + +## Enumeration Members + +### Boost + +```ts +Boost: "boost"; +``` + +*** + +### Match + +```ts +Match: "match"; +``` + +*** + +### MatchPhrase + +```ts +MatchPhrase: "match_phrase"; +``` + +*** + +### MultiMatch + +```ts +MultiMatch: "multi_match"; +``` diff --git a/docs/src/js/globals.md b/docs/src/js/globals.md index f57acf68..2b1d546c 100644 --- a/docs/src/js/globals.md +++ b/docs/src/js/globals.md @@ -9,12 +9,20 @@ - [embedding](namespaces/embedding/README.md) - [rerankers](namespaces/rerankers/README.md) +## Enumerations + +- [FullTextQueryType](enumerations/FullTextQueryType.md) + ## Classes +- [BoostQuery](classes/BoostQuery.md) - [Connection](classes/Connection.md) - [Index](classes/Index.md) - [MakeArrowTableOptions](classes/MakeArrowTableOptions.md) +- [MatchQuery](classes/MatchQuery.md) - [MergeInsertBuilder](classes/MergeInsertBuilder.md) +- [MultiMatchQuery](classes/MultiMatchQuery.md) +- [PhraseQuery](classes/PhraseQuery.md) - [Query](classes/Query.md) - [QueryBase](classes/QueryBase.md) - [RecordBatchIterator](classes/RecordBatchIterator.md) @@ -33,6 +41,7 @@ - [CreateTableOptions](interfaces/CreateTableOptions.md) - [ExecutableQuery](interfaces/ExecutableQuery.md) - [FtsOptions](interfaces/FtsOptions.md) +- [FullTextQuery](interfaces/FullTextQuery.md) - [FullTextSearchOptions](interfaces/FullTextSearchOptions.md) - [HnswPqOptions](interfaces/HnswPqOptions.md) - [HnswSqOptions](interfaces/HnswSqOptions.md) diff --git a/docs/src/js/interfaces/FullTextQuery.md b/docs/src/js/interfaces/FullTextQuery.md new file mode 100644 index 00000000..bf63433e --- /dev/null +++ b/docs/src/js/interfaces/FullTextQuery.md @@ -0,0 +1,35 @@ +[**@lancedb/lancedb**](../README.md) • **Docs** + +*** + +[@lancedb/lancedb](../globals.md) / FullTextQuery + +# Interface: FullTextQuery + +Represents a full-text query interface. +This interface defines the structure and behavior for full-text queries, +including methods to retrieve the query type and convert the query to a dictionary format. + +## Methods + +### queryType() + +```ts +queryType(): FullTextQueryType +``` + +#### Returns + +[`FullTextQueryType`](../enumerations/FullTextQueryType.md) + +*** + +### toDict() + +```ts +toDict(): Record +``` + +#### Returns + +`Record`<`string`, `unknown`> diff --git a/nodejs/lancedb/index.ts b/nodejs/lancedb/index.ts index 76eef118..969bb396 100644 --- a/nodejs/lancedb/index.ts +++ b/nodejs/lancedb/index.ts @@ -47,6 +47,12 @@ export { QueryExecutionOptions, FullTextSearchOptions, RecordBatchIterator, + FullTextQuery, + MatchQuery, + PhraseQuery, + BoostQuery, + MultiMatchQuery, + FullTextQueryType, } from "./query"; export { diff --git a/nodejs/lancedb/query.ts b/nodejs/lancedb/query.ts index 3badae26..0db81982 100644 --- a/nodejs/lancedb/query.ts +++ b/nodejs/lancedb/query.ts @@ -17,6 +17,7 @@ import { VectorQuery as NativeVectorQuery, } from "./native"; import { Reranker } from "./rerankers"; + export class RecordBatchIterator implements AsyncIterator { private promisedInner?: Promise; private inner?: NativeBatchIterator; @@ -152,7 +153,7 @@ export class QueryBase } fullTextSearch( - query: string, + query: string | FullTextQuery, options?: Partial, ): this { let columns: string[] | null = null; @@ -164,9 +165,18 @@ export class QueryBase } } - this.doCall((inner: NativeQueryType) => - inner.fullTextSearch(query, columns), - ); + this.doCall((inner: NativeQueryType) => { + if (typeof query === "string") { + inner.fullTextSearch({ + query: query, + columns: columns, + }); + } else { + // If query is a FullTextQuery object, convert it to a dict + const queryObj = query.toDict(); + inner.fullTextSearch(queryObj); + } + }); return this; } @@ -718,8 +728,167 @@ export class Query extends QueryBase { } } - nearestToText(query: string, columns?: string[]): Query { - this.doCall((inner) => inner.fullTextSearch(query, columns)); + nearestToText(query: string | FullTextQuery, columns?: string[]): Query { + this.doCall((inner) => { + if (typeof query === "string") { + inner.fullTextSearch({ + query: query, + columns: columns, + }); + } else { + const queryObj = query.toDict(); + inner.fullTextSearch(queryObj); + } + }); return this; } } + +/** + * Enum representing the types of full-text queries supported. + * + * - `Match`: Performs a full-text search for terms in the query string. + * - `MatchPhrase`: Searches for an exact phrase match in the text. + * - `Boost`: Boosts the relevance score of specific terms in the query. + * - `MultiMatch`: Searches across multiple fields for the query terms. + */ +export enum FullTextQueryType { + Match = "match", + MatchPhrase = "match_phrase", + Boost = "boost", + MultiMatch = "multi_match", +} + +/** + * Represents a full-text query interface. + * This interface defines the structure and behavior for full-text queries, + * including methods to retrieve the query type and convert the query to a dictionary format. + */ +export interface FullTextQuery { + queryType(): FullTextQueryType; + toDict(): Record; +} + +export class MatchQuery implements FullTextQuery { + /** + * Creates an instance of MatchQuery. + * + * @param query - The text query to search for. + * @param column - The name of the column to search within. + * @param boost - (Optional) The boost factor to influence the relevance score of this query. Default is `1.0`. + * @param fuzziness - (Optional) The allowed edit distance for fuzzy matching. Default is `0`. + * @param maxExpansions - (Optional) The maximum number of terms to consider for fuzzy matching. Default is `50`. + */ + constructor( + private query: string, + private column: string, + private boost: number = 1.0, + private fuzziness: number = 0, + private maxExpansions: number = 50, + ) {} + + queryType(): FullTextQueryType { + return FullTextQueryType.Match; + } + + toDict(): Record { + return { + [this.queryType()]: { + [this.column]: { + query: this.query, + boost: this.boost, + fuzziness: this.fuzziness, + // biome-ignore lint/style/useNamingConvention: use underscore for consistency with the other APIs + max_expansions: this.maxExpansions, + }, + }, + }; + } +} + +export class PhraseQuery implements FullTextQuery { + /** + * Creates an instance of `PhraseQuery`. + * + * @param query - The phrase to search for in the specified column. + * @param column - The name of the column to search within. + */ + constructor( + private query: string, + private column: string, + ) {} + + queryType(): FullTextQueryType { + return FullTextQueryType.MatchPhrase; + } + + toDict(): Record { + return { + [this.queryType()]: { + [this.column]: this.query, + }, + }; + } +} + +export class BoostQuery implements FullTextQuery { + /** + * Creates an instance of BoostQuery. + * + * @param positive - The positive query that boosts the relevance score. + * @param negative - The negative query that reduces the relevance score. + * @param negativeBoost - The factor by which the negative query reduces the score. + */ + constructor( + private positive: FullTextQuery, + private negative: FullTextQuery, + private negativeBoost: number, + ) {} + + queryType(): FullTextQueryType { + return FullTextQueryType.Boost; + } + + toDict(): Record { + return { + [this.queryType()]: { + positive: this.positive.toDict(), + negative: this.negative.toDict(), + // biome-ignore lint/style/useNamingConvention: use underscore for consistency with the other APIs + negative_boost: this.negativeBoost, + }, + }; + } +} + +export class MultiMatchQuery implements FullTextQuery { + /** + * Creates an instance of MultiMatchQuery. + * + * @param query - The text query to search for across multiple columns. + * @param columns - An array of column names to search within. + * @param boosts - (Optional) An array of boost factors corresponding to each column. Default is an array of 1.0 for each column. + * + * The `boosts` array should have the same length as `columns`. If not provided, all columns will have a default boost of 1.0. + * If the length of `boosts` is less than `columns`, it will be padded with 1.0s. + */ + constructor( + private query: string, + private columns: string[], + private boosts: number[] = columns.map(() => 1.0), + ) {} + + queryType(): FullTextQueryType { + return FullTextQueryType.MultiMatch; + } + + toDict(): Record { + return { + [this.queryType()]: { + query: this.query, + columns: this.columns, + boost: this.boosts, + }, + }; + } +} diff --git a/nodejs/src/query.rs b/nodejs/src/query.rs index 1b9badeb..d945d48e 100644 --- a/nodejs/src/query.rs +++ b/nodejs/src/query.rs @@ -3,7 +3,7 @@ use std::sync::Arc; -use lancedb::index::scalar::FullTextSearchQuery; +use lancedb::index::scalar::{FtsQuery, FullTextSearchQuery, MatchQuery, PhraseQuery}; use lancedb::query::ExecutableQuery; use lancedb::query::Query as LanceDbQuery; use lancedb::query::QueryBase; @@ -18,7 +18,7 @@ use crate::error::NapiErrorExt; use crate::iterator::RecordBatchIterator; use crate::rerankers::Reranker; use crate::rerankers::RerankerCallbacks; -use crate::util::parse_distance_type; +use crate::util::{parse_distance_type, parse_fts_query}; #[napi] pub struct Query { @@ -38,9 +38,53 @@ impl Query { } #[napi] - pub fn full_text_search(&mut self, query: String, columns: Option>) { - let query = FullTextSearchQuery::new(query).columns(columns); + pub fn full_text_search(&mut self, query: napi::JsUnknown) -> napi::Result<()> { + let query = unsafe { query.cast::() }; + let query = if let Some(query_text) = query.get::<_, String>("query").transpose() { + let mut query_text = query_text?; + let columns = query.get::<_, Option>>("columns")?.flatten(); + + let is_phrase = + query_text.len() >= 2 && query_text.starts_with('"') && query_text.ends_with('"'); + let is_multi_match = columns.as_ref().map(|cols| cols.len() > 1).unwrap_or(false); + + if is_phrase { + // Remove the surrounding quotes for phrase queries + query_text = query_text[1..query_text.len() - 1].to_string(); + } + + let query: FtsQuery = match (is_phrase, is_multi_match) { + (false, _) => MatchQuery::new(query_text).into(), + (true, false) => PhraseQuery::new(query_text).into(), + (true, true) => { + return Err(napi::Error::from_reason( + "Phrase queries cannot be used with multiple columns.", + )); + } + }; + let mut query = FullTextSearchQuery::new_query(query); + if let Some(cols) = columns { + if !cols.is_empty() { + query = query.with_columns(&cols).map_err(|e| { + napi::Error::from_reason(format!( + "Failed to set full text search columns: {}", + e + )) + })?; + } + } + query + } else if let Some(query) = query.get::<_, napi::JsObject>("query")? { + let query = parse_fts_query(&query)?; + FullTextSearchQuery::new_query(query) + } else { + return Err(napi::Error::from_reason( + "Invalid full text search query object".to_string(), + )); + }; + self.inner = self.inner.clone().full_text_search(query); + Ok(()) } #[napi] @@ -195,9 +239,53 @@ impl VectorQuery { } #[napi] - pub fn full_text_search(&mut self, query: String, columns: Option>) { - let query = FullTextSearchQuery::new(query).columns(columns); + pub fn full_text_search(&mut self, query: napi::JsUnknown) -> napi::Result<()> { + let query = unsafe { query.cast::() }; + let query = if let Some(query_text) = query.get::<_, String>("query").transpose() { + let mut query_text = query_text?; + let columns = query.get::<_, Option>>("columns")?.flatten(); + + let is_phrase = + query_text.len() >= 2 && query_text.starts_with('"') && query_text.ends_with('"'); + let is_multi_match = columns.as_ref().map(|cols| cols.len() > 1).unwrap_or(false); + + if is_phrase { + // Remove the surrounding quotes for phrase queries + query_text = query_text[1..query_text.len() - 1].to_string(); + } + + let query: FtsQuery = match (is_phrase, is_multi_match) { + (false, _) => MatchQuery::new(query_text).into(), + (true, false) => PhraseQuery::new(query_text).into(), + (true, true) => { + return Err(napi::Error::from_reason( + "Phrase queries cannot be used with multiple columns.", + )); + } + }; + let mut query = FullTextSearchQuery::new_query(query); + if let Some(cols) = columns { + if !cols.is_empty() { + query = query.with_columns(&cols).map_err(|e| { + napi::Error::from_reason(format!( + "Failed to set full text search columns: {}", + e + )) + })?; + } + } + query + } else if let Some(query) = query.get::<_, napi::JsObject>("query")? { + let query = parse_fts_query(&query)?; + FullTextSearchQuery::new_query(query) + } else { + return Err(napi::Error::from_reason( + "Invalid full text search query object".to_string(), + )); + }; + self.inner = self.inner.clone().full_text_search(query); + Ok(()) } #[napi] diff --git a/nodejs/src/util.rs b/nodejs/src/util.rs index a29a67f9..18959136 100644 --- a/nodejs/src/util.rs +++ b/nodejs/src/util.rs @@ -1,6 +1,7 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright The LanceDB Authors +use lancedb::index::scalar::{BoostQuery, FtsQuery, MatchQuery, MultiMatchQuery, PhraseQuery}; use lancedb::DistanceType; pub fn parse_distance_type(distance_type: impl AsRef) -> napi::Result { @@ -15,3 +16,144 @@ pub fn parse_distance_type(distance_type: impl AsRef) -> napi::Result napi::Result { + let query_type = query + .get_property_names()? + .get_element::(0)?; + let query_type = query_type.into_utf8()?.into_owned()?; + let query_value = + query + .get::<_, napi::JsObject>(&query_type)? + .ok_or(napi::Error::from_reason(format!( + "query value {} not found", + query_type + )))?; + + match query_type.as_str() { + "match" => { + let column = query_value + .get_property_names()? + .get_element::(0)? + .into_utf8()? + .into_owned()?; + let params = + query_value + .get::<_, napi::JsObject>(&column)? + .ok_or(napi::Error::from_reason(format!( + "column {} not found", + column + )))?; + + let query = params + .get::<_, napi::JsString>("query")? + .ok_or(napi::Error::from_reason("query not found"))? + .into_utf8()? + .into_owned()?; + let boost = params + .get::<_, napi::JsNumber>("boost")? + .ok_or(napi::Error::from_reason("boost not found"))? + .get_double()? as f32; + let fuzziness = params + .get::<_, napi::JsNumber>("fuzziness")? + .map(|f| f.get_uint32()) + .transpose()?; + let max_expansions = params + .get::<_, napi::JsNumber>("max_expansions")? + .ok_or(napi::Error::from_reason("max_expansions not found"))? + .get_uint32()? as usize; + + let query = MatchQuery::new(query) + .with_column(Some(column)) + .with_boost(boost) + .with_fuzziness(fuzziness) + .with_max_expansions(max_expansions); + Ok(query.into()) + } + + "match_phrase" => { + let column = query_value + .get_property_names()? + .get_element::(0)? + .into_utf8()? + .into_owned()?; + let query = query_value + .get::<_, napi::JsString>(&column)? + .ok_or(napi::Error::from_reason(format!( + "column {} not found", + column + )))? + .into_utf8()? + .into_owned()?; + + let query = PhraseQuery::new(query).with_column(Some(column)); + Ok(query.into()) + } + + "boost" => { + let positive = query_value + .get::<_, napi::JsObject>("positive")? + .ok_or(napi::Error::from_reason("positive not found"))?; + + let negative = query_value + .get::<_, napi::JsObject>("negative")? + .ok_or(napi::Error::from_reason("negative not found"))?; + let negative_boost = query_value + .get::<_, napi::JsNumber>("negative_boost")? + .ok_or(napi::Error::from_reason("negative_boost not found"))? + .get_double()? as f32; + + let positive = parse_fts_query(&positive)?; + let negative = parse_fts_query(&negative)?; + let query = BoostQuery::new(positive, negative, Some(negative_boost)); + Ok(query.into()) + } + + "multi_match" => { + let query = query_value + .get::<_, napi::JsString>("query")? + .ok_or(napi::Error::from_reason("query not found"))? + .into_utf8()? + .into_owned()?; + let columns_array = query_value + .get::<_, napi::JsTypedArray>("columns")? + .ok_or(napi::Error::from_reason("columns not found"))?; + let columns_num = columns_array.get_array_length()?; + let mut columns = Vec::with_capacity(columns_num as usize); + for i in 0..columns_num { + let column = columns_array + .get_element::(i)? + .into_utf8()? + .into_owned()?; + columns.push(column); + } + let boost_array = query_value + .get::<_, napi::JsTypedArray>("boost")? + .ok_or(napi::Error::from_reason("boost not found"))?; + if boost_array.get_array_length()? != columns_num { + return Err(napi::Error::from_reason(format!( + "boost array length ({}) does not match columns length ({})", + boost_array.get_array_length()?, + columns_num + ))); + } + let mut boost = Vec::with_capacity(columns_num as usize); + for i in 0..columns_num { + let b = boost_array.get_element::(i)?.get_double()? as f32; + boost.push(b); + } + + let query = + MultiMatchQuery::try_new_with_boosts(query, columns, boost).map_err(|e| { + napi::Error::from_reason(format!("Error creating MultiMatchQuery: {}", e)) + })?; + + Ok(query.into()) + } + + _ => Err(napi::Error::from_reason(format!( + "Unsupported query type: {}", + query_type + ))), + } +} diff --git a/python/python/lancedb/query.py b/python/python/lancedb/query.py index 45f2ce1e..095f3aa7 100644 --- a/python/python/lancedb/query.py +++ b/python/python/lancedb/query.py @@ -4,7 +4,9 @@ from __future__ import annotations from abc import ABC, abstractmethod +import abc from concurrent.futures import ThreadPoolExecutor +from enum import Enum from typing import ( TYPE_CHECKING, Dict, @@ -83,6 +85,196 @@ def ensure_vector_query( return val +class FullTextQueryType(Enum): + MATCH = "match" + MATCH_PHRASE = "match_phrase" + BOOST = "boost" + MULTI_MATCH = "multi_match" + + +class FullTextQuery(abc.ABC, pydantic.BaseModel): + @abc.abstractmethod + def query_type(self) -> FullTextQueryType: + """ + Get the query type of the query. + + Returns + ------- + str + The type of the query. + """ + + @abc.abstractmethod + def to_dict(self) -> dict: + """ + Convert the query to a dictionary. + + Returns + ------- + dict + The query as a dictionary. + """ + + +class MatchQuery(FullTextQuery): + def __init__( + self, + query: str, + column: str, + *, + boost: float = 1.0, + fuzziness: int = 0, + max_expansions: int = 50, + ): + """ + Match query for full-text search. + + Parameters + ---------- + query : str + The query string to match against. + column : str + The name of the column to match against. + boost : float, default 1.0 + The boost factor for the query. + The score of each matching document is multiplied by this value. + fuzziness : int, optional + The maximum edit distance for each term in the match query. + Defaults to 0 (exact match). + If None, fuzziness is applied automatically by the rules: + - 0 for terms with length <= 2 + - 1 for terms with length <= 5 + - 2 for terms with length > 5 + max_expansions : int, optional + The maximum number of terms to consider for fuzzy matching. + Defaults to 50. + """ + self.column = column + self.query = query + self.boost = boost + self.fuzziness = fuzziness + self.max_expansions = max_expansions + + def query_type(self) -> FullTextQueryType: + return FullTextQueryType.MATCH + + def to_dict(self) -> dict: + return { + "match": { + self.column: { + "query": self.query, + "boost": self.boost, + "fuzziness": self.fuzziness, + "max_expansions": self.max_expansions, + } + } + } + + +class PhraseQuery(FullTextQuery): + def __init__(self, query: str, column: str): + """ + Phrase query for full-text search. + + Parameters + ---------- + query : str + The query string to match against. + column : str + The name of the column to match against. + """ + self.column = column + self.query = query + + def query_type(self) -> FullTextQueryType: + return FullTextQueryType.MATCH_PHRASE + + def to_dict(self) -> dict: + return { + "match_phrase": { + self.column: self.query, + } + } + + +class BoostQuery(FullTextQuery): + def __init__( + self, + positive: FullTextQuery, + negative: FullTextQuery, + negative_boost: float, + ): + """ + Boost query for full-text search. + + Parameters + ---------- + positive : dict + The positive query object. + negative : dict + The negative query object. + negative_boost : float + The boost factor for the negative query. + """ + self.positive = positive + self.negative = negative + self.negative_boost = negative_boost + + def query_type(self) -> FullTextQueryType: + return FullTextQueryType.BOOST + + def to_dict(self) -> dict: + return { + "boost": { + "positive": self.positive.to_dict(), + "negative": self.negative.to_dict(), + "negative_boost": self.negative_boost, + } + } + + +class MultiMatchQuery(FullTextQuery): + def __init__( + self, + query: str, + columns: list[str], + *, + boosts: Optional[list[float]] = None, + ): + """ + Multi-match query for full-text search. + + Parameters + ---------- + query : str | list[Query] + If a string, the query string to match against. + + columns : list[str] + The list of columns to match against. + + boosts : list[float], optional + The list of boost factors for each column. If not provided, + all columns will have the same boost factor. + """ + self.query = query + self.columns = columns + if boosts is None: + boosts = [1.0] * len(columns) + self.boosts = boosts + + def query_type(self) -> FullTextQueryType: + return FullTextQueryType.MULTI_MATCH + + def to_dict(self) -> dict: + return { + "multi_match": { + "query": self.query, + "columns": self.columns, + "boost": self.boosts, + } + } + + class FullTextSearchQuery(pydantic.BaseModel): """A LanceDB Full Text Search Query @@ -92,18 +284,13 @@ class FullTextSearchQuery(pydantic.BaseModel): The columns to search If None, then the table should select the column automatically. - query: str - The query to search for - limit: Optional[int] = None - The limit on the number of results to return - wand_factor: Optional[float] = None - The wand factor to use for the search + query: str | FullTextQuery + If a string, it is treated as a MatchQuery. + If a FullTextQuery object, it is used directly. """ columns: Optional[List[str]] = None - query: str - limit: Optional[int] = None - wand_factor: Optional[float] = None + query: Union[str, FullTextQuery] class Query(pydantic.BaseModel): @@ -712,13 +899,14 @@ class LanceQueryBuilder(ABC): """ raise NotImplementedError - def text(self, text: str) -> Self: + def text(self, text: str | FullTextQuery) -> Self: """Set the text to search for. Parameters ---------- - text: str - The text to search for. + text: str | FullTextQuery + If a string, it is treated as a MatchQuery. + If a FullTextQuery object, it is used directly. Returns ------- @@ -1084,7 +1272,7 @@ class LanceFtsQueryBuilder(LanceQueryBuilder): def __init__( self, table: "Table", - query: str, + query: str | FullTextQuery, ordering_field_name: Optional[str] = None, fts_columns: Optional[Union[str, List[str]]] = None, ): @@ -1691,7 +1879,7 @@ class LanceHybridQueryBuilder(LanceQueryBuilder): self._vector = vector return self - def text(self, text: str) -> LanceHybridQueryBuilder: + def text(self, text: str | FullTextQuery) -> LanceHybridQueryBuilder: self._text = text return self @@ -2088,7 +2276,7 @@ class AsyncQuery(AsyncQueryBase): ) def nearest_to_text( - self, query: str, columns: Union[str, List[str], None] = None + self, query: str | FullTextQuery, columns: Union[str, List[str], None] = None ) -> AsyncFTSQuery: """ Find the documents that are most relevant to the given text query. @@ -2114,9 +2302,13 @@ class AsyncQuery(AsyncQueryBase): columns = [columns] if columns is None: columns = [] - return AsyncFTSQuery( - self._inner.nearest_to_text({"query": query, "columns": columns}) - ) + + if isinstance(query, str): + return AsyncFTSQuery( + self._inner.nearest_to_text({"query": query, "columns": columns}) + ) + # FullTextQuery object + return AsyncFTSQuery(self._inner.nearest_to_text(query.to_dict())) class AsyncFTSQuery(AsyncQueryBase): @@ -2399,7 +2591,7 @@ class AsyncVectorQuery(AsyncQueryBase, AsyncVectorQueryBase): return self def nearest_to_text( - self, query: str, columns: Union[str, List[str], None] = None + self, query: str | FullTextQuery, columns: Union[str, List[str], None] = None ) -> AsyncHybridQuery: """ Find the documents that are most relevant to the given text query, @@ -2429,9 +2621,13 @@ class AsyncVectorQuery(AsyncQueryBase, AsyncVectorQueryBase): columns = [columns] if columns is None: columns = [] - return AsyncHybridQuery( - self._inner.nearest_to_text({"query": query, "columns": columns}) - ) + + if isinstance(query, str): + return AsyncHybridQuery( + self._inner.nearest_to_text({"query": query, "columns": columns}) + ) + # FullTextQuery object + return AsyncHybridQuery(self._inner.nearest_to_text(query.to_dict())) async def to_batches( self, *, max_batch_length: Optional[int] = None diff --git a/python/python/lancedb/table.py b/python/python/lancedb/table.py index 4cf615ed..46dc9467 100644 --- a/python/python/lancedb/table.py +++ b/python/python/lancedb/table.py @@ -3373,8 +3373,6 @@ class AsyncTable: async_query = async_query.nearest_to_text( query.full_text_query.query, query.full_text_query.columns ) - if query.full_text_query.limit is not None: - async_query = async_query.limit(query.full_text_query.limit) return async_query diff --git a/python/python/tests/test_remote_db.py b/python/python/tests/test_remote_db.py index 642e2443..c4975f97 100644 --- a/python/python/tests/test_remote_db.py +++ b/python/python/tests/test_remote_db.py @@ -444,6 +444,16 @@ def test_query_sync_fts(): "prefilter": True, "with_row_id": True, "version": None, + } or body == { + "full_text_query": { + "query": "puppy", + "columns": ["description", "name"], + }, + "k": 42, + "vector": [], + "prefilter": True, + "with_row_id": True, + "version": None, } return pa.table({"id": [1, 2, 3]}) diff --git a/python/src/query.rs b/python/src/query.rs index bf5b2756..d69f36ae 100644 --- a/python/src/query.rs +++ b/python/src/query.rs @@ -8,19 +8,19 @@ use arrow::array::Array; use arrow::array::ArrayData; use arrow::pyarrow::FromPyArrow; use arrow::pyarrow::IntoPyArrow; -use lancedb::index::scalar::FullTextSearchQuery; +use lancedb::index::scalar::{FtsQuery, FullTextSearchQuery, MatchQuery, PhraseQuery}; use lancedb::query::QueryExecutionOptions; use lancedb::query::QueryFilter; use lancedb::query::{ ExecutableQuery, Query as LanceDbQuery, QueryBase, Select, VectorQuery as LanceDbVectorQuery, }; use lancedb::table::AnyQuery; -use pyo3::exceptions::PyNotImplementedError; use pyo3::exceptions::PyRuntimeError; +use pyo3::exceptions::{PyNotImplementedError, PyValueError}; use pyo3::prelude::{PyAnyMethods, PyDictMethods}; use pyo3::pymethods; -use pyo3::types::PyDict; use pyo3::types::PyList; +use pyo3::types::{PyDict, PyString}; use pyo3::Bound; use pyo3::IntoPyObject; use pyo3::PyAny; @@ -31,7 +31,7 @@ use pyo3_async_runtimes::tokio::future_into_py; use crate::arrow::RecordBatchStream; use crate::error::PythonErrorExt; -use crate::util::parse_distance_type; +use crate::util::{parse_distance_type, parse_fts_query}; // Python representation of full text search parameters #[derive(Clone)] @@ -46,8 +46,8 @@ pub struct PyFullTextSearchQuery { impl From for PyFullTextSearchQuery { fn from(query: FullTextSearchQuery) -> Self { PyFullTextSearchQuery { - columns: query.columns, - query: query.query, + columns: query.columns().into_iter().collect(), + query: query.query.query().to_owned(), limit: query.limit, wand_factor: query.wand_factor, } @@ -236,22 +236,61 @@ impl Query { } pub fn nearest_to_text(&mut self, query: Bound<'_, PyDict>) -> PyResult { - let query_text = query + let fts_query = query .get_item("query")? .ok_or(PyErr::new::( "Query text is required for nearest_to_text", - ))? - .extract::()?; - let columns = query - .get_item("columns")? - .map(|columns| columns.extract::>()) - .transpose()?; + ))?; - let fts_query = FullTextSearchQuery::new(query_text).columns(columns); + let query = if let Ok(query_text) = fts_query.downcast::() { + let mut query_text = query_text.to_string(); + let columns = query + .get_item("columns")? + .map(|columns| columns.extract::>()) + .transpose()?; + + let is_phrase = + query_text.len() >= 2 && query_text.starts_with('"') && query_text.ends_with('"'); + let is_multi_match = columns.as_ref().map(|cols| cols.len() > 1).unwrap_or(false); + + if is_phrase { + // Remove the surrounding quotes for phrase queries + query_text = query_text[1..query_text.len() - 1].to_string(); + } + + let query: FtsQuery = match (is_phrase, is_multi_match) { + (false, _) => MatchQuery::new(query_text).into(), + (true, false) => PhraseQuery::new(query_text).into(), + (true, true) => { + return Err(PyValueError::new_err( + "Phrase queries cannot be used with multiple columns.", + )); + } + }; + let mut query = FullTextSearchQuery::new_query(query); + if let Some(cols) = columns { + if !cols.is_empty() { + query = query.with_columns(&cols).map_err(|e| { + PyValueError::new_err(format!( + "Failed to set full text search columns: {}", + e + )) + })?; + } + } + query + } else if let Ok(query) = query.downcast::() { + let query = parse_fts_query(query)?; + FullTextSearchQuery::new_query(query) + } else { + return Err(PyValueError::new_err( + "query must be a string or a Query object", + )); + }; Ok(FTSQuery { - fts_query, inner: self.inner.clone(), + fts_query: query, }) } @@ -386,7 +425,7 @@ impl FTSQuery { } pub fn get_query(&self) -> String { - self.fts_query.query.clone() + self.fts_query.query.query().to_owned() } pub fn to_query_request(&self) -> PyQueryRequest { diff --git a/python/src/util.rs b/python/src/util.rs index 84043217..225827b3 100644 --- a/python/src/util.rs +++ b/python/src/util.rs @@ -3,11 +3,15 @@ use std::sync::Mutex; +use lancedb::index::scalar::{BoostQuery, FtsQuery, MatchQuery, MultiMatchQuery, PhraseQuery}; use lancedb::DistanceType; +use pyo3::prelude::{PyAnyMethods, PyDictMethods, PyListMethods}; +use pyo3::types::PyDict; use pyo3::{ exceptions::{PyRuntimeError, PyValueError}, pyfunction, PyResult, }; +use pyo3::{Bound, PyAny}; /// A wrapper around a rust builder /// @@ -59,3 +63,116 @@ pub fn validate_table_name(table_name: &str) -> PyResult<()> { lancedb::utils::validate_table_name(table_name) .map_err(|e| PyValueError::new_err(e.to_string())) } + +pub fn parse_fts_query(query: &Bound<'_, PyDict>) -> PyResult { + let query_type = query.keys().get_item(0)?.extract::()?; + let query_value = query + .get_item(&query_type)? + .ok_or(PyValueError::new_err(format!( + "Query type {} not found", + query_type + )))?; + let query_value = query_value.downcast::()?; + + match query_type.as_str() { + "match" => { + let column = query_value.keys().get_item(0)?.extract::()?; + let params = query_value + .get_item(&column)? + .ok_or(PyValueError::new_err(format!( + "column {} not found", + column + )))?; + let params = params.downcast::()?; + + let query = params + .get_item("query")? + .ok_or(PyValueError::new_err("query not found"))? + .extract::()?; + let boost = params + .get_item("boost")? + .ok_or(PyValueError::new_err("boost not found"))? + .extract::()?; + let fuzziness = params + .get_item("fuzziness")? + .ok_or(PyValueError::new_err("fuzziness not found"))? + .extract::>()?; + let max_expansions = params + .get_item("max_expansions")? + .ok_or(PyValueError::new_err("max_expansions not found"))? + .extract::()?; + + let query = MatchQuery::new(query) + .with_column(Some(column)) + .with_boost(boost) + .with_fuzziness(fuzziness) + .with_max_expansions(max_expansions); + Ok(query.into()) + } + + "match_phrase" => { + let column = query_value.keys().get_item(0)?.extract::()?; + let query = query_value + .get_item(&column)? + .ok_or(PyValueError::new_err(format!( + "column {} not found", + column + )))? + .extract::()?; + + let query = PhraseQuery::new(query).with_column(Some(column)); + Ok(query.into()) + } + + "boost" => { + let positive: Bound<'_, PyAny> = query_value + .get_item("positive")? + .ok_or(PyValueError::new_err("positive not found"))?; + let positive = positive.downcast::()?; + + let negative = query_value + .get_item("negative")? + .ok_or(PyValueError::new_err("negative not found"))?; + let negative = negative.downcast::()?; + + let negative_boost = query_value + .get_item("negative_boost")? + .ok_or(PyValueError::new_err("negative_boost not found"))? + .extract::()?; + + let positive_query = parse_fts_query(positive)?; + let negative_query = parse_fts_query(negative)?; + let query = BoostQuery::new(positive_query, negative_query, Some(negative_boost)); + + Ok(query.into()) + } + + "multi_match" => { + let query = query_value + .get_item("query")? + .ok_or(PyValueError::new_err("query not found"))? + .extract::()?; + + let columns = query_value + .get_item("columns")? + .ok_or(PyValueError::new_err("columns not found"))? + .extract::>()?; + + let boost = query_value + .get_item("boost")? + .ok_or(PyValueError::new_err("boost not found"))? + .extract::>()?; + + let query = + MultiMatchQuery::try_new_with_boosts(query, columns, boost).map_err(|e| { + PyValueError::new_err(format!("Error creating MultiMatchQuery: {}", e)) + })?; + Ok(query.into()) + } + + _ => Err(PyValueError::new_err(format!( + "Unsupported query type: {}", + query_type + ))), + } +} diff --git a/rust/lancedb/src/index/scalar.rs b/rust/lancedb/src/index/scalar.rs index 2d5c0a72..7d381961 100644 --- a/rust/lancedb/src/index/scalar.rs +++ b/rust/lancedb/src/index/scalar.rs @@ -80,5 +80,6 @@ impl FtsIndexBuilder { } } +pub use lance_index::scalar::inverted::query::*; pub use lance_index::scalar::inverted::TokenizerConfig; pub use lance_index::scalar::FullTextSearchQuery; diff --git a/rust/lancedb/src/query.rs b/rust/lancedb/src/query.rs index 18a5d8ad..11b413a2 100644 --- a/rust/lancedb/src/query.rs +++ b/rust/lancedb/src/query.rs @@ -1056,7 +1056,7 @@ impl VectorQuery { })?; let mut results = reranker - .rerank_hybrid(&fts_query.query, vec_results, fts_results) + .rerank_hybrid(&fts_query.query.query(), vec_results, fts_results) .await?; check_reranker_result(&results)?; diff --git a/rust/lancedb/src/remote/db.rs b/rust/lancedb/src/remote/db.rs index 2d1f4ced..21703efb 100644 --- a/rust/lancedb/src/remote/db.rs +++ b/rust/lancedb/src/remote/db.rs @@ -52,6 +52,10 @@ impl ServerVersion { pub fn support_multivector(&self) -> bool { self.0 >= semver::Version::new(0, 2, 0) } + + pub fn support_structural_fts(&self) -> bool { + self.0 >= semver::Version::new(0, 3, 0) + } } pub const OPT_REMOTE_PREFIX: &str = "remote_database_"; diff --git a/rust/lancedb/src/remote/table.rs b/rust/lancedb/src/remote/table.rs index 4991f968..07b62abf 100644 --- a/rust/lancedb/src/remote/table.rs +++ b/rust/lancedb/src/remote/table.rs @@ -155,7 +155,11 @@ impl RemoteTable { Ok(Box::pin(RecordBatchStreamAdapter::new(schema, stream))) } - fn apply_query_params(body: &mut serde_json::Value, params: &QueryRequest) -> Result<()> { + fn apply_query_params( + &self, + body: &mut serde_json::Value, + params: &QueryRequest, + ) -> Result<()> { body["prefilter"] = params.prefilter.into(); if let Some(offset) = params.offset { body["offset"] = serde_json::Value::Number(serde_json::Number::from(offset)); @@ -209,10 +213,17 @@ impl RemoteTable { message: "Wand factor is not yet supported in LanceDB Cloud".into(), }); } - body["full_text_query"] = serde_json::json!({ - "columns": full_text_search.columns, - "query": full_text_search.query, - }) + + if self.server_version.support_structural_fts() { + body["full_text_query"] = serde_json::json!({ + "query": full_text_search.query.clone(), + }); + } else { + body["full_text_query"] = serde_json::json!({ + "columns": full_text_search.columns().into_iter().collect::>(), + "query": full_text_search.query.query(), + }) + } } Ok(()) @@ -223,7 +234,7 @@ impl RemoteTable { mut body: serde_json::Value, query: &VectorQueryRequest, ) -> Result> { - Self::apply_query_params(&mut body, &query.base)?; + self.apply_query_params(&mut body, &query.base)?; // Apply general parameters, before we dispatch based on number of query vectors. body["distance_type"] = serde_json::json!(query.distance_type.unwrap_or_default()); @@ -346,7 +357,7 @@ impl RemoteTable { match query { AnyQuery::Query(query) => { let mut body = base_body.clone(); - Self::apply_query_params(&mut body, query)?; + self.apply_query_params(&mut body, query)?; // Empty vector can be passed if no vector search is performed. body["vector"] = serde_json::Value::Array(Vec::new()); Ok(vec![body]) @@ -1683,7 +1694,18 @@ mod tests { "prefilter": true, "version": null }); - assert_eq!(body, expected_body); + let expected_body_2 = serde_json::json!({ + "full_text_query": { + "columns": ["b","a"], + "query": "hello world", + }, + "k": 10, + "vector": [], + "with_row_id": true, + "prefilter": true, + "version": null + }); + assert!(body == expected_body || body == expected_body_2); let data = RecordBatch::try_new( Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, false)])), @@ -1702,7 +1724,8 @@ mod tests { .query() .full_text_search( FullTextSearchQuery::new("hello world".into()) - .columns(Some(vec!["a".into(), "b".into()])), + .with_columns(&["a".into(), "b".into()]) + .unwrap(), ) .with_row_id() .limit(10)