Compare commits

..

12 Commits

Author SHA1 Message Date
Lance Release
c1738250a3 Bump version: 0.22.0-beta.1 → 0.22.0-beta.2 2025-04-01 17:27:57 +00:00
Weston Pace
1ee63984f5 feat: allow FSB to be used for btree indices (#2297)
We recently allowed this for lance but there was a check in lancedb as
well that was preventing it

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->

## Summary by CodeRabbit

- **New Features**
- Added support for indexing fixed-size binary data using B-tree
structures for efficient data storage and retrieval.
- **Tests**
- Implemented automated tests to ensure the new binary indexing works
correctly and meets the expected configuration.

<!-- end of auto-generated comment: release notes by coderabbit.ai -->
2025-04-01 10:27:22 -07:00
Lance Release
2eb2c8862a Updating package-lock.json 2025-04-01 14:27:26 +00:00
Lance Release
4ea8e178d3 Updating package-lock.json 2025-04-01 14:27:07 +00:00
Lance Release
e4485a630e Bump version: 0.19.0-beta.0 → 0.19.0-beta.1 2025-04-01 14:26:47 +00:00
Lance Release
fb95f9b3bd Bump version: 0.22.0-beta.0 → 0.22.0-beta.1 2025-04-01 14:26:28 +00:00
Weston Pace
625bab3f21 feat: update to lance 0.25.3b1 (#2294)
<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->
## Summary by CodeRabbit

- **Chores**
- Updated dependency versions for improved performance and
compatibility.

- **New Features**
- Added support for structured full-text search with expanded query
types (e.g., match, phrase, boost, multi-match) and flexible input
formats.
- Introduced a new method to check server support for structural
full-text search features.
- Enhanced the query system with new classes and interfaces for handling
various full-text queries.
- Expanded the functionality of existing methods to accept more complex
query structures, including updates to method signatures.

- **Bug Fixes**
  - Improved error handling and reporting for full-text search queries.

- **Refactor**
- Enhanced query processing with streamlined input handling and improved
error reporting, ensuring more robust and consistent search results
across platforms.
<!-- end of auto-generated comment: release notes by coderabbit.ai -->

---------

Signed-off-by: BubbleCal <bubble-cal@outlook.com>
Co-authored-by: BubbleCal <bubble-cal@outlook.com>
2025-04-01 06:36:42 -07:00
Will Jones
e59f9382a0 ci: deprecate vectordb each release (#2292)
I released each time we published, the new package was no longer
deprecated. This re-deprecated the package after a new publish.
2025-03-31 12:03:04 -07:00
Lance Release
fdee7ba477 Updating package-lock.json 2025-03-30 19:09:17 +00:00
Lance Release
c44fa3abc4 Updating package-lock.json 2025-03-30 18:05:07 +00:00
Lance Release
fc43aac0ed Updating package-lock.json 2025-03-30 18:04:51 +00:00
Lance Release
e67cd0baf9 Bump version: 0.18.3-beta.0 → 0.19.0-beta.0 2025-03-30 18:04:32 +00:00
48 changed files with 1490 additions and 280 deletions

View File

@@ -1,5 +1,5 @@
[tool.bumpversion]
current_version = "0.18.3-beta.0"
current_version = "0.19.0-beta.1"
parse = """(?x)
(?P<major>0|[1-9]\\d*)\\.
(?P<minor>0|[1-9]\\d*)\\.

View File

@@ -535,6 +535,10 @@ jobs:
for filename in *.tgz; do
npm publish $PUBLISH_ARGS $filename
done
- name: Deprecate
# We need to deprecate the old package to avoid confusion.
# Each time we publish a new version, it gets undeprecated.
run: npm deprecate vectordb "Use @lancedb/lancedb instead."
- name: Notify Slack Action
uses: ravsamhq/notify-slack-action@2.3.0
if: ${{ always() }}

276
Cargo.lock generated
View File

@@ -1816,27 +1816,30 @@ dependencies = [
[[package]]
name = "datafusion"
version = "45.0.0"
version = "46.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "eae420e7a5b0b7f1c39364cc76cbcd0f5fdc416b2514ae3847c2676bbd60702a"
checksum = "914e6f9525599579abbd90b0f7a55afcaaaa40350b9e9ed52563f126dfe45fd3"
dependencies = [
"arrow",
"arrow-array",
"arrow-ipc",
"arrow-schema",
"async-trait",
"bytes",
"chrono",
"datafusion-catalog",
"datafusion-catalog-listing",
"datafusion-common",
"datafusion-common-runtime",
"datafusion-datasource",
"datafusion-execution",
"datafusion-expr",
"datafusion-expr-common",
"datafusion-functions",
"datafusion-functions-aggregate",
"datafusion-functions-nested",
"datafusion-functions-table",
"datafusion-functions-window",
"datafusion-macros",
"datafusion-optimizer",
"datafusion-physical-expr",
"datafusion-physical-expr-common",
@@ -1844,14 +1847,13 @@ dependencies = [
"datafusion-physical-plan",
"datafusion-sql",
"futures",
"glob",
"itertools 0.14.0",
"log",
"object_store",
"parking_lot",
"rand 0.8.5",
"regex",
"sqlparser 0.53.0",
"sqlparser 0.54.0",
"tempfile",
"tokio",
"url",
@@ -1860,9 +1862,9 @@ dependencies = [
[[package]]
name = "datafusion-catalog"
version = "45.0.0"
version = "46.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6f27987bc22b810939e8dfecc55571e9d50355d6ea8ec1c47af8383a76a6d0e1"
checksum = "998a6549e6ee4ee3980e05590b2960446a56b343ea30199ef38acd0e0b9036e2"
dependencies = [
"arrow",
"async-trait",
@@ -1876,21 +1878,39 @@ dependencies = [
"itertools 0.14.0",
"log",
"parking_lot",
"sqlparser 0.53.0",
]
[[package]]
name = "datafusion-catalog-listing"
version = "46.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a5ac10096a5b3c0d8a227176c0e543606860842e943594ccddb45cf42a526e43"
dependencies = [
"arrow",
"async-trait",
"datafusion-catalog",
"datafusion-common",
"datafusion-datasource",
"datafusion-execution",
"datafusion-expr",
"datafusion-physical-expr",
"datafusion-physical-expr-common",
"datafusion-physical-plan",
"futures",
"log",
"object_store",
"tokio",
]
[[package]]
name = "datafusion-common"
version = "45.0.0"
version = "46.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e3f6d5b8c9408cc692f7c194b8aa0c0f9b253e065a8d960ad9cdc2a13e697602"
checksum = "1f53d7ec508e1b3f68bd301cee3f649834fad51eff9240d898a4b2614cfd0a7a"
dependencies = [
"ahash",
"arrow",
"arrow-array",
"arrow-buffer",
"arrow-ipc",
"arrow-schema",
"base64 0.22.1",
"half",
"hashbrown 0.14.5",
@@ -1899,32 +1919,60 @@ dependencies = [
"log",
"object_store",
"paste",
"sqlparser 0.53.0",
"sqlparser 0.54.0",
"tokio",
"web-time",
]
[[package]]
name = "datafusion-common-runtime"
version = "45.0.0"
version = "46.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0d4603c8e8a4baf77660ab7074cc66fc15cc8a18f2ce9dfadb755fc6ee294e48"
checksum = "e0fcf41523b22e14cc349b01526e8b9f59206653037f2949a4adbfde5f8cb668"
dependencies = [
"log",
"tokio",
]
[[package]]
name = "datafusion-doc"
version = "45.0.0"
name = "datafusion-datasource"
version = "46.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e5bf4bc68623a5cf231eed601ed6eb41f46a37c4d15d11a0bff24cbc8396cd66"
checksum = "cf7f37ad8b6e88b46c7eeab3236147d32ea64b823544f498455a8d9042839c92"
dependencies = [
"arrow",
"async-trait",
"bytes",
"chrono",
"datafusion-catalog",
"datafusion-common",
"datafusion-common-runtime",
"datafusion-execution",
"datafusion-expr",
"datafusion-physical-expr",
"datafusion-physical-expr-common",
"datafusion-physical-plan",
"futures",
"glob",
"itertools 0.14.0",
"log",
"object_store",
"rand 0.8.5",
"tokio",
"url",
]
[[package]]
name = "datafusion-doc"
version = "46.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7db7a0239fd060f359dc56c6e7db726abaa92babaed2fb2e91c3a8b2fff8b256"
[[package]]
name = "datafusion-execution"
version = "45.0.0"
version = "46.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "88b491c012cdf8e051053426013429a76f74ee3c2db68496c79c323ca1084d27"
checksum = "0938f9e5b6bc5782be4111cdfb70c02b7b5451bf34fd57e4de062a7f7c4e31f1"
dependencies = [
"arrow",
"dashmap",
@@ -1941,9 +1989,9 @@ dependencies = [
[[package]]
name = "datafusion-expr"
version = "45.0.0"
version = "46.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e5a181408d4fc5dc22f9252781a8f39f2d0e5d1b33ec9bde242844980a2689c1"
checksum = "b36c28b00b00019a8695ad7f1a53ee1673487b90322ecbd604e2cf32894eb14f"
dependencies = [
"arrow",
"chrono",
@@ -1956,26 +2004,27 @@ dependencies = [
"indexmap 2.8.0",
"paste",
"serde_json",
"sqlparser 0.53.0",
"sqlparser 0.54.0",
]
[[package]]
name = "datafusion-expr-common"
version = "45.0.0"
version = "46.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d1129b48e8534d8c03c6543bcdccef0b55c8ac0c1272a15a56c67068b6eb1885"
checksum = "18f0a851a436c5a2139189eb4617a54e6a9ccb9edc96c4b3c83b3bb7c58b950e"
dependencies = [
"arrow",
"datafusion-common",
"indexmap 2.8.0",
"itertools 0.14.0",
"paste",
]
[[package]]
name = "datafusion-functions"
version = "45.0.0"
version = "46.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6125874e4856dfb09b59886784fcb74cde5cfc5930b3a80a1a728ef7a010df6b"
checksum = "e3196e37d7b65469fb79fee4f05e5bb58a456831035f9a38aa5919aeb3298d40"
dependencies = [
"arrow",
"arrow-buffer",
@@ -1989,7 +2038,6 @@ dependencies = [
"datafusion-expr",
"datafusion-expr-common",
"datafusion-macros",
"hashbrown 0.14.5",
"hex",
"itertools 0.14.0",
"log",
@@ -2003,14 +2051,12 @@ dependencies = [
[[package]]
name = "datafusion-functions-aggregate"
version = "45.0.0"
version = "46.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f3add7b1d3888e05e7c95f2b281af900ca69ebdcb21069ba679b33bde8b3b9d6"
checksum = "adfc2d074d5ee4d9354fdcc9283d5b2b9037849237ddecb8942a29144b77ca05"
dependencies = [
"ahash",
"arrow",
"arrow-buffer",
"arrow-schema",
"datafusion-common",
"datafusion-doc",
"datafusion-execution",
@@ -2026,9 +2072,9 @@ dependencies = [
[[package]]
name = "datafusion-functions-aggregate-common"
version = "45.0.0"
version = "46.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6e18baa4cfc3d2f144f74148ed68a1f92337f5072b6dde204a0dbbdf3324989c"
checksum = "1cbceba0f98d921309a9121b702bcd49289d383684cccabf9a92cda1602f3bbb"
dependencies = [
"ahash",
"arrow",
@@ -2039,15 +2085,12 @@ dependencies = [
[[package]]
name = "datafusion-functions-nested"
version = "45.0.0"
version = "46.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3ec5ee8cecb0dc370291279673097ddabec03a011f73f30d7f1096457127e03e"
checksum = "170e27ce4baa27113ddf5f77f1a7ec484b0dbeda0c7abbd4bad3fc609c8ab71a"
dependencies = [
"arrow",
"arrow-array",
"arrow-buffer",
"arrow-ord",
"arrow-schema",
"datafusion-common",
"datafusion-doc",
"datafusion-execution",
@@ -2063,9 +2106,9 @@ dependencies = [
[[package]]
name = "datafusion-functions-table"
version = "45.0.0"
version = "46.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2c403ddd473bbb0952ba880008428b3c7febf0ed3ce1eec35a205db20efb2a36"
checksum = "7d3a06a7f0817ded87b026a437e7e51de7f59d48173b0a4e803aa896a7bd6bb5"
dependencies = [
"arrow",
"async-trait",
@@ -2079,9 +2122,9 @@ dependencies = [
[[package]]
name = "datafusion-functions-window"
version = "45.0.0"
version = "46.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1ab18c2fb835614d06a75f24a9e09136d3a8c12a92d97c95a6af316a1787a9c5"
checksum = "d6c608b66496a1e05e3d196131eb9bebea579eed1f59e88d962baf3dda853bc6"
dependencies = [
"datafusion-common",
"datafusion-doc",
@@ -2096,9 +2139,9 @@ dependencies = [
[[package]]
name = "datafusion-functions-window-common"
version = "45.0.0"
version = "46.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a77b73bc15e7d1967121fdc7a55d819bfb9d6c03766a6c322247dce9094a53a4"
checksum = "da2f9d83348957b4ad0cd87b5cb9445f2651863a36592fe5484d43b49a5f8d82"
dependencies = [
"datafusion-common",
"datafusion-physical-expr-common",
@@ -2106,9 +2149,9 @@ dependencies = [
[[package]]
name = "datafusion-macros"
version = "45.0.0"
version = "46.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09369b8d962291e808977cf94d495fd8b5b38647232d7ef562c27ac0f495b0af"
checksum = "4800e1ff7ecf8f310887e9b54c9c444b8e215ccbc7b21c2f244cfae373b1ece7"
dependencies = [
"datafusion-expr",
"quote",
@@ -2117,9 +2160,9 @@ dependencies = [
[[package]]
name = "datafusion-optimizer"
version = "45.0.0"
version = "46.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2403a7e4a84637f3de7d8d4d7a9ccc0cc4be92d89b0161ba3ee5be82f0531c54"
checksum = "971c51c54cd309001376fae752fb15a6b41750b6d1552345c46afbfb6458801b"
dependencies = [
"arrow",
"chrono",
@@ -2135,15 +2178,12 @@ dependencies = [
[[package]]
name = "datafusion-physical-expr"
version = "45.0.0"
version = "46.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "86ff72ac702b62dbf2650c4e1d715ebd3e4aab14e3885e72e8549e250307347c"
checksum = "e1447c2c6bc8674a16be4786b4abf528c302803fafa186aa6275692570e64d85"
dependencies = [
"ahash",
"arrow",
"arrow-array",
"arrow-buffer",
"arrow-schema",
"datafusion-common",
"datafusion-expr",
"datafusion-expr-common",
@@ -2160,13 +2200,12 @@ dependencies = [
[[package]]
name = "datafusion-physical-expr-common"
version = "45.0.0"
version = "46.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "60982b7d684e25579ee29754b4333057ed62e2cc925383c5f0bd8cab7962f435"
checksum = "69f8c25dcd069073a75b3d2840a79d0f81e64bdd2c05f2d3d18939afb36a7dcb"
dependencies = [
"ahash",
"arrow",
"arrow-buffer",
"datafusion-common",
"datafusion-expr-common",
"hashbrown 0.14.5",
@@ -2175,12 +2214,11 @@ dependencies = [
[[package]]
name = "datafusion-physical-optimizer"
version = "45.0.0"
version = "46.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac5e85c189d5238a5cf181a624e450c4cd4c66ac77ca551d6f3ff9080bac90bb"
checksum = "68da5266b5b9847c11d1b3404ee96b1d423814e1973e1ad3789131e5ec912763"
dependencies = [
"arrow",
"arrow-schema",
"datafusion-common",
"datafusion-execution",
"datafusion-expr",
@@ -2188,22 +2226,18 @@ dependencies = [
"datafusion-physical-expr",
"datafusion-physical-expr-common",
"datafusion-physical-plan",
"futures",
"itertools 0.14.0",
"log",
"url",
]
[[package]]
name = "datafusion-physical-plan"
version = "45.0.0"
version = "46.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c36bf163956d7e2542657c78b3383fdc78f791317ef358a359feffcdb968106f"
checksum = "88cc160df00e413e370b3b259c8ea7bfbebc134d32de16325950e9e923846b7f"
dependencies = [
"ahash",
"arrow",
"arrow-array",
"arrow-buffer",
"arrow-ord",
"arrow-schema",
"async-trait",
@@ -2228,20 +2262,18 @@ dependencies = [
[[package]]
name = "datafusion-sql"
version = "45.0.0"
version = "46.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e13caa4daede211ecec53c78b13c503b592794d125f9a3cc3afe992edf9e7f43"
checksum = "325a212b67b677c0eb91447bf9a11b630f9fc4f62d8e5d145bf859f5a6b29e64"
dependencies = [
"arrow",
"arrow-array",
"arrow-schema",
"bigdecimal",
"datafusion-common",
"datafusion-expr",
"indexmap 2.8.0",
"log",
"regex",
"sqlparser 0.53.0",
"sqlparser 0.54.0",
]
[[package]]
@@ -2687,12 +2719,21 @@ checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c"
[[package]]
name = "fsst"
version = "0.25.1"
source = "git+https://github.com/lancedb/lance.git?tag=v0.25.1-beta.3#33634d3b2e8f6a54e63a97721c7fcd31206e999a"
version = "0.25.3"
source = "git+https://github.com/lancedb/lance?tag=v0.25.3-beta.1#ca2e69c2be80b0714d5ef1db5265bae9fadf682c"
dependencies = [
"rand 0.8.5",
]
[[package]]
name = "fst"
version = "0.4.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7ab85b9b05e3978cc9a9cf8fea7f01b494e1a09ed3037e16ba39edc7a29eb61a"
dependencies = [
"utf8-ranges",
]
[[package]]
name = "funty"
version = "2.0.0"
@@ -3666,8 +3707,8 @@ dependencies = [
[[package]]
name = "lance"
version = "0.25.1"
source = "git+https://github.com/lancedb/lance.git?tag=v0.25.1-beta.3#33634d3b2e8f6a54e63a97721c7fcd31206e999a"
version = "0.25.3"
source = "git+https://github.com/lancedb/lance?tag=v0.25.3-beta.1#ca2e69c2be80b0714d5ef1db5265bae9fadf682c"
dependencies = [
"arrow",
"arrow-arith",
@@ -3726,8 +3767,8 @@ dependencies = [
[[package]]
name = "lance-arrow"
version = "0.25.1"
source = "git+https://github.com/lancedb/lance.git?tag=v0.25.1-beta.3#33634d3b2e8f6a54e63a97721c7fcd31206e999a"
version = "0.25.3"
source = "git+https://github.com/lancedb/lance?tag=v0.25.3-beta.1#ca2e69c2be80b0714d5ef1db5265bae9fadf682c"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -3744,8 +3785,8 @@ dependencies = [
[[package]]
name = "lance-core"
version = "0.25.1"
source = "git+https://github.com/lancedb/lance.git?tag=v0.25.1-beta.3#33634d3b2e8f6a54e63a97721c7fcd31206e999a"
version = "0.25.3"
source = "git+https://github.com/lancedb/lance?tag=v0.25.3-beta.1#ca2e69c2be80b0714d5ef1db5265bae9fadf682c"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -3781,8 +3822,8 @@ dependencies = [
[[package]]
name = "lance-datafusion"
version = "0.25.1"
source = "git+https://github.com/lancedb/lance.git?tag=v0.25.1-beta.3#33634d3b2e8f6a54e63a97721c7fcd31206e999a"
version = "0.25.3"
source = "git+https://github.com/lancedb/lance?tag=v0.25.3-beta.1#ca2e69c2be80b0714d5ef1db5265bae9fadf682c"
dependencies = [
"arrow",
"arrow-array",
@@ -3798,6 +3839,7 @@ dependencies = [
"futures",
"lance-arrow",
"lance-core",
"lance-datagen",
"lazy_static",
"log",
"prost",
@@ -3806,10 +3848,26 @@ dependencies = [
"tracing",
]
[[package]]
name = "lance-datagen"
version = "0.25.3"
source = "git+https://github.com/lancedb/lance?tag=v0.25.3-beta.1#ca2e69c2be80b0714d5ef1db5265bae9fadf682c"
dependencies = [
"arrow",
"arrow-array",
"arrow-cast",
"arrow-schema",
"chrono",
"futures",
"hex",
"rand 0.8.5",
"rand_xoshiro",
]
[[package]]
name = "lance-encoding"
version = "0.25.1"
source = "git+https://github.com/lancedb/lance.git?tag=v0.25.1-beta.3#33634d3b2e8f6a54e63a97721c7fcd31206e999a"
version = "0.25.3"
source = "git+https://github.com/lancedb/lance?tag=v0.25.3-beta.1#ca2e69c2be80b0714d5ef1db5265bae9fadf682c"
dependencies = [
"arrayref",
"arrow",
@@ -3832,6 +3890,7 @@ dependencies = [
"lance-core",
"lazy_static",
"log",
"lz4",
"num-traits",
"paste",
"prost",
@@ -3847,8 +3906,8 @@ dependencies = [
[[package]]
name = "lance-file"
version = "0.25.1"
source = "git+https://github.com/lancedb/lance.git?tag=v0.25.1-beta.3#33634d3b2e8f6a54e63a97721c7fcd31206e999a"
version = "0.25.3"
source = "git+https://github.com/lancedb/lance?tag=v0.25.3-beta.1#ca2e69c2be80b0714d5ef1db5265bae9fadf682c"
dependencies = [
"arrow-arith",
"arrow-array",
@@ -3882,8 +3941,8 @@ dependencies = [
[[package]]
name = "lance-index"
version = "0.25.1"
source = "git+https://github.com/lancedb/lance.git?tag=v0.25.1-beta.3#33634d3b2e8f6a54e63a97721c7fcd31206e999a"
version = "0.25.3"
source = "git+https://github.com/lancedb/lance?tag=v0.25.3-beta.1#ca2e69c2be80b0714d5ef1db5265bae9fadf682c"
dependencies = [
"arrow",
"arrow-array",
@@ -3902,6 +3961,7 @@ dependencies = [
"datafusion-sql",
"deepsize",
"dirs",
"fst",
"futures",
"half",
"itertools 0.13.0",
@@ -3935,8 +3995,8 @@ dependencies = [
[[package]]
name = "lance-io"
version = "0.25.1"
source = "git+https://github.com/lancedb/lance.git?tag=v0.25.1-beta.3#33634d3b2e8f6a54e63a97721c7fcd31206e999a"
version = "0.25.3"
source = "git+https://github.com/lancedb/lance?tag=v0.25.3-beta.1#ca2e69c2be80b0714d5ef1db5265bae9fadf682c"
dependencies = [
"arrow",
"arrow-arith",
@@ -3974,8 +4034,8 @@ dependencies = [
[[package]]
name = "lance-linalg"
version = "0.25.1"
source = "git+https://github.com/lancedb/lance.git?tag=v0.25.1-beta.3#33634d3b2e8f6a54e63a97721c7fcd31206e999a"
version = "0.25.3"
source = "git+https://github.com/lancedb/lance?tag=v0.25.3-beta.1#ca2e69c2be80b0714d5ef1db5265bae9fadf682c"
dependencies = [
"arrow-array",
"arrow-ord",
@@ -3998,8 +4058,8 @@ dependencies = [
[[package]]
name = "lance-table"
version = "0.25.1"
source = "git+https://github.com/lancedb/lance.git?tag=v0.25.1-beta.3#33634d3b2e8f6a54e63a97721c7fcd31206e999a"
version = "0.25.3"
source = "git+https://github.com/lancedb/lance?tag=v0.25.3-beta.1#ca2e69c2be80b0714d5ef1db5265bae9fadf682c"
dependencies = [
"arrow",
"arrow-array",
@@ -4038,8 +4098,8 @@ dependencies = [
[[package]]
name = "lance-testing"
version = "0.25.1"
source = "git+https://github.com/lancedb/lance.git?tag=v0.25.1-beta.3#33634d3b2e8f6a54e63a97721c7fcd31206e999a"
version = "0.25.3"
source = "git+https://github.com/lancedb/lance?tag=v0.25.3-beta.1#ca2e69c2be80b0714d5ef1db5265bae9fadf682c"
dependencies = [
"arrow-array",
"arrow-schema",
@@ -4050,7 +4110,7 @@ dependencies = [
[[package]]
name = "lancedb"
version = "0.18.3-beta.0"
version = "0.19.0-beta.1"
dependencies = [
"arrow",
"arrow-array",
@@ -4137,7 +4197,7 @@ dependencies = [
[[package]]
name = "lancedb-node"
version = "0.18.3-beta.0"
version = "0.19.0-beta.1"
dependencies = [
"arrow-array",
"arrow-ipc",
@@ -4162,7 +4222,7 @@ dependencies = [
[[package]]
name = "lancedb-nodejs"
version = "0.18.3-beta.0"
version = "0.19.0-beta.1"
dependencies = [
"arrow-array",
"arrow-ipc",
@@ -4180,7 +4240,7 @@ dependencies = [
[[package]]
name = "lancedb-python"
version = "0.21.3-beta.0"
version = "0.22.0-beta.1"
dependencies = [
"arrow",
"env_logger",
@@ -5895,6 +5955,15 @@ dependencies = [
"rand 0.8.5",
]
[[package]]
name = "rand_xoshiro"
version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6f97cdb2a36ed4183de61b2f824cc45c9f1037f28afe0a322e9fff4c108b5aaa"
dependencies = [
"rand_core 0.6.4",
]
[[package]]
name = "random_word"
version = "0.4.3"
@@ -6781,11 +6850,12 @@ dependencies = [
[[package]]
name = "sqlparser"
version = "0.53.0"
version = "0.54.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "05a528114c392209b3264855ad491fcce534b94a38771b0a0b97a79379275ce8"
checksum = "c66e3b7374ad4a6af849b08b3e7a6eda0edbd82f0fd59b57e22671bf16979899"
dependencies = [
"log",
"recursive",
"sqlparser_derive",
]
@@ -7636,7 +7706,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "458f7a779bf54acc9f347480ac654f68407d3aab21269a6e3c9f922acd9e2da9"
dependencies = [
"getrandom 0.3.2",
"js-sys",
"serde",
"wasm-bindgen",
]
[[package]]

View File

@@ -21,16 +21,16 @@ categories = ["database-implementations"]
rust-version = "1.78.0"
[workspace.dependencies]
lance = { "version" = "=0.25.1", "features" = [
lance = { "version" = "=0.25.3", "features" = [
"dynamodb",
], tag = "v0.25.1-beta.3", git = "https://github.com/lancedb/lance.git" }
lance-io = { version = "=0.25.1", tag = "v0.25.1-beta.3", git = "https://github.com/lancedb/lance.git" }
lance-index = { version = "=0.25.1", tag = "v0.25.1-beta.3", git = "https://github.com/lancedb/lance.git" }
lance-linalg = { version = "=0.25.1", tag = "v0.25.1-beta.3", git = "https://github.com/lancedb/lance.git" }
lance-table = { version = "=0.25.1", tag = "v0.25.1-beta.3", git = "https://github.com/lancedb/lance.git" }
lance-testing = { version = "=0.25.1", tag = "v0.25.1-beta.3", git = "https://github.com/lancedb/lance.git" }
lance-datafusion = { version = "=0.25.1", tag = "v0.25.1-beta.3", git = "https://github.com/lancedb/lance.git" }
lance-encoding = { version = "=0.25.1", tag = "v0.25.1-beta.3", git = "https://github.com/lancedb/lance.git" }
], tag = "v0.25.3-beta.1", git = "https://github.com/lancedb/lance" }
lance-io = { version = "=0.25.3", tag = "v0.25.3-beta.1", git = "https://github.com/lancedb/lance" }
lance-index = { version = "=0.25.3", tag = "v0.25.3-beta.1", git = "https://github.com/lancedb/lance" }
lance-linalg = { version = "=0.25.3", tag = "v0.25.3-beta.1", git = "https://github.com/lancedb/lance" }
lance-table = { version = "=0.25.3", tag = "v0.25.3-beta.1", git = "https://github.com/lancedb/lance" }
lance-testing = { version = "=0.25.3", tag = "v0.25.3-beta.1", git = "https://github.com/lancedb/lance" }
lance-datafusion = { version = "=0.25.3", tag = "v0.25.3-beta.1", git = "https://github.com/lancedb/lance" }
lance-encoding = { version = "=0.25.3", tag = "v0.25.3-beta.1", git = "https://github.com/lancedb/lance" }
# Note that this one does not include pyarrow
arrow = { version = "54.1", optional = false }
arrow-array = "54.1"
@@ -41,12 +41,12 @@ arrow-schema = "54.1"
arrow-arith = "54.1"
arrow-cast = "54.1"
async-trait = "0"
datafusion = { version = "45.0", default-features = false }
datafusion-catalog = "45.0"
datafusion-common = { version = "45.0", default-features = false }
datafusion-execution = "45.0"
datafusion-expr = "45.0"
datafusion-physical-plan = "45.0"
datafusion = { version = "46.0", default-features = false }
datafusion-catalog = "46.0"
datafusion-common = { version = "46.0", default-features = false }
datafusion-execution = "46.0"
datafusion-expr = "46.0"
datafusion-physical-plan = "46.0"
env_logger = "0.11"
half = { "version" = "=2.4.1", default-features = false, features = [
"num-traits",

View File

@@ -0,0 +1,75 @@
[**@lancedb/lancedb**](../README.md) • **Docs**
***
[@lancedb/lancedb](../globals.md) / BoostQuery
# Class: BoostQuery
Represents a full-text query interface.
This interface defines the structure and behavior for full-text queries,
including methods to retrieve the query type and convert the query to a dictionary format.
## Implements
- [`FullTextQuery`](../interfaces/FullTextQuery.md)
## Constructors
### new BoostQuery()
```ts
new BoostQuery(
positive,
negative,
negativeBoost): BoostQuery
```
Creates an instance of BoostQuery.
#### Parameters
* **positive**: [`FullTextQuery`](../interfaces/FullTextQuery.md)
The positive query that boosts the relevance score.
* **negative**: [`FullTextQuery`](../interfaces/FullTextQuery.md)
The negative query that reduces the relevance score.
* **negativeBoost**: `number`
The factor by which the negative query reduces the score.
#### Returns
[`BoostQuery`](BoostQuery.md)
## Methods
### queryType()
```ts
queryType(): FullTextQueryType
```
#### Returns
[`FullTextQueryType`](../enumerations/FullTextQueryType.md)
#### Implementation of
[`FullTextQuery`](../interfaces/FullTextQuery.md).[`queryType`](../interfaces/FullTextQuery.md#querytype)
***
### toDict()
```ts
toDict(): Record<string, unknown>
```
#### Returns
`Record`&lt;`string`, `unknown`&gt;
#### Implementation of
[`FullTextQuery`](../interfaces/FullTextQuery.md).[`toDict`](../interfaces/FullTextQuery.md#todict)

View File

@@ -0,0 +1,83 @@
[**@lancedb/lancedb**](../README.md) • **Docs**
***
[@lancedb/lancedb](../globals.md) / MatchQuery
# Class: MatchQuery
Represents a full-text query interface.
This interface defines the structure and behavior for full-text queries,
including methods to retrieve the query type and convert the query to a dictionary format.
## Implements
- [`FullTextQuery`](../interfaces/FullTextQuery.md)
## Constructors
### new MatchQuery()
```ts
new MatchQuery(
query,
column,
boost,
fuzziness,
maxExpansions): MatchQuery
```
Creates an instance of MatchQuery.
#### Parameters
* **query**: `string`
The text query to search for.
* **column**: `string`
The name of the column to search within.
* **boost**: `number` = `1.0`
(Optional) The boost factor to influence the relevance score of this query. Default is `1.0`.
* **fuzziness**: `number` = `0`
(Optional) The allowed edit distance for fuzzy matching. Default is `0`.
* **maxExpansions**: `number` = `50`
(Optional) The maximum number of terms to consider for fuzzy matching. Default is `50`.
#### Returns
[`MatchQuery`](MatchQuery.md)
## Methods
### queryType()
```ts
queryType(): FullTextQueryType
```
#### Returns
[`FullTextQueryType`](../enumerations/FullTextQueryType.md)
#### Implementation of
[`FullTextQuery`](../interfaces/FullTextQuery.md).[`queryType`](../interfaces/FullTextQuery.md#querytype)
***
### toDict()
```ts
toDict(): Record<string, unknown>
```
#### Returns
`Record`&lt;`string`, `unknown`&gt;
#### Implementation of
[`FullTextQuery`](../interfaces/FullTextQuery.md).[`toDict`](../interfaces/FullTextQuery.md#todict)

View File

@@ -0,0 +1,77 @@
[**@lancedb/lancedb**](../README.md) • **Docs**
***
[@lancedb/lancedb](../globals.md) / MultiMatchQuery
# Class: MultiMatchQuery
Represents a full-text query interface.
This interface defines the structure and behavior for full-text queries,
including methods to retrieve the query type and convert the query to a dictionary format.
## Implements
- [`FullTextQuery`](../interfaces/FullTextQuery.md)
## Constructors
### new MultiMatchQuery()
```ts
new MultiMatchQuery(
query,
columns,
boosts): MultiMatchQuery
```
Creates an instance of MultiMatchQuery.
#### Parameters
* **query**: `string`
The text query to search for across multiple columns.
* **columns**: `string`[]
An array of column names to search within.
* **boosts**: `number`[] = `...`
(Optional) An array of boost factors corresponding to each column. Default is an array of 1.0 for each column.
The `boosts` array should have the same length as `columns`. If not provided, all columns will have a default boost of 1.0.
If the length of `boosts` is less than `columns`, it will be padded with 1.0s.
#### Returns
[`MultiMatchQuery`](MultiMatchQuery.md)
## Methods
### queryType()
```ts
queryType(): FullTextQueryType
```
#### Returns
[`FullTextQueryType`](../enumerations/FullTextQueryType.md)
#### Implementation of
[`FullTextQuery`](../interfaces/FullTextQuery.md).[`queryType`](../interfaces/FullTextQuery.md#querytype)
***
### toDict()
```ts
toDict(): Record<string, unknown>
```
#### Returns
`Record`&lt;`string`, `unknown`&gt;
#### Implementation of
[`FullTextQuery`](../interfaces/FullTextQuery.md).[`toDict`](../interfaces/FullTextQuery.md#todict)

View File

@@ -0,0 +1,69 @@
[**@lancedb/lancedb**](../README.md) • **Docs**
***
[@lancedb/lancedb](../globals.md) / PhraseQuery
# Class: PhraseQuery
Represents a full-text query interface.
This interface defines the structure and behavior for full-text queries,
including methods to retrieve the query type and convert the query to a dictionary format.
## Implements
- [`FullTextQuery`](../interfaces/FullTextQuery.md)
## Constructors
### new PhraseQuery()
```ts
new PhraseQuery(query, column): PhraseQuery
```
Creates an instance of `PhraseQuery`.
#### Parameters
* **query**: `string`
The phrase to search for in the specified column.
* **column**: `string`
The name of the column to search within.
#### Returns
[`PhraseQuery`](PhraseQuery.md)
## Methods
### queryType()
```ts
queryType(): FullTextQueryType
```
#### Returns
[`FullTextQueryType`](../enumerations/FullTextQueryType.md)
#### Implementation of
[`FullTextQuery`](../interfaces/FullTextQuery.md).[`queryType`](../interfaces/FullTextQuery.md#querytype)
***
### toDict()
```ts
toDict(): Record<string, unknown>
```
#### Returns
`Record`&lt;`string`, `unknown`&gt;
#### Implementation of
[`FullTextQuery`](../interfaces/FullTextQuery.md).[`toDict`](../interfaces/FullTextQuery.md#todict)

View File

@@ -206,7 +206,7 @@ fullTextSearch(query, options?): this
#### Parameters
* **query**: `string`
* **query**: `string` \| [`FullTextQuery`](../interfaces/FullTextQuery.md)
* **options?**: `Partial`&lt;[`FullTextSearchOptions`](../interfaces/FullTextSearchOptions.md)&gt;
@@ -309,7 +309,7 @@ nearestToText(query, columns?): Query
#### Parameters
* **query**: `string`
* **query**: `string` \| [`FullTextQuery`](../interfaces/FullTextQuery.md)
* **columns?**: `string`[]

View File

@@ -192,7 +192,7 @@ fullTextSearch(query, options?): this
#### Parameters
* **query**: `string`
* **query**: `string` \| [`FullTextQuery`](../interfaces/FullTextQuery.md)
* **options?**: `Partial`&lt;[`FullTextSearchOptions`](../interfaces/FullTextSearchOptions.md)&gt;

View File

@@ -347,7 +347,7 @@ fullTextSearch(query, options?): this
#### Parameters
* **query**: `string`
* **query**: `string` \| [`FullTextQuery`](../interfaces/FullTextQuery.md)
* **options?**: `Partial`&lt;[`FullTextSearchOptions`](../interfaces/FullTextSearchOptions.md)&gt;

View File

@@ -0,0 +1,46 @@
[**@lancedb/lancedb**](../README.md) • **Docs**
***
[@lancedb/lancedb](../globals.md) / FullTextQueryType
# Enumeration: FullTextQueryType
Enum representing the types of full-text queries supported.
- `Match`: Performs a full-text search for terms in the query string.
- `MatchPhrase`: Searches for an exact phrase match in the text.
- `Boost`: Boosts the relevance score of specific terms in the query.
- `MultiMatch`: Searches across multiple fields for the query terms.
## Enumeration Members
### Boost
```ts
Boost: "boost";
```
***
### Match
```ts
Match: "match";
```
***
### MatchPhrase
```ts
MatchPhrase: "match_phrase";
```
***
### MultiMatch
```ts
MultiMatch: "multi_match";
```

View File

@@ -9,12 +9,20 @@
- [embedding](namespaces/embedding/README.md)
- [rerankers](namespaces/rerankers/README.md)
## Enumerations
- [FullTextQueryType](enumerations/FullTextQueryType.md)
## Classes
- [BoostQuery](classes/BoostQuery.md)
- [Connection](classes/Connection.md)
- [Index](classes/Index.md)
- [MakeArrowTableOptions](classes/MakeArrowTableOptions.md)
- [MatchQuery](classes/MatchQuery.md)
- [MergeInsertBuilder](classes/MergeInsertBuilder.md)
- [MultiMatchQuery](classes/MultiMatchQuery.md)
- [PhraseQuery](classes/PhraseQuery.md)
- [Query](classes/Query.md)
- [QueryBase](classes/QueryBase.md)
- [RecordBatchIterator](classes/RecordBatchIterator.md)
@@ -33,6 +41,7 @@
- [CreateTableOptions](interfaces/CreateTableOptions.md)
- [ExecutableQuery](interfaces/ExecutableQuery.md)
- [FtsOptions](interfaces/FtsOptions.md)
- [FullTextQuery](interfaces/FullTextQuery.md)
- [FullTextSearchOptions](interfaces/FullTextSearchOptions.md)
- [HnswPqOptions](interfaces/HnswPqOptions.md)
- [HnswSqOptions](interfaces/HnswSqOptions.md)

View File

@@ -0,0 +1,35 @@
[**@lancedb/lancedb**](../README.md) • **Docs**
***
[@lancedb/lancedb](../globals.md) / FullTextQuery
# Interface: FullTextQuery
Represents a full-text query interface.
This interface defines the structure and behavior for full-text queries,
including methods to retrieve the query type and convert the query to a dictionary format.
## Methods
### queryType()
```ts
queryType(): FullTextQueryType
```
#### Returns
[`FullTextQueryType`](../enumerations/FullTextQueryType.md)
***
### toDict()
```ts
toDict(): Record<string, unknown>
```
#### Returns
`Record`&lt;`string`, `unknown`&gt;

View File

@@ -8,7 +8,7 @@
<parent>
<groupId>com.lancedb</groupId>
<artifactId>lancedb-parent</artifactId>
<version>0.18.3-beta.0</version>
<version>0.19.0-beta.1</version>
<relativePath>../pom.xml</relativePath>
</parent>

View File

@@ -6,7 +6,7 @@
<groupId>com.lancedb</groupId>
<artifactId>lancedb-parent</artifactId>
<version>0.18.3-beta.0</version>
<version>0.19.0-beta.1</version>
<packaging>pom</packaging>
<name>LanceDB Parent</name>

79
node/package-lock.json generated
View File

@@ -1,12 +1,12 @@
{
"name": "vectordb",
"version": "0.18.3-beta.0",
"version": "0.19.0-beta.1",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "vectordb",
"version": "0.18.3-beta.0",
"version": "0.19.0-beta.1",
"cpu": [
"x64",
"arm64"
@@ -52,11 +52,11 @@
"uuid": "^9.0.0"
},
"optionalDependencies": {
"@lancedb/vectordb-darwin-arm64": "0.18.3-beta.0",
"@lancedb/vectordb-darwin-x64": "0.18.3-beta.0",
"@lancedb/vectordb-linux-arm64-gnu": "0.18.3-beta.0",
"@lancedb/vectordb-linux-x64-gnu": "0.18.3-beta.0",
"@lancedb/vectordb-win32-x64-msvc": "0.18.3-beta.0"
"@lancedb/vectordb-darwin-arm64": "0.19.0-beta.1",
"@lancedb/vectordb-darwin-x64": "0.19.0-beta.1",
"@lancedb/vectordb-linux-arm64-gnu": "0.19.0-beta.1",
"@lancedb/vectordb-linux-x64-gnu": "0.19.0-beta.1",
"@lancedb/vectordb-win32-x64-msvc": "0.19.0-beta.1"
},
"peerDependencies": {
"@apache-arrow/ts": "^14.0.2",
@@ -326,71 +326,6 @@
"@jridgewell/sourcemap-codec": "^1.4.10"
}
},
"node_modules/@lancedb/vectordb-darwin-arm64": {
"version": "0.18.3-beta.0",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.18.3-beta.0.tgz",
"integrity": "sha512-dhJ5VlXV2N/L67mIpTSePhb8krX0FyQgpuz3I+4T4vYuU5JEF3cmedQ5TF5+3cGJhZim4PHRYLkfgCyTlxcqUg==",
"cpu": [
"arm64"
],
"license": "Apache-2.0",
"optional": true,
"os": [
"darwin"
]
},
"node_modules/@lancedb/vectordb-darwin-x64": {
"version": "0.18.3-beta.0",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.18.3-beta.0.tgz",
"integrity": "sha512-SHqPkuyfe87d5skf9GERzdeu6AKvVIbXMUwl5N+dVrE7HH6qiuP2HvOmiyHS2lJFgo0Ph8jSBVzPDxxtjF36Dg==",
"cpu": [
"x64"
],
"license": "Apache-2.0",
"optional": true,
"os": [
"darwin"
]
},
"node_modules/@lancedb/vectordb-linux-arm64-gnu": {
"version": "0.18.3-beta.0",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.18.3-beta.0.tgz",
"integrity": "sha512-ohnWsV1n9cxL5ik/GGL4FdQ04Ff9REELcNb1zgmJYyEfwyc6TH9m5HdySO/1ACPZJiLbML4gSvZ10J0Zyb+2SA==",
"cpu": [
"arm64"
],
"license": "Apache-2.0",
"optional": true,
"os": [
"linux"
]
},
"node_modules/@lancedb/vectordb-linux-x64-gnu": {
"version": "0.18.3-beta.0",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.18.3-beta.0.tgz",
"integrity": "sha512-nhbW2CKaBSUesiYCPBd9fAsDYIJLadlGsrb2gfjODlFy+2Lpnbz6T9SuV7dNqj6KBw+KHhaRhLqta7tyMZm/EA==",
"cpu": [
"x64"
],
"license": "Apache-2.0",
"optional": true,
"os": [
"linux"
]
},
"node_modules/@lancedb/vectordb-win32-x64-msvc": {
"version": "0.18.3-beta.0",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.18.3-beta.0.tgz",
"integrity": "sha512-VE4TvMdZ7DIrTC8VYylGxEcH4h2UEejSwGX4PxRzrN9QsCQ4m4pOh3L/UguSO3g+Y1QEaGE20iWQoX6wgSEUhA==",
"cpu": [
"x64"
],
"license": "Apache-2.0",
"optional": true,
"os": [
"win32"
]
},
"node_modules/@neon-rs/cli": {
"version": "0.0.160",
"resolved": "https://registry.npmjs.org/@neon-rs/cli/-/cli-0.0.160.tgz",

View File

@@ -1,6 +1,6 @@
{
"name": "vectordb",
"version": "0.18.3-beta.0",
"version": "0.19.0-beta.1",
"description": " Serverless, low-latency vector database for AI applications",
"private": false,
"main": "dist/index.js",
@@ -89,10 +89,10 @@
}
},
"optionalDependencies": {
"@lancedb/vectordb-darwin-x64": "0.18.3-beta.0",
"@lancedb/vectordb-darwin-arm64": "0.18.3-beta.0",
"@lancedb/vectordb-linux-x64-gnu": "0.18.3-beta.0",
"@lancedb/vectordb-linux-arm64-gnu": "0.18.3-beta.0",
"@lancedb/vectordb-win32-x64-msvc": "0.18.3-beta.0"
"@lancedb/vectordb-darwin-x64": "0.19.0-beta.1",
"@lancedb/vectordb-darwin-arm64": "0.19.0-beta.1",
"@lancedb/vectordb-linux-x64-gnu": "0.19.0-beta.1",
"@lancedb/vectordb-linux-arm64-gnu": "0.19.0-beta.1",
"@lancedb/vectordb-win32-x64-msvc": "0.19.0-beta.1"
}
}

View File

@@ -1,7 +1,7 @@
[package]
name = "lancedb-nodejs"
edition.workspace = true
version = "0.18.3-beta.0"
version = "0.19.0-beta.1"
license.workspace = true
description.workspace = true
repository.workspace = true

View File

@@ -47,6 +47,12 @@ export {
QueryExecutionOptions,
FullTextSearchOptions,
RecordBatchIterator,
FullTextQuery,
MatchQuery,
PhraseQuery,
BoostQuery,
MultiMatchQuery,
FullTextQueryType,
} from "./query";
export {

View File

@@ -17,6 +17,7 @@ import {
VectorQuery as NativeVectorQuery,
} from "./native";
import { Reranker } from "./rerankers";
export class RecordBatchIterator implements AsyncIterator<RecordBatch> {
private promisedInner?: Promise<NativeBatchIterator>;
private inner?: NativeBatchIterator;
@@ -152,7 +153,7 @@ export class QueryBase<NativeQueryType extends NativeQuery | NativeVectorQuery>
}
fullTextSearch(
query: string,
query: string | FullTextQuery,
options?: Partial<FullTextSearchOptions>,
): this {
let columns: string[] | null = null;
@@ -164,9 +165,18 @@ export class QueryBase<NativeQueryType extends NativeQuery | NativeVectorQuery>
}
}
this.doCall((inner: NativeQueryType) =>
inner.fullTextSearch(query, columns),
);
this.doCall((inner: NativeQueryType) => {
if (typeof query === "string") {
inner.fullTextSearch({
query: query,
columns: columns,
});
} else {
// If query is a FullTextQuery object, convert it to a dict
const queryObj = query.toDict();
inner.fullTextSearch(queryObj);
}
});
return this;
}
@@ -718,8 +728,167 @@ export class Query extends QueryBase<NativeQuery> {
}
}
nearestToText(query: string, columns?: string[]): Query {
this.doCall((inner) => inner.fullTextSearch(query, columns));
nearestToText(query: string | FullTextQuery, columns?: string[]): Query {
this.doCall((inner) => {
if (typeof query === "string") {
inner.fullTextSearch({
query: query,
columns: columns,
});
} else {
const queryObj = query.toDict();
inner.fullTextSearch(queryObj);
}
});
return this;
}
}
/**
* Enum representing the types of full-text queries supported.
*
* - `Match`: Performs a full-text search for terms in the query string.
* - `MatchPhrase`: Searches for an exact phrase match in the text.
* - `Boost`: Boosts the relevance score of specific terms in the query.
* - `MultiMatch`: Searches across multiple fields for the query terms.
*/
export enum FullTextQueryType {
Match = "match",
MatchPhrase = "match_phrase",
Boost = "boost",
MultiMatch = "multi_match",
}
/**
* Represents a full-text query interface.
* This interface defines the structure and behavior for full-text queries,
* including methods to retrieve the query type and convert the query to a dictionary format.
*/
export interface FullTextQuery {
queryType(): FullTextQueryType;
toDict(): Record<string, unknown>;
}
export class MatchQuery implements FullTextQuery {
/**
* Creates an instance of MatchQuery.
*
* @param query - The text query to search for.
* @param column - The name of the column to search within.
* @param boost - (Optional) The boost factor to influence the relevance score of this query. Default is `1.0`.
* @param fuzziness - (Optional) The allowed edit distance for fuzzy matching. Default is `0`.
* @param maxExpansions - (Optional) The maximum number of terms to consider for fuzzy matching. Default is `50`.
*/
constructor(
private query: string,
private column: string,
private boost: number = 1.0,
private fuzziness: number = 0,
private maxExpansions: number = 50,
) {}
queryType(): FullTextQueryType {
return FullTextQueryType.Match;
}
toDict(): Record<string, unknown> {
return {
[this.queryType()]: {
[this.column]: {
query: this.query,
boost: this.boost,
fuzziness: this.fuzziness,
// biome-ignore lint/style/useNamingConvention: use underscore for consistency with the other APIs
max_expansions: this.maxExpansions,
},
},
};
}
}
export class PhraseQuery implements FullTextQuery {
/**
* Creates an instance of `PhraseQuery`.
*
* @param query - The phrase to search for in the specified column.
* @param column - The name of the column to search within.
*/
constructor(
private query: string,
private column: string,
) {}
queryType(): FullTextQueryType {
return FullTextQueryType.MatchPhrase;
}
toDict(): Record<string, unknown> {
return {
[this.queryType()]: {
[this.column]: this.query,
},
};
}
}
export class BoostQuery implements FullTextQuery {
/**
* Creates an instance of BoostQuery.
*
* @param positive - The positive query that boosts the relevance score.
* @param negative - The negative query that reduces the relevance score.
* @param negativeBoost - The factor by which the negative query reduces the score.
*/
constructor(
private positive: FullTextQuery,
private negative: FullTextQuery,
private negativeBoost: number,
) {}
queryType(): FullTextQueryType {
return FullTextQueryType.Boost;
}
toDict(): Record<string, unknown> {
return {
[this.queryType()]: {
positive: this.positive.toDict(),
negative: this.negative.toDict(),
// biome-ignore lint/style/useNamingConvention: use underscore for consistency with the other APIs
negative_boost: this.negativeBoost,
},
};
}
}
export class MultiMatchQuery implements FullTextQuery {
/**
* Creates an instance of MultiMatchQuery.
*
* @param query - The text query to search for across multiple columns.
* @param columns - An array of column names to search within.
* @param boosts - (Optional) An array of boost factors corresponding to each column. Default is an array of 1.0 for each column.
*
* The `boosts` array should have the same length as `columns`. If not provided, all columns will have a default boost of 1.0.
* If the length of `boosts` is less than `columns`, it will be padded with 1.0s.
*/
constructor(
private query: string,
private columns: string[],
private boosts: number[] = columns.map(() => 1.0),
) {}
queryType(): FullTextQueryType {
return FullTextQueryType.MultiMatch;
}
toDict(): Record<string, unknown> {
return {
[this.queryType()]: {
query: this.query,
columns: this.columns,
boost: this.boosts,
},
};
}
}

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-darwin-arm64",
"version": "0.18.3-beta.0",
"version": "0.19.0-beta.1",
"os": ["darwin"],
"cpu": ["arm64"],
"main": "lancedb.darwin-arm64.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-darwin-x64",
"version": "0.18.3-beta.0",
"version": "0.19.0-beta.1",
"os": ["darwin"],
"cpu": ["x64"],
"main": "lancedb.darwin-x64.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-linux-arm64-gnu",
"version": "0.18.3-beta.0",
"version": "0.19.0-beta.1",
"os": ["linux"],
"cpu": ["arm64"],
"main": "lancedb.linux-arm64-gnu.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-linux-arm64-musl",
"version": "0.18.3-beta.0",
"version": "0.19.0-beta.1",
"os": ["linux"],
"cpu": ["arm64"],
"main": "lancedb.linux-arm64-musl.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-linux-x64-gnu",
"version": "0.18.3-beta.0",
"version": "0.19.0-beta.1",
"os": ["linux"],
"cpu": ["x64"],
"main": "lancedb.linux-x64-gnu.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-linux-x64-musl",
"version": "0.18.3-beta.0",
"version": "0.19.0-beta.1",
"os": ["linux"],
"cpu": ["x64"],
"main": "lancedb.linux-x64-musl.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-win32-arm64-msvc",
"version": "0.18.3-beta.0",
"version": "0.19.0-beta.1",
"os": [
"win32"
],

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-win32-x64-msvc",
"version": "0.18.3-beta.0",
"version": "0.19.0-beta.1",
"os": ["win32"],
"cpu": ["x64"],
"main": "lancedb.win32-x64-msvc.node",

View File

@@ -1,12 +1,12 @@
{
"name": "@lancedb/lancedb",
"version": "0.18.3-beta.0",
"version": "0.19.0-beta.1",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "@lancedb/lancedb",
"version": "0.18.3-beta.0",
"version": "0.19.0-beta.1",
"cpu": [
"x64",
"arm64"

View File

@@ -11,7 +11,7 @@
"ann"
],
"private": false,
"version": "0.18.3-beta.0",
"version": "0.19.0-beta.1",
"main": "dist/index.js",
"exports": {
".": "./dist/index.js",

View File

@@ -3,7 +3,7 @@
use std::sync::Arc;
use lancedb::index::scalar::FullTextSearchQuery;
use lancedb::index::scalar::{FtsQuery, FullTextSearchQuery, MatchQuery, PhraseQuery};
use lancedb::query::ExecutableQuery;
use lancedb::query::Query as LanceDbQuery;
use lancedb::query::QueryBase;
@@ -18,7 +18,7 @@ use crate::error::NapiErrorExt;
use crate::iterator::RecordBatchIterator;
use crate::rerankers::Reranker;
use crate::rerankers::RerankerCallbacks;
use crate::util::parse_distance_type;
use crate::util::{parse_distance_type, parse_fts_query};
#[napi]
pub struct Query {
@@ -38,9 +38,53 @@ impl Query {
}
#[napi]
pub fn full_text_search(&mut self, query: String, columns: Option<Vec<String>>) {
let query = FullTextSearchQuery::new(query).columns(columns);
pub fn full_text_search(&mut self, query: napi::JsUnknown) -> napi::Result<()> {
let query = unsafe { query.cast::<napi::JsObject>() };
let query = if let Some(query_text) = query.get::<_, String>("query").transpose() {
let mut query_text = query_text?;
let columns = query.get::<_, Option<Vec<String>>>("columns")?.flatten();
let is_phrase =
query_text.len() >= 2 && query_text.starts_with('"') && query_text.ends_with('"');
let is_multi_match = columns.as_ref().map(|cols| cols.len() > 1).unwrap_or(false);
if is_phrase {
// Remove the surrounding quotes for phrase queries
query_text = query_text[1..query_text.len() - 1].to_string();
}
let query: FtsQuery = match (is_phrase, is_multi_match) {
(false, _) => MatchQuery::new(query_text).into(),
(true, false) => PhraseQuery::new(query_text).into(),
(true, true) => {
return Err(napi::Error::from_reason(
"Phrase queries cannot be used with multiple columns.",
));
}
};
let mut query = FullTextSearchQuery::new_query(query);
if let Some(cols) = columns {
if !cols.is_empty() {
query = query.with_columns(&cols).map_err(|e| {
napi::Error::from_reason(format!(
"Failed to set full text search columns: {}",
e
))
})?;
}
}
query
} else if let Some(query) = query.get::<_, napi::JsObject>("query")? {
let query = parse_fts_query(&query)?;
FullTextSearchQuery::new_query(query)
} else {
return Err(napi::Error::from_reason(
"Invalid full text search query object".to_string(),
));
};
self.inner = self.inner.clone().full_text_search(query);
Ok(())
}
#[napi]
@@ -195,9 +239,53 @@ impl VectorQuery {
}
#[napi]
pub fn full_text_search(&mut self, query: String, columns: Option<Vec<String>>) {
let query = FullTextSearchQuery::new(query).columns(columns);
pub fn full_text_search(&mut self, query: napi::JsUnknown) -> napi::Result<()> {
let query = unsafe { query.cast::<napi::JsObject>() };
let query = if let Some(query_text) = query.get::<_, String>("query").transpose() {
let mut query_text = query_text?;
let columns = query.get::<_, Option<Vec<String>>>("columns")?.flatten();
let is_phrase =
query_text.len() >= 2 && query_text.starts_with('"') && query_text.ends_with('"');
let is_multi_match = columns.as_ref().map(|cols| cols.len() > 1).unwrap_or(false);
if is_phrase {
// Remove the surrounding quotes for phrase queries
query_text = query_text[1..query_text.len() - 1].to_string();
}
let query: FtsQuery = match (is_phrase, is_multi_match) {
(false, _) => MatchQuery::new(query_text).into(),
(true, false) => PhraseQuery::new(query_text).into(),
(true, true) => {
return Err(napi::Error::from_reason(
"Phrase queries cannot be used with multiple columns.",
));
}
};
let mut query = FullTextSearchQuery::new_query(query);
if let Some(cols) = columns {
if !cols.is_empty() {
query = query.with_columns(&cols).map_err(|e| {
napi::Error::from_reason(format!(
"Failed to set full text search columns: {}",
e
))
})?;
}
}
query
} else if let Some(query) = query.get::<_, napi::JsObject>("query")? {
let query = parse_fts_query(&query)?;
FullTextSearchQuery::new_query(query)
} else {
return Err(napi::Error::from_reason(
"Invalid full text search query object".to_string(),
));
};
self.inner = self.inner.clone().full_text_search(query);
Ok(())
}
#[napi]

View File

@@ -1,6 +1,7 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
use lancedb::index::scalar::{BoostQuery, FtsQuery, MatchQuery, MultiMatchQuery, PhraseQuery};
use lancedb::DistanceType;
pub fn parse_distance_type(distance_type: impl AsRef<str>) -> napi::Result<DistanceType> {
@@ -15,3 +16,144 @@ pub fn parse_distance_type(distance_type: impl AsRef<str>) -> napi::Result<Dista
))),
}
}
pub fn parse_fts_query(query: &napi::JsObject) -> napi::Result<FtsQuery> {
let query_type = query
.get_property_names()?
.get_element::<napi::JsString>(0)?;
let query_type = query_type.into_utf8()?.into_owned()?;
let query_value =
query
.get::<_, napi::JsObject>(&query_type)?
.ok_or(napi::Error::from_reason(format!(
"query value {} not found",
query_type
)))?;
match query_type.as_str() {
"match" => {
let column = query_value
.get_property_names()?
.get_element::<napi::JsString>(0)?
.into_utf8()?
.into_owned()?;
let params =
query_value
.get::<_, napi::JsObject>(&column)?
.ok_or(napi::Error::from_reason(format!(
"column {} not found",
column
)))?;
let query = params
.get::<_, napi::JsString>("query")?
.ok_or(napi::Error::from_reason("query not found"))?
.into_utf8()?
.into_owned()?;
let boost = params
.get::<_, napi::JsNumber>("boost")?
.ok_or(napi::Error::from_reason("boost not found"))?
.get_double()? as f32;
let fuzziness = params
.get::<_, napi::JsNumber>("fuzziness")?
.map(|f| f.get_uint32())
.transpose()?;
let max_expansions = params
.get::<_, napi::JsNumber>("max_expansions")?
.ok_or(napi::Error::from_reason("max_expansions not found"))?
.get_uint32()? as usize;
let query = MatchQuery::new(query)
.with_column(Some(column))
.with_boost(boost)
.with_fuzziness(fuzziness)
.with_max_expansions(max_expansions);
Ok(query.into())
}
"match_phrase" => {
let column = query_value
.get_property_names()?
.get_element::<napi::JsString>(0)?
.into_utf8()?
.into_owned()?;
let query = query_value
.get::<_, napi::JsString>(&column)?
.ok_or(napi::Error::from_reason(format!(
"column {} not found",
column
)))?
.into_utf8()?
.into_owned()?;
let query = PhraseQuery::new(query).with_column(Some(column));
Ok(query.into())
}
"boost" => {
let positive = query_value
.get::<_, napi::JsObject>("positive")?
.ok_or(napi::Error::from_reason("positive not found"))?;
let negative = query_value
.get::<_, napi::JsObject>("negative")?
.ok_or(napi::Error::from_reason("negative not found"))?;
let negative_boost = query_value
.get::<_, napi::JsNumber>("negative_boost")?
.ok_or(napi::Error::from_reason("negative_boost not found"))?
.get_double()? as f32;
let positive = parse_fts_query(&positive)?;
let negative = parse_fts_query(&negative)?;
let query = BoostQuery::new(positive, negative, Some(negative_boost));
Ok(query.into())
}
"multi_match" => {
let query = query_value
.get::<_, napi::JsString>("query")?
.ok_or(napi::Error::from_reason("query not found"))?
.into_utf8()?
.into_owned()?;
let columns_array = query_value
.get::<_, napi::JsTypedArray>("columns")?
.ok_or(napi::Error::from_reason("columns not found"))?;
let columns_num = columns_array.get_array_length()?;
let mut columns = Vec::with_capacity(columns_num as usize);
for i in 0..columns_num {
let column = columns_array
.get_element::<napi::JsString>(i)?
.into_utf8()?
.into_owned()?;
columns.push(column);
}
let boost_array = query_value
.get::<_, napi::JsTypedArray>("boost")?
.ok_or(napi::Error::from_reason("boost not found"))?;
if boost_array.get_array_length()? != columns_num {
return Err(napi::Error::from_reason(format!(
"boost array length ({}) does not match columns length ({})",
boost_array.get_array_length()?,
columns_num
)));
}
let mut boost = Vec::with_capacity(columns_num as usize);
for i in 0..columns_num {
let b = boost_array.get_element::<napi::JsNumber>(i)?.get_double()? as f32;
boost.push(b);
}
let query =
MultiMatchQuery::try_new_with_boosts(query, columns, boost).map_err(|e| {
napi::Error::from_reason(format!("Error creating MultiMatchQuery: {}", e))
})?;
Ok(query.into())
}
_ => Err(napi::Error::from_reason(format!(
"Unsupported query type: {}",
query_type
))),
}
}

View File

@@ -1,5 +1,5 @@
[tool.bumpversion]
current_version = "0.22.0-beta.0"
current_version = "0.22.0-beta.2"
parse = """(?x)
(?P<major>0|[1-9]\\d*)\\.
(?P<minor>0|[1-9]\\d*)\\.

View File

@@ -1,6 +1,6 @@
[package]
name = "lancedb-python"
version = "0.22.0-beta.0"
version = "0.22.0-beta.2"
edition.workspace = true
description = "Python bindings for LanceDB"
license.workspace = true

View File

@@ -4,7 +4,9 @@
from __future__ import annotations
from abc import ABC, abstractmethod
import abc
from concurrent.futures import ThreadPoolExecutor
from enum import Enum
from typing import (
TYPE_CHECKING,
Dict,
@@ -83,6 +85,196 @@ def ensure_vector_query(
return val
class FullTextQueryType(Enum):
MATCH = "match"
MATCH_PHRASE = "match_phrase"
BOOST = "boost"
MULTI_MATCH = "multi_match"
class FullTextQuery(abc.ABC, pydantic.BaseModel):
@abc.abstractmethod
def query_type(self) -> FullTextQueryType:
"""
Get the query type of the query.
Returns
-------
str
The type of the query.
"""
@abc.abstractmethod
def to_dict(self) -> dict:
"""
Convert the query to a dictionary.
Returns
-------
dict
The query as a dictionary.
"""
class MatchQuery(FullTextQuery):
def __init__(
self,
query: str,
column: str,
*,
boost: float = 1.0,
fuzziness: int = 0,
max_expansions: int = 50,
):
"""
Match query for full-text search.
Parameters
----------
query : str
The query string to match against.
column : str
The name of the column to match against.
boost : float, default 1.0
The boost factor for the query.
The score of each matching document is multiplied by this value.
fuzziness : int, optional
The maximum edit distance for each term in the match query.
Defaults to 0 (exact match).
If None, fuzziness is applied automatically by the rules:
- 0 for terms with length <= 2
- 1 for terms with length <= 5
- 2 for terms with length > 5
max_expansions : int, optional
The maximum number of terms to consider for fuzzy matching.
Defaults to 50.
"""
self.column = column
self.query = query
self.boost = boost
self.fuzziness = fuzziness
self.max_expansions = max_expansions
def query_type(self) -> FullTextQueryType:
return FullTextQueryType.MATCH
def to_dict(self) -> dict:
return {
"match": {
self.column: {
"query": self.query,
"boost": self.boost,
"fuzziness": self.fuzziness,
"max_expansions": self.max_expansions,
}
}
}
class PhraseQuery(FullTextQuery):
def __init__(self, query: str, column: str):
"""
Phrase query for full-text search.
Parameters
----------
query : str
The query string to match against.
column : str
The name of the column to match against.
"""
self.column = column
self.query = query
def query_type(self) -> FullTextQueryType:
return FullTextQueryType.MATCH_PHRASE
def to_dict(self) -> dict:
return {
"match_phrase": {
self.column: self.query,
}
}
class BoostQuery(FullTextQuery):
def __init__(
self,
positive: FullTextQuery,
negative: FullTextQuery,
negative_boost: float,
):
"""
Boost query for full-text search.
Parameters
----------
positive : dict
The positive query object.
negative : dict
The negative query object.
negative_boost : float
The boost factor for the negative query.
"""
self.positive = positive
self.negative = negative
self.negative_boost = negative_boost
def query_type(self) -> FullTextQueryType:
return FullTextQueryType.BOOST
def to_dict(self) -> dict:
return {
"boost": {
"positive": self.positive.to_dict(),
"negative": self.negative.to_dict(),
"negative_boost": self.negative_boost,
}
}
class MultiMatchQuery(FullTextQuery):
def __init__(
self,
query: str,
columns: list[str],
*,
boosts: Optional[list[float]] = None,
):
"""
Multi-match query for full-text search.
Parameters
----------
query : str | list[Query]
If a string, the query string to match against.
columns : list[str]
The list of columns to match against.
boosts : list[float], optional
The list of boost factors for each column. If not provided,
all columns will have the same boost factor.
"""
self.query = query
self.columns = columns
if boosts is None:
boosts = [1.0] * len(columns)
self.boosts = boosts
def query_type(self) -> FullTextQueryType:
return FullTextQueryType.MULTI_MATCH
def to_dict(self) -> dict:
return {
"multi_match": {
"query": self.query,
"columns": self.columns,
"boost": self.boosts,
}
}
class FullTextSearchQuery(pydantic.BaseModel):
"""A LanceDB Full Text Search Query
@@ -92,18 +284,13 @@ class FullTextSearchQuery(pydantic.BaseModel):
The columns to search
If None, then the table should select the column automatically.
query: str
The query to search for
limit: Optional[int] = None
The limit on the number of results to return
wand_factor: Optional[float] = None
The wand factor to use for the search
query: str | FullTextQuery
If a string, it is treated as a MatchQuery.
If a FullTextQuery object, it is used directly.
"""
columns: Optional[List[str]] = None
query: str
limit: Optional[int] = None
wand_factor: Optional[float] = None
query: Union[str, FullTextQuery]
class Query(pydantic.BaseModel):
@@ -712,13 +899,14 @@ class LanceQueryBuilder(ABC):
"""
raise NotImplementedError
def text(self, text: str) -> Self:
def text(self, text: str | FullTextQuery) -> Self:
"""Set the text to search for.
Parameters
----------
text: str
The text to search for.
text: str | FullTextQuery
If a string, it is treated as a MatchQuery.
If a FullTextQuery object, it is used directly.
Returns
-------
@@ -1084,7 +1272,7 @@ class LanceFtsQueryBuilder(LanceQueryBuilder):
def __init__(
self,
table: "Table",
query: str,
query: str | FullTextQuery,
ordering_field_name: Optional[str] = None,
fts_columns: Optional[Union[str, List[str]]] = None,
):
@@ -1691,7 +1879,7 @@ class LanceHybridQueryBuilder(LanceQueryBuilder):
self._vector = vector
return self
def text(self, text: str) -> LanceHybridQueryBuilder:
def text(self, text: str | FullTextQuery) -> LanceHybridQueryBuilder:
self._text = text
return self
@@ -2088,7 +2276,7 @@ class AsyncQuery(AsyncQueryBase):
)
def nearest_to_text(
self, query: str, columns: Union[str, List[str], None] = None
self, query: str | FullTextQuery, columns: Union[str, List[str], None] = None
) -> AsyncFTSQuery:
"""
Find the documents that are most relevant to the given text query.
@@ -2114,9 +2302,13 @@ class AsyncQuery(AsyncQueryBase):
columns = [columns]
if columns is None:
columns = []
return AsyncFTSQuery(
self._inner.nearest_to_text({"query": query, "columns": columns})
)
if isinstance(query, str):
return AsyncFTSQuery(
self._inner.nearest_to_text({"query": query, "columns": columns})
)
# FullTextQuery object
return AsyncFTSQuery(self._inner.nearest_to_text(query.to_dict()))
class AsyncFTSQuery(AsyncQueryBase):
@@ -2399,7 +2591,7 @@ class AsyncVectorQuery(AsyncQueryBase, AsyncVectorQueryBase):
return self
def nearest_to_text(
self, query: str, columns: Union[str, List[str], None] = None
self, query: str | FullTextQuery, columns: Union[str, List[str], None] = None
) -> AsyncHybridQuery:
"""
Find the documents that are most relevant to the given text query,
@@ -2429,9 +2621,13 @@ class AsyncVectorQuery(AsyncQueryBase, AsyncVectorQueryBase):
columns = [columns]
if columns is None:
columns = []
return AsyncHybridQuery(
self._inner.nearest_to_text({"query": query, "columns": columns})
)
if isinstance(query, str):
return AsyncHybridQuery(
self._inner.nearest_to_text({"query": query, "columns": columns})
)
# FullTextQuery object
return AsyncHybridQuery(self._inner.nearest_to_text(query.to_dict()))
async def to_batches(
self, *, max_batch_length: Optional[int] = None

View File

@@ -3373,8 +3373,6 @@ class AsyncTable:
async_query = async_query.nearest_to_text(
query.full_text_query.query, query.full_text_query.columns
)
if query.full_text_query.limit is not None:
async_query = async_query.limit(query.full_text_query.limit)
return async_query

View File

@@ -31,6 +31,7 @@ async def some_table(db_async):
{
"id": list(range(NROWS)),
"vector": sample_fixed_size_list_array(NROWS, DIM),
"fsb": pa.array([bytes([i]) for i in range(NROWS)], pa.binary(1)),
"tags": [
[f"tag{random.randint(0, 8)}" for _ in range(2)] for _ in range(NROWS)
],
@@ -85,6 +86,16 @@ async def test_create_scalar_index(some_table: AsyncTable):
assert len(indices) == 0
@pytest.mark.asyncio
async def test_create_fixed_size_binary_index(some_table: AsyncTable):
await some_table.create_index("fsb", config=BTree())
indices = await some_table.list_indices()
assert str(indices) == '[Index(BTree, columns=["fsb"], name="fsb_idx")]'
assert len(indices) == 1
assert indices[0].index_type == "BTree"
assert indices[0].columns == ["fsb"]
@pytest.mark.asyncio
async def test_create_bitmap_index(some_table: AsyncTable):
await some_table.create_index("id", config=Bitmap())

View File

@@ -444,6 +444,16 @@ def test_query_sync_fts():
"prefilter": True,
"with_row_id": True,
"version": None,
} or body == {
"full_text_query": {
"query": "puppy",
"columns": ["description", "name"],
},
"k": 42,
"vector": [],
"prefilter": True,
"with_row_id": True,
"version": None,
}
return pa.table({"id": [1, 2, 3]})

View File

@@ -8,19 +8,19 @@ use arrow::array::Array;
use arrow::array::ArrayData;
use arrow::pyarrow::FromPyArrow;
use arrow::pyarrow::IntoPyArrow;
use lancedb::index::scalar::FullTextSearchQuery;
use lancedb::index::scalar::{FtsQuery, FullTextSearchQuery, MatchQuery, PhraseQuery};
use lancedb::query::QueryExecutionOptions;
use lancedb::query::QueryFilter;
use lancedb::query::{
ExecutableQuery, Query as LanceDbQuery, QueryBase, Select, VectorQuery as LanceDbVectorQuery,
};
use lancedb::table::AnyQuery;
use pyo3::exceptions::PyNotImplementedError;
use pyo3::exceptions::PyRuntimeError;
use pyo3::exceptions::{PyNotImplementedError, PyValueError};
use pyo3::prelude::{PyAnyMethods, PyDictMethods};
use pyo3::pymethods;
use pyo3::types::PyDict;
use pyo3::types::PyList;
use pyo3::types::{PyDict, PyString};
use pyo3::Bound;
use pyo3::IntoPyObject;
use pyo3::PyAny;
@@ -31,7 +31,7 @@ use pyo3_async_runtimes::tokio::future_into_py;
use crate::arrow::RecordBatchStream;
use crate::error::PythonErrorExt;
use crate::util::parse_distance_type;
use crate::util::{parse_distance_type, parse_fts_query};
// Python representation of full text search parameters
#[derive(Clone)]
@@ -46,8 +46,8 @@ pub struct PyFullTextSearchQuery {
impl From<FullTextSearchQuery> for PyFullTextSearchQuery {
fn from(query: FullTextSearchQuery) -> Self {
PyFullTextSearchQuery {
columns: query.columns,
query: query.query,
columns: query.columns().into_iter().collect(),
query: query.query.query().to_owned(),
limit: query.limit,
wand_factor: query.wand_factor,
}
@@ -236,22 +236,61 @@ impl Query {
}
pub fn nearest_to_text(&mut self, query: Bound<'_, PyDict>) -> PyResult<FTSQuery> {
let query_text = query
let fts_query = query
.get_item("query")?
.ok_or(PyErr::new::<PyRuntimeError, _>(
"Query text is required for nearest_to_text",
))?
.extract::<String>()?;
let columns = query
.get_item("columns")?
.map(|columns| columns.extract::<Vec<String>>())
.transpose()?;
))?;
let fts_query = FullTextSearchQuery::new(query_text).columns(columns);
let query = if let Ok(query_text) = fts_query.downcast::<PyString>() {
let mut query_text = query_text.to_string();
let columns = query
.get_item("columns")?
.map(|columns| columns.extract::<Vec<String>>())
.transpose()?;
let is_phrase =
query_text.len() >= 2 && query_text.starts_with('"') && query_text.ends_with('"');
let is_multi_match = columns.as_ref().map(|cols| cols.len() > 1).unwrap_or(false);
if is_phrase {
// Remove the surrounding quotes for phrase queries
query_text = query_text[1..query_text.len() - 1].to_string();
}
let query: FtsQuery = match (is_phrase, is_multi_match) {
(false, _) => MatchQuery::new(query_text).into(),
(true, false) => PhraseQuery::new(query_text).into(),
(true, true) => {
return Err(PyValueError::new_err(
"Phrase queries cannot be used with multiple columns.",
));
}
};
let mut query = FullTextSearchQuery::new_query(query);
if let Some(cols) = columns {
if !cols.is_empty() {
query = query.with_columns(&cols).map_err(|e| {
PyValueError::new_err(format!(
"Failed to set full text search columns: {}",
e
))
})?;
}
}
query
} else if let Ok(query) = query.downcast::<PyDict>() {
let query = parse_fts_query(query)?;
FullTextSearchQuery::new_query(query)
} else {
return Err(PyValueError::new_err(
"query must be a string or a Query object",
));
};
Ok(FTSQuery {
fts_query,
inner: self.inner.clone(),
fts_query: query,
})
}
@@ -386,7 +425,7 @@ impl FTSQuery {
}
pub fn get_query(&self) -> String {
self.fts_query.query.clone()
self.fts_query.query.query().to_owned()
}
pub fn to_query_request(&self) -> PyQueryRequest {

View File

@@ -3,11 +3,15 @@
use std::sync::Mutex;
use lancedb::index::scalar::{BoostQuery, FtsQuery, MatchQuery, MultiMatchQuery, PhraseQuery};
use lancedb::DistanceType;
use pyo3::prelude::{PyAnyMethods, PyDictMethods, PyListMethods};
use pyo3::types::PyDict;
use pyo3::{
exceptions::{PyRuntimeError, PyValueError},
pyfunction, PyResult,
};
use pyo3::{Bound, PyAny};
/// A wrapper around a rust builder
///
@@ -59,3 +63,116 @@ pub fn validate_table_name(table_name: &str) -> PyResult<()> {
lancedb::utils::validate_table_name(table_name)
.map_err(|e| PyValueError::new_err(e.to_string()))
}
pub fn parse_fts_query(query: &Bound<'_, PyDict>) -> PyResult<FtsQuery> {
let query_type = query.keys().get_item(0)?.extract::<String>()?;
let query_value = query
.get_item(&query_type)?
.ok_or(PyValueError::new_err(format!(
"Query type {} not found",
query_type
)))?;
let query_value = query_value.downcast::<PyDict>()?;
match query_type.as_str() {
"match" => {
let column = query_value.keys().get_item(0)?.extract::<String>()?;
let params = query_value
.get_item(&column)?
.ok_or(PyValueError::new_err(format!(
"column {} not found",
column
)))?;
let params = params.downcast::<PyDict>()?;
let query = params
.get_item("query")?
.ok_or(PyValueError::new_err("query not found"))?
.extract::<String>()?;
let boost = params
.get_item("boost")?
.ok_or(PyValueError::new_err("boost not found"))?
.extract::<f32>()?;
let fuzziness = params
.get_item("fuzziness")?
.ok_or(PyValueError::new_err("fuzziness not found"))?
.extract::<Option<u32>>()?;
let max_expansions = params
.get_item("max_expansions")?
.ok_or(PyValueError::new_err("max_expansions not found"))?
.extract::<usize>()?;
let query = MatchQuery::new(query)
.with_column(Some(column))
.with_boost(boost)
.with_fuzziness(fuzziness)
.with_max_expansions(max_expansions);
Ok(query.into())
}
"match_phrase" => {
let column = query_value.keys().get_item(0)?.extract::<String>()?;
let query = query_value
.get_item(&column)?
.ok_or(PyValueError::new_err(format!(
"column {} not found",
column
)))?
.extract::<String>()?;
let query = PhraseQuery::new(query).with_column(Some(column));
Ok(query.into())
}
"boost" => {
let positive: Bound<'_, PyAny> = query_value
.get_item("positive")?
.ok_or(PyValueError::new_err("positive not found"))?;
let positive = positive.downcast::<PyDict>()?;
let negative = query_value
.get_item("negative")?
.ok_or(PyValueError::new_err("negative not found"))?;
let negative = negative.downcast::<PyDict>()?;
let negative_boost = query_value
.get_item("negative_boost")?
.ok_or(PyValueError::new_err("negative_boost not found"))?
.extract::<f32>()?;
let positive_query = parse_fts_query(positive)?;
let negative_query = parse_fts_query(negative)?;
let query = BoostQuery::new(positive_query, negative_query, Some(negative_boost));
Ok(query.into())
}
"multi_match" => {
let query = query_value
.get_item("query")?
.ok_or(PyValueError::new_err("query not found"))?
.extract::<String>()?;
let columns = query_value
.get_item("columns")?
.ok_or(PyValueError::new_err("columns not found"))?
.extract::<Vec<String>>()?;
let boost = query_value
.get_item("boost")?
.ok_or(PyValueError::new_err("boost not found"))?
.extract::<Vec<f32>>()?;
let query =
MultiMatchQuery::try_new_with_boosts(query, columns, boost).map_err(|e| {
PyValueError::new_err(format!("Error creating MultiMatchQuery: {}", e))
})?;
Ok(query.into())
}
_ => Err(PyValueError::new_err(format!(
"Unsupported query type: {}",
query_type
))),
}
}

View File

@@ -1,6 +1,6 @@
[package]
name = "lancedb-node"
version = "0.18.3-beta.0"
version = "0.19.0-beta.1"
description = "Serverless, low-latency vector database for AI applications"
license.workspace = true
edition.workspace = true

View File

@@ -1,6 +1,6 @@
[package]
name = "lancedb"
version = "0.18.3-beta.0"
version = "0.19.0-beta.1"
edition.workspace = true
description = "LanceDB: A serverless, low-latency vector database for AI applications"
license.workspace = true

View File

@@ -80,5 +80,6 @@ impl FtsIndexBuilder {
}
}
pub use lance_index::scalar::inverted::query::*;
pub use lance_index::scalar::inverted::TokenizerConfig;
pub use lance_index::scalar::FullTextSearchQuery;

View File

@@ -1056,7 +1056,7 @@ impl VectorQuery {
})?;
let mut results = reranker
.rerank_hybrid(&fts_query.query, vec_results, fts_results)
.rerank_hybrid(&fts_query.query.query(), vec_results, fts_results)
.await?;
check_reranker_result(&results)?;

View File

@@ -52,6 +52,10 @@ impl ServerVersion {
pub fn support_multivector(&self) -> bool {
self.0 >= semver::Version::new(0, 2, 0)
}
pub fn support_structural_fts(&self) -> bool {
self.0 >= semver::Version::new(0, 3, 0)
}
}
pub const OPT_REMOTE_PREFIX: &str = "remote_database_";

View File

@@ -155,7 +155,11 @@ impl<S: HttpSend> RemoteTable<S> {
Ok(Box::pin(RecordBatchStreamAdapter::new(schema, stream)))
}
fn apply_query_params(body: &mut serde_json::Value, params: &QueryRequest) -> Result<()> {
fn apply_query_params(
&self,
body: &mut serde_json::Value,
params: &QueryRequest,
) -> Result<()> {
body["prefilter"] = params.prefilter.into();
if let Some(offset) = params.offset {
body["offset"] = serde_json::Value::Number(serde_json::Number::from(offset));
@@ -209,10 +213,17 @@ impl<S: HttpSend> RemoteTable<S> {
message: "Wand factor is not yet supported in LanceDB Cloud".into(),
});
}
body["full_text_query"] = serde_json::json!({
"columns": full_text_search.columns,
"query": full_text_search.query,
})
if self.server_version.support_structural_fts() {
body["full_text_query"] = serde_json::json!({
"query": full_text_search.query.clone(),
});
} else {
body["full_text_query"] = serde_json::json!({
"columns": full_text_search.columns().into_iter().collect::<Vec<_>>(),
"query": full_text_search.query.query(),
})
}
}
Ok(())
@@ -223,7 +234,7 @@ impl<S: HttpSend> RemoteTable<S> {
mut body: serde_json::Value,
query: &VectorQueryRequest,
) -> Result<Vec<serde_json::Value>> {
Self::apply_query_params(&mut body, &query.base)?;
self.apply_query_params(&mut body, &query.base)?;
// Apply general parameters, before we dispatch based on number of query vectors.
body["distance_type"] = serde_json::json!(query.distance_type.unwrap_or_default());
@@ -346,7 +357,7 @@ impl<S: HttpSend> RemoteTable<S> {
match query {
AnyQuery::Query(query) => {
let mut body = base_body.clone();
Self::apply_query_params(&mut body, query)?;
self.apply_query_params(&mut body, query)?;
// Empty vector can be passed if no vector search is performed.
body["vector"] = serde_json::Value::Array(Vec::new());
Ok(vec![body])
@@ -1683,7 +1694,18 @@ mod tests {
"prefilter": true,
"version": null
});
assert_eq!(body, expected_body);
let expected_body_2 = serde_json::json!({
"full_text_query": {
"columns": ["b","a"],
"query": "hello world",
},
"k": 10,
"vector": [],
"with_row_id": true,
"prefilter": true,
"version": null
});
assert!(body == expected_body || body == expected_body_2);
let data = RecordBatch::try_new(
Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, false)])),
@@ -1702,7 +1724,8 @@ mod tests {
.query()
.full_text_search(
FullTextSearchQuery::new("hello world".into())
.columns(Some(vec!["a".into(), "b".into()])),
.with_columns(&["a".into(), "b".into()])
.unwrap(),
)
.with_row_id()
.limit(10)

View File

@@ -135,6 +135,7 @@ pub fn supported_btree_data_type(dtype: &DataType) -> bool {
| DataType::Date32
| DataType::Date64
| DataType::Timestamp(_, _)
| DataType::FixedSizeBinary(_)
)
}