Compare commits

..

7 Commits

Author SHA1 Message Date
Lance Release
c625b6f2b2 Bump version: 0.24.0-beta.0 → 0.24.0 2025-06-20 05:46:05 +00:00
Lance Release
bec8fe6547 Bump version: 0.23.1-beta.2 → 0.24.0-beta.0 2025-06-20 05:46:04 +00:00
BubbleCal
dc1150c011 chore: upgrade lance to 0.30.0 (#2451)
lance [release
details](https://github.com/lancedb/lance/releases/tag/v0.30.0)
<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->

## Summary by CodeRabbit

- **Chores**
- Updated dependency specifications to use exact version numbers instead
of referencing a git repository and tag.

<!-- end of auto-generated comment: release notes by coderabbit.ai -->

Signed-off-by: BubbleCal <bubble-cal@outlook.com>
2025-06-20 11:27:20 +08:00
Will Jones
afaefc6264 ci: fix package lock again (#2449)
We are able to push commits over here:
cb7293e073/.github/workflows/make-release-commit.yml (L88-L95)

So I think it's safe to assume this will work.

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->

## Summary by CodeRabbit

- **Chores**
- Updated workflow configuration to improve authentication and branch
targeting for automated release processes.

<!-- end of auto-generated comment: release notes by coderabbit.ai -->
2025-06-19 08:51:48 -07:00
BubbleCal
cb70ff8cee feat!: switch default FTS to native lance FTS (#2428)
This switches the default FTS to native lance FTS for Python sync table
API, the other APIs have switched to native implementation already

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->
## Summary by CodeRabbit

- **New Features**
- The default behavior for creating a full-text search index now uses
the new implementation rather than the legacy one.
- **Bug Fixes**
- Improved handling and error messages for phrase queries in full-text
search.
<!-- end of auto-generated comment: release notes by coderabbit.ai -->

---------

Signed-off-by: BubbleCal <bubble-cal@outlook.com>
2025-06-19 10:38:34 +08:00
BubbleCal
cbb5a841b1 feat: support prefix matching and must_not clause (#2441) 2025-06-19 10:32:32 +08:00
Lance Release
c72f6770fd Bump version: 0.20.1-beta.1 → 0.20.1-beta.2 2025-06-18 23:33:57 +00:00
31 changed files with 223 additions and 113 deletions

View File

@@ -1,5 +1,5 @@
[tool.bumpversion] [tool.bumpversion]
current_version = "0.20.1-beta.1" current_version = "0.20.1-beta.2"
parse = """(?x) parse = """(?x)
(?P<major>0|[1-9]\\d*)\\. (?P<major>0|[1-9]\\d*)\\.
(?P<minor>0|[1-9]\\d*)\\. (?P<minor>0|[1-9]\\d*)\\.

View File

@@ -550,6 +550,9 @@ jobs:
bash ci/update_lockfiles.sh bash ci/update_lockfiles.sh
- name: Push new commit - name: Push new commit
uses: ad-m/github-push-action@master uses: ad-m/github-push-action@master
with:
github_token: ${{ secrets.LANCEDB_RELEASE_TOKEN }}
branch: main
- name: Notify Slack Action - name: Notify Slack Action
uses: ravsamhq/notify-slack-action@2.3.0 uses: ravsamhq/notify-slack-action@2.3.0
if: ${{ always() }} if: ${{ always() }}

47
Cargo.lock generated
View File

@@ -2814,7 +2814,8 @@ checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c"
[[package]] [[package]]
name = "fsst" name = "fsst"
version = "0.30.0" version = "0.30.0"
source = "git+https://github.com/lancedb/lance.git?tag=v0.30.0-beta.1#a499cfa06b7221b895bc13908cfc2ee7aadba46e" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3b6a55335126d20524dc83cf0638b7ca1b5d9736f9064a89c47e4d028cbaccdb"
dependencies = [ dependencies = [
"rand 0.8.5", "rand 0.8.5",
] ]
@@ -3907,7 +3908,8 @@ dependencies = [
[[package]] [[package]]
name = "lance" name = "lance"
version = "0.30.0" version = "0.30.0"
source = "git+https://github.com/lancedb/lance.git?tag=v0.30.0-beta.1#a499cfa06b7221b895bc13908cfc2ee7aadba46e" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "84a9bf2cf9ff1d8b8a8c822cf4aaec7023fbe056d3348dce347957695470bd19"
dependencies = [ dependencies = [
"arrow", "arrow",
"arrow-arith", "arrow-arith",
@@ -3971,7 +3973,8 @@ dependencies = [
[[package]] [[package]]
name = "lance-arrow" name = "lance-arrow"
version = "0.30.0" version = "0.30.0"
source = "git+https://github.com/lancedb/lance.git?tag=v0.30.0-beta.1#a499cfa06b7221b895bc13908cfc2ee7aadba46e" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "82fc2b0dd2598f4b390445d63a3906f84d928c250b208d382d4cfc22681b23c0"
dependencies = [ dependencies = [
"arrow-array", "arrow-array",
"arrow-buffer", "arrow-buffer",
@@ -3989,7 +3992,8 @@ dependencies = [
[[package]] [[package]]
name = "lance-core" name = "lance-core"
version = "0.30.0" version = "0.30.0"
source = "git+https://github.com/lancedb/lance.git?tag=v0.30.0-beta.1#a499cfa06b7221b895bc13908cfc2ee7aadba46e" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4118c6e2ac2d26ff80e55708f337c4593381a32751f2a79a03d92452885bd648"
dependencies = [ dependencies = [
"arrow-array", "arrow-array",
"arrow-buffer", "arrow-buffer",
@@ -4026,7 +4030,8 @@ dependencies = [
[[package]] [[package]]
name = "lance-datafusion" name = "lance-datafusion"
version = "0.30.0" version = "0.30.0"
source = "git+https://github.com/lancedb/lance.git?tag=v0.30.0-beta.1#a499cfa06b7221b895bc13908cfc2ee7aadba46e" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ccf8b01e9a5f15d4975423ea1495df85cf36f9036c3ed999190d4631ffbd28b6"
dependencies = [ dependencies = [
"arrow", "arrow",
"arrow-array", "arrow-array",
@@ -4056,7 +4061,8 @@ dependencies = [
[[package]] [[package]]
name = "lance-datagen" name = "lance-datagen"
version = "0.30.0" version = "0.30.0"
source = "git+https://github.com/lancedb/lance.git?tag=v0.30.0-beta.1#a499cfa06b7221b895bc13908cfc2ee7aadba46e" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3fbedb84243fb2fe255b4e9ac298019d2e93e83fcc9ce2eb67a4ac7cab427dda"
dependencies = [ dependencies = [
"arrow", "arrow",
"arrow-array", "arrow-array",
@@ -4072,7 +4078,8 @@ dependencies = [
[[package]] [[package]]
name = "lance-encoding" name = "lance-encoding"
version = "0.30.0" version = "0.30.0"
source = "git+https://github.com/lancedb/lance.git?tag=v0.30.0-beta.1#a499cfa06b7221b895bc13908cfc2ee7aadba46e" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9a0e078414cce96da2e2b37290d0b38a81ba6b0ebcad6806b231c2cd8d04427a"
dependencies = [ dependencies = [
"arrayref", "arrayref",
"arrow", "arrow",
@@ -4112,7 +4119,8 @@ dependencies = [
[[package]] [[package]]
name = "lance-file" name = "lance-file"
version = "0.30.0" version = "0.30.0"
source = "git+https://github.com/lancedb/lance.git?tag=v0.30.0-beta.1#a499cfa06b7221b895bc13908cfc2ee7aadba46e" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ce7deba5b59118f7ef726859ace192b7cc7da4e6639147d2a3908a2de621ce98"
dependencies = [ dependencies = [
"arrow-arith", "arrow-arith",
"arrow-array", "arrow-array",
@@ -4147,7 +4155,8 @@ dependencies = [
[[package]] [[package]]
name = "lance-index" name = "lance-index"
version = "0.30.0" version = "0.30.0"
source = "git+https://github.com/lancedb/lance.git?tag=v0.30.0-beta.1#a499cfa06b7221b895bc13908cfc2ee7aadba46e" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5bee1aecc60c759436d8f952e2d9c4e93d1940bfbdc1869068b4ac6b01e86b2f"
dependencies = [ dependencies = [
"arrow", "arrow",
"arrow-array", "arrow-array",
@@ -4203,7 +4212,8 @@ dependencies = [
[[package]] [[package]]
name = "lance-io" name = "lance-io"
version = "0.30.0" version = "0.30.0"
source = "git+https://github.com/lancedb/lance.git?tag=v0.30.0-beta.1#a499cfa06b7221b895bc13908cfc2ee7aadba46e" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "61a48f6a3f5433ca5095993fcd8bb47efbf473af852b9aca1e175a3d7bbf67fd"
dependencies = [ dependencies = [
"arrow", "arrow",
"arrow-arith", "arrow-arith",
@@ -4243,7 +4253,8 @@ dependencies = [
[[package]] [[package]]
name = "lance-linalg" name = "lance-linalg"
version = "0.30.0" version = "0.30.0"
source = "git+https://github.com/lancedb/lance.git?tag=v0.30.0-beta.1#a499cfa06b7221b895bc13908cfc2ee7aadba46e" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "620dedc792311862fc336b2651e825d2b450bbade7bfc819b7b182c3bb585c1e"
dependencies = [ dependencies = [
"arrow-array", "arrow-array",
"arrow-ord", "arrow-ord",
@@ -4267,7 +4278,8 @@ dependencies = [
[[package]] [[package]]
name = "lance-table" name = "lance-table"
version = "0.30.0" version = "0.30.0"
source = "git+https://github.com/lancedb/lance.git?tag=v0.30.0-beta.1#a499cfa06b7221b895bc13908cfc2ee7aadba46e" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9b010312330943c5e81628722a50e3679688d96065348659b7913964f13765cf"
dependencies = [ dependencies = [
"arrow", "arrow",
"arrow-array", "arrow-array",
@@ -4307,7 +4319,8 @@ dependencies = [
[[package]] [[package]]
name = "lance-testing" name = "lance-testing"
version = "0.30.0" version = "0.30.0"
source = "git+https://github.com/lancedb/lance.git?tag=v0.30.0-beta.1#a499cfa06b7221b895bc13908cfc2ee7aadba46e" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "efa10957cdadef40e853896a67282cd29898775b29715eec42dd49bc3b3c8554"
dependencies = [ dependencies = [
"arrow-array", "arrow-array",
"arrow-schema", "arrow-schema",
@@ -4318,7 +4331,7 @@ dependencies = [
[[package]] [[package]]
name = "lancedb" name = "lancedb"
version = "0.20.1-beta.1" version = "0.20.1-beta.2"
dependencies = [ dependencies = [
"arrow", "arrow",
"arrow-array", "arrow-array",
@@ -4405,7 +4418,7 @@ dependencies = [
[[package]] [[package]]
name = "lancedb-node" name = "lancedb-node"
version = "0.20.1-beta.1" version = "0.20.1-beta.2"
dependencies = [ dependencies = [
"arrow-array", "arrow-array",
"arrow-ipc", "arrow-ipc",
@@ -4430,7 +4443,7 @@ dependencies = [
[[package]] [[package]]
name = "lancedb-nodejs" name = "lancedb-nodejs"
version = "0.20.1-beta.1" version = "0.20.1-beta.2"
dependencies = [ dependencies = [
"arrow-array", "arrow-array",
"arrow-ipc", "arrow-ipc",
@@ -4450,7 +4463,7 @@ dependencies = [
[[package]] [[package]]
name = "lancedb-python" name = "lancedb-python"
version = "0.23.1-beta.1" version = "0.23.1-beta.2"
dependencies = [ dependencies = [
"arrow", "arrow",
"env_logger", "env_logger",

View File

@@ -21,14 +21,14 @@ categories = ["database-implementations"]
rust-version = "1.78.0" rust-version = "1.78.0"
[workspace.dependencies] [workspace.dependencies]
lance = { "version" = "=0.30.0", "features" = ["dynamodb"], tag = "v0.30.0-beta.1", git="https://github.com/lancedb/lance.git" } lance = { "version" = "=0.30.0", "features" = ["dynamodb"] }
lance-io = { version = "=0.30.0", tag = "v0.30.0-beta.1", git="https://github.com/lancedb/lance.git" } lance-io = "=0.30.0"
lance-index = { version = "=0.30.0", tag = "v0.30.0-beta.1", git="https://github.com/lancedb/lance.git" } lance-index = "=0.30.0"
lance-linalg = { version = "=0.30.0", tag = "v0.30.0-beta.1", git="https://github.com/lancedb/lance.git" } lance-linalg = "=0.30.0"
lance-table = { version = "=0.30.0", tag = "v0.30.0-beta.1", git="https://github.com/lancedb/lance.git" } lance-table = "=0.30.0"
lance-testing = { version = "=0.30.0", tag = "v0.30.0-beta.1", git="https://github.com/lancedb/lance.git" } lance-testing = "=0.30.0"
lance-datafusion = { version = "=0.30.0", tag = "v0.30.0-beta.1", git="https://github.com/lancedb/lance.git" } lance-datafusion = "=0.30.0"
lance-encoding = { version = "=0.30.0", tag = "v0.30.0-beta.1", git="https://github.com/lancedb/lance.git" } lance-encoding = "=0.30.0"
# Note that this one does not include pyarrow # Note that this one does not include pyarrow
arrow = { version = "55.1", optional = false } arrow = { version = "55.1", optional = false }
arrow-array = "55.1" arrow-array = "55.1"

View File

@@ -8,7 +8,7 @@
<parent> <parent>
<groupId>com.lancedb</groupId> <groupId>com.lancedb</groupId>
<artifactId>lancedb-parent</artifactId> <artifactId>lancedb-parent</artifactId>
<version>0.20.1-beta.1</version> <version>0.20.1-beta.2</version>
<relativePath>../pom.xml</relativePath> <relativePath>../pom.xml</relativePath>
</parent> </parent>

View File

@@ -6,7 +6,7 @@
<groupId>com.lancedb</groupId> <groupId>com.lancedb</groupId>
<artifactId>lancedb-parent</artifactId> <artifactId>lancedb-parent</artifactId>
<version>0.20.1-beta.1</version> <version>0.20.1-beta.2</version>
<packaging>pom</packaging> <packaging>pom</packaging>
<name>LanceDB Parent</name> <name>LanceDB Parent</name>

44
node/package-lock.json generated
View File

@@ -1,12 +1,12 @@
{ {
"name": "vectordb", "name": "vectordb",
"version": "0.20.1-beta.1", "version": "0.20.1-beta.2",
"lockfileVersion": 3, "lockfileVersion": 3,
"requires": true, "requires": true,
"packages": { "packages": {
"": { "": {
"name": "vectordb", "name": "vectordb",
"version": "0.20.1-beta.1", "version": "0.20.1-beta.2",
"cpu": [ "cpu": [
"x64", "x64",
"arm64" "arm64"
@@ -52,11 +52,11 @@
"uuid": "^9.0.0" "uuid": "^9.0.0"
}, },
"optionalDependencies": { "optionalDependencies": {
"@lancedb/vectordb-darwin-arm64": "0.20.1-beta.1", "@lancedb/vectordb-darwin-arm64": "0.20.1-beta.2",
"@lancedb/vectordb-darwin-x64": "0.20.1-beta.1", "@lancedb/vectordb-darwin-x64": "0.20.1-beta.2",
"@lancedb/vectordb-linux-arm64-gnu": "0.20.1-beta.1", "@lancedb/vectordb-linux-arm64-gnu": "0.20.1-beta.2",
"@lancedb/vectordb-linux-x64-gnu": "0.20.1-beta.1", "@lancedb/vectordb-linux-x64-gnu": "0.20.1-beta.2",
"@lancedb/vectordb-win32-x64-msvc": "0.20.1-beta.1" "@lancedb/vectordb-win32-x64-msvc": "0.20.1-beta.2"
}, },
"peerDependencies": { "peerDependencies": {
"@apache-arrow/ts": "^14.0.2", "@apache-arrow/ts": "^14.0.2",
@@ -327,9 +327,9 @@
} }
}, },
"node_modules/@lancedb/vectordb-darwin-arm64": { "node_modules/@lancedb/vectordb-darwin-arm64": {
"version": "0.20.1-beta.1", "version": "0.20.1-beta.2",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.20.1-beta.1.tgz", "resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.20.1-beta.2.tgz",
"integrity": "sha512-DPD8gwFQz5aENYYbTFS/l3YX/rqzS6Kj2B4IZERccVFULQsdR5YwtaAfFwTMp7NSnsjWKwJAknohiMZlJr4njQ==", "integrity": "sha512-mqi0yI+ZwBTydaDy1FRHAUZwrWS28u6tbHTe1s4uSrmERbVI6PfmoPR+NZWWAp6ZhlseSdl/+yeI4imk11rQSw==",
"cpu": [ "cpu": [
"arm64" "arm64"
], ],
@@ -339,9 +339,9 @@
] ]
}, },
"node_modules/@lancedb/vectordb-darwin-x64": { "node_modules/@lancedb/vectordb-darwin-x64": {
"version": "0.20.1-beta.1", "version": "0.20.1-beta.2",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.20.1-beta.1.tgz", "resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.20.1-beta.2.tgz",
"integrity": "sha512-lTPtlRSTC08UgQW5Bv8WYhdbogAgUJ+9ejg+UE+fwP9gEsgEKXL/SHBm+9gmAlTo7LbrxJjg0CtCde/mW68UTw==", "integrity": "sha512-m8EYYA8JZIeNsJqQsBDUMu6r31/u7FzpjonJ4Y+CjapVl6UdvI65KUkeL2dYrFao++RuIoaiqcm3e7gRgFZpXQ==",
"cpu": [ "cpu": [
"x64" "x64"
], ],
@@ -351,9 +351,9 @@
] ]
}, },
"node_modules/@lancedb/vectordb-linux-arm64-gnu": { "node_modules/@lancedb/vectordb-linux-arm64-gnu": {
"version": "0.20.1-beta.1", "version": "0.20.1-beta.2",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.20.1-beta.1.tgz", "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.20.1-beta.2.tgz",
"integrity": "sha512-w/3O9FvwQiGegYsM21yZ0FezfOFVsW7HttYwwPzZMZaCpK3/i+LvZVSqwO4qXHHJBtHgKevonINyvVlg5487aQ==", "integrity": "sha512-3Og2+bk4GlWmMO1Yg2HBfeb5zrOMLaIHD7bEqQ4+6yw4IckAaV+ke05H0tyyqmOVrOQ0LpvtXgD7pPztjm9r9A==",
"cpu": [ "cpu": [
"arm64" "arm64"
], ],
@@ -363,9 +363,9 @@
] ]
}, },
"node_modules/@lancedb/vectordb-linux-x64-gnu": { "node_modules/@lancedb/vectordb-linux-x64-gnu": {
"version": "0.20.1-beta.1", "version": "0.20.1-beta.2",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.20.1-beta.1.tgz", "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.20.1-beta.2.tgz",
"integrity": "sha512-rq7Q6Lq9kJmBcgwplYQVJmRbyeP+xPVmXyyQfAO3IjekqeSsyjj1HoCZYqZIfBZyN5ELiSvIJB0731aKf9pr1A==", "integrity": "sha512-mwTQyA/FBoU/FkPuvCNBZG3y83gBN+iYoejehBH2HBkLUIcmlsDgSRZ1OQ+f9ijj12EMBCA11tBUPA9zhHzyrw==",
"cpu": [ "cpu": [
"x64" "x64"
], ],
@@ -375,9 +375,9 @@
] ]
}, },
"node_modules/@lancedb/vectordb-win32-x64-msvc": { "node_modules/@lancedb/vectordb-win32-x64-msvc": {
"version": "0.20.1-beta.1", "version": "0.20.1-beta.2",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.20.1-beta.1.tgz", "resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.20.1-beta.2.tgz",
"integrity": "sha512-kHra0SEXeMKdgqi5h0igsqHcBr73hKBhEVJBa8VTv1DUv6Jvazwl4B4ueqllcyD4k3vvOTb2XzZomm7dhQ9QnA==", "integrity": "sha512-VkjNpqhK3l3uHLLPmox+HrmKPMaZgV+qsGQWx0nfseGnSOEmXAWZWQFe0APVCQ9y0xTypQB0oH7eSOPZv2t4WQ==",
"cpu": [ "cpu": [
"x64" "x64"
], ],

View File

@@ -1,6 +1,6 @@
{ {
"name": "vectordb", "name": "vectordb",
"version": "0.20.1-beta.1", "version": "0.20.1-beta.2",
"description": " Serverless, low-latency vector database for AI applications", "description": " Serverless, low-latency vector database for AI applications",
"private": false, "private": false,
"main": "dist/index.js", "main": "dist/index.js",
@@ -89,10 +89,10 @@
} }
}, },
"optionalDependencies": { "optionalDependencies": {
"@lancedb/vectordb-darwin-x64": "0.20.1-beta.1", "@lancedb/vectordb-darwin-x64": "0.20.1-beta.2",
"@lancedb/vectordb-darwin-arm64": "0.20.1-beta.1", "@lancedb/vectordb-darwin-arm64": "0.20.1-beta.2",
"@lancedb/vectordb-linux-x64-gnu": "0.20.1-beta.1", "@lancedb/vectordb-linux-x64-gnu": "0.20.1-beta.2",
"@lancedb/vectordb-linux-arm64-gnu": "0.20.1-beta.1", "@lancedb/vectordb-linux-arm64-gnu": "0.20.1-beta.2",
"@lancedb/vectordb-win32-x64-msvc": "0.20.1-beta.1" "@lancedb/vectordb-win32-x64-msvc": "0.20.1-beta.2"
} }
} }

View File

@@ -1,7 +1,7 @@
[package] [package]
name = "lancedb-nodejs" name = "lancedb-nodejs"
edition.workspace = true edition.workspace = true
version = "0.20.1-beta.1" version = "0.20.1-beta.2"
license.workspace = true license.workspace = true
description.workspace = true description.workspace = true
repository.workspace = true repository.workspace = true

View File

@@ -1650,13 +1650,25 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
expect(resultSet.has("fob")).toBe(true); expect(resultSet.has("fob")).toBe(true);
expect(resultSet.has("fo")).toBe(true); expect(resultSet.has("fo")).toBe(true);
expect(resultSet.has("food")).toBe(true); expect(resultSet.has("food")).toBe(true);
const prefixResults = await table
.search(
new MatchQuery("foo", "text", { fuzziness: 3, prefixLength: 3 }),
)
.toArray();
expect(prefixResults.length).toBe(2);
const resultSet2 = new Set(prefixResults.map((r) => r.text));
expect(resultSet2.has("foo")).toBe(true);
expect(resultSet2.has("food")).toBe(true);
}); });
test("full text search boolean query", async () => { test("full text search boolean query", async () => {
const db = await connect(tmpDir.name); const db = await connect(tmpDir.name);
const data = [ const data = [
{ text: "hello world", vector: [0.1, 0.2, 0.3] }, { text: "The cat and dog are playing" },
{ text: "goodbye world", vector: [0.4, 0.5, 0.6] }, { text: "The cat is sleeping" },
{ text: "The dog is barking" },
{ text: "The dog chases the cat" },
]; ];
const table = await db.createTable("test", data); const table = await db.createTable("test", data);
await table.createIndex("text", { await table.createIndex("text", {
@@ -1666,22 +1678,32 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
const shouldResults = await table const shouldResults = await table
.search( .search(
new BooleanQuery([ new BooleanQuery([
[Occur.Should, new MatchQuery("hello", "text")], [Occur.Should, new MatchQuery("cat", "text")],
[Occur.Should, new MatchQuery("goodbye", "text")], [Occur.Should, new MatchQuery("dog", "text")],
]), ]),
) )
.toArray(); .toArray();
expect(shouldResults.length).toBe(2); expect(shouldResults.length).toBe(4);
const mustResults = await table const mustResults = await table
.search( .search(
new BooleanQuery([ new BooleanQuery([
[Occur.Must, new MatchQuery("hello", "text")], [Occur.Must, new MatchQuery("cat", "text")],
[Occur.Must, new MatchQuery("world", "text")], [Occur.Must, new MatchQuery("dog", "text")],
]), ]),
) )
.toArray(); .toArray();
expect(mustResults.length).toBe(1); expect(mustResults.length).toBe(2);
const mustNotResults = await table
.search(
new BooleanQuery([
[Occur.Must, new MatchQuery("cat", "text")],
[Occur.MustNot, new MatchQuery("dog", "text")],
]),
)
.toArray();
expect(mustNotResults.length).toBe(1);
}); });
test.each([ test.each([

View File

@@ -812,10 +812,12 @@ export enum Operator {
* *
* - `Must`: The term must be present in the document. * - `Must`: The term must be present in the document.
* - `Should`: The term should contribute to the document score, but is not required. * - `Should`: The term should contribute to the document score, but is not required.
* - `MustNot`: The term must not be present in the document.
*/ */
export enum Occur { export enum Occur {
Must = "MUST",
Should = "SHOULD", Should = "SHOULD",
Must = "MUST",
MustNot = "MUST_NOT",
} }
/** /**
@@ -856,6 +858,7 @@ export class MatchQuery implements FullTextQuery {
* - `fuzziness`: The fuzziness level for the query (default is 0). * - `fuzziness`: The fuzziness level for the query (default is 0).
* - `maxExpansions`: The maximum number of terms to consider for fuzzy matching (default is 50). * - `maxExpansions`: The maximum number of terms to consider for fuzzy matching (default is 50).
* - `operator`: The logical operator to use for combining terms in the query (default is "OR"). * - `operator`: The logical operator to use for combining terms in the query (default is "OR").
* - `prefixLength`: The number of beginning characters being unchanged for fuzzy matching.
*/ */
constructor( constructor(
query: string, query: string,
@@ -865,6 +868,7 @@ export class MatchQuery implements FullTextQuery {
fuzziness?: number; fuzziness?: number;
maxExpansions?: number; maxExpansions?: number;
operator?: Operator; operator?: Operator;
prefixLength?: number;
}, },
) { ) {
let fuzziness = options?.fuzziness; let fuzziness = options?.fuzziness;
@@ -878,6 +882,7 @@ export class MatchQuery implements FullTextQuery {
fuzziness, fuzziness,
options?.maxExpansions ?? 50, options?.maxExpansions ?? 50,
options?.operator ?? Operator.Or, options?.operator ?? Operator.Or,
options?.prefixLength ?? 0,
); );
} }

View File

@@ -1,6 +1,6 @@
{ {
"name": "@lancedb/lancedb-darwin-arm64", "name": "@lancedb/lancedb-darwin-arm64",
"version": "0.20.1-beta.1", "version": "0.20.1-beta.2",
"os": ["darwin"], "os": ["darwin"],
"cpu": ["arm64"], "cpu": ["arm64"],
"main": "lancedb.darwin-arm64.node", "main": "lancedb.darwin-arm64.node",

View File

@@ -1,6 +1,6 @@
{ {
"name": "@lancedb/lancedb-darwin-x64", "name": "@lancedb/lancedb-darwin-x64",
"version": "0.20.1-beta.1", "version": "0.20.1-beta.2",
"os": ["darwin"], "os": ["darwin"],
"cpu": ["x64"], "cpu": ["x64"],
"main": "lancedb.darwin-x64.node", "main": "lancedb.darwin-x64.node",

View File

@@ -1,6 +1,6 @@
{ {
"name": "@lancedb/lancedb-linux-arm64-gnu", "name": "@lancedb/lancedb-linux-arm64-gnu",
"version": "0.20.1-beta.1", "version": "0.20.1-beta.2",
"os": ["linux"], "os": ["linux"],
"cpu": ["arm64"], "cpu": ["arm64"],
"main": "lancedb.linux-arm64-gnu.node", "main": "lancedb.linux-arm64-gnu.node",

View File

@@ -1,6 +1,6 @@
{ {
"name": "@lancedb/lancedb-linux-arm64-musl", "name": "@lancedb/lancedb-linux-arm64-musl",
"version": "0.20.1-beta.1", "version": "0.20.1-beta.2",
"os": ["linux"], "os": ["linux"],
"cpu": ["arm64"], "cpu": ["arm64"],
"main": "lancedb.linux-arm64-musl.node", "main": "lancedb.linux-arm64-musl.node",

View File

@@ -1,6 +1,6 @@
{ {
"name": "@lancedb/lancedb-linux-x64-gnu", "name": "@lancedb/lancedb-linux-x64-gnu",
"version": "0.20.1-beta.1", "version": "0.20.1-beta.2",
"os": ["linux"], "os": ["linux"],
"cpu": ["x64"], "cpu": ["x64"],
"main": "lancedb.linux-x64-gnu.node", "main": "lancedb.linux-x64-gnu.node",

View File

@@ -1,6 +1,6 @@
{ {
"name": "@lancedb/lancedb-linux-x64-musl", "name": "@lancedb/lancedb-linux-x64-musl",
"version": "0.20.1-beta.1", "version": "0.20.1-beta.2",
"os": ["linux"], "os": ["linux"],
"cpu": ["x64"], "cpu": ["x64"],
"main": "lancedb.linux-x64-musl.node", "main": "lancedb.linux-x64-musl.node",

View File

@@ -1,6 +1,6 @@
{ {
"name": "@lancedb/lancedb-win32-arm64-msvc", "name": "@lancedb/lancedb-win32-arm64-msvc",
"version": "0.20.1-beta.1", "version": "0.20.1-beta.2",
"os": [ "os": [
"win32" "win32"
], ],

View File

@@ -1,6 +1,6 @@
{ {
"name": "@lancedb/lancedb-win32-x64-msvc", "name": "@lancedb/lancedb-win32-x64-msvc",
"version": "0.20.1-beta.1", "version": "0.20.1-beta.2",
"os": ["win32"], "os": ["win32"],
"cpu": ["x64"], "cpu": ["x64"],
"main": "lancedb.win32-x64-msvc.node", "main": "lancedb.win32-x64-msvc.node",

View File

@@ -1,12 +1,12 @@
{ {
"name": "@lancedb/lancedb", "name": "@lancedb/lancedb",
"version": "0.20.1-beta.1", "version": "0.20.1-beta.2",
"lockfileVersion": 3, "lockfileVersion": 3,
"requires": true, "requires": true,
"packages": { "packages": {
"": { "": {
"name": "@lancedb/lancedb", "name": "@lancedb/lancedb",
"version": "0.20.1-beta.1", "version": "0.20.1-beta.2",
"cpu": [ "cpu": [
"x64", "x64",
"arm64" "arm64"

View File

@@ -11,7 +11,7 @@
"ann" "ann"
], ],
"private": false, "private": false,
"version": "0.20.1-beta.1", "version": "0.20.1-beta.2",
"main": "dist/index.js", "main": "dist/index.js",
"exports": { "exports": {
".": "./dist/index.js", ".": "./dist/index.js",

View File

@@ -335,6 +335,7 @@ impl JsFullTextQuery {
fuzziness: Option<u32>, fuzziness: Option<u32>,
max_expansions: u32, max_expansions: u32,
operator: String, operator: String,
prefix_length: u32,
) -> napi::Result<Self> { ) -> napi::Result<Self> {
Ok(Self { Ok(Self {
inner: MatchQuery::new(query) inner: MatchQuery::new(query)
@@ -347,6 +348,7 @@ impl JsFullTextQuery {
napi::Error::from_reason(format!("Invalid operator: {}", e)) napi::Error::from_reason(format!("Invalid operator: {}", e))
})?, })?,
) )
.with_prefix_length(prefix_length)
.into(), .into(),
}) })
} }

View File

@@ -1,5 +1,5 @@
[tool.bumpversion] [tool.bumpversion]
current_version = "0.23.1-beta.2" current_version = "0.24.0"
parse = """(?x) parse = """(?x)
(?P<major>0|[1-9]\\d*)\\. (?P<major>0|[1-9]\\d*)\\.
(?P<minor>0|[1-9]\\d*)\\. (?P<minor>0|[1-9]\\d*)\\.

View File

@@ -1,6 +1,6 @@
[package] [package]
name = "lancedb-python" name = "lancedb-python"
version = "0.23.1-beta.2" version = "0.24.0"
edition.workspace = true edition.workspace = true
description = "Python bindings for LanceDB" description = "Python bindings for LanceDB"
license.workspace = true license.workspace = true

View File

@@ -101,8 +101,9 @@ class FullTextOperator(str, Enum):
class Occur(str, Enum): class Occur(str, Enum):
MUST = "MUST"
SHOULD = "SHOULD" SHOULD = "SHOULD"
MUST = "MUST"
MUST_NOT = "MUST_NOT"
@pydantic.dataclasses.dataclass @pydantic.dataclasses.dataclass
@@ -181,6 +182,9 @@ class MatchQuery(FullTextQuery):
Can be either `AND` or `OR`. Can be either `AND` or `OR`.
If `AND`, all terms in the query must match. If `AND`, all terms in the query must match.
If `OR`, at least one term in the query must match. If `OR`, at least one term in the query must match.
prefix_length : int, optional
The number of beginning characters being unchanged for fuzzy matching.
This is useful to achieve prefix matching.
""" """
query: str query: str
@@ -189,6 +193,7 @@ class MatchQuery(FullTextQuery):
fuzziness: int = pydantic.Field(0, kw_only=True) fuzziness: int = pydantic.Field(0, kw_only=True)
max_expansions: int = pydantic.Field(50, kw_only=True) max_expansions: int = pydantic.Field(50, kw_only=True)
operator: FullTextOperator = pydantic.Field(FullTextOperator.OR, kw_only=True) operator: FullTextOperator = pydantic.Field(FullTextOperator.OR, kw_only=True)
prefix_length: int = pydantic.Field(0, kw_only=True)
def query_type(self) -> FullTextQueryType: def query_type(self) -> FullTextQueryType:
return FullTextQueryType.MATCH return FullTextQueryType.MATCH
@@ -1446,10 +1451,13 @@ class LanceFtsQueryBuilder(LanceQueryBuilder):
query = self._query query = self._query
if self._phrase_query: if self._phrase_query:
raise NotImplementedError( if isinstance(query, str):
"Phrase query is not yet supported in Lance FTS. " if not query.startswith('"') or not query.endswith('"'):
"Use tantivy-based index instead for now." query = f'"{query}"'
) elif isinstance(query, FullTextQuery) and not isinstance(
query, PhraseQuery
):
raise TypeError("Please use PhraseQuery for phrase queries.")
query = self.to_query_object() query = self.to_query_object()
results = self._table._execute_query(query, timeout=timeout) results = self._table._execute_query(query, timeout=timeout)
results = results.read_all() results = results.read_all()

View File

@@ -827,7 +827,7 @@ class Table(ABC):
ordering_field_names: Optional[Union[str, List[str]]] = None, ordering_field_names: Optional[Union[str, List[str]]] = None,
replace: bool = False, replace: bool = False,
writer_heap_size: Optional[int] = 1024 * 1024 * 1024, writer_heap_size: Optional[int] = 1024 * 1024 * 1024,
use_tantivy: bool = True, use_tantivy: bool = False,
tokenizer_name: Optional[str] = None, tokenizer_name: Optional[str] = None,
with_position: bool = False, with_position: bool = False,
# tokenizer configs: # tokenizer configs:
@@ -864,7 +864,7 @@ class Table(ABC):
The tokenizer to use for the index. Can be "raw", "default" or the 2 letter The tokenizer to use for the index. Can be "raw", "default" or the 2 letter
language code followed by "_stem". So for english it would be "en_stem". language code followed by "_stem". So for english it would be "en_stem".
For available languages see: https://docs.rs/tantivy/latest/tantivy/tokenizer/enum.Language.html For available languages see: https://docs.rs/tantivy/latest/tantivy/tokenizer/enum.Language.html
use_tantivy: bool, default True use_tantivy: bool, default False
If True, use the legacy full-text search implementation based on tantivy. If True, use the legacy full-text search implementation based on tantivy.
If False, use the new full-text search implementation based on lance-index. If False, use the new full-text search implementation based on lance-index.
with_position: bool, default False with_position: bool, default False
@@ -1970,7 +1970,7 @@ class LanceTable(Table):
ordering_field_names: Optional[Union[str, List[str]]] = None, ordering_field_names: Optional[Union[str, List[str]]] = None,
replace: bool = False, replace: bool = False,
writer_heap_size: Optional[int] = 1024 * 1024 * 1024, writer_heap_size: Optional[int] = 1024 * 1024 * 1024,
use_tantivy: bool = True, use_tantivy: bool = False,
tokenizer_name: Optional[str] = None, tokenizer_name: Optional[str] = None,
with_position: bool = False, with_position: bool = False,
# tokenizer configs: # tokenizer configs:

View File

@@ -6,7 +6,7 @@ import lancedb
# --8<-- [end:import-lancedb] # --8<-- [end:import-lancedb]
# --8<-- [start:import-numpy] # --8<-- [start:import-numpy]
from lancedb.query import BoostQuery, MatchQuery from lancedb.query import BooleanQuery, BoostQuery, MatchQuery, Occur
import numpy as np import numpy as np
import pyarrow as pa import pyarrow as pa
@@ -191,6 +191,15 @@ def test_fts_fuzzy_query():
"food", # 1 insertion "food", # 1 insertion
} }
results = table.search(
MatchQuery("foo", "text", fuzziness=1, prefix_length=3)
).to_pandas()
assert len(results) == 2
assert set(results["text"].to_list()) == {
"foo",
"food",
}
@pytest.mark.skipif( @pytest.mark.skipif(
os.name == "nt", reason="Need to fix https://github.com/lancedb/lance/issues/3905" os.name == "nt", reason="Need to fix https://github.com/lancedb/lance/issues/3905"
@@ -240,6 +249,60 @@ def test_fts_boost_query():
) )
@pytest.mark.skipif(
os.name == "nt", reason="Need to fix https://github.com/lancedb/lance/issues/3905"
)
def test_fts_boolean_query(tmp_path):
uri = tmp_path / "boolean-example"
db = lancedb.connect(uri)
table = db.create_table(
"my_table_fts_boolean",
data=[
{"text": "The cat and dog are playing"},
{"text": "The cat is sleeping"},
{"text": "The dog is barking"},
{"text": "The dog chases the cat"},
],
mode="overwrite",
)
table.create_fts_index("text", use_tantivy=False, replace=True)
# SHOULD
results = table.search(
MatchQuery("cat", "text") | MatchQuery("dog", "text")
).to_pandas()
assert len(results) == 4
assert set(results["text"].to_list()) == {
"The cat and dog are playing",
"The cat is sleeping",
"The dog is barking",
"The dog chases the cat",
}
# MUST
results = table.search(
MatchQuery("cat", "text") & MatchQuery("dog", "text")
).to_pandas()
assert len(results) == 2
assert set(results["text"].to_list()) == {
"The cat and dog are playing",
"The dog chases the cat",
}
# MUST NOT
results = table.search(
BooleanQuery(
[
(Occur.MUST, MatchQuery("cat", "text")),
(Occur.MUST_NOT, MatchQuery("dog", "text")),
]
)
).to_pandas()
assert len(results) == 1
assert set(results["text"].to_list()) == {
"The cat is sleeping",
}
@pytest.mark.skipif( @pytest.mark.skipif(
os.name == "nt", reason="Need to fix https://github.com/lancedb/lance/issues/3905" os.name == "nt", reason="Need to fix https://github.com/lancedb/lance/issues/3905"
) )

View File

@@ -245,7 +245,7 @@ def test_s3_dynamodb_sync(s3_bucket: str, commit_table: str, monkeypatch):
NotImplementedError, NotImplementedError,
match="Full-text search is only supported on the local filesystem", match="Full-text search is only supported on the local filesystem",
): ):
table.create_fts_index("x") table.create_fts_index("x", use_tantivy=True)
# make sure list tables still works # make sure list tables still works
assert db.table_names() == ["test_ddb_sync"] assert db.table_names() == ["test_ddb_sync"]

View File

@@ -50,6 +50,7 @@ impl FromPyObject<'_> for PyLanceDB<FtsQuery> {
let fuzziness = ob.getattr("fuzziness")?.extract()?; let fuzziness = ob.getattr("fuzziness")?.extract()?;
let max_expansions = ob.getattr("max_expansions")?.extract()?; let max_expansions = ob.getattr("max_expansions")?.extract()?;
let operator = ob.getattr("operator")?.extract::<String>()?; let operator = ob.getattr("operator")?.extract::<String>()?;
let prefix_length = ob.getattr("prefix_length")?.extract()?;
Ok(PyLanceDB( Ok(PyLanceDB(
MatchQuery::new(query) MatchQuery::new(query)
@@ -60,6 +61,7 @@ impl FromPyObject<'_> for PyLanceDB<FtsQuery> {
.with_operator(Operator::try_from(operator.as_str()).map_err(|e| { .with_operator(Operator::try_from(operator.as_str()).map_err(|e| {
PyValueError::new_err(format!("Invalid operator: {}", e)) PyValueError::new_err(format!("Invalid operator: {}", e))
})?) })?)
.with_prefix_length(prefix_length)
.into(), .into(),
)) ))
} }
@@ -139,7 +141,8 @@ impl<'py> IntoPyObject<'py> for PyLanceDB<FtsQuery> {
kwargs.set_item("boost", query.boost)?; kwargs.set_item("boost", query.boost)?;
kwargs.set_item("fuzziness", query.fuzziness)?; kwargs.set_item("fuzziness", query.fuzziness)?;
kwargs.set_item("max_expansions", query.max_expansions)?; kwargs.set_item("max_expansions", query.max_expansions)?;
kwargs.set_item("operator", operator_to_str(query.operator))?; kwargs.set_item::<_, &str>("operator", query.operator.into())?;
kwargs.set_item("prefix_length", query.prefix_length)?;
namespace namespace
.getattr(intern!(py, "MatchQuery"))? .getattr(intern!(py, "MatchQuery"))?
.call((query.terms, query.column.unwrap()), Some(&kwargs)) .call((query.terms, query.column.unwrap()), Some(&kwargs))
@@ -169,19 +172,25 @@ impl<'py> IntoPyObject<'py> for PyLanceDB<FtsQuery> {
.unzip(); .unzip();
let kwargs = PyDict::new(py); let kwargs = PyDict::new(py);
kwargs.set_item("boosts", boosts)?; kwargs.set_item("boosts", boosts)?;
kwargs.set_item("operator", operator_to_str(first.operator))?; kwargs.set_item::<_, &str>("operator", first.operator.into())?;
namespace namespace
.getattr(intern!(py, "MultiMatchQuery"))? .getattr(intern!(py, "MultiMatchQuery"))?
.call((first.terms.clone(), columns), Some(&kwargs)) .call((first.terms.clone(), columns), Some(&kwargs))
} }
FtsQuery::Boolean(query) => { FtsQuery::Boolean(query) => {
let mut queries = Vec::with_capacity(query.must.len() + query.should.len()); let mut queries: Vec<(&str, Bound<'py, PyAny>)> = Vec::with_capacity(
for q in query.must { query.should.len() + query.must.len() + query.must_not.len(),
queries.push((occur_to_str(Occur::Must), PyLanceDB(q).into_pyobject(py)?)); );
}
for q in query.should { for q in query.should {
queries.push((occur_to_str(Occur::Should), PyLanceDB(q).into_pyobject(py)?)); queries.push((Occur::Should.into(), PyLanceDB(q).into_pyobject(py)?));
} }
for q in query.must {
queries.push((Occur::Must.into(), PyLanceDB(q).into_pyobject(py)?));
}
for q in query.must_not {
queries.push((Occur::MustNot.into(), PyLanceDB(q).into_pyobject(py)?));
}
namespace namespace
.getattr(intern!(py, "BooleanQuery"))? .getattr(intern!(py, "BooleanQuery"))?
.call1((queries,)) .call1((queries,))
@@ -190,21 +199,6 @@ impl<'py> IntoPyObject<'py> for PyLanceDB<FtsQuery> {
} }
} }
fn operator_to_str(op: Operator) -> &'static str {
match op {
Operator::And => "AND",
Operator::Or => "OR",
}
}
fn occur_to_str(occur: Occur) -> &'static str {
match occur {
Occur::Must => "MUST",
Occur::Should => "SHOULD",
Occur::MustNot => "MUST NOT",
}
}
// Python representation of query vector(s) // Python representation of query vector(s)
#[derive(Clone)] #[derive(Clone)]
pub struct PyQueryVectors(Vec<Arc<dyn Array>>); pub struct PyQueryVectors(Vec<Arc<dyn Array>>);

View File

@@ -1,6 +1,6 @@
[package] [package]
name = "lancedb-node" name = "lancedb-node"
version = "0.20.1-beta.1" version = "0.20.1-beta.2"
description = "Serverless, low-latency vector database for AI applications" description = "Serverless, low-latency vector database for AI applications"
license.workspace = true license.workspace = true
edition.workspace = true edition.workspace = true

View File

@@ -1,6 +1,6 @@
[package] [package]
name = "lancedb" name = "lancedb"
version = "0.20.1-beta.1" version = "0.20.1-beta.2"
edition.workspace = true edition.workspace = true
description = "LanceDB: A serverless, low-latency vector database for AI applications" description = "LanceDB: A serverless, low-latency vector database for AI applications"
license.workspace = true license.workspace = true