mirror of
https://github.com/lancedb/lancedb.git
synced 2025-12-23 13:29:57 +00:00
Compare commits
73 Commits
python-v0.
...
python-v0.
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c6c20cb2bd | ||
|
|
26080ee4c1 | ||
|
|
ef3a2b5357 | ||
|
|
c42a201389 | ||
|
|
24e42ccd4d | ||
|
|
8a50944061 | ||
|
|
40e066bc7c | ||
|
|
b3ad105fa0 | ||
|
|
6e701d3e1b | ||
|
|
2248aa9508 | ||
|
|
a6fa69ab89 | ||
|
|
b3a4efd587 | ||
|
|
4708b60bb1 | ||
|
|
080ea2f9a4 | ||
|
|
32fdde23f8 | ||
|
|
c44e5c046c | ||
|
|
f23aa0a793 | ||
|
|
83fc2b1851 | ||
|
|
56aa133ee6 | ||
|
|
27d9e5c596 | ||
|
|
ec8271931f | ||
|
|
6c6966600c | ||
|
|
2e170c3c7b | ||
|
|
fd92e651d1 | ||
|
|
c298482ee1 | ||
|
|
d59f64b5a3 | ||
|
|
30ed8c4c43 | ||
|
|
4a2cdbf299 | ||
|
|
657843d9e9 | ||
|
|
1cd76b8498 | ||
|
|
a38f784081 | ||
|
|
647dee4e94 | ||
|
|
0844c2dd64 | ||
|
|
fd2692295c | ||
|
|
d4ea50fba1 | ||
|
|
0d42297cf8 | ||
|
|
a6d4125cbf | ||
|
|
5c32a99e61 | ||
|
|
cefaa75b24 | ||
|
|
bd62c2384f | ||
|
|
f0bc08c0d7 | ||
|
|
e52ac79c69 | ||
|
|
f091f57594 | ||
|
|
a997fd4108 | ||
|
|
1486514ccc | ||
|
|
a505bc3965 | ||
|
|
c1738250a3 | ||
|
|
1ee63984f5 | ||
|
|
2eb2c8862a | ||
|
|
4ea8e178d3 | ||
|
|
e4485a630e | ||
|
|
fb95f9b3bd | ||
|
|
625bab3f21 | ||
|
|
e59f9382a0 | ||
|
|
fdee7ba477 | ||
|
|
c44fa3abc4 | ||
|
|
fc43aac0ed | ||
|
|
e67cd0baf9 | ||
|
|
26dab93f2a | ||
|
|
b9bdb8d937 | ||
|
|
a1d1833a40 | ||
|
|
a547c523c2 | ||
|
|
dc8b75feab | ||
|
|
c1600cdc06 | ||
|
|
f5dee46970 | ||
|
|
346cbf8bf7 | ||
|
|
3c7dfe9f28 | ||
|
|
f52d05d3fa | ||
|
|
c321cccc12 | ||
|
|
cba14a5743 | ||
|
|
72057b743d | ||
|
|
698f329598 | ||
|
|
79fa745130 |
@@ -1,5 +1,5 @@
|
|||||||
[tool.bumpversion]
|
[tool.bumpversion]
|
||||||
current_version = "0.18.2-beta.1"
|
current_version = "0.19.0-beta.7"
|
||||||
parse = """(?x)
|
parse = """(?x)
|
||||||
(?P<major>0|[1-9]\\d*)\\.
|
(?P<major>0|[1-9]\\d*)\\.
|
||||||
(?P<minor>0|[1-9]\\d*)\\.
|
(?P<minor>0|[1-9]\\d*)\\.
|
||||||
|
|||||||
6
.github/workflows/java-publish.yml
vendored
6
.github/workflows/java-publish.yml
vendored
@@ -43,7 +43,7 @@ jobs:
|
|||||||
- uses: Swatinem/rust-cache@v2
|
- uses: Swatinem/rust-cache@v2
|
||||||
- uses: actions-rust-lang/setup-rust-toolchain@v1
|
- uses: actions-rust-lang/setup-rust-toolchain@v1
|
||||||
with:
|
with:
|
||||||
toolchain: "1.79.0"
|
toolchain: "1.81.0"
|
||||||
cache-workspaces: "./java/core/lancedb-jni"
|
cache-workspaces: "./java/core/lancedb-jni"
|
||||||
# Disable full debug symbol generation to speed up CI build and keep memory down
|
# Disable full debug symbol generation to speed up CI build and keep memory down
|
||||||
# "1" means line tables only, which is useful for panic tracebacks.
|
# "1" means line tables only, which is useful for panic tracebacks.
|
||||||
@@ -97,7 +97,7 @@ jobs:
|
|||||||
- name: Dry run
|
- name: Dry run
|
||||||
if: github.event_name == 'pull_request'
|
if: github.event_name == 'pull_request'
|
||||||
run: |
|
run: |
|
||||||
mvn --batch-mode -DskipTests package
|
mvn --batch-mode -DskipTests -Drust.release.build=true package
|
||||||
- name: Set github
|
- name: Set github
|
||||||
run: |
|
run: |
|
||||||
git config --global user.email "LanceDB Github Runner"
|
git config --global user.email "LanceDB Github Runner"
|
||||||
@@ -108,7 +108,7 @@ jobs:
|
|||||||
echo "use-agent" >> ~/.gnupg/gpg.conf
|
echo "use-agent" >> ~/.gnupg/gpg.conf
|
||||||
echo "pinentry-mode loopback" >> ~/.gnupg/gpg.conf
|
echo "pinentry-mode loopback" >> ~/.gnupg/gpg.conf
|
||||||
export GPG_TTY=$(tty)
|
export GPG_TTY=$(tty)
|
||||||
mvn --batch-mode -DskipTests -DpushChanges=false -Dgpg.passphrase=${{ secrets.GPG_PASSPHRASE }} deploy -P deploy-to-ossrh
|
mvn --batch-mode -DskipTests -Drust.release.build=true -DpushChanges=false -Dgpg.passphrase=${{ secrets.GPG_PASSPHRASE }} deploy -P deploy-to-ossrh
|
||||||
env:
|
env:
|
||||||
SONATYPE_USER: ${{ secrets.SONATYPE_USER }}
|
SONATYPE_USER: ${{ secrets.SONATYPE_USER }}
|
||||||
SONATYPE_TOKEN: ${{ secrets.SONATYPE_TOKEN }}
|
SONATYPE_TOKEN: ${{ secrets.SONATYPE_TOKEN }}
|
||||||
|
|||||||
44
.github/workflows/npm-publish.yml
vendored
44
.github/workflows/npm-publish.yml
vendored
@@ -18,6 +18,7 @@ on:
|
|||||||
# This should trigger a dry run (we skip the final publish step)
|
# This should trigger a dry run (we skip the final publish step)
|
||||||
paths:
|
paths:
|
||||||
- .github/workflows/npm-publish.yml
|
- .github/workflows/npm-publish.yml
|
||||||
|
- Cargo.toml # Change in dependency frequently breaks builds
|
||||||
|
|
||||||
concurrency:
|
concurrency:
|
||||||
group: ${{ github.workflow }}-${{ github.ref }}
|
group: ${{ github.workflow }}-${{ github.ref }}
|
||||||
@@ -130,29 +131,24 @@ jobs:
|
|||||||
set -e &&
|
set -e &&
|
||||||
apt-get update &&
|
apt-get update &&
|
||||||
apt-get install -y protobuf-compiler pkg-config
|
apt-get install -y protobuf-compiler pkg-config
|
||||||
|
- target: x86_64-unknown-linux-musl
|
||||||
# TODO: re-enable x64 musl builds. I could not figure out why, but it
|
# This one seems to need some extra memory
|
||||||
# consistently made GHA runners non-responsive at the end of build. Example:
|
host: ubuntu-2404-8x-x64
|
||||||
# https://github.com/lancedb/lancedb/actions/runs/13980431071/job/39144319470?pr=2250
|
# https://github.com/napi-rs/napi-rs/blob/main/alpine.Dockerfile
|
||||||
|
docker: ghcr.io/napi-rs/napi-rs/nodejs-rust:lts-alpine
|
||||||
# - target: x86_64-unknown-linux-musl
|
features: fp16kernels
|
||||||
# # This one seems to need some extra memory
|
pre_build: |-
|
||||||
# host: ubuntu-2404-8x-x64
|
set -e &&
|
||||||
# # https://github.com/napi-rs/napi-rs/blob/main/alpine.Dockerfile
|
apk add protobuf-dev curl &&
|
||||||
# docker: ghcr.io/napi-rs/napi-rs/nodejs-rust:lts-alpine
|
ln -s /usr/lib/gcc/x86_64-alpine-linux-musl/14.2.0/crtbeginS.o /usr/lib/crtbeginS.o &&
|
||||||
# features: ","
|
ln -s /usr/lib/libgcc_s.so /usr/lib/libgcc.so &&
|
||||||
# pre_build: |-
|
CC=gcc &&
|
||||||
# set -e &&
|
CXX=g++
|
||||||
# apk add protobuf-dev curl &&
|
|
||||||
# ln -s /usr/lib/gcc/x86_64-alpine-linux-musl/14.2.0/crtbeginS.o /usr/lib/crtbeginS.o &&
|
|
||||||
# ln -s /usr/lib/libgcc_s.so /usr/lib/libgcc.so
|
|
||||||
|
|
||||||
- target: aarch64-unknown-linux-gnu
|
- target: aarch64-unknown-linux-gnu
|
||||||
host: ubuntu-2404-8x-x64
|
host: ubuntu-2404-8x-x64
|
||||||
# https://github.com/napi-rs/napi-rs/blob/main/debian-aarch64.Dockerfile
|
# https://github.com/napi-rs/napi-rs/blob/main/debian-aarch64.Dockerfile
|
||||||
docker: ghcr.io/napi-rs/napi-rs/nodejs-rust:lts-debian-aarch64
|
docker: ghcr.io/napi-rs/napi-rs/nodejs-rust:lts-debian-aarch64
|
||||||
# TODO: enable fp16kernels after https://github.com/lancedb/lance/pull/3559
|
features: "fp16kernels"
|
||||||
features: ","
|
|
||||||
pre_build: |-
|
pre_build: |-
|
||||||
set -e &&
|
set -e &&
|
||||||
apt-get update &&
|
apt-get update &&
|
||||||
@@ -170,8 +166,8 @@ jobs:
|
|||||||
set -e &&
|
set -e &&
|
||||||
apk add protobuf-dev &&
|
apk add protobuf-dev &&
|
||||||
rustup target add aarch64-unknown-linux-musl &&
|
rustup target add aarch64-unknown-linux-musl &&
|
||||||
export CC="/aarch64-linux-musl-cross/bin/aarch64-linux-musl-gcc" &&
|
export CC_aarch64_unknown_linux_musl=aarch64-linux-musl-gcc &&
|
||||||
export CXX="/aarch64-linux-musl-cross/bin/aarch64-linux-musl-g++"
|
export CXX_aarch64_unknown_linux_musl=aarch64-linux-musl-g++
|
||||||
name: build - ${{ matrix.settings.target }}
|
name: build - ${{ matrix.settings.target }}
|
||||||
runs-on: ${{ matrix.settings.host }}
|
runs-on: ${{ matrix.settings.host }}
|
||||||
defaults:
|
defaults:
|
||||||
@@ -535,6 +531,12 @@ jobs:
|
|||||||
for filename in *.tgz; do
|
for filename in *.tgz; do
|
||||||
npm publish $PUBLISH_ARGS $filename
|
npm publish $PUBLISH_ARGS $filename
|
||||||
done
|
done
|
||||||
|
- name: Deprecate
|
||||||
|
env:
|
||||||
|
NODE_AUTH_TOKEN: ${{ secrets.LANCEDB_NPM_REGISTRY_TOKEN }}
|
||||||
|
# We need to deprecate the old package to avoid confusion.
|
||||||
|
# Each time we publish a new version, it gets undeprecated.
|
||||||
|
run: npm deprecate vectordb "Use @lancedb/lancedb instead."
|
||||||
- name: Notify Slack Action
|
- name: Notify Slack Action
|
||||||
uses: ravsamhq/notify-slack-action@2.3.0
|
uses: ravsamhq/notify-slack-action@2.3.0
|
||||||
if: ${{ always() }}
|
if: ${{ always() }}
|
||||||
|
|||||||
1
.github/workflows/pypi-publish.yml
vendored
1
.github/workflows/pypi-publish.yml
vendored
@@ -8,6 +8,7 @@ on:
|
|||||||
# This should trigger a dry run (we skip the final publish step)
|
# This should trigger a dry run (we skip the final publish step)
|
||||||
paths:
|
paths:
|
||||||
- .github/workflows/pypi-publish.yml
|
- .github/workflows/pypi-publish.yml
|
||||||
|
- Cargo.toml # Change in dependency frequently breaks builds
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
linux:
|
linux:
|
||||||
|
|||||||
847
Cargo.lock
generated
847
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
30
Cargo.toml
30
Cargo.toml
@@ -21,16 +21,16 @@ categories = ["database-implementations"]
|
|||||||
rust-version = "1.78.0"
|
rust-version = "1.78.0"
|
||||||
|
|
||||||
[workspace.dependencies]
|
[workspace.dependencies]
|
||||||
lance = { "version" = "=0.25.0", "features" = [
|
lance = { "version" = "=0.26.0", "features" = [
|
||||||
"dynamodb",
|
"dynamodb",
|
||||||
] }
|
], tag = "v0.26.0-beta.1", git = "https://github.com/lancedb/lance" }
|
||||||
lance-io = { version = "=0.25.0" }
|
lance-io = { version = "=0.26.0", tag = "v0.26.0-beta.1", git = "https://github.com/lancedb/lance" }
|
||||||
lance-index = { version = "=0.25.0" }
|
lance-index = { version = "=0.26.0", tag = "v0.26.0-beta.1", git = "https://github.com/lancedb/lance" }
|
||||||
lance-linalg = { version = "=0.25.0" }
|
lance-linalg = { version = "=0.26.0", tag = "v0.26.0-beta.1", git = "https://github.com/lancedb/lance" }
|
||||||
lance-table = { version = "=0.25.0" }
|
lance-table = { version = "=0.26.0", tag = "v0.26.0-beta.1", git = "https://github.com/lancedb/lance" }
|
||||||
lance-testing = { version = "=0.25.0" }
|
lance-testing = { version = "=0.26.0", tag = "v0.26.0-beta.1", git = "https://github.com/lancedb/lance" }
|
||||||
lance-datafusion = { version = "=0.25.0" }
|
lance-datafusion = { version = "=0.26.0", tag = "v0.26.0-beta.1", git = "https://github.com/lancedb/lance" }
|
||||||
lance-encoding = { version = "=0.25.0" }
|
lance-encoding = { version = "=0.26.0", tag = "v0.26.0-beta.1", git = "https://github.com/lancedb/lance" }
|
||||||
# Note that this one does not include pyarrow
|
# Note that this one does not include pyarrow
|
||||||
arrow = { version = "54.1", optional = false }
|
arrow = { version = "54.1", optional = false }
|
||||||
arrow-array = "54.1"
|
arrow-array = "54.1"
|
||||||
@@ -41,12 +41,12 @@ arrow-schema = "54.1"
|
|||||||
arrow-arith = "54.1"
|
arrow-arith = "54.1"
|
||||||
arrow-cast = "54.1"
|
arrow-cast = "54.1"
|
||||||
async-trait = "0"
|
async-trait = "0"
|
||||||
datafusion = { version = "45.0", default-features = false }
|
datafusion = { version = "46.0", default-features = false }
|
||||||
datafusion-catalog = "45.0"
|
datafusion-catalog = "46.0"
|
||||||
datafusion-common = { version = "45.0", default-features = false }
|
datafusion-common = { version = "46.0", default-features = false }
|
||||||
datafusion-execution = "45.0"
|
datafusion-execution = "46.0"
|
||||||
datafusion-expr = "45.0"
|
datafusion-expr = "46.0"
|
||||||
datafusion-physical-plan = "45.0"
|
datafusion-physical-plan = "46.0"
|
||||||
env_logger = "0.11"
|
env_logger = "0.11"
|
||||||
half = { "version" = "=2.4.1", default-features = false, features = [
|
half = { "version" = "=2.4.1", default-features = false, features = [
|
||||||
"num-traits",
|
"num-traits",
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
LanceDB docs are deployed to https://lancedb.github.io/lancedb/.
|
LanceDB docs are deployed to https://lancedb.github.io/lancedb/.
|
||||||
|
|
||||||
Docs is built and deployed automatically by [Github Actions](.github/workflows/docs.yml)
|
Docs is built and deployed automatically by [Github Actions](../.github/workflows/docs.yml)
|
||||||
whenever a commit is pushed to the `main` branch. So it is possible for the docs to show
|
whenever a commit is pushed to the `main` branch. So it is possible for the docs to show
|
||||||
unreleased features.
|
unreleased features.
|
||||||
|
|
||||||
|
|||||||
@@ -342,7 +342,7 @@ For **read and write access**, LanceDB will need a policy such as:
|
|||||||
"Action": [
|
"Action": [
|
||||||
"s3:PutObject",
|
"s3:PutObject",
|
||||||
"s3:GetObject",
|
"s3:GetObject",
|
||||||
"s3:DeleteObject",
|
"s3:DeleteObject"
|
||||||
],
|
],
|
||||||
"Resource": "arn:aws:s3:::<bucket>/<prefix>/*"
|
"Resource": "arn:aws:s3:::<bucket>/<prefix>/*"
|
||||||
},
|
},
|
||||||
@@ -374,7 +374,7 @@ For **read-only access**, LanceDB will need a policy such as:
|
|||||||
{
|
{
|
||||||
"Effect": "Allow",
|
"Effect": "Allow",
|
||||||
"Action": [
|
"Action": [
|
||||||
"s3:GetObject",
|
"s3:GetObject"
|
||||||
],
|
],
|
||||||
"Resource": "arn:aws:s3:::<bucket>/<prefix>/*"
|
"Resource": "arn:aws:s3:::<bucket>/<prefix>/*"
|
||||||
},
|
},
|
||||||
|
|||||||
67
docs/src/js/classes/BoostQuery.md
Normal file
67
docs/src/js/classes/BoostQuery.md
Normal file
@@ -0,0 +1,67 @@
|
|||||||
|
[**@lancedb/lancedb**](../README.md) • **Docs**
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
|
[@lancedb/lancedb](../globals.md) / BoostQuery
|
||||||
|
|
||||||
|
# Class: BoostQuery
|
||||||
|
|
||||||
|
Represents a full-text query interface.
|
||||||
|
This interface defines the structure and behavior for full-text queries,
|
||||||
|
including methods to retrieve the query type and convert the query to a dictionary format.
|
||||||
|
|
||||||
|
## Implements
|
||||||
|
|
||||||
|
- [`FullTextQuery`](../interfaces/FullTextQuery.md)
|
||||||
|
|
||||||
|
## Constructors
|
||||||
|
|
||||||
|
### new BoostQuery()
|
||||||
|
|
||||||
|
```ts
|
||||||
|
new BoostQuery(
|
||||||
|
positive,
|
||||||
|
negative,
|
||||||
|
options?): BoostQuery
|
||||||
|
```
|
||||||
|
|
||||||
|
Creates an instance of BoostQuery.
|
||||||
|
The boost returns documents that match the positive query,
|
||||||
|
but penalizes those that match the negative query.
|
||||||
|
the penalty is controlled by the `negativeBoost` parameter.
|
||||||
|
|
||||||
|
#### Parameters
|
||||||
|
|
||||||
|
* **positive**: [`FullTextQuery`](../interfaces/FullTextQuery.md)
|
||||||
|
The positive query that boosts the relevance score.
|
||||||
|
|
||||||
|
* **negative**: [`FullTextQuery`](../interfaces/FullTextQuery.md)
|
||||||
|
The negative query that reduces the relevance score.
|
||||||
|
|
||||||
|
* **options?**
|
||||||
|
Optional parameters for the boost query.
|
||||||
|
- `negativeBoost`: The boost factor for the negative query (default is 0.0).
|
||||||
|
|
||||||
|
* **options.negativeBoost?**: `number`
|
||||||
|
|
||||||
|
#### Returns
|
||||||
|
|
||||||
|
[`BoostQuery`](BoostQuery.md)
|
||||||
|
|
||||||
|
## Methods
|
||||||
|
|
||||||
|
### queryType()
|
||||||
|
|
||||||
|
```ts
|
||||||
|
queryType(): FullTextQueryType
|
||||||
|
```
|
||||||
|
|
||||||
|
The type of the full-text query.
|
||||||
|
|
||||||
|
#### Returns
|
||||||
|
|
||||||
|
[`FullTextQueryType`](../enumerations/FullTextQueryType.md)
|
||||||
|
|
||||||
|
#### Implementation of
|
||||||
|
|
||||||
|
[`FullTextQuery`](../interfaces/FullTextQuery.md).[`queryType`](../interfaces/FullTextQuery.md#querytype)
|
||||||
70
docs/src/js/classes/MatchQuery.md
Normal file
70
docs/src/js/classes/MatchQuery.md
Normal file
@@ -0,0 +1,70 @@
|
|||||||
|
[**@lancedb/lancedb**](../README.md) • **Docs**
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
|
[@lancedb/lancedb](../globals.md) / MatchQuery
|
||||||
|
|
||||||
|
# Class: MatchQuery
|
||||||
|
|
||||||
|
Represents a full-text query interface.
|
||||||
|
This interface defines the structure and behavior for full-text queries,
|
||||||
|
including methods to retrieve the query type and convert the query to a dictionary format.
|
||||||
|
|
||||||
|
## Implements
|
||||||
|
|
||||||
|
- [`FullTextQuery`](../interfaces/FullTextQuery.md)
|
||||||
|
|
||||||
|
## Constructors
|
||||||
|
|
||||||
|
### new MatchQuery()
|
||||||
|
|
||||||
|
```ts
|
||||||
|
new MatchQuery(
|
||||||
|
query,
|
||||||
|
column,
|
||||||
|
options?): MatchQuery
|
||||||
|
```
|
||||||
|
|
||||||
|
Creates an instance of MatchQuery.
|
||||||
|
|
||||||
|
#### Parameters
|
||||||
|
|
||||||
|
* **query**: `string`
|
||||||
|
The text query to search for.
|
||||||
|
|
||||||
|
* **column**: `string`
|
||||||
|
The name of the column to search within.
|
||||||
|
|
||||||
|
* **options?**
|
||||||
|
Optional parameters for the match query.
|
||||||
|
- `boost`: The boost factor for the query (default is 1.0).
|
||||||
|
- `fuzziness`: The fuzziness level for the query (default is 0).
|
||||||
|
- `maxExpansions`: The maximum number of terms to consider for fuzzy matching (default is 50).
|
||||||
|
|
||||||
|
* **options.boost?**: `number`
|
||||||
|
|
||||||
|
* **options.fuzziness?**: `number`
|
||||||
|
|
||||||
|
* **options.maxExpansions?**: `number`
|
||||||
|
|
||||||
|
#### Returns
|
||||||
|
|
||||||
|
[`MatchQuery`](MatchQuery.md)
|
||||||
|
|
||||||
|
## Methods
|
||||||
|
|
||||||
|
### queryType()
|
||||||
|
|
||||||
|
```ts
|
||||||
|
queryType(): FullTextQueryType
|
||||||
|
```
|
||||||
|
|
||||||
|
The type of the full-text query.
|
||||||
|
|
||||||
|
#### Returns
|
||||||
|
|
||||||
|
[`FullTextQueryType`](../enumerations/FullTextQueryType.md)
|
||||||
|
|
||||||
|
#### Implementation of
|
||||||
|
|
||||||
|
[`FullTextQuery`](../interfaces/FullTextQuery.md).[`queryType`](../interfaces/FullTextQuery.md#querytype)
|
||||||
64
docs/src/js/classes/MultiMatchQuery.md
Normal file
64
docs/src/js/classes/MultiMatchQuery.md
Normal file
@@ -0,0 +1,64 @@
|
|||||||
|
[**@lancedb/lancedb**](../README.md) • **Docs**
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
|
[@lancedb/lancedb](../globals.md) / MultiMatchQuery
|
||||||
|
|
||||||
|
# Class: MultiMatchQuery
|
||||||
|
|
||||||
|
Represents a full-text query interface.
|
||||||
|
This interface defines the structure and behavior for full-text queries,
|
||||||
|
including methods to retrieve the query type and convert the query to a dictionary format.
|
||||||
|
|
||||||
|
## Implements
|
||||||
|
|
||||||
|
- [`FullTextQuery`](../interfaces/FullTextQuery.md)
|
||||||
|
|
||||||
|
## Constructors
|
||||||
|
|
||||||
|
### new MultiMatchQuery()
|
||||||
|
|
||||||
|
```ts
|
||||||
|
new MultiMatchQuery(
|
||||||
|
query,
|
||||||
|
columns,
|
||||||
|
options?): MultiMatchQuery
|
||||||
|
```
|
||||||
|
|
||||||
|
Creates an instance of MultiMatchQuery.
|
||||||
|
|
||||||
|
#### Parameters
|
||||||
|
|
||||||
|
* **query**: `string`
|
||||||
|
The text query to search for across multiple columns.
|
||||||
|
|
||||||
|
* **columns**: `string`[]
|
||||||
|
An array of column names to search within.
|
||||||
|
|
||||||
|
* **options?**
|
||||||
|
Optional parameters for the multi-match query.
|
||||||
|
- `boosts`: An array of boost factors for each column (default is 1.0 for all).
|
||||||
|
|
||||||
|
* **options.boosts?**: `number`[]
|
||||||
|
|
||||||
|
#### Returns
|
||||||
|
|
||||||
|
[`MultiMatchQuery`](MultiMatchQuery.md)
|
||||||
|
|
||||||
|
## Methods
|
||||||
|
|
||||||
|
### queryType()
|
||||||
|
|
||||||
|
```ts
|
||||||
|
queryType(): FullTextQueryType
|
||||||
|
```
|
||||||
|
|
||||||
|
The type of the full-text query.
|
||||||
|
|
||||||
|
#### Returns
|
||||||
|
|
||||||
|
[`FullTextQueryType`](../enumerations/FullTextQueryType.md)
|
||||||
|
|
||||||
|
#### Implementation of
|
||||||
|
|
||||||
|
[`FullTextQuery`](../interfaces/FullTextQuery.md).[`queryType`](../interfaces/FullTextQuery.md#querytype)
|
||||||
55
docs/src/js/classes/PhraseQuery.md
Normal file
55
docs/src/js/classes/PhraseQuery.md
Normal file
@@ -0,0 +1,55 @@
|
|||||||
|
[**@lancedb/lancedb**](../README.md) • **Docs**
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
|
[@lancedb/lancedb](../globals.md) / PhraseQuery
|
||||||
|
|
||||||
|
# Class: PhraseQuery
|
||||||
|
|
||||||
|
Represents a full-text query interface.
|
||||||
|
This interface defines the structure and behavior for full-text queries,
|
||||||
|
including methods to retrieve the query type and convert the query to a dictionary format.
|
||||||
|
|
||||||
|
## Implements
|
||||||
|
|
||||||
|
- [`FullTextQuery`](../interfaces/FullTextQuery.md)
|
||||||
|
|
||||||
|
## Constructors
|
||||||
|
|
||||||
|
### new PhraseQuery()
|
||||||
|
|
||||||
|
```ts
|
||||||
|
new PhraseQuery(query, column): PhraseQuery
|
||||||
|
```
|
||||||
|
|
||||||
|
Creates an instance of `PhraseQuery`.
|
||||||
|
|
||||||
|
#### Parameters
|
||||||
|
|
||||||
|
* **query**: `string`
|
||||||
|
The phrase to search for in the specified column.
|
||||||
|
|
||||||
|
* **column**: `string`
|
||||||
|
The name of the column to search within.
|
||||||
|
|
||||||
|
#### Returns
|
||||||
|
|
||||||
|
[`PhraseQuery`](PhraseQuery.md)
|
||||||
|
|
||||||
|
## Methods
|
||||||
|
|
||||||
|
### queryType()
|
||||||
|
|
||||||
|
```ts
|
||||||
|
queryType(): FullTextQueryType
|
||||||
|
```
|
||||||
|
|
||||||
|
The type of the full-text query.
|
||||||
|
|
||||||
|
#### Returns
|
||||||
|
|
||||||
|
[`FullTextQueryType`](../enumerations/FullTextQueryType.md)
|
||||||
|
|
||||||
|
#### Implementation of
|
||||||
|
|
||||||
|
[`FullTextQuery`](../interfaces/FullTextQuery.md).[`queryType`](../interfaces/FullTextQuery.md#querytype)
|
||||||
@@ -30,6 +30,53 @@ protected inner: Query | Promise<Query>;
|
|||||||
|
|
||||||
## Methods
|
## Methods
|
||||||
|
|
||||||
|
### analyzePlan()
|
||||||
|
|
||||||
|
```ts
|
||||||
|
analyzePlan(): Promise<string>
|
||||||
|
```
|
||||||
|
|
||||||
|
Executes the query and returns the physical query plan annotated with runtime metrics.
|
||||||
|
|
||||||
|
This is useful for debugging and performance analysis, as it shows how the query was executed
|
||||||
|
and includes metrics such as elapsed time, rows processed, and I/O statistics.
|
||||||
|
|
||||||
|
#### Returns
|
||||||
|
|
||||||
|
`Promise`<`string`>
|
||||||
|
|
||||||
|
A query execution plan with runtime metrics for each step.
|
||||||
|
|
||||||
|
#### Example
|
||||||
|
|
||||||
|
```ts
|
||||||
|
import * as lancedb from "@lancedb/lancedb"
|
||||||
|
|
||||||
|
const db = await lancedb.connect("./.lancedb");
|
||||||
|
const table = await db.createTable("my_table", [
|
||||||
|
{ vector: [1.1, 0.9], id: "1" },
|
||||||
|
]);
|
||||||
|
|
||||||
|
const plan = await table.query().nearestTo([0.5, 0.2]).analyzePlan();
|
||||||
|
|
||||||
|
Example output (with runtime metrics inlined):
|
||||||
|
AnalyzeExec verbose=true, metrics=[]
|
||||||
|
ProjectionExec: expr=[id@3 as id, vector@0 as vector, _distance@2 as _distance], metrics=[output_rows=1, elapsed_compute=3.292µs]
|
||||||
|
Take: columns="vector, _rowid, _distance, (id)", metrics=[output_rows=1, elapsed_compute=66.001µs, batches_processed=1, bytes_read=8, iops=1, requests=1]
|
||||||
|
CoalesceBatchesExec: target_batch_size=1024, metrics=[output_rows=1, elapsed_compute=3.333µs]
|
||||||
|
GlobalLimitExec: skip=0, fetch=10, metrics=[output_rows=1, elapsed_compute=167ns]
|
||||||
|
FilterExec: _distance@2 IS NOT NULL, metrics=[output_rows=1, elapsed_compute=8.542µs]
|
||||||
|
SortExec: TopK(fetch=10), expr=[_distance@2 ASC NULLS LAST], metrics=[output_rows=1, elapsed_compute=63.25µs, row_replacements=1]
|
||||||
|
KNNVectorDistance: metric=l2, metrics=[output_rows=1, elapsed_compute=114.333µs, output_batches=1]
|
||||||
|
LanceScan: uri=/path/to/data, projection=[vector], row_id=true, row_addr=false, ordered=false, metrics=[output_rows=1, elapsed_compute=103.626µs, bytes_read=549, iops=2, requests=2]
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Inherited from
|
||||||
|
|
||||||
|
[`QueryBase`](QueryBase.md).[`analyzePlan`](QueryBase.md#analyzeplan)
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
### execute()
|
### execute()
|
||||||
|
|
||||||
```ts
|
```ts
|
||||||
@@ -159,7 +206,7 @@ fullTextSearch(query, options?): this
|
|||||||
|
|
||||||
#### Parameters
|
#### Parameters
|
||||||
|
|
||||||
* **query**: `string`
|
* **query**: `string` \| [`FullTextQuery`](../interfaces/FullTextQuery.md)
|
||||||
|
|
||||||
* **options?**: `Partial`<[`FullTextSearchOptions`](../interfaces/FullTextSearchOptions.md)>
|
* **options?**: `Partial`<[`FullTextSearchOptions`](../interfaces/FullTextSearchOptions.md)>
|
||||||
|
|
||||||
@@ -262,7 +309,7 @@ nearestToText(query, columns?): Query
|
|||||||
|
|
||||||
#### Parameters
|
#### Parameters
|
||||||
|
|
||||||
* **query**: `string`
|
* **query**: `string` \| [`FullTextQuery`](../interfaces/FullTextQuery.md)
|
||||||
|
|
||||||
* **columns?**: `string`[]
|
* **columns?**: `string`[]
|
||||||
|
|
||||||
|
|||||||
@@ -36,6 +36,49 @@ protected inner: NativeQueryType | Promise<NativeQueryType>;
|
|||||||
|
|
||||||
## Methods
|
## Methods
|
||||||
|
|
||||||
|
### analyzePlan()
|
||||||
|
|
||||||
|
```ts
|
||||||
|
analyzePlan(): Promise<string>
|
||||||
|
```
|
||||||
|
|
||||||
|
Executes the query and returns the physical query plan annotated with runtime metrics.
|
||||||
|
|
||||||
|
This is useful for debugging and performance analysis, as it shows how the query was executed
|
||||||
|
and includes metrics such as elapsed time, rows processed, and I/O statistics.
|
||||||
|
|
||||||
|
#### Returns
|
||||||
|
|
||||||
|
`Promise`<`string`>
|
||||||
|
|
||||||
|
A query execution plan with runtime metrics for each step.
|
||||||
|
|
||||||
|
#### Example
|
||||||
|
|
||||||
|
```ts
|
||||||
|
import * as lancedb from "@lancedb/lancedb"
|
||||||
|
|
||||||
|
const db = await lancedb.connect("./.lancedb");
|
||||||
|
const table = await db.createTable("my_table", [
|
||||||
|
{ vector: [1.1, 0.9], id: "1" },
|
||||||
|
]);
|
||||||
|
|
||||||
|
const plan = await table.query().nearestTo([0.5, 0.2]).analyzePlan();
|
||||||
|
|
||||||
|
Example output (with runtime metrics inlined):
|
||||||
|
AnalyzeExec verbose=true, metrics=[]
|
||||||
|
ProjectionExec: expr=[id@3 as id, vector@0 as vector, _distance@2 as _distance], metrics=[output_rows=1, elapsed_compute=3.292µs]
|
||||||
|
Take: columns="vector, _rowid, _distance, (id)", metrics=[output_rows=1, elapsed_compute=66.001µs, batches_processed=1, bytes_read=8, iops=1, requests=1]
|
||||||
|
CoalesceBatchesExec: target_batch_size=1024, metrics=[output_rows=1, elapsed_compute=3.333µs]
|
||||||
|
GlobalLimitExec: skip=0, fetch=10, metrics=[output_rows=1, elapsed_compute=167ns]
|
||||||
|
FilterExec: _distance@2 IS NOT NULL, metrics=[output_rows=1, elapsed_compute=8.542µs]
|
||||||
|
SortExec: TopK(fetch=10), expr=[_distance@2 ASC NULLS LAST], metrics=[output_rows=1, elapsed_compute=63.25µs, row_replacements=1]
|
||||||
|
KNNVectorDistance: metric=l2, metrics=[output_rows=1, elapsed_compute=114.333µs, output_batches=1]
|
||||||
|
LanceScan: uri=/path/to/data, projection=[vector], row_id=true, row_addr=false, ordered=false, metrics=[output_rows=1, elapsed_compute=103.626µs, bytes_read=549, iops=2, requests=2]
|
||||||
|
```
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
### execute()
|
### execute()
|
||||||
|
|
||||||
```ts
|
```ts
|
||||||
@@ -149,7 +192,7 @@ fullTextSearch(query, options?): this
|
|||||||
|
|
||||||
#### Parameters
|
#### Parameters
|
||||||
|
|
||||||
* **query**: `string`
|
* **query**: `string` \| [`FullTextQuery`](../interfaces/FullTextQuery.md)
|
||||||
|
|
||||||
* **options?**: `Partial`<[`FullTextSearchOptions`](../interfaces/FullTextSearchOptions.md)>
|
* **options?**: `Partial`<[`FullTextSearchOptions`](../interfaces/FullTextSearchOptions.md)>
|
||||||
|
|
||||||
|
|||||||
@@ -454,6 +454,28 @@ Modeled after ``VACUUM`` in PostgreSQL.
|
|||||||
|
|
||||||
***
|
***
|
||||||
|
|
||||||
|
### prewarmIndex()
|
||||||
|
|
||||||
|
```ts
|
||||||
|
abstract prewarmIndex(name): Promise<void>
|
||||||
|
```
|
||||||
|
|
||||||
|
Prewarm an index in the table.
|
||||||
|
|
||||||
|
#### Parameters
|
||||||
|
|
||||||
|
* **name**: `string`
|
||||||
|
The name of the index.
|
||||||
|
This will load the index into memory. This may reduce the cold-start time for
|
||||||
|
future queries. If the index does not fit in the cache then this call may be
|
||||||
|
wasteful.
|
||||||
|
|
||||||
|
#### Returns
|
||||||
|
|
||||||
|
`Promise`<`void`>
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
### query()
|
### query()
|
||||||
|
|
||||||
```ts
|
```ts
|
||||||
@@ -575,7 +597,7 @@ of the given query
|
|||||||
|
|
||||||
#### Parameters
|
#### Parameters
|
||||||
|
|
||||||
* **query**: `string` \| [`IntoVector`](../type-aliases/IntoVector.md)
|
* **query**: `string` \| [`IntoVector`](../type-aliases/IntoVector.md) \| [`FullTextQuery`](../interfaces/FullTextQuery.md)
|
||||||
the query, a vector or string
|
the query, a vector or string
|
||||||
|
|
||||||
* **queryType?**: `string`
|
* **queryType?**: `string`
|
||||||
|
|||||||
@@ -48,6 +48,53 @@ addQueryVector(vector): VectorQuery
|
|||||||
|
|
||||||
***
|
***
|
||||||
|
|
||||||
|
### analyzePlan()
|
||||||
|
|
||||||
|
```ts
|
||||||
|
analyzePlan(): Promise<string>
|
||||||
|
```
|
||||||
|
|
||||||
|
Executes the query and returns the physical query plan annotated with runtime metrics.
|
||||||
|
|
||||||
|
This is useful for debugging and performance analysis, as it shows how the query was executed
|
||||||
|
and includes metrics such as elapsed time, rows processed, and I/O statistics.
|
||||||
|
|
||||||
|
#### Returns
|
||||||
|
|
||||||
|
`Promise`<`string`>
|
||||||
|
|
||||||
|
A query execution plan with runtime metrics for each step.
|
||||||
|
|
||||||
|
#### Example
|
||||||
|
|
||||||
|
```ts
|
||||||
|
import * as lancedb from "@lancedb/lancedb"
|
||||||
|
|
||||||
|
const db = await lancedb.connect("./.lancedb");
|
||||||
|
const table = await db.createTable("my_table", [
|
||||||
|
{ vector: [1.1, 0.9], id: "1" },
|
||||||
|
]);
|
||||||
|
|
||||||
|
const plan = await table.query().nearestTo([0.5, 0.2]).analyzePlan();
|
||||||
|
|
||||||
|
Example output (with runtime metrics inlined):
|
||||||
|
AnalyzeExec verbose=true, metrics=[]
|
||||||
|
ProjectionExec: expr=[id@3 as id, vector@0 as vector, _distance@2 as _distance], metrics=[output_rows=1, elapsed_compute=3.292µs]
|
||||||
|
Take: columns="vector, _rowid, _distance, (id)", metrics=[output_rows=1, elapsed_compute=66.001µs, batches_processed=1, bytes_read=8, iops=1, requests=1]
|
||||||
|
CoalesceBatchesExec: target_batch_size=1024, metrics=[output_rows=1, elapsed_compute=3.333µs]
|
||||||
|
GlobalLimitExec: skip=0, fetch=10, metrics=[output_rows=1, elapsed_compute=167ns]
|
||||||
|
FilterExec: _distance@2 IS NOT NULL, metrics=[output_rows=1, elapsed_compute=8.542µs]
|
||||||
|
SortExec: TopK(fetch=10), expr=[_distance@2 ASC NULLS LAST], metrics=[output_rows=1, elapsed_compute=63.25µs, row_replacements=1]
|
||||||
|
KNNVectorDistance: metric=l2, metrics=[output_rows=1, elapsed_compute=114.333µs, output_batches=1]
|
||||||
|
LanceScan: uri=/path/to/data, projection=[vector], row_id=true, row_addr=false, ordered=false, metrics=[output_rows=1, elapsed_compute=103.626µs, bytes_read=549, iops=2, requests=2]
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Inherited from
|
||||||
|
|
||||||
|
[`QueryBase`](QueryBase.md).[`analyzePlan`](QueryBase.md#analyzeplan)
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
### bypassVectorIndex()
|
### bypassVectorIndex()
|
||||||
|
|
||||||
```ts
|
```ts
|
||||||
@@ -300,7 +347,7 @@ fullTextSearch(query, options?): this
|
|||||||
|
|
||||||
#### Parameters
|
#### Parameters
|
||||||
|
|
||||||
* **query**: `string`
|
* **query**: `string` \| [`FullTextQuery`](../interfaces/FullTextQuery.md)
|
||||||
|
|
||||||
* **options?**: `Partial`<[`FullTextSearchOptions`](../interfaces/FullTextSearchOptions.md)>
|
* **options?**: `Partial`<[`FullTextSearchOptions`](../interfaces/FullTextSearchOptions.md)>
|
||||||
|
|
||||||
|
|||||||
46
docs/src/js/enumerations/FullTextQueryType.md
Normal file
46
docs/src/js/enumerations/FullTextQueryType.md
Normal file
@@ -0,0 +1,46 @@
|
|||||||
|
[**@lancedb/lancedb**](../README.md) • **Docs**
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
|
[@lancedb/lancedb](../globals.md) / FullTextQueryType
|
||||||
|
|
||||||
|
# Enumeration: FullTextQueryType
|
||||||
|
|
||||||
|
Enum representing the types of full-text queries supported.
|
||||||
|
|
||||||
|
- `Match`: Performs a full-text search for terms in the query string.
|
||||||
|
- `MatchPhrase`: Searches for an exact phrase match in the text.
|
||||||
|
- `Boost`: Boosts the relevance score of specific terms in the query.
|
||||||
|
- `MultiMatch`: Searches across multiple fields for the query terms.
|
||||||
|
|
||||||
|
## Enumeration Members
|
||||||
|
|
||||||
|
### Boost
|
||||||
|
|
||||||
|
```ts
|
||||||
|
Boost: "boost";
|
||||||
|
```
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
|
### Match
|
||||||
|
|
||||||
|
```ts
|
||||||
|
Match: "match";
|
||||||
|
```
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
|
### MatchPhrase
|
||||||
|
|
||||||
|
```ts
|
||||||
|
MatchPhrase: "match_phrase";
|
||||||
|
```
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
|
### MultiMatch
|
||||||
|
|
||||||
|
```ts
|
||||||
|
MultiMatch: "multi_match";
|
||||||
|
```
|
||||||
@@ -9,12 +9,20 @@
|
|||||||
- [embedding](namespaces/embedding/README.md)
|
- [embedding](namespaces/embedding/README.md)
|
||||||
- [rerankers](namespaces/rerankers/README.md)
|
- [rerankers](namespaces/rerankers/README.md)
|
||||||
|
|
||||||
|
## Enumerations
|
||||||
|
|
||||||
|
- [FullTextQueryType](enumerations/FullTextQueryType.md)
|
||||||
|
|
||||||
## Classes
|
## Classes
|
||||||
|
|
||||||
|
- [BoostQuery](classes/BoostQuery.md)
|
||||||
- [Connection](classes/Connection.md)
|
- [Connection](classes/Connection.md)
|
||||||
- [Index](classes/Index.md)
|
- [Index](classes/Index.md)
|
||||||
- [MakeArrowTableOptions](classes/MakeArrowTableOptions.md)
|
- [MakeArrowTableOptions](classes/MakeArrowTableOptions.md)
|
||||||
|
- [MatchQuery](classes/MatchQuery.md)
|
||||||
- [MergeInsertBuilder](classes/MergeInsertBuilder.md)
|
- [MergeInsertBuilder](classes/MergeInsertBuilder.md)
|
||||||
|
- [MultiMatchQuery](classes/MultiMatchQuery.md)
|
||||||
|
- [PhraseQuery](classes/PhraseQuery.md)
|
||||||
- [Query](classes/Query.md)
|
- [Query](classes/Query.md)
|
||||||
- [QueryBase](classes/QueryBase.md)
|
- [QueryBase](classes/QueryBase.md)
|
||||||
- [RecordBatchIterator](classes/RecordBatchIterator.md)
|
- [RecordBatchIterator](classes/RecordBatchIterator.md)
|
||||||
@@ -33,6 +41,7 @@
|
|||||||
- [CreateTableOptions](interfaces/CreateTableOptions.md)
|
- [CreateTableOptions](interfaces/CreateTableOptions.md)
|
||||||
- [ExecutableQuery](interfaces/ExecutableQuery.md)
|
- [ExecutableQuery](interfaces/ExecutableQuery.md)
|
||||||
- [FtsOptions](interfaces/FtsOptions.md)
|
- [FtsOptions](interfaces/FtsOptions.md)
|
||||||
|
- [FullTextQuery](interfaces/FullTextQuery.md)
|
||||||
- [FullTextSearchOptions](interfaces/FullTextSearchOptions.md)
|
- [FullTextSearchOptions](interfaces/FullTextSearchOptions.md)
|
||||||
- [HnswPqOptions](interfaces/HnswPqOptions.md)
|
- [HnswPqOptions](interfaces/HnswPqOptions.md)
|
||||||
- [HnswSqOptions](interfaces/HnswSqOptions.md)
|
- [HnswSqOptions](interfaces/HnswSqOptions.md)
|
||||||
|
|||||||
25
docs/src/js/interfaces/FullTextQuery.md
Normal file
25
docs/src/js/interfaces/FullTextQuery.md
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
[**@lancedb/lancedb**](../README.md) • **Docs**
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
|
[@lancedb/lancedb](../globals.md) / FullTextQuery
|
||||||
|
|
||||||
|
# Interface: FullTextQuery
|
||||||
|
|
||||||
|
Represents a full-text query interface.
|
||||||
|
This interface defines the structure and behavior for full-text queries,
|
||||||
|
including methods to retrieve the query type and convert the query to a dictionary format.
|
||||||
|
|
||||||
|
## Methods
|
||||||
|
|
||||||
|
### queryType()
|
||||||
|
|
||||||
|
```ts
|
||||||
|
queryType(): FullTextQueryType
|
||||||
|
```
|
||||||
|
|
||||||
|
The type of the full-text query.
|
||||||
|
|
||||||
|
#### Returns
|
||||||
|
|
||||||
|
[`FullTextQueryType`](../enumerations/FullTextQueryType.md)
|
||||||
@@ -20,3 +20,13 @@ The maximum number of rows to return in a single batch
|
|||||||
|
|
||||||
Batches may have fewer rows if the underlying data is stored
|
Batches may have fewer rows if the underlying data is stored
|
||||||
in smaller chunks.
|
in smaller chunks.
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
|
### timeoutMs?
|
||||||
|
|
||||||
|
```ts
|
||||||
|
optional timeoutMs: number;
|
||||||
|
```
|
||||||
|
|
||||||
|
Timeout for query execution in milliseconds
|
||||||
|
|||||||
@@ -35,3 +35,9 @@ print the resolved query plan. You can use the `explain_plan` method to do this:
|
|||||||
* Python Sync: [LanceQueryBuilder.explain_plan][lancedb.query.LanceQueryBuilder.explain_plan]
|
* Python Sync: [LanceQueryBuilder.explain_plan][lancedb.query.LanceQueryBuilder.explain_plan]
|
||||||
* Python Async: [AsyncQueryBase.explain_plan][lancedb.query.AsyncQueryBase.explain_plan]
|
* Python Async: [AsyncQueryBase.explain_plan][lancedb.query.AsyncQueryBase.explain_plan]
|
||||||
* Node @lancedb/lancedb: [LanceQueryBuilder.explainPlan](/lancedb/js/classes/QueryBase/#explainplan)
|
* Node @lancedb/lancedb: [LanceQueryBuilder.explainPlan](/lancedb/js/classes/QueryBase/#explainplan)
|
||||||
|
|
||||||
|
To understand how a query was actually executed—including metrics like execution time, number of rows processed, I/O stats, and more—use the analyze_plan method. This executes the query and returns a physical execution plan annotated with runtime metrics, making it especially helpful for performance tuning and debugging.
|
||||||
|
|
||||||
|
* Python Sync: [LanceQueryBuilder.analyze_plan][lancedb.query.LanceQueryBuilder.analyze_plan]
|
||||||
|
* Python Async: [AsyncQueryBase.analyze_plan][lancedb.query.AsyncQueryBase.analyze_plan]
|
||||||
|
* Node @lancedb/lancedb: [LanceQueryBuilder.analyzePlan](/lancedb/js/classes/QueryBase/#analyzePlan)
|
||||||
|
|||||||
@@ -8,13 +8,16 @@
|
|||||||
<parent>
|
<parent>
|
||||||
<groupId>com.lancedb</groupId>
|
<groupId>com.lancedb</groupId>
|
||||||
<artifactId>lancedb-parent</artifactId>
|
<artifactId>lancedb-parent</artifactId>
|
||||||
<version>0.18.2-beta.1</version>
|
<version>0.19.0-beta.7</version>
|
||||||
<relativePath>../pom.xml</relativePath>
|
<relativePath>../pom.xml</relativePath>
|
||||||
</parent>
|
</parent>
|
||||||
|
|
||||||
<artifactId>lancedb-core</artifactId>
|
<artifactId>lancedb-core</artifactId>
|
||||||
<name>LanceDB Core</name>
|
<name>LanceDB Core</name>
|
||||||
<packaging>jar</packaging>
|
<packaging>jar</packaging>
|
||||||
|
<properties>
|
||||||
|
<rust.release.build>false</rust.release.build>
|
||||||
|
</properties>
|
||||||
|
|
||||||
<dependencies>
|
<dependencies>
|
||||||
<dependency>
|
<dependency>
|
||||||
@@ -68,7 +71,7 @@
|
|||||||
</goals>
|
</goals>
|
||||||
<configuration>
|
<configuration>
|
||||||
<path>lancedb-jni</path>
|
<path>lancedb-jni</path>
|
||||||
<release>true</release>
|
<release>${rust.release.build}</release>
|
||||||
<!-- Copy native libraries to target/classes for runtime access -->
|
<!-- Copy native libraries to target/classes for runtime access -->
|
||||||
<copyTo>${project.build.directory}/classes/nativelib</copyTo>
|
<copyTo>${project.build.directory}/classes/nativelib</copyTo>
|
||||||
<copyWithPlatformDir>true</copyWithPlatformDir>
|
<copyWithPlatformDir>true</copyWithPlatformDir>
|
||||||
|
|||||||
@@ -1,16 +1,25 @@
|
|||||||
// SPDX-License-Identifier: Apache-2.0
|
/*
|
||||||
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
package com.lancedb.lancedb;
|
package com.lancedb.lancedb;
|
||||||
|
|
||||||
import io.questdb.jar.jni.JarJniLoader;
|
import io.questdb.jar.jni.JarJniLoader;
|
||||||
|
|
||||||
import java.io.Closeable;
|
import java.io.Closeable;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
|
|
||||||
/**
|
/** Represents LanceDB database. */
|
||||||
* Represents LanceDB database.
|
|
||||||
*/
|
|
||||||
public class Connection implements Closeable {
|
public class Connection implements Closeable {
|
||||||
static {
|
static {
|
||||||
JarJniLoader.loadLib(Connection.class, "/nativelib", "lancedb_jni");
|
JarJniLoader.loadLib(Connection.class, "/nativelib", "lancedb_jni");
|
||||||
@@ -18,14 +27,11 @@ public class Connection implements Closeable {
|
|||||||
|
|
||||||
private long nativeConnectionHandle;
|
private long nativeConnectionHandle;
|
||||||
|
|
||||||
/**
|
/** Connect to a LanceDB instance. */
|
||||||
* Connect to a LanceDB instance.
|
|
||||||
*/
|
|
||||||
public static native Connection connect(String uri);
|
public static native Connection connect(String uri);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get the names of all tables in the database. The names are sorted in
|
* Get the names of all tables in the database. The names are sorted in ascending order.
|
||||||
* ascending order.
|
|
||||||
*
|
*
|
||||||
* @return the table names
|
* @return the table names
|
||||||
*/
|
*/
|
||||||
@@ -34,8 +40,7 @@ public class Connection implements Closeable {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get the names of filtered tables in the database. The names are sorted in
|
* Get the names of filtered tables in the database. The names are sorted in ascending order.
|
||||||
* ascending order.
|
|
||||||
*
|
*
|
||||||
* @param limit The number of results to return.
|
* @param limit The number of results to return.
|
||||||
* @return the table names
|
* @return the table names
|
||||||
@@ -45,12 +50,11 @@ public class Connection implements Closeable {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get the names of filtered tables in the database. The names are sorted in
|
* Get the names of filtered tables in the database. The names are sorted in ascending order.
|
||||||
* ascending order.
|
|
||||||
*
|
*
|
||||||
* @param startAfter If present, only return names that come lexicographically after the supplied
|
* @param startAfter If present, only return names that come lexicographically after the supplied
|
||||||
* value. This can be combined with limit to implement pagination
|
* value. This can be combined with limit to implement pagination by setting this to the last
|
||||||
* by setting this to the last table name from the previous page.
|
* table name from the previous page.
|
||||||
* @return the table names
|
* @return the table names
|
||||||
*/
|
*/
|
||||||
public List<String> tableNames(String startAfter) {
|
public List<String> tableNames(String startAfter) {
|
||||||
@@ -58,12 +62,11 @@ public class Connection implements Closeable {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get the names of filtered tables in the database. The names are sorted in
|
* Get the names of filtered tables in the database. The names are sorted in ascending order.
|
||||||
* ascending order.
|
|
||||||
*
|
*
|
||||||
* @param startAfter If present, only return names that come lexicographically after the supplied
|
* @param startAfter If present, only return names that come lexicographically after the supplied
|
||||||
* value. This can be combined with limit to implement pagination
|
* value. This can be combined with limit to implement pagination by setting this to the last
|
||||||
* by setting this to the last table name from the previous page.
|
* table name from the previous page.
|
||||||
* @param limit The number of results to return.
|
* @param limit The number of results to return.
|
||||||
* @return the table names
|
* @return the table names
|
||||||
*/
|
*/
|
||||||
@@ -72,22 +75,19 @@ public class Connection implements Closeable {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get the names of filtered tables in the database. The names are sorted in
|
* Get the names of filtered tables in the database. The names are sorted in ascending order.
|
||||||
* ascending order.
|
|
||||||
*
|
*
|
||||||
* @param startAfter If present, only return names that come lexicographically after the supplied
|
* @param startAfter If present, only return names that come lexicographically after the supplied
|
||||||
* value. This can be combined with limit to implement pagination
|
* value. This can be combined with limit to implement pagination by setting this to the last
|
||||||
* by setting this to the last table name from the previous page.
|
* table name from the previous page.
|
||||||
* @param limit The number of results to return.
|
* @param limit The number of results to return.
|
||||||
* @return the table names
|
* @return the table names
|
||||||
*/
|
*/
|
||||||
public native List<String> tableNames(
|
public native List<String> tableNames(Optional<String> startAfter, Optional<Integer> limit);
|
||||||
Optional<String> startAfter, Optional<Integer> limit);
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Closes this connection and releases any system resources associated with it. If
|
* Closes this connection and releases any system resources associated with it. If the connection
|
||||||
* the connection is
|
* is already closed, then invoking this method has no effect.
|
||||||
* already closed, then invoking this method has no effect.
|
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public void close() {
|
public void close() {
|
||||||
@@ -98,8 +98,7 @@ public class Connection implements Closeable {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Native method to release the Lance connection resources associated with the
|
* Native method to release the Lance connection resources associated with the given handle.
|
||||||
* given handle.
|
|
||||||
*
|
*
|
||||||
* @param handle The native handle to the connection resource.
|
* @param handle The native handle to the connection resource.
|
||||||
*/
|
*/
|
||||||
|
|||||||
@@ -1,27 +1,35 @@
|
|||||||
// SPDX-License-Identifier: Apache-2.0
|
/*
|
||||||
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
package com.lancedb.lancedb;
|
package com.lancedb.lancedb;
|
||||||
|
|
||||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
|
||||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
|
||||||
|
|
||||||
import java.nio.file.Path;
|
|
||||||
import java.util.List;
|
|
||||||
import java.net.URL;
|
|
||||||
import org.junit.jupiter.api.BeforeAll;
|
import org.junit.jupiter.api.BeforeAll;
|
||||||
import org.junit.jupiter.api.Test;
|
import org.junit.jupiter.api.Test;
|
||||||
import org.junit.jupiter.api.io.TempDir;
|
import org.junit.jupiter.api.io.TempDir;
|
||||||
|
|
||||||
|
import java.net.URL;
|
||||||
|
import java.nio.file.Path;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||||
|
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||||
|
|
||||||
public class ConnectionTest {
|
public class ConnectionTest {
|
||||||
private static final String[] TABLE_NAMES = {
|
private static final String[] TABLE_NAMES = {
|
||||||
"dataset_version",
|
"dataset_version", "new_empty_dataset", "test", "write_stream"
|
||||||
"new_empty_dataset",
|
|
||||||
"test",
|
|
||||||
"write_stream"
|
|
||||||
};
|
};
|
||||||
|
|
||||||
@TempDir
|
@TempDir static Path tempDir; // Temporary directory for the tests
|
||||||
static Path tempDir; // Temporary directory for the tests
|
|
||||||
private static URL lanceDbURL;
|
private static URL lanceDbURL;
|
||||||
|
|
||||||
@BeforeAll
|
@BeforeAll
|
||||||
@@ -53,18 +61,21 @@ public class ConnectionTest {
|
|||||||
@Test
|
@Test
|
||||||
void tableNamesStartAfter() {
|
void tableNamesStartAfter() {
|
||||||
try (Connection conn = Connection.connect(lanceDbURL.toString())) {
|
try (Connection conn = Connection.connect(lanceDbURL.toString())) {
|
||||||
assertTableNamesStartAfter(conn, TABLE_NAMES[0], 3, TABLE_NAMES[1], TABLE_NAMES[2], TABLE_NAMES[3]);
|
assertTableNamesStartAfter(
|
||||||
|
conn, TABLE_NAMES[0], 3, TABLE_NAMES[1], TABLE_NAMES[2], TABLE_NAMES[3]);
|
||||||
assertTableNamesStartAfter(conn, TABLE_NAMES[1], 2, TABLE_NAMES[2], TABLE_NAMES[3]);
|
assertTableNamesStartAfter(conn, TABLE_NAMES[1], 2, TABLE_NAMES[2], TABLE_NAMES[3]);
|
||||||
assertTableNamesStartAfter(conn, TABLE_NAMES[2], 1, TABLE_NAMES[3]);
|
assertTableNamesStartAfter(conn, TABLE_NAMES[2], 1, TABLE_NAMES[3]);
|
||||||
assertTableNamesStartAfter(conn, TABLE_NAMES[3], 0);
|
assertTableNamesStartAfter(conn, TABLE_NAMES[3], 0);
|
||||||
assertTableNamesStartAfter(conn, "a_dataset", 4, TABLE_NAMES[0], TABLE_NAMES[1], TABLE_NAMES[2], TABLE_NAMES[3]);
|
assertTableNamesStartAfter(
|
||||||
|
conn, "a_dataset", 4, TABLE_NAMES[0], TABLE_NAMES[1], TABLE_NAMES[2], TABLE_NAMES[3]);
|
||||||
assertTableNamesStartAfter(conn, "o_dataset", 2, TABLE_NAMES[2], TABLE_NAMES[3]);
|
assertTableNamesStartAfter(conn, "o_dataset", 2, TABLE_NAMES[2], TABLE_NAMES[3]);
|
||||||
assertTableNamesStartAfter(conn, "v_dataset", 1, TABLE_NAMES[3]);
|
assertTableNamesStartAfter(conn, "v_dataset", 1, TABLE_NAMES[3]);
|
||||||
assertTableNamesStartAfter(conn, "z_dataset", 0);
|
assertTableNamesStartAfter(conn, "z_dataset", 0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private void assertTableNamesStartAfter(Connection conn, String startAfter, int expectedSize, String... expectedNames) {
|
private void assertTableNamesStartAfter(
|
||||||
|
Connection conn, String startAfter, int expectedSize, String... expectedNames) {
|
||||||
List<String> tableNames = conn.tableNames(startAfter);
|
List<String> tableNames = conn.tableNames(startAfter);
|
||||||
assertEquals(expectedSize, tableNames.size());
|
assertEquals(expectedSize, tableNames.size());
|
||||||
for (int i = 0; i < expectedNames.length; i++) {
|
for (int i = 0; i < expectedNames.length; i++) {
|
||||||
@@ -74,7 +85,7 @@ public class ConnectionTest {
|
|||||||
|
|
||||||
@Test
|
@Test
|
||||||
void tableNamesLimit() {
|
void tableNamesLimit() {
|
||||||
try (Connection conn = Connection.connect(lanceDbURL.toString())) {
|
try (Connection conn = Connection.connect(lanceDbURL.toString())) {
|
||||||
for (int i = 0; i <= TABLE_NAMES.length; i++) {
|
for (int i = 0; i <= TABLE_NAMES.length; i++) {
|
||||||
List<String> tableNames = conn.tableNames(i);
|
List<String> tableNames = conn.tableNames(i);
|
||||||
assertEquals(i, tableNames.size());
|
assertEquals(i, tableNames.size());
|
||||||
|
|||||||
76
java/pom.xml
76
java/pom.xml
@@ -6,7 +6,7 @@
|
|||||||
|
|
||||||
<groupId>com.lancedb</groupId>
|
<groupId>com.lancedb</groupId>
|
||||||
<artifactId>lancedb-parent</artifactId>
|
<artifactId>lancedb-parent</artifactId>
|
||||||
<version>0.18.2-beta.1</version>
|
<version>0.19.0-beta.7</version>
|
||||||
<packaging>pom</packaging>
|
<packaging>pom</packaging>
|
||||||
|
|
||||||
<name>LanceDB Parent</name>
|
<name>LanceDB Parent</name>
|
||||||
@@ -29,6 +29,25 @@
|
|||||||
<properties>
|
<properties>
|
||||||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
||||||
<arrow.version>15.0.0</arrow.version>
|
<arrow.version>15.0.0</arrow.version>
|
||||||
|
<spotless.skip>false</spotless.skip>
|
||||||
|
<spotless.version>2.30.0</spotless.version>
|
||||||
|
<spotless.java.googlejavaformat.version>1.7</spotless.java.googlejavaformat.version>
|
||||||
|
<spotless.delimiter>package</spotless.delimiter>
|
||||||
|
<spotless.license.header>
|
||||||
|
/*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
</spotless.license.header>
|
||||||
</properties>
|
</properties>
|
||||||
|
|
||||||
<modules>
|
<modules>
|
||||||
@@ -127,7 +146,8 @@
|
|||||||
<configuration>
|
<configuration>
|
||||||
<configLocation>google_checks.xml</configLocation>
|
<configLocation>google_checks.xml</configLocation>
|
||||||
<consoleOutput>true</consoleOutput>
|
<consoleOutput>true</consoleOutput>
|
||||||
<failsOnError>true</failsOnError>
|
<failsOnError>false</failsOnError>
|
||||||
|
<failOnViolation>false</failOnViolation>
|
||||||
<violationSeverity>warning</violationSeverity>
|
<violationSeverity>warning</violationSeverity>
|
||||||
<linkXRef>false</linkXRef>
|
<linkXRef>false</linkXRef>
|
||||||
</configuration>
|
</configuration>
|
||||||
@@ -141,6 +161,10 @@
|
|||||||
</execution>
|
</execution>
|
||||||
</executions>
|
</executions>
|
||||||
</plugin>
|
</plugin>
|
||||||
|
<plugin>
|
||||||
|
<groupId>com.diffplug.spotless</groupId>
|
||||||
|
<artifactId>spotless-maven-plugin</artifactId>
|
||||||
|
</plugin>
|
||||||
</plugins>
|
</plugins>
|
||||||
<pluginManagement>
|
<pluginManagement>
|
||||||
<plugins>
|
<plugins>
|
||||||
@@ -179,6 +203,54 @@
|
|||||||
<artifactId>maven-install-plugin</artifactId>
|
<artifactId>maven-install-plugin</artifactId>
|
||||||
<version>2.5.2</version>
|
<version>2.5.2</version>
|
||||||
</plugin>
|
</plugin>
|
||||||
|
<plugin>
|
||||||
|
<groupId>com.diffplug.spotless</groupId>
|
||||||
|
<artifactId>spotless-maven-plugin</artifactId>
|
||||||
|
<version>${spotless.version}</version>
|
||||||
|
<configuration>
|
||||||
|
<skip>${spotless.skip}</skip>
|
||||||
|
<upToDateChecking>
|
||||||
|
<enabled>true</enabled>
|
||||||
|
</upToDateChecking>
|
||||||
|
<java>
|
||||||
|
<includes>
|
||||||
|
<include>src/main/java/**/*.java</include>
|
||||||
|
<include>src/test/java/**/*.java</include>
|
||||||
|
</includes>
|
||||||
|
<googleJavaFormat>
|
||||||
|
<version>${spotless.java.googlejavaformat.version}</version>
|
||||||
|
<style>GOOGLE</style>
|
||||||
|
</googleJavaFormat>
|
||||||
|
|
||||||
|
<importOrder>
|
||||||
|
<order>com.lancedb.lance,,javax,java,\#</order>
|
||||||
|
</importOrder>
|
||||||
|
|
||||||
|
<removeUnusedImports />
|
||||||
|
</java>
|
||||||
|
<scala>
|
||||||
|
<includes>
|
||||||
|
<include>src/main/scala/**/*.scala</include>
|
||||||
|
<include>src/main/scala-*/**/*.scala</include>
|
||||||
|
<include>src/test/scala/**/*.scala</include>
|
||||||
|
<include>src/test/scala-*/**/*.scala</include>
|
||||||
|
</includes>
|
||||||
|
</scala>
|
||||||
|
<licenseHeader>
|
||||||
|
<content>${spotless.license.header}</content>
|
||||||
|
<delimiter>${spotless.delimiter}</delimiter>
|
||||||
|
</licenseHeader>
|
||||||
|
</configuration>
|
||||||
|
<executions>
|
||||||
|
<execution>
|
||||||
|
<id>spotless-check</id>
|
||||||
|
<phase>validate</phase>
|
||||||
|
<goals>
|
||||||
|
<goal>apply</goal>
|
||||||
|
</goals>
|
||||||
|
</execution>
|
||||||
|
</executions>
|
||||||
|
</plugin>
|
||||||
</plugins>
|
</plugins>
|
||||||
</pluginManagement>
|
</pluginManagement>
|
||||||
</build>
|
</build>
|
||||||
|
|||||||
51
node/package-lock.json
generated
51
node/package-lock.json
generated
@@ -1,12 +1,12 @@
|
|||||||
{
|
{
|
||||||
"name": "vectordb",
|
"name": "vectordb",
|
||||||
"version": "0.18.2-beta.0",
|
"version": "0.19.0-beta.7",
|
||||||
"lockfileVersion": 3,
|
"lockfileVersion": 3,
|
||||||
"requires": true,
|
"requires": true,
|
||||||
"packages": {
|
"packages": {
|
||||||
"": {
|
"": {
|
||||||
"name": "vectordb",
|
"name": "vectordb",
|
||||||
"version": "0.18.2-beta.0",
|
"version": "0.19.0-beta.7",
|
||||||
"cpu": [
|
"cpu": [
|
||||||
"x64",
|
"x64",
|
||||||
"arm64"
|
"arm64"
|
||||||
@@ -52,11 +52,11 @@
|
|||||||
"uuid": "^9.0.0"
|
"uuid": "^9.0.0"
|
||||||
},
|
},
|
||||||
"optionalDependencies": {
|
"optionalDependencies": {
|
||||||
"@lancedb/vectordb-darwin-arm64": "0.18.2-beta.0",
|
"@lancedb/vectordb-darwin-arm64": "0.19.0-beta.7",
|
||||||
"@lancedb/vectordb-darwin-x64": "0.18.2-beta.0",
|
"@lancedb/vectordb-darwin-x64": "0.19.0-beta.7",
|
||||||
"@lancedb/vectordb-linux-arm64-gnu": "0.18.2-beta.0",
|
"@lancedb/vectordb-linux-arm64-gnu": "0.19.0-beta.7",
|
||||||
"@lancedb/vectordb-linux-x64-gnu": "0.18.2-beta.0",
|
"@lancedb/vectordb-linux-x64-gnu": "0.19.0-beta.7",
|
||||||
"@lancedb/vectordb-win32-x64-msvc": "0.18.2-beta.0"
|
"@lancedb/vectordb-win32-x64-msvc": "0.19.0-beta.7"
|
||||||
},
|
},
|
||||||
"peerDependencies": {
|
"peerDependencies": {
|
||||||
"@apache-arrow/ts": "^14.0.2",
|
"@apache-arrow/ts": "^14.0.2",
|
||||||
@@ -327,9 +327,9 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/@lancedb/vectordb-darwin-arm64": {
|
"node_modules/@lancedb/vectordb-darwin-arm64": {
|
||||||
"version": "0.18.2-beta.0",
|
"version": "0.19.0-beta.7",
|
||||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.18.2-beta.0.tgz",
|
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.19.0-beta.7.tgz",
|
||||||
"integrity": "sha512-FzIcElkS6R5I5kU1S5m7yLVTB1Duv1XcmZQtVmYl/JjNlfxS1WTtMzdzMqSBFohDcgU2Tkc5+1FpK1B94dUUbg==",
|
"integrity": "sha512-HpbVKw4Vs+mPv7uPwaK7ilJlGrGdjOrNlC2mSkMCj0OlEwGRVcEcrSyijI7LXQH7ybEgNnDhSds5TuzBV26SGg==",
|
||||||
"cpu": [
|
"cpu": [
|
||||||
"arm64"
|
"arm64"
|
||||||
],
|
],
|
||||||
@@ -340,9 +340,9 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
"node_modules/@lancedb/vectordb-darwin-x64": {
|
"node_modules/@lancedb/vectordb-darwin-x64": {
|
||||||
"version": "0.18.2-beta.0",
|
"version": "0.19.0-beta.7",
|
||||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.18.2-beta.0.tgz",
|
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.19.0-beta.7.tgz",
|
||||||
"integrity": "sha512-jv+XludfLNBDm1DjdqyghwDMtd4E+ygwycQpkpK72wyZSh6Qytrgq+4dNi/zCZ3UChFLbKbIxrVxv9yENQn2Pg==",
|
"integrity": "sha512-x3X7nqIYVZtxaa0uZUk/M99vKvDinZ5G0+8k2NqZ696YXGWKGyRxR6k8ZzKYCoCTSuYXnBftgKoIlwJGtNt8Bw==",
|
||||||
"cpu": [
|
"cpu": [
|
||||||
"x64"
|
"x64"
|
||||||
],
|
],
|
||||||
@@ -353,9 +353,9 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
"node_modules/@lancedb/vectordb-linux-arm64-gnu": {
|
"node_modules/@lancedb/vectordb-linux-arm64-gnu": {
|
||||||
"version": "0.18.2-beta.0",
|
"version": "0.19.0-beta.7",
|
||||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.18.2-beta.0.tgz",
|
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.19.0-beta.7.tgz",
|
||||||
"integrity": "sha512-8/fBpbNYhhpetf/pZv0DyPnQkeAbsiICMyCoRiNu5auvQK4AsGF1XvLWrDi68u9F0GysBKvuatYuGqa/yh+Anw==",
|
"integrity": "sha512-Vwj0HI3+b4NgXKf+5+W/GfLBCGoQMBGM47vA/ts1dpe/PxraOQYPDv67I5kbXkCQKwhal7b0iZx/PbMu0JZPyw==",
|
||||||
"cpu": [
|
"cpu": [
|
||||||
"arm64"
|
"arm64"
|
||||||
],
|
],
|
||||||
@@ -366,9 +366,9 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
"node_modules/@lancedb/vectordb-linux-x64-gnu": {
|
"node_modules/@lancedb/vectordb-linux-x64-gnu": {
|
||||||
"version": "0.18.2-beta.0",
|
"version": "0.19.0-beta.7",
|
||||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.18.2-beta.0.tgz",
|
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.19.0-beta.7.tgz",
|
||||||
"integrity": "sha512-7a1Kc/2V2ff4HlLzXyXVdK0Z0VIFUt50v2SBRdlcycJ0NLW9ZqV+9UjB/NAOwMXVgYd7d3rKjACGkQzkpvcyeg==",
|
"integrity": "sha512-Dx2B6UWQei9D7Rt+MgHWqPTYtEK2w3EgsNb5ENEWUTZxH7lD/CV7Sw0JMK5LDG209fFcpXFerveF6J8ZC8uGBQ==",
|
||||||
"cpu": [
|
"cpu": [
|
||||||
"x64"
|
"x64"
|
||||||
],
|
],
|
||||||
@@ -379,9 +379,9 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
"node_modules/@lancedb/vectordb-win32-x64-msvc": {
|
"node_modules/@lancedb/vectordb-win32-x64-msvc": {
|
||||||
"version": "0.18.2-beta.0",
|
"version": "0.19.0-beta.7",
|
||||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.18.2-beta.0.tgz",
|
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.19.0-beta.7.tgz",
|
||||||
"integrity": "sha512-EeCiSf2RtJMESnkIca28GI6rAStYj2q9sVIyNCXpmIZSkJVpfQ3iswHGAbHrEfaPl0J1Re9cnRHLLuqkumwiIQ==",
|
"integrity": "sha512-F5LZGa+gkUH1TgsWZWLLAMejwXFIWdash7+85ip4k2M0ThyqLF/dtlldOvteUEd5+flxihGjHg6TUtnSY8XBFA==",
|
||||||
"cpu": [
|
"cpu": [
|
||||||
"x64"
|
"x64"
|
||||||
],
|
],
|
||||||
@@ -1184,9 +1184,10 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/axios": {
|
"node_modules/axios": {
|
||||||
"version": "1.7.7",
|
"version": "1.8.4",
|
||||||
"resolved": "https://registry.npmjs.org/axios/-/axios-1.7.7.tgz",
|
"resolved": "https://registry.npmjs.org/axios/-/axios-1.8.4.tgz",
|
||||||
"integrity": "sha512-S4kL7XrjgBmvdGut0sN3yJxqYzrDOnivkBiN0OFs6hLiUam3UPvswUo0kqGyhqUZGEOytHyumEdXsAkgCOUf3Q==",
|
"integrity": "sha512-eBSYY4Y68NNlHbHBMdeDmKNtDgXWhQsJcGqzO3iLUM0GraQFSS9cVgPX5I9b3lbdFKyYoAEGAZF1DwhTaljNAw==",
|
||||||
|
"license": "MIT",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"follow-redirects": "^1.15.6",
|
"follow-redirects": "^1.15.6",
|
||||||
"form-data": "^4.0.0",
|
"form-data": "^4.0.0",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "vectordb",
|
"name": "vectordb",
|
||||||
"version": "0.18.2-beta.1",
|
"version": "0.19.0-beta.7",
|
||||||
"description": " Serverless, low-latency vector database for AI applications",
|
"description": " Serverless, low-latency vector database for AI applications",
|
||||||
"private": false,
|
"private": false,
|
||||||
"main": "dist/index.js",
|
"main": "dist/index.js",
|
||||||
@@ -89,10 +89,10 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"optionalDependencies": {
|
"optionalDependencies": {
|
||||||
"@lancedb/vectordb-darwin-x64": "0.18.2-beta.1",
|
"@lancedb/vectordb-darwin-x64": "0.19.0-beta.7",
|
||||||
"@lancedb/vectordb-darwin-arm64": "0.18.2-beta.1",
|
"@lancedb/vectordb-darwin-arm64": "0.19.0-beta.7",
|
||||||
"@lancedb/vectordb-linux-x64-gnu": "0.18.2-beta.1",
|
"@lancedb/vectordb-linux-x64-gnu": "0.19.0-beta.7",
|
||||||
"@lancedb/vectordb-linux-arm64-gnu": "0.18.2-beta.1",
|
"@lancedb/vectordb-linux-arm64-gnu": "0.19.0-beta.7",
|
||||||
"@lancedb/vectordb-win32-x64-msvc": "0.18.2-beta.1"
|
"@lancedb/vectordb-win32-x64-msvc": "0.19.0-beta.7"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "lancedb-nodejs"
|
name = "lancedb-nodejs"
|
||||||
edition.workspace = true
|
edition.workspace = true
|
||||||
version = "0.18.2-beta.1"
|
version = "0.19.0-beta.7"
|
||||||
license.workspace = true
|
license.workspace = true
|
||||||
description.workspace = true
|
description.workspace = true
|
||||||
repository.workspace = true
|
repository.workspace = true
|
||||||
|
|||||||
@@ -10,7 +10,7 @@ import * as arrow16 from "apache-arrow-16";
|
|||||||
import * as arrow17 from "apache-arrow-17";
|
import * as arrow17 from "apache-arrow-17";
|
||||||
import * as arrow18 from "apache-arrow-18";
|
import * as arrow18 from "apache-arrow-18";
|
||||||
|
|
||||||
import { Table, connect } from "../lancedb";
|
import { MatchQuery, PhraseQuery, Table, connect } from "../lancedb";
|
||||||
import {
|
import {
|
||||||
Table as ArrowTable,
|
Table as ArrowTable,
|
||||||
Field,
|
Field,
|
||||||
@@ -33,6 +33,7 @@ import {
|
|||||||
register,
|
register,
|
||||||
} from "../lancedb/embedding";
|
} from "../lancedb/embedding";
|
||||||
import { Index } from "../lancedb/indices";
|
import { Index } from "../lancedb/indices";
|
||||||
|
import { instanceOfFullTextQuery } from "../lancedb/query";
|
||||||
|
|
||||||
describe.each([arrow15, arrow16, arrow17, arrow18])(
|
describe.each([arrow15, arrow16, arrow17, arrow18])(
|
||||||
"Given a table",
|
"Given a table",
|
||||||
@@ -633,6 +634,23 @@ describe("When creating an index", () => {
|
|||||||
expect(plan2).not.toMatch("LanceScan");
|
expect(plan2).not.toMatch("LanceScan");
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("should be able to run analyze plan", async () => {
|
||||||
|
await tbl.createIndex("vec");
|
||||||
|
await tbl.add([
|
||||||
|
{
|
||||||
|
id: 300,
|
||||||
|
vec: Array(32)
|
||||||
|
.fill(1)
|
||||||
|
.map(() => Math.random()),
|
||||||
|
tags: [],
|
||||||
|
},
|
||||||
|
]);
|
||||||
|
|
||||||
|
const plan = await tbl.query().nearestTo(queryVec).analyzePlan();
|
||||||
|
expect(plan).toMatch("AnalyzeExec");
|
||||||
|
expect(plan).toMatch("metrics=");
|
||||||
|
});
|
||||||
|
|
||||||
it("should be able to query with row id", async () => {
|
it("should be able to query with row id", async () => {
|
||||||
const results = await tbl
|
const results = await tbl
|
||||||
.query()
|
.query()
|
||||||
@@ -850,6 +868,44 @@ describe("When creating an index", () => {
|
|||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
describe("When querying a table", () => {
|
||||||
|
let tmpDir: tmp.DirResult;
|
||||||
|
beforeEach(() => {
|
||||||
|
tmpDir = tmp.dirSync({ unsafeCleanup: true });
|
||||||
|
});
|
||||||
|
afterEach(() => tmpDir.removeCallback());
|
||||||
|
|
||||||
|
it("should throw an error when timeout is reached", async () => {
|
||||||
|
const db = await connect(tmpDir.name);
|
||||||
|
const data = makeArrowTable([
|
||||||
|
{ text: "a", vector: [0.1, 0.2] },
|
||||||
|
{ text: "b", vector: [0.3, 0.4] },
|
||||||
|
]);
|
||||||
|
const table = await db.createTable("test", data);
|
||||||
|
await table.createIndex("text", { config: Index.fts() });
|
||||||
|
|
||||||
|
await expect(
|
||||||
|
table.query().where("text != 'a'").toArray({ timeoutMs: 0 }),
|
||||||
|
).rejects.toThrow("Query timeout");
|
||||||
|
|
||||||
|
await expect(
|
||||||
|
table.query().nearestTo([0.0, 0.0]).toArrow({ timeoutMs: 0 }),
|
||||||
|
).rejects.toThrow("Query timeout");
|
||||||
|
|
||||||
|
await expect(
|
||||||
|
table.search("a", "fts").toArray({ timeoutMs: 0 }),
|
||||||
|
).rejects.toThrow("Query timeout");
|
||||||
|
|
||||||
|
await expect(
|
||||||
|
table
|
||||||
|
.query()
|
||||||
|
.nearestToText("a")
|
||||||
|
.nearestTo([0.0, 0.0])
|
||||||
|
.toArrow({ timeoutMs: 0 }),
|
||||||
|
).rejects.toThrow("Query timeout");
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
describe("Read consistency interval", () => {
|
describe("Read consistency interval", () => {
|
||||||
let tmpDir: tmp.DirResult;
|
let tmpDir: tmp.DirResult;
|
||||||
beforeEach(() => {
|
beforeEach(() => {
|
||||||
@@ -1247,6 +1303,56 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
|
|||||||
|
|
||||||
const results = await table.search("hello").toArray();
|
const results = await table.search("hello").toArray();
|
||||||
expect(results[0].text).toBe(data[0].text);
|
expect(results[0].text).toBe(data[0].text);
|
||||||
|
|
||||||
|
const query = new MatchQuery("goodbye", "text");
|
||||||
|
expect(instanceOfFullTextQuery(query)).toBe(true);
|
||||||
|
const results2 = await table
|
||||||
|
.search(new MatchQuery("goodbye", "text"))
|
||||||
|
.toArray();
|
||||||
|
expect(results2[0].text).toBe(data[1].text);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("prewarm full text search index", async () => {
|
||||||
|
const db = await connect(tmpDir.name);
|
||||||
|
const data = [
|
||||||
|
{ text: ["lance database", "the", "search"], vector: [0.1, 0.2, 0.3] },
|
||||||
|
{ text: ["lance database"], vector: [0.4, 0.5, 0.6] },
|
||||||
|
{ text: ["lance", "search"], vector: [0.7, 0.8, 0.9] },
|
||||||
|
{ text: ["database", "search"], vector: [1.0, 1.1, 1.2] },
|
||||||
|
{ text: ["unrelated", "doc"], vector: [1.3, 1.4, 1.5] },
|
||||||
|
];
|
||||||
|
const table = await db.createTable("test", data);
|
||||||
|
await table.createIndex("text", {
|
||||||
|
config: Index.fts(),
|
||||||
|
});
|
||||||
|
|
||||||
|
// For the moment, we just confirm we can call prewarmIndex without error
|
||||||
|
// and still search it afterwards
|
||||||
|
await table.prewarmIndex("text_idx");
|
||||||
|
|
||||||
|
const results = await table.search("lance").toArray();
|
||||||
|
expect(results.length).toBe(3);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("full text index on list", async () => {
|
||||||
|
const db = await connect(tmpDir.name);
|
||||||
|
const data = [
|
||||||
|
{ text: ["lance database", "the", "search"], vector: [0.1, 0.2, 0.3] },
|
||||||
|
{ text: ["lance database"], vector: [0.4, 0.5, 0.6] },
|
||||||
|
{ text: ["lance", "search"], vector: [0.7, 0.8, 0.9] },
|
||||||
|
{ text: ["database", "search"], vector: [1.0, 1.1, 1.2] },
|
||||||
|
{ text: ["unrelated", "doc"], vector: [1.3, 1.4, 1.5] },
|
||||||
|
];
|
||||||
|
const table = await db.createTable("test", data);
|
||||||
|
await table.createIndex("text", {
|
||||||
|
config: Index.fts(),
|
||||||
|
});
|
||||||
|
|
||||||
|
const results = await table.search("lance").toArray();
|
||||||
|
expect(results.length).toBe(3);
|
||||||
|
|
||||||
|
const results2 = await table.search('"lance database"').toArray();
|
||||||
|
expect(results2.length).toBe(2);
|
||||||
});
|
});
|
||||||
|
|
||||||
test("full text search without positions", async () => {
|
test("full text search without positions", async () => {
|
||||||
@@ -1299,6 +1405,43 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
|
|||||||
expect(results.length).toBe(2);
|
expect(results.length).toBe(2);
|
||||||
const phraseResults = await table.search('"hello world"').toArray();
|
const phraseResults = await table.search('"hello world"').toArray();
|
||||||
expect(phraseResults.length).toBe(1);
|
expect(phraseResults.length).toBe(1);
|
||||||
|
const phraseResults2 = await table
|
||||||
|
.search(new PhraseQuery("hello world", "text"))
|
||||||
|
.toArray();
|
||||||
|
expect(phraseResults2.length).toBe(1);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("full text search fuzzy query", async () => {
|
||||||
|
const db = await connect(tmpDir.name);
|
||||||
|
const data = [
|
||||||
|
{ text: "fa", vector: [0.1, 0.2, 0.3] },
|
||||||
|
{ text: "fo", vector: [0.4, 0.5, 0.6] },
|
||||||
|
{ text: "fob", vector: [0.4, 0.5, 0.6] },
|
||||||
|
{ text: "focus", vector: [0.4, 0.5, 0.6] },
|
||||||
|
{ text: "foo", vector: [0.4, 0.5, 0.6] },
|
||||||
|
{ text: "food", vector: [0.4, 0.5, 0.6] },
|
||||||
|
{ text: "foul", vector: [0.4, 0.5, 0.6] },
|
||||||
|
];
|
||||||
|
const table = await db.createTable("test", data);
|
||||||
|
await table.createIndex("text", {
|
||||||
|
config: Index.fts(),
|
||||||
|
});
|
||||||
|
|
||||||
|
const results = await table
|
||||||
|
.search(new MatchQuery("foo", "text"))
|
||||||
|
.toArray();
|
||||||
|
expect(results.length).toBe(1);
|
||||||
|
expect(results[0].text).toBe("foo");
|
||||||
|
|
||||||
|
const fuzzyResults = await table
|
||||||
|
.search(new MatchQuery("foo", "text", { fuzziness: 1 }))
|
||||||
|
.toArray();
|
||||||
|
expect(fuzzyResults.length).toBe(4);
|
||||||
|
const resultSet = new Set(fuzzyResults.map((r) => r.text));
|
||||||
|
expect(resultSet.has("foo")).toBe(true);
|
||||||
|
expect(resultSet.has("fob")).toBe(true);
|
||||||
|
expect(resultSet.has("fo")).toBe(true);
|
||||||
|
expect(resultSet.has("food")).toBe(true);
|
||||||
});
|
});
|
||||||
|
|
||||||
test.each([
|
test.each([
|
||||||
@@ -1346,6 +1489,30 @@ describe("when calling explainPlan", () => {
|
|||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
describe("when calling analyzePlan", () => {
|
||||||
|
let tmpDir: tmp.DirResult;
|
||||||
|
let table: Table;
|
||||||
|
let queryVec: number[];
|
||||||
|
beforeEach(async () => {
|
||||||
|
tmpDir = tmp.dirSync({ unsafeCleanup: true });
|
||||||
|
const con = await connect(tmpDir.name);
|
||||||
|
table = await con.createTable("vectors", [{ id: 1, vector: [1.1, 0.9] }]);
|
||||||
|
});
|
||||||
|
|
||||||
|
afterEach(() => {
|
||||||
|
tmpDir.removeCallback();
|
||||||
|
});
|
||||||
|
|
||||||
|
it("retrieves runtime metrics", async () => {
|
||||||
|
queryVec = Array(2)
|
||||||
|
.fill(1)
|
||||||
|
.map(() => Math.random());
|
||||||
|
const plan = await table.query().nearestTo(queryVec).analyzePlan();
|
||||||
|
console.log("Query Plan:\n", plan); // <--- Print the plan
|
||||||
|
expect(plan).toMatch("AnalyzeExec");
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
describe("column name options", () => {
|
describe("column name options", () => {
|
||||||
let tmpDir: tmp.DirResult;
|
let tmpDir: tmp.DirResult;
|
||||||
let table: Table;
|
let table: Table;
|
||||||
|
|||||||
@@ -47,6 +47,12 @@ export {
|
|||||||
QueryExecutionOptions,
|
QueryExecutionOptions,
|
||||||
FullTextSearchOptions,
|
FullTextSearchOptions,
|
||||||
RecordBatchIterator,
|
RecordBatchIterator,
|
||||||
|
FullTextQuery,
|
||||||
|
MatchQuery,
|
||||||
|
PhraseQuery,
|
||||||
|
BoostQuery,
|
||||||
|
MultiMatchQuery,
|
||||||
|
FullTextQueryType,
|
||||||
} from "./query";
|
} from "./query";
|
||||||
|
|
||||||
export {
|
export {
|
||||||
|
|||||||
@@ -11,12 +11,14 @@ import {
|
|||||||
} from "./arrow";
|
} from "./arrow";
|
||||||
import { type IvfPqOptions } from "./indices";
|
import { type IvfPqOptions } from "./indices";
|
||||||
import {
|
import {
|
||||||
|
JsFullTextQuery,
|
||||||
RecordBatchIterator as NativeBatchIterator,
|
RecordBatchIterator as NativeBatchIterator,
|
||||||
Query as NativeQuery,
|
Query as NativeQuery,
|
||||||
Table as NativeTable,
|
Table as NativeTable,
|
||||||
VectorQuery as NativeVectorQuery,
|
VectorQuery as NativeVectorQuery,
|
||||||
} from "./native";
|
} from "./native";
|
||||||
import { Reranker } from "./rerankers";
|
import { Reranker } from "./rerankers";
|
||||||
|
|
||||||
export class RecordBatchIterator implements AsyncIterator<RecordBatch> {
|
export class RecordBatchIterator implements AsyncIterator<RecordBatch> {
|
||||||
private promisedInner?: Promise<NativeBatchIterator>;
|
private promisedInner?: Promise<NativeBatchIterator>;
|
||||||
private inner?: NativeBatchIterator;
|
private inner?: NativeBatchIterator;
|
||||||
@@ -62,7 +64,7 @@ class RecordBatchIterable<
|
|||||||
// biome-ignore lint/suspicious/noExplicitAny: skip
|
// biome-ignore lint/suspicious/noExplicitAny: skip
|
||||||
[Symbol.asyncIterator](): AsyncIterator<RecordBatch<any>, any, undefined> {
|
[Symbol.asyncIterator](): AsyncIterator<RecordBatch<any>, any, undefined> {
|
||||||
return new RecordBatchIterator(
|
return new RecordBatchIterator(
|
||||||
this.inner.execute(this.options?.maxBatchLength),
|
this.inner.execute(this.options?.maxBatchLength, this.options?.timeoutMs),
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -78,6 +80,11 @@ export interface QueryExecutionOptions {
|
|||||||
* in smaller chunks.
|
* in smaller chunks.
|
||||||
*/
|
*/
|
||||||
maxBatchLength?: number;
|
maxBatchLength?: number;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Timeout for query execution in milliseconds
|
||||||
|
*/
|
||||||
|
timeoutMs?: number;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -152,7 +159,7 @@ export class QueryBase<NativeQueryType extends NativeQuery | NativeVectorQuery>
|
|||||||
}
|
}
|
||||||
|
|
||||||
fullTextSearch(
|
fullTextSearch(
|
||||||
query: string,
|
query: string | FullTextQuery,
|
||||||
options?: Partial<FullTextSearchOptions>,
|
options?: Partial<FullTextSearchOptions>,
|
||||||
): this {
|
): this {
|
||||||
let columns: string[] | null = null;
|
let columns: string[] | null = null;
|
||||||
@@ -164,9 +171,16 @@ export class QueryBase<NativeQueryType extends NativeQuery | NativeVectorQuery>
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
this.doCall((inner: NativeQueryType) =>
|
this.doCall((inner: NativeQueryType) => {
|
||||||
inner.fullTextSearch(query, columns),
|
if (typeof query === "string") {
|
||||||
);
|
inner.fullTextSearch({
|
||||||
|
query: query,
|
||||||
|
columns: columns,
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
inner.fullTextSearch({ query: query.inner });
|
||||||
|
}
|
||||||
|
});
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -273,9 +287,11 @@ export class QueryBase<NativeQueryType extends NativeQuery | NativeVectorQuery>
|
|||||||
options?: Partial<QueryExecutionOptions>,
|
options?: Partial<QueryExecutionOptions>,
|
||||||
): Promise<NativeBatchIterator> {
|
): Promise<NativeBatchIterator> {
|
||||||
if (this.inner instanceof Promise) {
|
if (this.inner instanceof Promise) {
|
||||||
return this.inner.then((inner) => inner.execute(options?.maxBatchLength));
|
return this.inner.then((inner) =>
|
||||||
|
inner.execute(options?.maxBatchLength, options?.timeoutMs),
|
||||||
|
);
|
||||||
} else {
|
} else {
|
||||||
return this.inner.execute(options?.maxBatchLength);
|
return this.inner.execute(options?.maxBatchLength, options?.timeoutMs);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -348,6 +364,43 @@ export class QueryBase<NativeQueryType extends NativeQuery | NativeVectorQuery>
|
|||||||
return this.inner.explainPlan(verbose);
|
return this.inner.explainPlan(verbose);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Executes the query and returns the physical query plan annotated with runtime metrics.
|
||||||
|
*
|
||||||
|
* This is useful for debugging and performance analysis, as it shows how the query was executed
|
||||||
|
* and includes metrics such as elapsed time, rows processed, and I/O statistics.
|
||||||
|
*
|
||||||
|
* @example
|
||||||
|
* import * as lancedb from "@lancedb/lancedb"
|
||||||
|
*
|
||||||
|
* const db = await lancedb.connect("./.lancedb");
|
||||||
|
* const table = await db.createTable("my_table", [
|
||||||
|
* { vector: [1.1, 0.9], id: "1" },
|
||||||
|
* ]);
|
||||||
|
*
|
||||||
|
* const plan = await table.query().nearestTo([0.5, 0.2]).analyzePlan();
|
||||||
|
*
|
||||||
|
* Example output (with runtime metrics inlined):
|
||||||
|
* AnalyzeExec verbose=true, metrics=[]
|
||||||
|
* ProjectionExec: expr=[id@3 as id, vector@0 as vector, _distance@2 as _distance], metrics=[output_rows=1, elapsed_compute=3.292µs]
|
||||||
|
* Take: columns="vector, _rowid, _distance, (id)", metrics=[output_rows=1, elapsed_compute=66.001µs, batches_processed=1, bytes_read=8, iops=1, requests=1]
|
||||||
|
* CoalesceBatchesExec: target_batch_size=1024, metrics=[output_rows=1, elapsed_compute=3.333µs]
|
||||||
|
* GlobalLimitExec: skip=0, fetch=10, metrics=[output_rows=1, elapsed_compute=167ns]
|
||||||
|
* FilterExec: _distance@2 IS NOT NULL, metrics=[output_rows=1, elapsed_compute=8.542µs]
|
||||||
|
* SortExec: TopK(fetch=10), expr=[_distance@2 ASC NULLS LAST], metrics=[output_rows=1, elapsed_compute=63.25µs, row_replacements=1]
|
||||||
|
* KNNVectorDistance: metric=l2, metrics=[output_rows=1, elapsed_compute=114.333µs, output_batches=1]
|
||||||
|
* LanceScan: uri=/path/to/data, projection=[vector], row_id=true, row_addr=false, ordered=false, metrics=[output_rows=1, elapsed_compute=103.626µs, bytes_read=549, iops=2, requests=2]
|
||||||
|
*
|
||||||
|
* @returns A query execution plan with runtime metrics for each step.
|
||||||
|
*/
|
||||||
|
async analyzePlan(): Promise<string> {
|
||||||
|
if (this.inner instanceof Promise) {
|
||||||
|
return this.inner.then((inner) => inner.analyzePlan());
|
||||||
|
} else {
|
||||||
|
return this.inner.analyzePlan();
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -681,8 +734,177 @@ export class Query extends QueryBase<NativeQuery> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
nearestToText(query: string, columns?: string[]): Query {
|
nearestToText(query: string | FullTextQuery, columns?: string[]): Query {
|
||||||
this.doCall((inner) => inner.fullTextSearch(query, columns));
|
this.doCall((inner) => {
|
||||||
|
if (typeof query === "string") {
|
||||||
|
inner.fullTextSearch({
|
||||||
|
query: query,
|
||||||
|
columns: columns,
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
inner.fullTextSearch({ query: query.inner });
|
||||||
|
}
|
||||||
|
});
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Enum representing the types of full-text queries supported.
|
||||||
|
*
|
||||||
|
* - `Match`: Performs a full-text search for terms in the query string.
|
||||||
|
* - `MatchPhrase`: Searches for an exact phrase match in the text.
|
||||||
|
* - `Boost`: Boosts the relevance score of specific terms in the query.
|
||||||
|
* - `MultiMatch`: Searches across multiple fields for the query terms.
|
||||||
|
*/
|
||||||
|
export enum FullTextQueryType {
|
||||||
|
Match = "match",
|
||||||
|
MatchPhrase = "match_phrase",
|
||||||
|
Boost = "boost",
|
||||||
|
MultiMatch = "multi_match",
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Represents a full-text query interface.
|
||||||
|
* This interface defines the structure and behavior for full-text queries,
|
||||||
|
* including methods to retrieve the query type and convert the query to a dictionary format.
|
||||||
|
*/
|
||||||
|
export interface FullTextQuery {
|
||||||
|
/**
|
||||||
|
* Returns the inner query object.
|
||||||
|
* This is the underlying query object used by the database engine.
|
||||||
|
* @ignore
|
||||||
|
*/
|
||||||
|
inner: JsFullTextQuery;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The type of the full-text query.
|
||||||
|
*/
|
||||||
|
queryType(): FullTextQueryType;
|
||||||
|
}
|
||||||
|
|
||||||
|
// biome-ignore lint/suspicious/noExplicitAny: we want any here
|
||||||
|
export function instanceOfFullTextQuery(obj: any): obj is FullTextQuery {
|
||||||
|
return obj != null && obj.inner instanceof JsFullTextQuery;
|
||||||
|
}
|
||||||
|
|
||||||
|
export class MatchQuery implements FullTextQuery {
|
||||||
|
/** @ignore */
|
||||||
|
public readonly inner: JsFullTextQuery;
|
||||||
|
/**
|
||||||
|
* Creates an instance of MatchQuery.
|
||||||
|
*
|
||||||
|
* @param query - The text query to search for.
|
||||||
|
* @param column - The name of the column to search within.
|
||||||
|
* @param options - Optional parameters for the match query.
|
||||||
|
* - `boost`: The boost factor for the query (default is 1.0).
|
||||||
|
* - `fuzziness`: The fuzziness level for the query (default is 0).
|
||||||
|
* - `maxExpansions`: The maximum number of terms to consider for fuzzy matching (default is 50).
|
||||||
|
*/
|
||||||
|
constructor(
|
||||||
|
query: string,
|
||||||
|
column: string,
|
||||||
|
options?: {
|
||||||
|
boost?: number;
|
||||||
|
fuzziness?: number;
|
||||||
|
maxExpansions?: number;
|
||||||
|
},
|
||||||
|
) {
|
||||||
|
let fuzziness = options?.fuzziness;
|
||||||
|
if (fuzziness === undefined) {
|
||||||
|
fuzziness = 0;
|
||||||
|
}
|
||||||
|
this.inner = JsFullTextQuery.matchQuery(
|
||||||
|
query,
|
||||||
|
column,
|
||||||
|
options?.boost ?? 1.0,
|
||||||
|
fuzziness,
|
||||||
|
options?.maxExpansions ?? 50,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
queryType(): FullTextQueryType {
|
||||||
|
return FullTextQueryType.Match;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export class PhraseQuery implements FullTextQuery {
|
||||||
|
/** @ignore */
|
||||||
|
public readonly inner: JsFullTextQuery;
|
||||||
|
/**
|
||||||
|
* Creates an instance of `PhraseQuery`.
|
||||||
|
*
|
||||||
|
* @param query - The phrase to search for in the specified column.
|
||||||
|
* @param column - The name of the column to search within.
|
||||||
|
*/
|
||||||
|
constructor(query: string, column: string) {
|
||||||
|
this.inner = JsFullTextQuery.phraseQuery(query, column);
|
||||||
|
}
|
||||||
|
|
||||||
|
queryType(): FullTextQueryType {
|
||||||
|
return FullTextQueryType.MatchPhrase;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export class BoostQuery implements FullTextQuery {
|
||||||
|
/** @ignore */
|
||||||
|
public readonly inner: JsFullTextQuery;
|
||||||
|
/**
|
||||||
|
* Creates an instance of BoostQuery.
|
||||||
|
* The boost returns documents that match the positive query,
|
||||||
|
* but penalizes those that match the negative query.
|
||||||
|
* the penalty is controlled by the `negativeBoost` parameter.
|
||||||
|
*
|
||||||
|
* @param positive - The positive query that boosts the relevance score.
|
||||||
|
* @param negative - The negative query that reduces the relevance score.
|
||||||
|
* @param options - Optional parameters for the boost query.
|
||||||
|
* - `negativeBoost`: The boost factor for the negative query (default is 0.0).
|
||||||
|
*/
|
||||||
|
constructor(
|
||||||
|
positive: FullTextQuery,
|
||||||
|
negative: FullTextQuery,
|
||||||
|
options?: {
|
||||||
|
negativeBoost?: number;
|
||||||
|
},
|
||||||
|
) {
|
||||||
|
this.inner = JsFullTextQuery.boostQuery(
|
||||||
|
positive.inner,
|
||||||
|
negative.inner,
|
||||||
|
options?.negativeBoost,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
queryType(): FullTextQueryType {
|
||||||
|
return FullTextQueryType.Boost;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export class MultiMatchQuery implements FullTextQuery {
|
||||||
|
/** @ignore */
|
||||||
|
public readonly inner: JsFullTextQuery;
|
||||||
|
/**
|
||||||
|
* Creates an instance of MultiMatchQuery.
|
||||||
|
*
|
||||||
|
* @param query - The text query to search for across multiple columns.
|
||||||
|
* @param columns - An array of column names to search within.
|
||||||
|
* @param options - Optional parameters for the multi-match query.
|
||||||
|
* - `boosts`: An array of boost factors for each column (default is 1.0 for all).
|
||||||
|
*/
|
||||||
|
constructor(
|
||||||
|
query: string,
|
||||||
|
columns: string[],
|
||||||
|
options?: {
|
||||||
|
boosts?: number[];
|
||||||
|
},
|
||||||
|
) {
|
||||||
|
this.inner = JsFullTextQuery.multiMatchQuery(
|
||||||
|
query,
|
||||||
|
columns,
|
||||||
|
options?.boosts,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
queryType(): FullTextQueryType {
|
||||||
|
return FullTextQueryType.MultiMatch;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -22,7 +22,12 @@ import {
|
|||||||
OptimizeStats,
|
OptimizeStats,
|
||||||
Table as _NativeTable,
|
Table as _NativeTable,
|
||||||
} from "./native";
|
} from "./native";
|
||||||
import { Query, VectorQuery } from "./query";
|
import {
|
||||||
|
FullTextQuery,
|
||||||
|
Query,
|
||||||
|
VectorQuery,
|
||||||
|
instanceOfFullTextQuery,
|
||||||
|
} from "./query";
|
||||||
import { sanitizeType } from "./sanitize";
|
import { sanitizeType } from "./sanitize";
|
||||||
import { IntoSql, toSQL } from "./util";
|
import { IntoSql, toSQL } from "./util";
|
||||||
export { IndexConfig } from "./native";
|
export { IndexConfig } from "./native";
|
||||||
@@ -230,6 +235,17 @@ export abstract class Table {
|
|||||||
*/
|
*/
|
||||||
abstract dropIndex(name: string): Promise<void>;
|
abstract dropIndex(name: string): Promise<void>;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Prewarm an index in the table.
|
||||||
|
*
|
||||||
|
* @param name The name of the index.
|
||||||
|
*
|
||||||
|
* This will load the index into memory. This may reduce the cold-start time for
|
||||||
|
* future queries. If the index does not fit in the cache then this call may be
|
||||||
|
* wasteful.
|
||||||
|
*/
|
||||||
|
abstract prewarmIndex(name: string): Promise<void>;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a {@link Query} Builder.
|
* Create a {@link Query} Builder.
|
||||||
*
|
*
|
||||||
@@ -294,7 +310,7 @@ export abstract class Table {
|
|||||||
* if the query is a string and no embedding function is defined, it will be treated as a full text search query
|
* if the query is a string and no embedding function is defined, it will be treated as a full text search query
|
||||||
*/
|
*/
|
||||||
abstract search(
|
abstract search(
|
||||||
query: string | IntoVector,
|
query: string | IntoVector | FullTextQuery,
|
||||||
queryType?: string,
|
queryType?: string,
|
||||||
ftsColumns?: string | string[],
|
ftsColumns?: string | string[],
|
||||||
): VectorQuery | Query;
|
): VectorQuery | Query;
|
||||||
@@ -560,16 +576,20 @@ export class LocalTable extends Table {
|
|||||||
await this.inner.dropIndex(name);
|
await this.inner.dropIndex(name);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async prewarmIndex(name: string): Promise<void> {
|
||||||
|
await this.inner.prewarmIndex(name);
|
||||||
|
}
|
||||||
|
|
||||||
query(): Query {
|
query(): Query {
|
||||||
return new Query(this.inner);
|
return new Query(this.inner);
|
||||||
}
|
}
|
||||||
|
|
||||||
search(
|
search(
|
||||||
query: string | IntoVector,
|
query: string | IntoVector | FullTextQuery,
|
||||||
queryType: string = "auto",
|
queryType: string = "auto",
|
||||||
ftsColumns?: string | string[],
|
ftsColumns?: string | string[],
|
||||||
): VectorQuery | Query {
|
): VectorQuery | Query {
|
||||||
if (typeof query !== "string") {
|
if (typeof query !== "string" && !instanceOfFullTextQuery(query)) {
|
||||||
if (queryType === "fts") {
|
if (queryType === "fts") {
|
||||||
throw new Error("Cannot perform full text search on a vector query");
|
throw new Error("Cannot perform full text search on a vector query");
|
||||||
}
|
}
|
||||||
@@ -585,7 +605,10 @@ export class LocalTable extends Table {
|
|||||||
|
|
||||||
// The query type is auto or vector
|
// The query type is auto or vector
|
||||||
// fall back to full text search if no embedding functions are defined and the query is a string
|
// fall back to full text search if no embedding functions are defined and the query is a string
|
||||||
if (queryType === "auto" && getRegistry().length() === 0) {
|
if (
|
||||||
|
queryType === "auto" &&
|
||||||
|
(getRegistry().length() === 0 || instanceOfFullTextQuery(query))
|
||||||
|
) {
|
||||||
return this.query().fullTextSearch(query, {
|
return this.query().fullTextSearch(query, {
|
||||||
columns: ftsColumns,
|
columns: ftsColumns,
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-darwin-arm64",
|
"name": "@lancedb/lancedb-darwin-arm64",
|
||||||
"version": "0.18.2-beta.1",
|
"version": "0.19.0-beta.7",
|
||||||
"os": ["darwin"],
|
"os": ["darwin"],
|
||||||
"cpu": ["arm64"],
|
"cpu": ["arm64"],
|
||||||
"main": "lancedb.darwin-arm64.node",
|
"main": "lancedb.darwin-arm64.node",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-darwin-x64",
|
"name": "@lancedb/lancedb-darwin-x64",
|
||||||
"version": "0.18.2-beta.1",
|
"version": "0.19.0-beta.7",
|
||||||
"os": ["darwin"],
|
"os": ["darwin"],
|
||||||
"cpu": ["x64"],
|
"cpu": ["x64"],
|
||||||
"main": "lancedb.darwin-x64.node",
|
"main": "lancedb.darwin-x64.node",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-linux-arm64-gnu",
|
"name": "@lancedb/lancedb-linux-arm64-gnu",
|
||||||
"version": "0.18.2-beta.1",
|
"version": "0.19.0-beta.7",
|
||||||
"os": ["linux"],
|
"os": ["linux"],
|
||||||
"cpu": ["arm64"],
|
"cpu": ["arm64"],
|
||||||
"main": "lancedb.linux-arm64-gnu.node",
|
"main": "lancedb.linux-arm64-gnu.node",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-linux-arm64-musl",
|
"name": "@lancedb/lancedb-linux-arm64-musl",
|
||||||
"version": "0.18.2-beta.1",
|
"version": "0.19.0-beta.7",
|
||||||
"os": ["linux"],
|
"os": ["linux"],
|
||||||
"cpu": ["arm64"],
|
"cpu": ["arm64"],
|
||||||
"main": "lancedb.linux-arm64-musl.node",
|
"main": "lancedb.linux-arm64-musl.node",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-linux-x64-gnu",
|
"name": "@lancedb/lancedb-linux-x64-gnu",
|
||||||
"version": "0.18.2-beta.1",
|
"version": "0.19.0-beta.7",
|
||||||
"os": ["linux"],
|
"os": ["linux"],
|
||||||
"cpu": ["x64"],
|
"cpu": ["x64"],
|
||||||
"main": "lancedb.linux-x64-gnu.node",
|
"main": "lancedb.linux-x64-gnu.node",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-linux-x64-musl",
|
"name": "@lancedb/lancedb-linux-x64-musl",
|
||||||
"version": "0.18.2-beta.1",
|
"version": "0.19.0-beta.7",
|
||||||
"os": ["linux"],
|
"os": ["linux"],
|
||||||
"cpu": ["x64"],
|
"cpu": ["x64"],
|
||||||
"main": "lancedb.linux-x64-musl.node",
|
"main": "lancedb.linux-x64-musl.node",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-win32-arm64-msvc",
|
"name": "@lancedb/lancedb-win32-arm64-msvc",
|
||||||
"version": "0.18.2-beta.1",
|
"version": "0.19.0-beta.7",
|
||||||
"os": [
|
"os": [
|
||||||
"win32"
|
"win32"
|
||||||
],
|
],
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-win32-x64-msvc",
|
"name": "@lancedb/lancedb-win32-x64-msvc",
|
||||||
"version": "0.18.2-beta.1",
|
"version": "0.19.0-beta.7",
|
||||||
"os": ["win32"],
|
"os": ["win32"],
|
||||||
"cpu": ["x64"],
|
"cpu": ["x64"],
|
||||||
"main": "lancedb.win32-x64-msvc.node",
|
"main": "lancedb.win32-x64-msvc.node",
|
||||||
|
|||||||
252
nodejs/package-lock.json
generated
252
nodejs/package-lock.json
generated
@@ -1,12 +1,12 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb",
|
"name": "@lancedb/lancedb",
|
||||||
"version": "0.18.2-beta.0",
|
"version": "0.19.0-beta.7",
|
||||||
"lockfileVersion": 3,
|
"lockfileVersion": 3,
|
||||||
"requires": true,
|
"requires": true,
|
||||||
"packages": {
|
"packages": {
|
||||||
"": {
|
"": {
|
||||||
"name": "@lancedb/lancedb",
|
"name": "@lancedb/lancedb",
|
||||||
"version": "0.18.2-beta.0",
|
"version": "0.19.0-beta.7",
|
||||||
"cpu": [
|
"cpu": [
|
||||||
"x64",
|
"x64",
|
||||||
"arm64"
|
"arm64"
|
||||||
@@ -2304,89 +2304,20 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/@babel/code-frame": {
|
"node_modules/@babel/code-frame": {
|
||||||
"version": "7.23.5",
|
"version": "7.26.2",
|
||||||
"resolved": "https://registry.npmjs.org/@babel/code-frame/-/code-frame-7.23.5.tgz",
|
"resolved": "https://registry.npmjs.org/@babel/code-frame/-/code-frame-7.26.2.tgz",
|
||||||
"integrity": "sha512-CgH3s1a96LipHCmSUmYFPwY7MNx8C3avkq7i4Wl3cfa662ldtUe4VM1TPXX70pfmrlWTb6jLqTYrZyT2ZTJBgA==",
|
"integrity": "sha512-RJlIHRueQgwWitWgF8OdFYGZX328Ax5BCemNGlqHfplnRT9ESi8JkFlvaVYbS+UubVY6dpv87Fs2u5M29iNFVQ==",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
|
"license": "MIT",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@babel/highlight": "^7.23.4",
|
"@babel/helper-validator-identifier": "^7.25.9",
|
||||||
"chalk": "^2.4.2"
|
"js-tokens": "^4.0.0",
|
||||||
|
"picocolors": "^1.0.0"
|
||||||
},
|
},
|
||||||
"engines": {
|
"engines": {
|
||||||
"node": ">=6.9.0"
|
"node": ">=6.9.0"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/@babel/code-frame/node_modules/ansi-styles": {
|
|
||||||
"version": "3.2.1",
|
|
||||||
"resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-3.2.1.tgz",
|
|
||||||
"integrity": "sha512-VT0ZI6kZRdTh8YyJw3SMbYm/u+NqfsAxEpWO0Pf9sq8/e94WxxOpPKx9FR1FlyCtOVDNOQ+8ntlqFxiRc+r5qA==",
|
|
||||||
"dev": true,
|
|
||||||
"dependencies": {
|
|
||||||
"color-convert": "^1.9.0"
|
|
||||||
},
|
|
||||||
"engines": {
|
|
||||||
"node": ">=4"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/@babel/code-frame/node_modules/chalk": {
|
|
||||||
"version": "2.4.2",
|
|
||||||
"resolved": "https://registry.npmjs.org/chalk/-/chalk-2.4.2.tgz",
|
|
||||||
"integrity": "sha512-Mti+f9lpJNcwF4tWV8/OrTTtF1gZi+f8FqlyAdouralcFWFQWF2+NgCHShjkCb+IFBLq9buZwE1xckQU4peSuQ==",
|
|
||||||
"dev": true,
|
|
||||||
"dependencies": {
|
|
||||||
"ansi-styles": "^3.2.1",
|
|
||||||
"escape-string-regexp": "^1.0.5",
|
|
||||||
"supports-color": "^5.3.0"
|
|
||||||
},
|
|
||||||
"engines": {
|
|
||||||
"node": ">=4"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/@babel/code-frame/node_modules/color-convert": {
|
|
||||||
"version": "1.9.3",
|
|
||||||
"resolved": "https://registry.npmjs.org/color-convert/-/color-convert-1.9.3.tgz",
|
|
||||||
"integrity": "sha512-QfAUtd+vFdAtFQcC8CCyYt1fYWxSqAiK2cSD6zDB8N3cpsEBAvRxp9zOGg6G/SHHJYAT88/az/IuDGALsNVbGg==",
|
|
||||||
"dev": true,
|
|
||||||
"dependencies": {
|
|
||||||
"color-name": "1.1.3"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/@babel/code-frame/node_modules/color-name": {
|
|
||||||
"version": "1.1.3",
|
|
||||||
"resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.3.tgz",
|
|
||||||
"integrity": "sha512-72fSenhMw2HZMTVHeCA9KCmpEIbzWiQsjN+BHcBbS9vr1mtt+vJjPdksIBNUmKAW8TFUDPJK5SUU3QhE9NEXDw==",
|
|
||||||
"dev": true
|
|
||||||
},
|
|
||||||
"node_modules/@babel/code-frame/node_modules/escape-string-regexp": {
|
|
||||||
"version": "1.0.5",
|
|
||||||
"resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-1.0.5.tgz",
|
|
||||||
"integrity": "sha512-vbRorB5FUQWvla16U8R/qgaFIya2qGzwDrNmCZuYKrbdSUMG6I1ZCGQRefkRVhuOkIGVne7BQ35DSfo1qvJqFg==",
|
|
||||||
"dev": true,
|
|
||||||
"engines": {
|
|
||||||
"node": ">=0.8.0"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/@babel/code-frame/node_modules/has-flag": {
|
|
||||||
"version": "3.0.0",
|
|
||||||
"resolved": "https://registry.npmjs.org/has-flag/-/has-flag-3.0.0.tgz",
|
|
||||||
"integrity": "sha512-sKJf1+ceQBr4SMkvQnBDNDtf4TXpVhVGateu0t918bl30FnbE2m4vNLX+VWe/dpjlb+HugGYzW7uQXH98HPEYw==",
|
|
||||||
"dev": true,
|
|
||||||
"engines": {
|
|
||||||
"node": ">=4"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/@babel/code-frame/node_modules/supports-color": {
|
|
||||||
"version": "5.5.0",
|
|
||||||
"resolved": "https://registry.npmjs.org/supports-color/-/supports-color-5.5.0.tgz",
|
|
||||||
"integrity": "sha512-QjVjwdXIt408MIiAqCX4oUKsgU2EqAGzs2Ppkm4aQYbjm+ZEWEcW4SfFNTr4uMNZma0ey4f5lgLrkB0aX0QMow==",
|
|
||||||
"dev": true,
|
|
||||||
"dependencies": {
|
|
||||||
"has-flag": "^3.0.0"
|
|
||||||
},
|
|
||||||
"engines": {
|
|
||||||
"node": ">=4"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/@babel/compat-data": {
|
"node_modules/@babel/compat-data": {
|
||||||
"version": "7.23.5",
|
"version": "7.23.5",
|
||||||
"resolved": "https://registry.npmjs.org/@babel/compat-data/-/compat-data-7.23.5.tgz",
|
"resolved": "https://registry.npmjs.org/@babel/compat-data/-/compat-data-7.23.5.tgz",
|
||||||
@@ -2589,19 +2520,21 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/@babel/helper-string-parser": {
|
"node_modules/@babel/helper-string-parser": {
|
||||||
"version": "7.23.4",
|
"version": "7.25.9",
|
||||||
"resolved": "https://registry.npmjs.org/@babel/helper-string-parser/-/helper-string-parser-7.23.4.tgz",
|
"resolved": "https://registry.npmjs.org/@babel/helper-string-parser/-/helper-string-parser-7.25.9.tgz",
|
||||||
"integrity": "sha512-803gmbQdqwdf4olxrX4AJyFBV/RTr3rSmOj0rKwesmzlfhYNDEs+/iOcznzpNWlJlIlTJC2QfPFcHB6DlzdVLQ==",
|
"integrity": "sha512-4A/SCr/2KLd5jrtOMFzaKjVtAei3+2r/NChoBNoZ3EyP/+GlhoaEGoWOZUmFmoITP7zOJyHIMm+DYRd8o3PvHA==",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
|
"license": "MIT",
|
||||||
"engines": {
|
"engines": {
|
||||||
"node": ">=6.9.0"
|
"node": ">=6.9.0"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/@babel/helper-validator-identifier": {
|
"node_modules/@babel/helper-validator-identifier": {
|
||||||
"version": "7.22.20",
|
"version": "7.25.9",
|
||||||
"resolved": "https://registry.npmjs.org/@babel/helper-validator-identifier/-/helper-validator-identifier-7.22.20.tgz",
|
"resolved": "https://registry.npmjs.org/@babel/helper-validator-identifier/-/helper-validator-identifier-7.25.9.tgz",
|
||||||
"integrity": "sha512-Y4OZ+ytlatR8AI+8KZfKuL5urKp7qey08ha31L8b3BwewJAoJamTzyvxPR/5D+KkdJCGPq/+8TukHBlY10FX9A==",
|
"integrity": "sha512-Ed61U6XJc3CVRfkERJWDz4dJwKe7iLmmJsbOGu9wSloNSFttHV0I8g6UAgb7qnK5ly5bGLPd4oXZlxCdANBOWQ==",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
|
"license": "MIT",
|
||||||
"engines": {
|
"engines": {
|
||||||
"node": ">=6.9.0"
|
"node": ">=6.9.0"
|
||||||
}
|
}
|
||||||
@@ -2616,109 +2549,28 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/@babel/helpers": {
|
"node_modules/@babel/helpers": {
|
||||||
"version": "7.23.8",
|
"version": "7.27.0",
|
||||||
"resolved": "https://registry.npmjs.org/@babel/helpers/-/helpers-7.23.8.tgz",
|
"resolved": "https://registry.npmjs.org/@babel/helpers/-/helpers-7.27.0.tgz",
|
||||||
"integrity": "sha512-KDqYz4PiOWvDFrdHLPhKtCThtIcKVy6avWD2oG4GEvyQ+XDZwHD4YQd+H2vNMnq2rkdxsDkU82T+Vk8U/WXHRQ==",
|
"integrity": "sha512-U5eyP/CTFPuNE3qk+WZMxFkp/4zUzdceQlfzf7DdGdhp+Fezd7HD+i8Y24ZuTMKX3wQBld449jijbGq6OdGNQg==",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
|
"license": "MIT",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@babel/template": "^7.22.15",
|
"@babel/template": "^7.27.0",
|
||||||
"@babel/traverse": "^7.23.7",
|
"@babel/types": "^7.27.0"
|
||||||
"@babel/types": "^7.23.6"
|
|
||||||
},
|
},
|
||||||
"engines": {
|
"engines": {
|
||||||
"node": ">=6.9.0"
|
"node": ">=6.9.0"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/@babel/highlight": {
|
|
||||||
"version": "7.23.4",
|
|
||||||
"resolved": "https://registry.npmjs.org/@babel/highlight/-/highlight-7.23.4.tgz",
|
|
||||||
"integrity": "sha512-acGdbYSfp2WheJoJm/EBBBLh/ID8KDc64ISZ9DYtBmC8/Q204PZJLHyzeB5qMzJ5trcOkybd78M4x2KWsUq++A==",
|
|
||||||
"dev": true,
|
|
||||||
"dependencies": {
|
|
||||||
"@babel/helper-validator-identifier": "^7.22.20",
|
|
||||||
"chalk": "^2.4.2",
|
|
||||||
"js-tokens": "^4.0.0"
|
|
||||||
},
|
|
||||||
"engines": {
|
|
||||||
"node": ">=6.9.0"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/@babel/highlight/node_modules/ansi-styles": {
|
|
||||||
"version": "3.2.1",
|
|
||||||
"resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-3.2.1.tgz",
|
|
||||||
"integrity": "sha512-VT0ZI6kZRdTh8YyJw3SMbYm/u+NqfsAxEpWO0Pf9sq8/e94WxxOpPKx9FR1FlyCtOVDNOQ+8ntlqFxiRc+r5qA==",
|
|
||||||
"dev": true,
|
|
||||||
"dependencies": {
|
|
||||||
"color-convert": "^1.9.0"
|
|
||||||
},
|
|
||||||
"engines": {
|
|
||||||
"node": ">=4"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/@babel/highlight/node_modules/chalk": {
|
|
||||||
"version": "2.4.2",
|
|
||||||
"resolved": "https://registry.npmjs.org/chalk/-/chalk-2.4.2.tgz",
|
|
||||||
"integrity": "sha512-Mti+f9lpJNcwF4tWV8/OrTTtF1gZi+f8FqlyAdouralcFWFQWF2+NgCHShjkCb+IFBLq9buZwE1xckQU4peSuQ==",
|
|
||||||
"dev": true,
|
|
||||||
"dependencies": {
|
|
||||||
"ansi-styles": "^3.2.1",
|
|
||||||
"escape-string-regexp": "^1.0.5",
|
|
||||||
"supports-color": "^5.3.0"
|
|
||||||
},
|
|
||||||
"engines": {
|
|
||||||
"node": ">=4"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/@babel/highlight/node_modules/color-convert": {
|
|
||||||
"version": "1.9.3",
|
|
||||||
"resolved": "https://registry.npmjs.org/color-convert/-/color-convert-1.9.3.tgz",
|
|
||||||
"integrity": "sha512-QfAUtd+vFdAtFQcC8CCyYt1fYWxSqAiK2cSD6zDB8N3cpsEBAvRxp9zOGg6G/SHHJYAT88/az/IuDGALsNVbGg==",
|
|
||||||
"dev": true,
|
|
||||||
"dependencies": {
|
|
||||||
"color-name": "1.1.3"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/@babel/highlight/node_modules/color-name": {
|
|
||||||
"version": "1.1.3",
|
|
||||||
"resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.3.tgz",
|
|
||||||
"integrity": "sha512-72fSenhMw2HZMTVHeCA9KCmpEIbzWiQsjN+BHcBbS9vr1mtt+vJjPdksIBNUmKAW8TFUDPJK5SUU3QhE9NEXDw==",
|
|
||||||
"dev": true
|
|
||||||
},
|
|
||||||
"node_modules/@babel/highlight/node_modules/escape-string-regexp": {
|
|
||||||
"version": "1.0.5",
|
|
||||||
"resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-1.0.5.tgz",
|
|
||||||
"integrity": "sha512-vbRorB5FUQWvla16U8R/qgaFIya2qGzwDrNmCZuYKrbdSUMG6I1ZCGQRefkRVhuOkIGVne7BQ35DSfo1qvJqFg==",
|
|
||||||
"dev": true,
|
|
||||||
"engines": {
|
|
||||||
"node": ">=0.8.0"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/@babel/highlight/node_modules/has-flag": {
|
|
||||||
"version": "3.0.0",
|
|
||||||
"resolved": "https://registry.npmjs.org/has-flag/-/has-flag-3.0.0.tgz",
|
|
||||||
"integrity": "sha512-sKJf1+ceQBr4SMkvQnBDNDtf4TXpVhVGateu0t918bl30FnbE2m4vNLX+VWe/dpjlb+HugGYzW7uQXH98HPEYw==",
|
|
||||||
"dev": true,
|
|
||||||
"engines": {
|
|
||||||
"node": ">=4"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/@babel/highlight/node_modules/supports-color": {
|
|
||||||
"version": "5.5.0",
|
|
||||||
"resolved": "https://registry.npmjs.org/supports-color/-/supports-color-5.5.0.tgz",
|
|
||||||
"integrity": "sha512-QjVjwdXIt408MIiAqCX4oUKsgU2EqAGzs2Ppkm4aQYbjm+ZEWEcW4SfFNTr4uMNZma0ey4f5lgLrkB0aX0QMow==",
|
|
||||||
"dev": true,
|
|
||||||
"dependencies": {
|
|
||||||
"has-flag": "^3.0.0"
|
|
||||||
},
|
|
||||||
"engines": {
|
|
||||||
"node": ">=4"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/@babel/parser": {
|
"node_modules/@babel/parser": {
|
||||||
"version": "7.23.6",
|
"version": "7.27.0",
|
||||||
"resolved": "https://registry.npmjs.org/@babel/parser/-/parser-7.23.6.tgz",
|
"resolved": "https://registry.npmjs.org/@babel/parser/-/parser-7.27.0.tgz",
|
||||||
"integrity": "sha512-Z2uID7YJ7oNvAI20O9X0bblw7Qqs8Q2hFy0R9tAfnfLkp5MW0UH9eUvnDSnFwKZ0AvgS1ucqR4KzvVHgnke1VQ==",
|
"integrity": "sha512-iaepho73/2Pz7w2eMS0Q5f83+0RKI7i4xmiYeBmDzfRVbQtTOG7Ts0S4HzJVsTMGI9keU8rNfuZr8DKfSt7Yyg==",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
|
"license": "MIT",
|
||||||
|
"dependencies": {
|
||||||
|
"@babel/types": "^7.27.0"
|
||||||
|
},
|
||||||
"bin": {
|
"bin": {
|
||||||
"parser": "bin/babel-parser.js"
|
"parser": "bin/babel-parser.js"
|
||||||
},
|
},
|
||||||
@@ -2904,14 +2756,15 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/@babel/template": {
|
"node_modules/@babel/template": {
|
||||||
"version": "7.22.15",
|
"version": "7.27.0",
|
||||||
"resolved": "https://registry.npmjs.org/@babel/template/-/template-7.22.15.tgz",
|
"resolved": "https://registry.npmjs.org/@babel/template/-/template-7.27.0.tgz",
|
||||||
"integrity": "sha512-QPErUVm4uyJa60rkI73qneDacvdvzxshT3kksGqlGWYdOTIUOwJ7RDUL8sGqslY1uXWSL6xMFKEXDS3ox2uF0w==",
|
"integrity": "sha512-2ncevenBqXI6qRMukPlXwHKHchC7RyMuu4xv5JBXRfOGVcTy1mXCD12qrp7Jsoxll1EV3+9sE4GugBVRjT2jFA==",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
|
"license": "MIT",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@babel/code-frame": "^7.22.13",
|
"@babel/code-frame": "^7.26.2",
|
||||||
"@babel/parser": "^7.22.15",
|
"@babel/parser": "^7.27.0",
|
||||||
"@babel/types": "^7.22.15"
|
"@babel/types": "^7.27.0"
|
||||||
},
|
},
|
||||||
"engines": {
|
"engines": {
|
||||||
"node": ">=6.9.0"
|
"node": ">=6.9.0"
|
||||||
@@ -2948,14 +2801,14 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/@babel/types": {
|
"node_modules/@babel/types": {
|
||||||
"version": "7.23.6",
|
"version": "7.27.0",
|
||||||
"resolved": "https://registry.npmjs.org/@babel/types/-/types-7.23.6.tgz",
|
"resolved": "https://registry.npmjs.org/@babel/types/-/types-7.27.0.tgz",
|
||||||
"integrity": "sha512-+uarb83brBzPKN38NX1MkB6vb6+mwvR6amUulqAE7ccQw1pEl+bCia9TbdG1lsnFP7lZySvUn37CHyXQdfTwzg==",
|
"integrity": "sha512-H45s8fVLYjbhFH62dIJ3WtmJ6RSPt/3DRO0ZcT2SUiYiQyz3BLVb9ADEnLl91m74aQPS3AzzeajZHYOalWe3bg==",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
|
"license": "MIT",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@babel/helper-string-parser": "^7.23.4",
|
"@babel/helper-string-parser": "^7.25.9",
|
||||||
"@babel/helper-validator-identifier": "^7.22.20",
|
"@babel/helper-validator-identifier": "^7.25.9"
|
||||||
"to-fast-properties": "^2.0.0"
|
|
||||||
},
|
},
|
||||||
"engines": {
|
"engines": {
|
||||||
"node": ">=6.9.0"
|
"node": ">=6.9.0"
|
||||||
@@ -5550,10 +5403,11 @@
|
|||||||
"devOptional": true
|
"devOptional": true
|
||||||
},
|
},
|
||||||
"node_modules/axios": {
|
"node_modules/axios": {
|
||||||
"version": "1.7.7",
|
"version": "1.8.4",
|
||||||
"resolved": "https://registry.npmjs.org/axios/-/axios-1.7.7.tgz",
|
"resolved": "https://registry.npmjs.org/axios/-/axios-1.8.4.tgz",
|
||||||
"integrity": "sha512-S4kL7XrjgBmvdGut0sN3yJxqYzrDOnivkBiN0OFs6hLiUam3UPvswUo0kqGyhqUZGEOytHyumEdXsAkgCOUf3Q==",
|
"integrity": "sha512-eBSYY4Y68NNlHbHBMdeDmKNtDgXWhQsJcGqzO3iLUM0GraQFSS9cVgPX5I9b3lbdFKyYoAEGAZF1DwhTaljNAw==",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
|
"license": "MIT",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"follow-redirects": "^1.15.6",
|
"follow-redirects": "^1.15.6",
|
||||||
"form-data": "^4.0.0",
|
"form-data": "^4.0.0",
|
||||||
@@ -7869,7 +7723,8 @@
|
|||||||
"version": "4.0.0",
|
"version": "4.0.0",
|
||||||
"resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-4.0.0.tgz",
|
"resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-4.0.0.tgz",
|
||||||
"integrity": "sha512-RdJUflcE3cUzKiMqQgsCu06FPu9UdIJO0beYbPhHN4k6apgJtifcoCtT9bcxOpYBtpD2kCM6Sbzg4CausW/PKQ==",
|
"integrity": "sha512-RdJUflcE3cUzKiMqQgsCu06FPu9UdIJO0beYbPhHN4k6apgJtifcoCtT9bcxOpYBtpD2kCM6Sbzg4CausW/PKQ==",
|
||||||
"dev": true
|
"dev": true,
|
||||||
|
"license": "MIT"
|
||||||
},
|
},
|
||||||
"node_modules/js-yaml": {
|
"node_modules/js-yaml": {
|
||||||
"version": "3.14.1",
|
"version": "3.14.1",
|
||||||
@@ -9360,15 +9215,6 @@
|
|||||||
"integrity": "sha512-3f0uOEAQwIqGuWW2MVzYg8fV/QNnc/IpuJNG837rLuczAaLVHslWHZQj4IGiEl5Hs3kkbhwL9Ab7Hrsmuj+Smw==",
|
"integrity": "sha512-3f0uOEAQwIqGuWW2MVzYg8fV/QNnc/IpuJNG837rLuczAaLVHslWHZQj4IGiEl5Hs3kkbhwL9Ab7Hrsmuj+Smw==",
|
||||||
"dev": true
|
"dev": true
|
||||||
},
|
},
|
||||||
"node_modules/to-fast-properties": {
|
|
||||||
"version": "2.0.0",
|
|
||||||
"resolved": "https://registry.npmjs.org/to-fast-properties/-/to-fast-properties-2.0.0.tgz",
|
|
||||||
"integrity": "sha512-/OaKK0xYrs3DmxRYqL/yDc+FxFUVYhDlXMhRmv3z915w2HF1tnN1omB354j8VUGO/hbRzyD6Y3sA7v7GS/ceog==",
|
|
||||||
"dev": true,
|
|
||||||
"engines": {
|
|
||||||
"node": ">=4"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/to-regex-range": {
|
"node_modules/to-regex-range": {
|
||||||
"version": "5.0.1",
|
"version": "5.0.1",
|
||||||
"resolved": "https://registry.npmjs.org/to-regex-range/-/to-regex-range-5.0.1.tgz",
|
"resolved": "https://registry.npmjs.org/to-regex-range/-/to-regex-range-5.0.1.tgz",
|
||||||
|
|||||||
@@ -11,7 +11,7 @@
|
|||||||
"ann"
|
"ann"
|
||||||
],
|
],
|
||||||
"private": false,
|
"private": false,
|
||||||
"version": "0.18.2-beta.1",
|
"version": "0.19.0-beta.7",
|
||||||
"main": "dist/index.js",
|
"main": "dist/index.js",
|
||||||
"exports": {
|
"exports": {
|
||||||
".": "./dist/index.js",
|
".": "./dist/index.js",
|
||||||
@@ -29,6 +29,7 @@
|
|||||||
"aarch64-apple-darwin",
|
"aarch64-apple-darwin",
|
||||||
"x86_64-unknown-linux-gnu",
|
"x86_64-unknown-linux-gnu",
|
||||||
"aarch64-unknown-linux-gnu",
|
"aarch64-unknown-linux-gnu",
|
||||||
|
"x86_64-unknown-linux-musl",
|
||||||
"aarch64-unknown-linux-musl",
|
"aarch64-unknown-linux-musl",
|
||||||
"x86_64-pc-windows-msvc",
|
"x86_64-pc-windows-msvc",
|
||||||
"aarch64-pc-windows-msvc"
|
"aarch64-pc-windows-msvc"
|
||||||
|
|||||||
@@ -3,7 +3,9 @@
|
|||||||
|
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use lancedb::index::scalar::FullTextSearchQuery;
|
use lancedb::index::scalar::{
|
||||||
|
BoostQuery, FtsQuery, FullTextSearchQuery, MatchQuery, MultiMatchQuery, PhraseQuery,
|
||||||
|
};
|
||||||
use lancedb::query::ExecutableQuery;
|
use lancedb::query::ExecutableQuery;
|
||||||
use lancedb::query::Query as LanceDbQuery;
|
use lancedb::query::Query as LanceDbQuery;
|
||||||
use lancedb::query::QueryBase;
|
use lancedb::query::QueryBase;
|
||||||
@@ -38,9 +40,10 @@ impl Query {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[napi]
|
#[napi]
|
||||||
pub fn full_text_search(&mut self, query: String, columns: Option<Vec<String>>) {
|
pub fn full_text_search(&mut self, query: napi::JsObject) -> napi::Result<()> {
|
||||||
let query = FullTextSearchQuery::new(query).columns(columns);
|
let query = parse_fts_query(query)?;
|
||||||
self.inner = self.inner.clone().full_text_search(query);
|
self.inner = self.inner.clone().full_text_search(query);
|
||||||
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
#[napi]
|
#[napi]
|
||||||
@@ -87,11 +90,15 @@ impl Query {
|
|||||||
pub async fn execute(
|
pub async fn execute(
|
||||||
&self,
|
&self,
|
||||||
max_batch_length: Option<u32>,
|
max_batch_length: Option<u32>,
|
||||||
|
timeout_ms: Option<u32>,
|
||||||
) -> napi::Result<RecordBatchIterator> {
|
) -> napi::Result<RecordBatchIterator> {
|
||||||
let mut execution_opts = QueryExecutionOptions::default();
|
let mut execution_opts = QueryExecutionOptions::default();
|
||||||
if let Some(max_batch_length) = max_batch_length {
|
if let Some(max_batch_length) = max_batch_length {
|
||||||
execution_opts.max_batch_length = max_batch_length;
|
execution_opts.max_batch_length = max_batch_length;
|
||||||
}
|
}
|
||||||
|
if let Some(timeout_ms) = timeout_ms {
|
||||||
|
execution_opts.timeout = Some(std::time::Duration::from_millis(timeout_ms as u64))
|
||||||
|
}
|
||||||
let inner_stream = self
|
let inner_stream = self
|
||||||
.inner
|
.inner
|
||||||
.execute_with_options(execution_opts)
|
.execute_with_options(execution_opts)
|
||||||
@@ -114,6 +121,16 @@ impl Query {
|
|||||||
))
|
))
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[napi(catch_unwind)]
|
||||||
|
pub async fn analyze_plan(&self) -> napi::Result<String> {
|
||||||
|
self.inner.analyze_plan().await.map_err(|e| {
|
||||||
|
napi::Error::from_reason(format!(
|
||||||
|
"Failed to execute analyze plan: {}",
|
||||||
|
convert_error(&e)
|
||||||
|
))
|
||||||
|
})
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[napi]
|
#[napi]
|
||||||
@@ -185,9 +202,10 @@ impl VectorQuery {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[napi]
|
#[napi]
|
||||||
pub fn full_text_search(&mut self, query: String, columns: Option<Vec<String>>) {
|
pub fn full_text_search(&mut self, query: napi::JsObject) -> napi::Result<()> {
|
||||||
let query = FullTextSearchQuery::new(query).columns(columns);
|
let query = parse_fts_query(query)?;
|
||||||
self.inner = self.inner.clone().full_text_search(query);
|
self.inner = self.inner.clone().full_text_search(query);
|
||||||
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
#[napi]
|
#[napi]
|
||||||
@@ -232,11 +250,15 @@ impl VectorQuery {
|
|||||||
pub async fn execute(
|
pub async fn execute(
|
||||||
&self,
|
&self,
|
||||||
max_batch_length: Option<u32>,
|
max_batch_length: Option<u32>,
|
||||||
|
timeout_ms: Option<u32>,
|
||||||
) -> napi::Result<RecordBatchIterator> {
|
) -> napi::Result<RecordBatchIterator> {
|
||||||
let mut execution_opts = QueryExecutionOptions::default();
|
let mut execution_opts = QueryExecutionOptions::default();
|
||||||
if let Some(max_batch_length) = max_batch_length {
|
if let Some(max_batch_length) = max_batch_length {
|
||||||
execution_opts.max_batch_length = max_batch_length;
|
execution_opts.max_batch_length = max_batch_length;
|
||||||
}
|
}
|
||||||
|
if let Some(timeout_ms) = timeout_ms {
|
||||||
|
execution_opts.timeout = Some(std::time::Duration::from_millis(timeout_ms as u64))
|
||||||
|
}
|
||||||
let inner_stream = self
|
let inner_stream = self
|
||||||
.inner
|
.inner
|
||||||
.execute_with_options(execution_opts)
|
.execute_with_options(execution_opts)
|
||||||
@@ -259,4 +281,127 @@ impl VectorQuery {
|
|||||||
))
|
))
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[napi(catch_unwind)]
|
||||||
|
pub async fn analyze_plan(&self) -> napi::Result<String> {
|
||||||
|
self.inner.analyze_plan().await.map_err(|e| {
|
||||||
|
napi::Error::from_reason(format!(
|
||||||
|
"Failed to execute analyze plan: {}",
|
||||||
|
convert_error(&e)
|
||||||
|
))
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[napi]
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct JsFullTextQuery {
|
||||||
|
pub(crate) inner: FtsQuery,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[napi]
|
||||||
|
impl JsFullTextQuery {
|
||||||
|
#[napi(factory)]
|
||||||
|
pub fn match_query(
|
||||||
|
query: String,
|
||||||
|
column: String,
|
||||||
|
boost: f64,
|
||||||
|
fuzziness: Option<u32>,
|
||||||
|
max_expansions: u32,
|
||||||
|
) -> napi::Result<Self> {
|
||||||
|
Ok(Self {
|
||||||
|
inner: MatchQuery::new(query)
|
||||||
|
.with_column(Some(column))
|
||||||
|
.with_boost(boost as f32)
|
||||||
|
.with_fuzziness(fuzziness)
|
||||||
|
.with_max_expansions(max_expansions as usize)
|
||||||
|
.into(),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
#[napi(factory)]
|
||||||
|
pub fn phrase_query(query: String, column: String) -> napi::Result<Self> {
|
||||||
|
Ok(Self {
|
||||||
|
inner: PhraseQuery::new(query).with_column(Some(column)).into(),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
#[napi(factory)]
|
||||||
|
#[allow(clippy::use_self)] // NAPI doesn't allow Self here but clippy reports it
|
||||||
|
pub fn boost_query(
|
||||||
|
positive: &JsFullTextQuery,
|
||||||
|
negative: &JsFullTextQuery,
|
||||||
|
negative_boost: Option<f64>,
|
||||||
|
) -> napi::Result<Self> {
|
||||||
|
Ok(Self {
|
||||||
|
inner: BoostQuery::new(
|
||||||
|
positive.inner.clone(),
|
||||||
|
negative.inner.clone(),
|
||||||
|
negative_boost.map(|v| v as f32),
|
||||||
|
)
|
||||||
|
.into(),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
#[napi(factory)]
|
||||||
|
pub fn multi_match_query(
|
||||||
|
query: String,
|
||||||
|
columns: Vec<String>,
|
||||||
|
boosts: Option<Vec<f64>>,
|
||||||
|
) -> napi::Result<Self> {
|
||||||
|
let q = match boosts {
|
||||||
|
Some(boosts) => MultiMatchQuery::try_new(query, columns)
|
||||||
|
.and_then(|q| q.try_with_boosts(boosts.into_iter().map(|v| v as f32).collect())),
|
||||||
|
None => MultiMatchQuery::try_new(query, columns),
|
||||||
|
}
|
||||||
|
.map_err(|e| {
|
||||||
|
napi::Error::from_reason(format!("Failed to create multi match query: {}", e))
|
||||||
|
})?;
|
||||||
|
|
||||||
|
Ok(Self { inner: q.into() })
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_fts_query(query: napi::JsObject) -> napi::Result<FullTextSearchQuery> {
|
||||||
|
if let Ok(Some(query)) = query.get::<_, &JsFullTextQuery>("query") {
|
||||||
|
Ok(FullTextSearchQuery::new_query(query.inner.clone()))
|
||||||
|
} else if let Ok(Some(query_text)) = query.get::<_, String>("query") {
|
||||||
|
let mut query_text = query_text;
|
||||||
|
let columns = query.get::<_, Option<Vec<String>>>("columns")?.flatten();
|
||||||
|
|
||||||
|
let is_phrase =
|
||||||
|
query_text.len() >= 2 && query_text.starts_with('"') && query_text.ends_with('"');
|
||||||
|
let is_multi_match = columns.as_ref().map(|cols| cols.len() > 1).unwrap_or(false);
|
||||||
|
|
||||||
|
if is_phrase {
|
||||||
|
// Remove the surrounding quotes for phrase queries
|
||||||
|
query_text = query_text[1..query_text.len() - 1].to_string();
|
||||||
|
}
|
||||||
|
|
||||||
|
let query: FtsQuery = match (is_phrase, is_multi_match) {
|
||||||
|
(false, _) => MatchQuery::new(query_text).into(),
|
||||||
|
(true, false) => PhraseQuery::new(query_text).into(),
|
||||||
|
(true, true) => {
|
||||||
|
return Err(napi::Error::from_reason(
|
||||||
|
"Phrase queries cannot be used with multiple columns.",
|
||||||
|
));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
let mut query = FullTextSearchQuery::new_query(query);
|
||||||
|
if let Some(cols) = columns {
|
||||||
|
if !cols.is_empty() {
|
||||||
|
query = query.with_columns(&cols).map_err(|e| {
|
||||||
|
napi::Error::from_reason(format!(
|
||||||
|
"Failed to set full text search columns: {}",
|
||||||
|
e
|
||||||
|
))
|
||||||
|
})?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(query)
|
||||||
|
} else {
|
||||||
|
Err(napi::Error::from_reason(
|
||||||
|
"Invalid full text search query object".to_string(),
|
||||||
|
))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -132,6 +132,14 @@ impl Table {
|
|||||||
.default_error()
|
.default_error()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[napi(catch_unwind)]
|
||||||
|
pub async fn prewarm_index(&self, index_name: String) -> napi::Result<()> {
|
||||||
|
self.inner_ref()?
|
||||||
|
.prewarm_index(&index_name)
|
||||||
|
.await
|
||||||
|
.default_error()
|
||||||
|
}
|
||||||
|
|
||||||
#[napi(catch_unwind)]
|
#[napi(catch_unwind)]
|
||||||
pub async fn update(
|
pub async fn update(
|
||||||
&self,
|
&self,
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
[tool.bumpversion]
|
[tool.bumpversion]
|
||||||
current_version = "0.21.2"
|
current_version = "0.22.0-beta.8"
|
||||||
parse = """(?x)
|
parse = """(?x)
|
||||||
(?P<major>0|[1-9]\\d*)\\.
|
(?P<major>0|[1-9]\\d*)\\.
|
||||||
(?P<minor>0|[1-9]\\d*)\\.
|
(?P<minor>0|[1-9]\\d*)\\.
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "lancedb-python"
|
name = "lancedb-python"
|
||||||
version = "0.21.2"
|
version = "0.22.0-beta.8"
|
||||||
edition.workspace = true
|
edition.workspace = true
|
||||||
description = "Python bindings for LanceDB"
|
description = "Python bindings for LanceDB"
|
||||||
license.workspace = true
|
license.workspace = true
|
||||||
|
|||||||
@@ -4,11 +4,12 @@ name = "lancedb"
|
|||||||
dynamic = ["version"]
|
dynamic = ["version"]
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"deprecation",
|
"deprecation",
|
||||||
"tqdm>=4.27.0",
|
"numpy",
|
||||||
|
"overrides>=0.7",
|
||||||
|
"packaging",
|
||||||
"pyarrow>=14",
|
"pyarrow>=14",
|
||||||
"pydantic>=1.10",
|
"pydantic>=1.10",
|
||||||
"packaging",
|
"tqdm>=4.27.0",
|
||||||
"overrides>=0.7",
|
|
||||||
]
|
]
|
||||||
description = "lancedb"
|
description = "lancedb"
|
||||||
authors = [{ name = "LanceDB Devs", email = "dev@lancedb.com" }]
|
authors = [{ name = "LanceDB Devs", email = "dev@lancedb.com" }]
|
||||||
@@ -42,6 +43,9 @@ classifiers = [
|
|||||||
repository = "https://github.com/lancedb/lancedb"
|
repository = "https://github.com/lancedb/lancedb"
|
||||||
|
|
||||||
[project.optional-dependencies]
|
[project.optional-dependencies]
|
||||||
|
pylance = [
|
||||||
|
"pylance>=0.25",
|
||||||
|
]
|
||||||
tests = [
|
tests = [
|
||||||
"aiohttp",
|
"aiohttp",
|
||||||
"boto3",
|
"boto3",
|
||||||
@@ -54,7 +58,8 @@ tests = [
|
|||||||
"polars>=0.19, <=1.3.0",
|
"polars>=0.19, <=1.3.0",
|
||||||
"tantivy",
|
"tantivy",
|
||||||
"pyarrow-stubs",
|
"pyarrow-stubs",
|
||||||
"pylance>=0.23.2",
|
"pylance>=0.25",
|
||||||
|
"requests",
|
||||||
]
|
]
|
||||||
dev = [
|
dev = [
|
||||||
"ruff",
|
"ruff",
|
||||||
|
|||||||
@@ -1,3 +1,4 @@
|
|||||||
|
from datetime import timedelta
|
||||||
from typing import Dict, List, Optional, Tuple, Any, Union, Literal
|
from typing import Dict, List, Optional, Tuple, Any, Union, Literal
|
||||||
|
|
||||||
import pyarrow as pa
|
import pyarrow as pa
|
||||||
@@ -48,10 +49,11 @@ class Table:
|
|||||||
async def version(self) -> int: ...
|
async def version(self) -> int: ...
|
||||||
async def checkout(self, version: int): ...
|
async def checkout(self, version: int): ...
|
||||||
async def checkout_latest(self): ...
|
async def checkout_latest(self): ...
|
||||||
async def restore(self): ...
|
async def restore(self, version: Optional[int] = None): ...
|
||||||
async def list_indices(self) -> list[IndexConfig]: ...
|
async def list_indices(self) -> list[IndexConfig]: ...
|
||||||
async def delete(self, filter: str): ...
|
async def delete(self, filter: str): ...
|
||||||
async def add_columns(self, columns: list[tuple[str, str]]) -> None: ...
|
async def add_columns(self, columns: list[tuple[str, str]]) -> None: ...
|
||||||
|
async def add_columns_with_schema(self, schema: pa.Schema) -> None: ...
|
||||||
async def alter_columns(self, columns: list[dict[str, Any]]) -> None: ...
|
async def alter_columns(self, columns: list[dict[str, Any]]) -> None: ...
|
||||||
async def optimize(
|
async def optimize(
|
||||||
self,
|
self,
|
||||||
@@ -93,7 +95,11 @@ class Query:
|
|||||||
def postfilter(self): ...
|
def postfilter(self): ...
|
||||||
def nearest_to(self, query_vec: pa.Array) -> VectorQuery: ...
|
def nearest_to(self, query_vec: pa.Array) -> VectorQuery: ...
|
||||||
def nearest_to_text(self, query: dict) -> FTSQuery: ...
|
def nearest_to_text(self, query: dict) -> FTSQuery: ...
|
||||||
async def execute(self, max_batch_length: Optional[int]) -> RecordBatchStream: ...
|
async def execute(
|
||||||
|
self, max_batch_length: Optional[int], timeout: Optional[timedelta]
|
||||||
|
) -> RecordBatchStream: ...
|
||||||
|
async def explain_plan(self, verbose: Optional[bool]) -> str: ...
|
||||||
|
async def analyze_plan(self) -> str: ...
|
||||||
def to_query_request(self) -> PyQueryRequest: ...
|
def to_query_request(self) -> PyQueryRequest: ...
|
||||||
|
|
||||||
class FTSQuery:
|
class FTSQuery:
|
||||||
@@ -107,8 +113,9 @@ class FTSQuery:
|
|||||||
def get_query(self) -> str: ...
|
def get_query(self) -> str: ...
|
||||||
def add_query_vector(self, query_vec: pa.Array) -> None: ...
|
def add_query_vector(self, query_vec: pa.Array) -> None: ...
|
||||||
def nearest_to(self, query_vec: pa.Array) -> HybridQuery: ...
|
def nearest_to(self, query_vec: pa.Array) -> HybridQuery: ...
|
||||||
async def execute(self, max_batch_length: Optional[int]) -> RecordBatchStream: ...
|
async def execute(
|
||||||
async def explain_plan(self) -> str: ...
|
self, max_batch_length: Optional[int], timeout: Optional[timedelta]
|
||||||
|
) -> RecordBatchStream: ...
|
||||||
def to_query_request(self) -> PyQueryRequest: ...
|
def to_query_request(self) -> PyQueryRequest: ...
|
||||||
|
|
||||||
class VectorQuery:
|
class VectorQuery:
|
||||||
|
|||||||
@@ -1,9 +1,12 @@
|
|||||||
# SPDX-License-Identifier: Apache-2.0
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
# SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
# SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||||
|
import base64
|
||||||
|
|
||||||
import os
|
import os
|
||||||
from typing import ClassVar, TYPE_CHECKING, List, Union
|
from typing import ClassVar, TYPE_CHECKING, List, Union, Any
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
from io import BytesIO
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import pyarrow as pa
|
import pyarrow as pa
|
||||||
@@ -11,12 +14,100 @@ import pyarrow as pa
|
|||||||
from ..util import attempt_import_or_raise
|
from ..util import attempt_import_or_raise
|
||||||
from .base import EmbeddingFunction
|
from .base import EmbeddingFunction
|
||||||
from .registry import register
|
from .registry import register
|
||||||
from .utils import api_key_not_found_help, IMAGES
|
from .utils import api_key_not_found_help, IMAGES, TEXT
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
import PIL
|
import PIL
|
||||||
|
|
||||||
|
|
||||||
|
def is_valid_url(text):
|
||||||
|
try:
|
||||||
|
parsed = urlparse(text)
|
||||||
|
return bool(parsed.scheme) and bool(parsed.netloc)
|
||||||
|
except Exception:
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def transform_input(input_data: Union[str, bytes, Path]):
|
||||||
|
PIL = attempt_import_or_raise("PIL", "pillow")
|
||||||
|
if isinstance(input_data, str):
|
||||||
|
if is_valid_url(input_data):
|
||||||
|
content = {"type": "image_url", "image_url": input_data}
|
||||||
|
else:
|
||||||
|
content = {"type": "text", "text": input_data}
|
||||||
|
elif isinstance(input_data, PIL.Image.Image):
|
||||||
|
buffered = BytesIO()
|
||||||
|
input_data.save(buffered, format="JPEG")
|
||||||
|
img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
|
||||||
|
content = {
|
||||||
|
"type": "image_base64",
|
||||||
|
"image_base64": "data:image/jpeg;base64," + img_str,
|
||||||
|
}
|
||||||
|
elif isinstance(input_data, bytes):
|
||||||
|
img = PIL.Image.open(BytesIO(input_data))
|
||||||
|
buffered = BytesIO()
|
||||||
|
img.save(buffered, format="JPEG")
|
||||||
|
img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
|
||||||
|
content = {
|
||||||
|
"type": "image_base64",
|
||||||
|
"image_base64": "data:image/jpeg;base64," + img_str,
|
||||||
|
}
|
||||||
|
elif isinstance(input_data, Path):
|
||||||
|
img = PIL.Image.open(input_data)
|
||||||
|
buffered = BytesIO()
|
||||||
|
img.save(buffered, format="JPEG")
|
||||||
|
img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
|
||||||
|
content = {
|
||||||
|
"type": "image_base64",
|
||||||
|
"image_base64": "data:image/jpeg;base64," + img_str,
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
raise ValueError("Each input should be either str, bytes, Path or Image.")
|
||||||
|
|
||||||
|
return {"content": [content]}
|
||||||
|
|
||||||
|
|
||||||
|
def sanitize_multimodal_input(inputs: Union[TEXT, IMAGES]) -> List[Any]:
|
||||||
|
"""
|
||||||
|
Sanitize the input to the embedding function.
|
||||||
|
"""
|
||||||
|
PIL = attempt_import_or_raise("PIL", "pillow")
|
||||||
|
if isinstance(inputs, (str, bytes, Path, PIL.Image.Image)):
|
||||||
|
inputs = [inputs]
|
||||||
|
elif isinstance(inputs, pa.Array):
|
||||||
|
inputs = inputs.to_pylist()
|
||||||
|
elif isinstance(inputs, pa.ChunkedArray):
|
||||||
|
inputs = inputs.combine_chunks().to_pylist()
|
||||||
|
else:
|
||||||
|
raise ValueError(
|
||||||
|
f"Input type {type(inputs)} not allowed with multimodal model."
|
||||||
|
)
|
||||||
|
|
||||||
|
if not all(isinstance(x, (str, bytes, Path, PIL.Image.Image)) for x in inputs):
|
||||||
|
raise ValueError("Each input should be either str, bytes, Path or Image.")
|
||||||
|
|
||||||
|
return [transform_input(i) for i in inputs]
|
||||||
|
|
||||||
|
|
||||||
|
def sanitize_text_input(inputs: TEXT) -> List[str]:
|
||||||
|
"""
|
||||||
|
Sanitize the input to the embedding function.
|
||||||
|
"""
|
||||||
|
if isinstance(inputs, str):
|
||||||
|
inputs = [inputs]
|
||||||
|
elif isinstance(inputs, pa.Array):
|
||||||
|
inputs = inputs.to_pylist()
|
||||||
|
elif isinstance(inputs, pa.ChunkedArray):
|
||||||
|
inputs = inputs.combine_chunks().to_pylist()
|
||||||
|
else:
|
||||||
|
raise ValueError(f"Input type {type(inputs)} not allowed with text model.")
|
||||||
|
|
||||||
|
if not all(isinstance(x, str) for x in inputs):
|
||||||
|
raise ValueError("Each input should be str.")
|
||||||
|
|
||||||
|
return inputs
|
||||||
|
|
||||||
|
|
||||||
@register("voyageai")
|
@register("voyageai")
|
||||||
class VoyageAIEmbeddingFunction(EmbeddingFunction):
|
class VoyageAIEmbeddingFunction(EmbeddingFunction):
|
||||||
"""
|
"""
|
||||||
@@ -74,6 +165,11 @@ class VoyageAIEmbeddingFunction(EmbeddingFunction):
|
|||||||
]
|
]
|
||||||
multimodal_embedding_models: list = ["voyage-multimodal-3"]
|
multimodal_embedding_models: list = ["voyage-multimodal-3"]
|
||||||
|
|
||||||
|
def _is_multimodal_model(self, model_name: str):
|
||||||
|
return (
|
||||||
|
model_name in self.multimodal_embedding_models or "multimodal" in model_name
|
||||||
|
)
|
||||||
|
|
||||||
def ndims(self):
|
def ndims(self):
|
||||||
if self.name == "voyage-3-lite":
|
if self.name == "voyage-3-lite":
|
||||||
return 512
|
return 512
|
||||||
@@ -85,55 +181,12 @@ class VoyageAIEmbeddingFunction(EmbeddingFunction):
|
|||||||
"voyage-finance-2",
|
"voyage-finance-2",
|
||||||
"voyage-multilingual-2",
|
"voyage-multilingual-2",
|
||||||
"voyage-law-2",
|
"voyage-law-2",
|
||||||
|
"voyage-multimodal-3",
|
||||||
]:
|
]:
|
||||||
return 1024
|
return 1024
|
||||||
else:
|
else:
|
||||||
raise ValueError(f"Model {self.name} not supported")
|
raise ValueError(f"Model {self.name} not supported")
|
||||||
|
|
||||||
def sanitize_input(self, images: IMAGES) -> Union[List[bytes], np.ndarray]:
|
|
||||||
"""
|
|
||||||
Sanitize the input to the embedding function.
|
|
||||||
"""
|
|
||||||
if isinstance(images, (str, bytes)):
|
|
||||||
images = [images]
|
|
||||||
elif isinstance(images, pa.Array):
|
|
||||||
images = images.to_pylist()
|
|
||||||
elif isinstance(images, pa.ChunkedArray):
|
|
||||||
images = images.combine_chunks().to_pylist()
|
|
||||||
return images
|
|
||||||
|
|
||||||
def generate_text_embeddings(self, text: str, **kwargs) -> np.ndarray:
|
|
||||||
"""
|
|
||||||
Get the embeddings for the given texts
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
texts: list[str] or np.ndarray (of str)
|
|
||||||
The texts to embed
|
|
||||||
input_type: Optional[str]
|
|
||||||
|
|
||||||
truncation: Optional[bool]
|
|
||||||
"""
|
|
||||||
client = VoyageAIEmbeddingFunction._get_client()
|
|
||||||
if self.name in self.text_embedding_models:
|
|
||||||
rs = client.embed(texts=[text], model=self.name, **kwargs)
|
|
||||||
elif self.name in self.multimodal_embedding_models:
|
|
||||||
rs = client.multimodal_embed(inputs=[[text]], model=self.name, **kwargs)
|
|
||||||
else:
|
|
||||||
raise ValueError(
|
|
||||||
f"Model {self.name} not supported to generate text embeddings"
|
|
||||||
)
|
|
||||||
|
|
||||||
return rs.embeddings[0]
|
|
||||||
|
|
||||||
def generate_image_embedding(
|
|
||||||
self, image: "PIL.Image.Image", **kwargs
|
|
||||||
) -> np.ndarray:
|
|
||||||
rs = VoyageAIEmbeddingFunction._get_client().multimodal_embed(
|
|
||||||
inputs=[[image]], model=self.name, **kwargs
|
|
||||||
)
|
|
||||||
return rs.embeddings[0]
|
|
||||||
|
|
||||||
def compute_query_embeddings(
|
def compute_query_embeddings(
|
||||||
self, query: Union[str, "PIL.Image.Image"], *args, **kwargs
|
self, query: Union[str, "PIL.Image.Image"], *args, **kwargs
|
||||||
) -> List[np.ndarray]:
|
) -> List[np.ndarray]:
|
||||||
@@ -144,23 +197,52 @@ class VoyageAIEmbeddingFunction(EmbeddingFunction):
|
|||||||
----------
|
----------
|
||||||
query : Union[str, PIL.Image.Image]
|
query : Union[str, PIL.Image.Image]
|
||||||
The query to embed. A query can be either text or an image.
|
The query to embed. A query can be either text or an image.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
List[np.array]: the list of embeddings
|
||||||
"""
|
"""
|
||||||
if isinstance(query, str):
|
client = VoyageAIEmbeddingFunction._get_client()
|
||||||
return [self.generate_text_embeddings(query, input_type="query")]
|
if self._is_multimodal_model(self.name):
|
||||||
|
result = client.multimodal_embed(
|
||||||
|
inputs=[[query]], model=self.name, input_type="query", **kwargs
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
PIL = attempt_import_or_raise("PIL", "pillow")
|
result = client.embed(
|
||||||
if isinstance(query, PIL.Image.Image):
|
texts=[query], model=self.name, input_type="query", **kwargs
|
||||||
return [self.generate_image_embedding(query, input_type="query")]
|
)
|
||||||
else:
|
|
||||||
raise TypeError("Only text PIL images supported as query")
|
return [result.embeddings[0]]
|
||||||
|
|
||||||
def compute_source_embeddings(
|
def compute_source_embeddings(
|
||||||
self, images: IMAGES, *args, **kwargs
|
self, inputs: Union[TEXT, IMAGES], *args, **kwargs
|
||||||
) -> List[np.array]:
|
) -> List[np.array]:
|
||||||
images = self.sanitize_input(images)
|
"""
|
||||||
return [
|
Compute the embeddings for the inputs
|
||||||
self.generate_image_embedding(img, input_type="document") for img in images
|
|
||||||
]
|
Parameters
|
||||||
|
----------
|
||||||
|
inputs : Union[TEXT, IMAGES]
|
||||||
|
The inputs to embed. The input can be either str, bytes, Path (to an image),
|
||||||
|
PIL.Image or list of these.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
List[np.array]: the list of embeddings
|
||||||
|
"""
|
||||||
|
client = VoyageAIEmbeddingFunction._get_client()
|
||||||
|
if self._is_multimodal_model(self.name):
|
||||||
|
inputs = sanitize_multimodal_input(inputs)
|
||||||
|
result = client.multimodal_embed(
|
||||||
|
inputs=inputs, model=self.name, input_type="document", **kwargs
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
inputs = sanitize_text_input(inputs)
|
||||||
|
result = client.embed(
|
||||||
|
texts=inputs, model=self.name, input_type="document", **kwargs
|
||||||
|
)
|
||||||
|
|
||||||
|
return result.embeddings
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _get_client():
|
def _get_client():
|
||||||
|
|||||||
@@ -4,7 +4,10 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
|
import abc
|
||||||
from concurrent.futures import ThreadPoolExecutor
|
from concurrent.futures import ThreadPoolExecutor
|
||||||
|
from enum import Enum
|
||||||
|
from datetime import timedelta
|
||||||
from typing import (
|
from typing import (
|
||||||
TYPE_CHECKING,
|
TYPE_CHECKING,
|
||||||
Dict,
|
Dict,
|
||||||
@@ -83,6 +86,213 @@ def ensure_vector_query(
|
|||||||
return val
|
return val
|
||||||
|
|
||||||
|
|
||||||
|
class FullTextQueryType(Enum):
|
||||||
|
MATCH = "match"
|
||||||
|
MATCH_PHRASE = "match_phrase"
|
||||||
|
BOOST = "boost"
|
||||||
|
MULTI_MATCH = "multi_match"
|
||||||
|
|
||||||
|
|
||||||
|
class FullTextQuery(abc.ABC, pydantic.BaseModel):
|
||||||
|
@abc.abstractmethod
|
||||||
|
def query_type(self) -> FullTextQueryType:
|
||||||
|
"""
|
||||||
|
Get the query type of the query.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
str
|
||||||
|
The type of the query.
|
||||||
|
"""
|
||||||
|
|
||||||
|
@abc.abstractmethod
|
||||||
|
def to_dict(self) -> dict:
|
||||||
|
"""
|
||||||
|
Convert the query to a dictionary.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
dict
|
||||||
|
The query as a dictionary.
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
class MatchQuery(FullTextQuery):
|
||||||
|
query: str
|
||||||
|
column: str
|
||||||
|
boost: float = 1.0
|
||||||
|
fuzziness: int = 0
|
||||||
|
max_expansions: int = 50
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
query: str,
|
||||||
|
column: str,
|
||||||
|
*,
|
||||||
|
boost: float = 1.0,
|
||||||
|
fuzziness: int = 0,
|
||||||
|
max_expansions: int = 50,
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Match query for full-text search.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
query : str
|
||||||
|
The query string to match against.
|
||||||
|
column : str
|
||||||
|
The name of the column to match against.
|
||||||
|
boost : float, default 1.0
|
||||||
|
The boost factor for the query.
|
||||||
|
The score of each matching document is multiplied by this value.
|
||||||
|
fuzziness : int, optional
|
||||||
|
The maximum edit distance for each term in the match query.
|
||||||
|
Defaults to 0 (exact match).
|
||||||
|
If None, fuzziness is applied automatically by the rules:
|
||||||
|
- 0 for terms with length <= 2
|
||||||
|
- 1 for terms with length <= 5
|
||||||
|
- 2 for terms with length > 5
|
||||||
|
max_expansions : int, optional
|
||||||
|
The maximum number of terms to consider for fuzzy matching.
|
||||||
|
Defaults to 50.
|
||||||
|
"""
|
||||||
|
super().__init__(
|
||||||
|
query=query,
|
||||||
|
column=column,
|
||||||
|
boost=boost,
|
||||||
|
fuzziness=fuzziness,
|
||||||
|
max_expansions=max_expansions,
|
||||||
|
)
|
||||||
|
|
||||||
|
def query_type(self) -> FullTextQueryType:
|
||||||
|
return FullTextQueryType.MATCH
|
||||||
|
|
||||||
|
def to_dict(self) -> dict:
|
||||||
|
return {
|
||||||
|
"match": {
|
||||||
|
self.column: {
|
||||||
|
"query": self.query,
|
||||||
|
"boost": self.boost,
|
||||||
|
"fuzziness": self.fuzziness,
|
||||||
|
"max_expansions": self.max_expansions,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class PhraseQuery(FullTextQuery):
|
||||||
|
query: str
|
||||||
|
column: str
|
||||||
|
|
||||||
|
def __init__(self, query: str, column: str):
|
||||||
|
"""
|
||||||
|
Phrase query for full-text search.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
query : str
|
||||||
|
The query string to match against.
|
||||||
|
column : str
|
||||||
|
The name of the column to match against.
|
||||||
|
"""
|
||||||
|
super().__init__(query=query, column=column)
|
||||||
|
|
||||||
|
def query_type(self) -> FullTextQueryType:
|
||||||
|
return FullTextQueryType.MATCH_PHRASE
|
||||||
|
|
||||||
|
def to_dict(self) -> dict:
|
||||||
|
return {
|
||||||
|
"match_phrase": {
|
||||||
|
self.column: self.query,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class BoostQuery(FullTextQuery):
|
||||||
|
positive: FullTextQuery
|
||||||
|
negative: FullTextQuery
|
||||||
|
negative_boost: float = 0.5
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
positive: FullTextQuery,
|
||||||
|
negative: FullTextQuery,
|
||||||
|
*,
|
||||||
|
negative_boost: float = 0.5,
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Boost query for full-text search.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
positive : dict
|
||||||
|
The positive query object.
|
||||||
|
negative : dict
|
||||||
|
The negative query object.
|
||||||
|
negative_boost : float
|
||||||
|
The boost factor for the negative query.
|
||||||
|
"""
|
||||||
|
super().__init__(
|
||||||
|
positive=positive, negative=negative, negative_boost=negative_boost
|
||||||
|
)
|
||||||
|
|
||||||
|
def query_type(self) -> FullTextQueryType:
|
||||||
|
return FullTextQueryType.BOOST
|
||||||
|
|
||||||
|
def to_dict(self) -> dict:
|
||||||
|
return {
|
||||||
|
"boost": {
|
||||||
|
"positive": self.positive.to_dict(),
|
||||||
|
"negative": self.negative.to_dict(),
|
||||||
|
"negative_boost": self.negative_boost,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class MultiMatchQuery(FullTextQuery):
|
||||||
|
query: str
|
||||||
|
columns: list[str]
|
||||||
|
boosts: list[float]
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
query: str,
|
||||||
|
columns: list[str],
|
||||||
|
*,
|
||||||
|
boosts: Optional[list[float]] = None,
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Multi-match query for full-text search.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
query : str
|
||||||
|
The query string to match against.
|
||||||
|
|
||||||
|
columns : list[str]
|
||||||
|
The list of columns to match against.
|
||||||
|
|
||||||
|
boosts : list[float], optional
|
||||||
|
The list of boost factors for each column. If not provided,
|
||||||
|
all columns will have the same boost factor.
|
||||||
|
"""
|
||||||
|
if boosts is None:
|
||||||
|
boosts = [1.0] * len(columns)
|
||||||
|
super().__init__(query=query, columns=columns, boosts=boosts)
|
||||||
|
|
||||||
|
def query_type(self) -> FullTextQueryType:
|
||||||
|
return FullTextQueryType.MULTI_MATCH
|
||||||
|
|
||||||
|
def to_dict(self) -> dict:
|
||||||
|
return {
|
||||||
|
"multi_match": {
|
||||||
|
"query": self.query,
|
||||||
|
"columns": self.columns,
|
||||||
|
"boost": self.boosts,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
class FullTextSearchQuery(pydantic.BaseModel):
|
class FullTextSearchQuery(pydantic.BaseModel):
|
||||||
"""A LanceDB Full Text Search Query
|
"""A LanceDB Full Text Search Query
|
||||||
|
|
||||||
@@ -92,18 +302,13 @@ class FullTextSearchQuery(pydantic.BaseModel):
|
|||||||
The columns to search
|
The columns to search
|
||||||
|
|
||||||
If None, then the table should select the column automatically.
|
If None, then the table should select the column automatically.
|
||||||
query: str
|
query: str | FullTextQuery
|
||||||
The query to search for
|
If a string, it is treated as a MatchQuery.
|
||||||
limit: Optional[int] = None
|
If a FullTextQuery object, it is used directly.
|
||||||
The limit on the number of results to return
|
|
||||||
wand_factor: Optional[float] = None
|
|
||||||
The wand factor to use for the search
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
columns: Optional[List[str]] = None
|
columns: Optional[List[str]] = None
|
||||||
query: str
|
query: Union[str, FullTextQuery]
|
||||||
limit: Optional[int] = None
|
|
||||||
wand_factor: Optional[float] = None
|
|
||||||
|
|
||||||
|
|
||||||
class Query(pydantic.BaseModel):
|
class Query(pydantic.BaseModel):
|
||||||
@@ -357,7 +562,7 @@ class LanceQueryBuilder(ABC):
|
|||||||
table, query, vector_column_name, fts_columns=fts_columns
|
table, query, vector_column_name, fts_columns=fts_columns
|
||||||
)
|
)
|
||||||
|
|
||||||
if isinstance(query, str):
|
if isinstance(query, (str, FullTextQuery)):
|
||||||
# fts
|
# fts
|
||||||
return LanceFtsQueryBuilder(
|
return LanceFtsQueryBuilder(
|
||||||
table,
|
table,
|
||||||
@@ -382,8 +587,10 @@ class LanceQueryBuilder(ABC):
|
|||||||
# If query_type is fts, then query must be a string.
|
# If query_type is fts, then query must be a string.
|
||||||
# otherwise raise TypeError
|
# otherwise raise TypeError
|
||||||
if query_type == "fts":
|
if query_type == "fts":
|
||||||
if not isinstance(query, str):
|
if not isinstance(query, (str, FullTextQuery)):
|
||||||
raise TypeError(f"'fts' queries must be a string: {type(query)}")
|
raise TypeError(
|
||||||
|
f"'fts' query must be a string or FullTextQuery: {type(query)}"
|
||||||
|
)
|
||||||
return query, query_type
|
return query, query_type
|
||||||
elif query_type == "vector":
|
elif query_type == "vector":
|
||||||
query = cls._query_to_vector(table, query, vector_column_name)
|
query = cls._query_to_vector(table, query, vector_column_name)
|
||||||
@@ -444,7 +651,12 @@ class LanceQueryBuilder(ABC):
|
|||||||
"""
|
"""
|
||||||
return self.to_pandas()
|
return self.to_pandas()
|
||||||
|
|
||||||
def to_pandas(self, flatten: Optional[Union[int, bool]] = None) -> "pd.DataFrame":
|
def to_pandas(
|
||||||
|
self,
|
||||||
|
flatten: Optional[Union[int, bool]] = None,
|
||||||
|
*,
|
||||||
|
timeout: Optional[timedelta] = None,
|
||||||
|
) -> "pd.DataFrame":
|
||||||
"""
|
"""
|
||||||
Execute the query and return the results as a pandas DataFrame.
|
Execute the query and return the results as a pandas DataFrame.
|
||||||
In addition to the selected columns, LanceDB also returns a vector
|
In addition to the selected columns, LanceDB also returns a vector
|
||||||
@@ -458,12 +670,15 @@ class LanceQueryBuilder(ABC):
|
|||||||
If flatten is an integer, flatten the nested columns up to the
|
If flatten is an integer, flatten the nested columns up to the
|
||||||
specified depth.
|
specified depth.
|
||||||
If unspecified, do not flatten the nested columns.
|
If unspecified, do not flatten the nested columns.
|
||||||
|
timeout: Optional[timedelta]
|
||||||
|
The maximum time to wait for the query to complete.
|
||||||
|
If None, wait indefinitely.
|
||||||
"""
|
"""
|
||||||
tbl = flatten_columns(self.to_arrow(), flatten)
|
tbl = flatten_columns(self.to_arrow(timeout=timeout), flatten)
|
||||||
return tbl.to_pandas()
|
return tbl.to_pandas()
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def to_arrow(self) -> pa.Table:
|
def to_arrow(self, *, timeout: Optional[timedelta] = None) -> pa.Table:
|
||||||
"""
|
"""
|
||||||
Execute the query and return the results as an
|
Execute the query and return the results as an
|
||||||
[Apache Arrow Table](https://arrow.apache.org/docs/python/generated/pyarrow.Table.html#pyarrow.Table).
|
[Apache Arrow Table](https://arrow.apache.org/docs/python/generated/pyarrow.Table.html#pyarrow.Table).
|
||||||
@@ -471,34 +686,65 @@ class LanceQueryBuilder(ABC):
|
|||||||
In addition to the selected columns, LanceDB also returns a vector
|
In addition to the selected columns, LanceDB also returns a vector
|
||||||
and also the "_distance" column which is the distance between the query
|
and also the "_distance" column which is the distance between the query
|
||||||
vector and the returned vectors.
|
vector and the returned vectors.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
timeout: Optional[timedelta]
|
||||||
|
The maximum time to wait for the query to complete.
|
||||||
|
If None, wait indefinitely.
|
||||||
"""
|
"""
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def to_batches(self, /, batch_size: Optional[int] = None) -> pa.RecordBatchReader:
|
def to_batches(
|
||||||
|
self,
|
||||||
|
/,
|
||||||
|
batch_size: Optional[int] = None,
|
||||||
|
*,
|
||||||
|
timeout: Optional[timedelta] = None,
|
||||||
|
) -> pa.RecordBatchReader:
|
||||||
"""
|
"""
|
||||||
Execute the query and return the results as a pyarrow
|
Execute the query and return the results as a pyarrow
|
||||||
[RecordBatchReader](https://arrow.apache.org/docs/python/generated/pyarrow.RecordBatchReader.html)
|
[RecordBatchReader](https://arrow.apache.org/docs/python/generated/pyarrow.RecordBatchReader.html)
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
batch_size: int
|
||||||
|
The maximum number of selected records in a RecordBatch object.
|
||||||
|
timeout: Optional[timedelta]
|
||||||
|
The maximum time to wait for the query to complete.
|
||||||
|
If None, wait indefinitely.
|
||||||
"""
|
"""
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
def to_list(self) -> List[dict]:
|
def to_list(self, *, timeout: Optional[timedelta] = None) -> List[dict]:
|
||||||
"""
|
"""
|
||||||
Execute the query and return the results as a list of dictionaries.
|
Execute the query and return the results as a list of dictionaries.
|
||||||
|
|
||||||
Each list entry is a dictionary with the selected column names as keys,
|
Each list entry is a dictionary with the selected column names as keys,
|
||||||
or all table columns if `select` is not called. The vector and the "_distance"
|
or all table columns if `select` is not called. The vector and the "_distance"
|
||||||
fields are returned whether or not they're explicitly selected.
|
fields are returned whether or not they're explicitly selected.
|
||||||
"""
|
|
||||||
return self.to_arrow().to_pylist()
|
|
||||||
|
|
||||||
def to_pydantic(self, model: Type[LanceModel]) -> List[LanceModel]:
|
Parameters
|
||||||
|
----------
|
||||||
|
timeout: Optional[timedelta]
|
||||||
|
The maximum time to wait for the query to complete.
|
||||||
|
If None, wait indefinitely.
|
||||||
|
"""
|
||||||
|
return self.to_arrow(timeout=timeout).to_pylist()
|
||||||
|
|
||||||
|
def to_pydantic(
|
||||||
|
self, model: Type[LanceModel], *, timeout: Optional[timedelta] = None
|
||||||
|
) -> List[LanceModel]:
|
||||||
"""Return the table as a list of pydantic models.
|
"""Return the table as a list of pydantic models.
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
model: Type[LanceModel]
|
model: Type[LanceModel]
|
||||||
The pydantic model to use.
|
The pydantic model to use.
|
||||||
|
timeout: Optional[timedelta]
|
||||||
|
The maximum time to wait for the query to complete.
|
||||||
|
If None, wait indefinitely.
|
||||||
|
|
||||||
Returns
|
Returns
|
||||||
-------
|
-------
|
||||||
@@ -506,19 +752,25 @@ class LanceQueryBuilder(ABC):
|
|||||||
"""
|
"""
|
||||||
return [
|
return [
|
||||||
model(**{k: v for k, v in row.items() if k in model.field_names()})
|
model(**{k: v for k, v in row.items() if k in model.field_names()})
|
||||||
for row in self.to_arrow().to_pylist()
|
for row in self.to_arrow(timeout=timeout).to_pylist()
|
||||||
]
|
]
|
||||||
|
|
||||||
def to_polars(self) -> "pl.DataFrame":
|
def to_polars(self, *, timeout: Optional[timedelta] = None) -> "pl.DataFrame":
|
||||||
"""
|
"""
|
||||||
Execute the query and return the results as a Polars DataFrame.
|
Execute the query and return the results as a Polars DataFrame.
|
||||||
In addition to the selected columns, LanceDB also returns a vector
|
In addition to the selected columns, LanceDB also returns a vector
|
||||||
and also the "_distance" column which is the distance between the query
|
and also the "_distance" column which is the distance between the query
|
||||||
vector and the returned vector.
|
vector and the returned vector.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
timeout: Optional[timedelta]
|
||||||
|
The maximum time to wait for the query to complete.
|
||||||
|
If None, wait indefinitely.
|
||||||
"""
|
"""
|
||||||
import polars as pl
|
import polars as pl
|
||||||
|
|
||||||
return pl.from_arrow(self.to_arrow())
|
return pl.from_arrow(self.to_arrow(timeout=timeout))
|
||||||
|
|
||||||
def limit(self, limit: Union[int, None]) -> Self:
|
def limit(self, limit: Union[int, None]) -> Self:
|
||||||
"""Set the maximum number of results to return.
|
"""Set the maximum number of results to return.
|
||||||
@@ -657,7 +909,45 @@ class LanceQueryBuilder(ABC):
|
|||||||
-------
|
-------
|
||||||
plan : str
|
plan : str
|
||||||
""" # noqa: E501
|
""" # noqa: E501
|
||||||
return self._table._explain_plan(self.to_query_object())
|
return self._table._explain_plan(self.to_query_object(), verbose=verbose)
|
||||||
|
|
||||||
|
def analyze_plan(self) -> str:
|
||||||
|
"""
|
||||||
|
Run the query and return its execution plan with runtime metrics.
|
||||||
|
|
||||||
|
This returns detailed metrics for each step, such as elapsed time,
|
||||||
|
rows processed, bytes read, and I/O stats. It is useful for debugging
|
||||||
|
and performance tuning.
|
||||||
|
|
||||||
|
Examples
|
||||||
|
--------
|
||||||
|
>>> import lancedb
|
||||||
|
>>> db = lancedb.connect("./.lancedb")
|
||||||
|
>>> table = db.create_table("my_table", [{"vector": [99.0, 99]}])
|
||||||
|
>>> query = [100, 100]
|
||||||
|
>>> plan = table.search(query).analyze_plan()
|
||||||
|
>>> print(plan) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
|
||||||
|
AnalyzeExec verbose=true, metrics=[]
|
||||||
|
ProjectionExec: expr=[...], metrics=[...]
|
||||||
|
GlobalLimitExec: skip=0, fetch=10, metrics=[...]
|
||||||
|
FilterExec: _distance@2 IS NOT NULL,
|
||||||
|
metrics=[output_rows=..., elapsed_compute=...]
|
||||||
|
SortExec: TopK(fetch=10), expr=[...],
|
||||||
|
preserve_partitioning=[...],
|
||||||
|
metrics=[output_rows=..., elapsed_compute=..., row_replacements=...]
|
||||||
|
KNNVectorDistance: metric=l2,
|
||||||
|
metrics=[output_rows=..., elapsed_compute=..., output_batches=...]
|
||||||
|
LanceScan: uri=..., projection=[vector], row_id=true,
|
||||||
|
row_addr=false, ordered=false,
|
||||||
|
metrics=[output_rows=..., elapsed_compute=...,
|
||||||
|
bytes_read=..., iops=..., requests=...]
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
plan : str
|
||||||
|
The physical query execution plan with runtime metrics.
|
||||||
|
"""
|
||||||
|
return self._table._analyze_plan(self.to_query_object())
|
||||||
|
|
||||||
def vector(self, vector: Union[np.ndarray, list]) -> Self:
|
def vector(self, vector: Union[np.ndarray, list]) -> Self:
|
||||||
"""Set the vector to search for.
|
"""Set the vector to search for.
|
||||||
@@ -674,13 +964,14 @@ class LanceQueryBuilder(ABC):
|
|||||||
"""
|
"""
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
def text(self, text: str) -> Self:
|
def text(self, text: str | FullTextQuery) -> Self:
|
||||||
"""Set the text to search for.
|
"""Set the text to search for.
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
text: str
|
text: str | FullTextQuery
|
||||||
The text to search for.
|
If a string, it is treated as a MatchQuery.
|
||||||
|
If a FullTextQuery object, it is used directly.
|
||||||
|
|
||||||
Returns
|
Returns
|
||||||
-------
|
-------
|
||||||
@@ -894,7 +1185,7 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
|
|||||||
self._refine_factor = refine_factor
|
self._refine_factor = refine_factor
|
||||||
return self
|
return self
|
||||||
|
|
||||||
def to_arrow(self) -> pa.Table:
|
def to_arrow(self, *, timeout: Optional[timedelta] = None) -> pa.Table:
|
||||||
"""
|
"""
|
||||||
Execute the query and return the results as an
|
Execute the query and return the results as an
|
||||||
[Apache Arrow Table](https://arrow.apache.org/docs/python/generated/pyarrow.Table.html#pyarrow.Table).
|
[Apache Arrow Table](https://arrow.apache.org/docs/python/generated/pyarrow.Table.html#pyarrow.Table).
|
||||||
@@ -902,8 +1193,14 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
|
|||||||
In addition to the selected columns, LanceDB also returns a vector
|
In addition to the selected columns, LanceDB also returns a vector
|
||||||
and also the "_distance" column which is the distance between the query
|
and also the "_distance" column which is the distance between the query
|
||||||
vector and the returned vectors.
|
vector and the returned vectors.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
timeout: Optional[timedelta]
|
||||||
|
The maximum time to wait for the query to complete.
|
||||||
|
If None, wait indefinitely.
|
||||||
"""
|
"""
|
||||||
return self.to_batches().read_all()
|
return self.to_batches(timeout=timeout).read_all()
|
||||||
|
|
||||||
def to_query_object(self) -> Query:
|
def to_query_object(self) -> Query:
|
||||||
"""
|
"""
|
||||||
@@ -933,7 +1230,13 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
|
|||||||
bypass_vector_index=self._bypass_vector_index,
|
bypass_vector_index=self._bypass_vector_index,
|
||||||
)
|
)
|
||||||
|
|
||||||
def to_batches(self, /, batch_size: Optional[int] = None) -> pa.RecordBatchReader:
|
def to_batches(
|
||||||
|
self,
|
||||||
|
/,
|
||||||
|
batch_size: Optional[int] = None,
|
||||||
|
*,
|
||||||
|
timeout: Optional[timedelta] = None,
|
||||||
|
) -> pa.RecordBatchReader:
|
||||||
"""
|
"""
|
||||||
Execute the query and return the result as a RecordBatchReader object.
|
Execute the query and return the result as a RecordBatchReader object.
|
||||||
|
|
||||||
@@ -941,6 +1244,9 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
|
|||||||
----------
|
----------
|
||||||
batch_size: int
|
batch_size: int
|
||||||
The maximum number of selected records in a RecordBatch object.
|
The maximum number of selected records in a RecordBatch object.
|
||||||
|
timeout: timedelta, default None
|
||||||
|
The maximum time to wait for the query to complete.
|
||||||
|
If None, wait indefinitely.
|
||||||
|
|
||||||
Returns
|
Returns
|
||||||
-------
|
-------
|
||||||
@@ -950,7 +1256,9 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
|
|||||||
if isinstance(vector[0], np.ndarray):
|
if isinstance(vector[0], np.ndarray):
|
||||||
vector = [v.tolist() for v in vector]
|
vector = [v.tolist() for v in vector]
|
||||||
query = self.to_query_object()
|
query = self.to_query_object()
|
||||||
result_set = self._table._execute_query(query, batch_size)
|
result_set = self._table._execute_query(
|
||||||
|
query, batch_size=batch_size, timeout=timeout
|
||||||
|
)
|
||||||
if self._reranker is not None:
|
if self._reranker is not None:
|
||||||
rs_table = result_set.read_all()
|
rs_table = result_set.read_all()
|
||||||
result_set = self._reranker.rerank_vector(self._str_query, rs_table)
|
result_set = self._reranker.rerank_vector(self._str_query, rs_table)
|
||||||
@@ -1046,7 +1354,7 @@ class LanceFtsQueryBuilder(LanceQueryBuilder):
|
|||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
table: "Table",
|
table: "Table",
|
||||||
query: str,
|
query: str | FullTextQuery,
|
||||||
ordering_field_name: Optional[str] = None,
|
ordering_field_name: Optional[str] = None,
|
||||||
fts_columns: Optional[Union[str, List[str]]] = None,
|
fts_columns: Optional[Union[str, List[str]]] = None,
|
||||||
):
|
):
|
||||||
@@ -1089,7 +1397,7 @@ class LanceFtsQueryBuilder(LanceQueryBuilder):
|
|||||||
offset=self._offset,
|
offset=self._offset,
|
||||||
)
|
)
|
||||||
|
|
||||||
def to_arrow(self) -> pa.Table:
|
def to_arrow(self, *, timeout: Optional[timedelta] = None) -> pa.Table:
|
||||||
path, fs, exist = self._table._get_fts_index_path()
|
path, fs, exist = self._table._get_fts_index_path()
|
||||||
if exist:
|
if exist:
|
||||||
return self.tantivy_to_arrow()
|
return self.tantivy_to_arrow()
|
||||||
@@ -1101,14 +1409,16 @@ class LanceFtsQueryBuilder(LanceQueryBuilder):
|
|||||||
"Use tantivy-based index instead for now."
|
"Use tantivy-based index instead for now."
|
||||||
)
|
)
|
||||||
query = self.to_query_object()
|
query = self.to_query_object()
|
||||||
results = self._table._execute_query(query)
|
results = self._table._execute_query(query, timeout=timeout)
|
||||||
results = results.read_all()
|
results = results.read_all()
|
||||||
if self._reranker is not None:
|
if self._reranker is not None:
|
||||||
results = self._reranker.rerank_fts(self._query, results)
|
results = self._reranker.rerank_fts(self._query, results)
|
||||||
check_reranker_result(results)
|
check_reranker_result(results)
|
||||||
return results
|
return results
|
||||||
|
|
||||||
def to_batches(self, /, batch_size: Optional[int] = None):
|
def to_batches(
|
||||||
|
self, /, batch_size: Optional[int] = None, timeout: Optional[timedelta] = None
|
||||||
|
):
|
||||||
raise NotImplementedError("to_batches on an FTS query")
|
raise NotImplementedError("to_batches on an FTS query")
|
||||||
|
|
||||||
def tantivy_to_arrow(self) -> pa.Table:
|
def tantivy_to_arrow(self) -> pa.Table:
|
||||||
@@ -1213,8 +1523,8 @@ class LanceFtsQueryBuilder(LanceQueryBuilder):
|
|||||||
|
|
||||||
|
|
||||||
class LanceEmptyQueryBuilder(LanceQueryBuilder):
|
class LanceEmptyQueryBuilder(LanceQueryBuilder):
|
||||||
def to_arrow(self) -> pa.Table:
|
def to_arrow(self, *, timeout: Optional[timedelta] = None) -> pa.Table:
|
||||||
return self.to_batches().read_all()
|
return self.to_batches(timeout=timeout).read_all()
|
||||||
|
|
||||||
def to_query_object(self) -> Query:
|
def to_query_object(self) -> Query:
|
||||||
return Query(
|
return Query(
|
||||||
@@ -1225,9 +1535,11 @@ class LanceEmptyQueryBuilder(LanceQueryBuilder):
|
|||||||
offset=self._offset,
|
offset=self._offset,
|
||||||
)
|
)
|
||||||
|
|
||||||
def to_batches(self, /, batch_size: Optional[int] = None) -> pa.RecordBatchReader:
|
def to_batches(
|
||||||
|
self, /, batch_size: Optional[int] = None, timeout: Optional[timedelta] = None
|
||||||
|
) -> pa.RecordBatchReader:
|
||||||
query = self.to_query_object()
|
query = self.to_query_object()
|
||||||
return self._table._execute_query(query, batch_size)
|
return self._table._execute_query(query, batch_size=batch_size, timeout=timeout)
|
||||||
|
|
||||||
def rerank(self, reranker: Reranker) -> LanceEmptyQueryBuilder:
|
def rerank(self, reranker: Reranker) -> LanceEmptyQueryBuilder:
|
||||||
"""Rerank the results using the specified reranker.
|
"""Rerank the results using the specified reranker.
|
||||||
@@ -1260,7 +1572,7 @@ class LanceHybridQueryBuilder(LanceQueryBuilder):
|
|||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
table: "Table",
|
table: "Table",
|
||||||
query: Optional[str] = None,
|
query: Optional[Union[str, FullTextQuery]] = None,
|
||||||
vector_column: Optional[str] = None,
|
vector_column: Optional[str] = None,
|
||||||
fts_columns: Optional[Union[str, List[str]]] = None,
|
fts_columns: Optional[Union[str, List[str]]] = None,
|
||||||
):
|
):
|
||||||
@@ -1290,8 +1602,8 @@ class LanceHybridQueryBuilder(LanceQueryBuilder):
|
|||||||
text_query = text or query
|
text_query = text or query
|
||||||
if text_query is None:
|
if text_query is None:
|
||||||
raise ValueError("Text query must be provided for hybrid search.")
|
raise ValueError("Text query must be provided for hybrid search.")
|
||||||
if not isinstance(text_query, str):
|
if not isinstance(text_query, (str, FullTextQuery)):
|
||||||
raise ValueError("Text query must be a string")
|
raise ValueError("Text query must be a string or FullTextQuery")
|
||||||
|
|
||||||
return vector_query, text_query
|
return vector_query, text_query
|
||||||
|
|
||||||
@@ -1315,7 +1627,7 @@ class LanceHybridQueryBuilder(LanceQueryBuilder):
|
|||||||
def to_query_object(self) -> Query:
|
def to_query_object(self) -> Query:
|
||||||
raise NotImplementedError("to_query_object not yet supported on a hybrid query")
|
raise NotImplementedError("to_query_object not yet supported on a hybrid query")
|
||||||
|
|
||||||
def to_arrow(self) -> pa.Table:
|
def to_arrow(self, *, timeout: Optional[timedelta] = None) -> pa.Table:
|
||||||
vector_query, fts_query = self._validate_query(
|
vector_query, fts_query = self._validate_query(
|
||||||
self._query, self._vector, self._text
|
self._query, self._vector, self._text
|
||||||
)
|
)
|
||||||
@@ -1358,9 +1670,11 @@ class LanceHybridQueryBuilder(LanceQueryBuilder):
|
|||||||
self._reranker = RRFReranker()
|
self._reranker = RRFReranker()
|
||||||
|
|
||||||
with ThreadPoolExecutor() as executor:
|
with ThreadPoolExecutor() as executor:
|
||||||
fts_future = executor.submit(self._fts_query.with_row_id(True).to_arrow)
|
fts_future = executor.submit(
|
||||||
|
self._fts_query.with_row_id(True).to_arrow, timeout=timeout
|
||||||
|
)
|
||||||
vector_future = executor.submit(
|
vector_future = executor.submit(
|
||||||
self._vector_query.with_row_id(True).to_arrow
|
self._vector_query.with_row_id(True).to_arrow, timeout=timeout
|
||||||
)
|
)
|
||||||
fts_results = fts_future.result()
|
fts_results = fts_future.result()
|
||||||
vector_results = vector_future.result()
|
vector_results = vector_future.result()
|
||||||
@@ -1447,7 +1761,9 @@ class LanceHybridQueryBuilder(LanceQueryBuilder):
|
|||||||
|
|
||||||
return results
|
return results
|
||||||
|
|
||||||
def to_batches(self):
|
def to_batches(
|
||||||
|
self, /, batch_size: Optional[int] = None, timeout: Optional[timedelta] = None
|
||||||
|
):
|
||||||
raise NotImplementedError("to_batches not yet supported on a hybrid query")
|
raise NotImplementedError("to_batches not yet supported on a hybrid query")
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
@@ -1653,7 +1969,7 @@ class LanceHybridQueryBuilder(LanceQueryBuilder):
|
|||||||
self._vector = vector
|
self._vector = vector
|
||||||
return self
|
return self
|
||||||
|
|
||||||
def text(self, text: str) -> LanceHybridQueryBuilder:
|
def text(self, text: str | FullTextQuery) -> LanceHybridQueryBuilder:
|
||||||
self._text = text
|
self._text = text
|
||||||
return self
|
return self
|
||||||
|
|
||||||
@@ -1811,7 +2127,10 @@ class AsyncQueryBase(object):
|
|||||||
return self
|
return self
|
||||||
|
|
||||||
async def to_batches(
|
async def to_batches(
|
||||||
self, *, max_batch_length: Optional[int] = None
|
self,
|
||||||
|
*,
|
||||||
|
max_batch_length: Optional[int] = None,
|
||||||
|
timeout: Optional[timedelta] = None,
|
||||||
) -> AsyncRecordBatchReader:
|
) -> AsyncRecordBatchReader:
|
||||||
"""
|
"""
|
||||||
Execute the query and return the results as an Apache Arrow RecordBatchReader.
|
Execute the query and return the results as an Apache Arrow RecordBatchReader.
|
||||||
@@ -1824,34 +2143,56 @@ class AsyncQueryBase(object):
|
|||||||
If not specified, a default batch length is used.
|
If not specified, a default batch length is used.
|
||||||
It is possible for batches to be smaller than the provided length if the
|
It is possible for batches to be smaller than the provided length if the
|
||||||
underlying data is stored in smaller chunks.
|
underlying data is stored in smaller chunks.
|
||||||
|
timeout: Optional[timedelta]
|
||||||
|
The maximum time to wait for the query to complete.
|
||||||
|
If not specified, no timeout is applied. If the query does not
|
||||||
|
complete within the specified time, an error will be raised.
|
||||||
"""
|
"""
|
||||||
return AsyncRecordBatchReader(await self._inner.execute(max_batch_length))
|
return AsyncRecordBatchReader(
|
||||||
|
await self._inner.execute(max_batch_length, timeout)
|
||||||
|
)
|
||||||
|
|
||||||
async def to_arrow(self) -> pa.Table:
|
async def to_arrow(self, timeout: Optional[timedelta] = None) -> pa.Table:
|
||||||
"""
|
"""
|
||||||
Execute the query and collect the results into an Apache Arrow Table.
|
Execute the query and collect the results into an Apache Arrow Table.
|
||||||
|
|
||||||
This method will collect all results into memory before returning. If
|
This method will collect all results into memory before returning. If
|
||||||
you expect a large number of results, you may want to use
|
you expect a large number of results, you may want to use
|
||||||
[to_batches][lancedb.query.AsyncQueryBase.to_batches]
|
[to_batches][lancedb.query.AsyncQueryBase.to_batches]
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
timeout: Optional[timedelta]
|
||||||
|
The maximum time to wait for the query to complete.
|
||||||
|
If not specified, no timeout is applied. If the query does not
|
||||||
|
complete within the specified time, an error will be raised.
|
||||||
"""
|
"""
|
||||||
batch_iter = await self.to_batches()
|
batch_iter = await self.to_batches(timeout=timeout)
|
||||||
return pa.Table.from_batches(
|
return pa.Table.from_batches(
|
||||||
await batch_iter.read_all(), schema=batch_iter.schema
|
await batch_iter.read_all(), schema=batch_iter.schema
|
||||||
)
|
)
|
||||||
|
|
||||||
async def to_list(self) -> List[dict]:
|
async def to_list(self, timeout: Optional[timedelta] = None) -> List[dict]:
|
||||||
"""
|
"""
|
||||||
Execute the query and return the results as a list of dictionaries.
|
Execute the query and return the results as a list of dictionaries.
|
||||||
|
|
||||||
Each list entry is a dictionary with the selected column names as keys,
|
Each list entry is a dictionary with the selected column names as keys,
|
||||||
or all table columns if `select` is not called. The vector and the "_distance"
|
or all table columns if `select` is not called. The vector and the "_distance"
|
||||||
fields are returned whether or not they're explicitly selected.
|
fields are returned whether or not they're explicitly selected.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
timeout: Optional[timedelta]
|
||||||
|
The maximum time to wait for the query to complete.
|
||||||
|
If not specified, no timeout is applied. If the query does not
|
||||||
|
complete within the specified time, an error will be raised.
|
||||||
"""
|
"""
|
||||||
return (await self.to_arrow()).to_pylist()
|
return (await self.to_arrow(timeout=timeout)).to_pylist()
|
||||||
|
|
||||||
async def to_pandas(
|
async def to_pandas(
|
||||||
self, flatten: Optional[Union[int, bool]] = None
|
self,
|
||||||
|
flatten: Optional[Union[int, bool]] = None,
|
||||||
|
timeout: Optional[timedelta] = None,
|
||||||
) -> "pd.DataFrame":
|
) -> "pd.DataFrame":
|
||||||
"""
|
"""
|
||||||
Execute the query and collect the results into a pandas DataFrame.
|
Execute the query and collect the results into a pandas DataFrame.
|
||||||
@@ -1880,10 +2221,19 @@ class AsyncQueryBase(object):
|
|||||||
If flatten is an integer, flatten the nested columns up to the
|
If flatten is an integer, flatten the nested columns up to the
|
||||||
specified depth.
|
specified depth.
|
||||||
If unspecified, do not flatten the nested columns.
|
If unspecified, do not flatten the nested columns.
|
||||||
|
timeout: Optional[timedelta]
|
||||||
|
The maximum time to wait for the query to complete.
|
||||||
|
If not specified, no timeout is applied. If the query does not
|
||||||
|
complete within the specified time, an error will be raised.
|
||||||
"""
|
"""
|
||||||
return (flatten_columns(await self.to_arrow(), flatten)).to_pandas()
|
return (
|
||||||
|
flatten_columns(await self.to_arrow(timeout=timeout), flatten)
|
||||||
|
).to_pandas()
|
||||||
|
|
||||||
async def to_polars(self) -> "pl.DataFrame":
|
async def to_polars(
|
||||||
|
self,
|
||||||
|
timeout: Optional[timedelta] = None,
|
||||||
|
) -> "pl.DataFrame":
|
||||||
"""
|
"""
|
||||||
Execute the query and collect the results into a Polars DataFrame.
|
Execute the query and collect the results into a Polars DataFrame.
|
||||||
|
|
||||||
@@ -1892,6 +2242,13 @@ class AsyncQueryBase(object):
|
|||||||
[to_batches][lancedb.query.AsyncQueryBase.to_batches] and convert each batch to
|
[to_batches][lancedb.query.AsyncQueryBase.to_batches] and convert each batch to
|
||||||
polars separately.
|
polars separately.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
timeout: Optional[timedelta]
|
||||||
|
The maximum time to wait for the query to complete.
|
||||||
|
If not specified, no timeout is applied. If the query does not
|
||||||
|
complete within the specified time, an error will be raised.
|
||||||
|
|
||||||
Examples
|
Examples
|
||||||
--------
|
--------
|
||||||
|
|
||||||
@@ -1907,7 +2264,7 @@ class AsyncQueryBase(object):
|
|||||||
"""
|
"""
|
||||||
import polars as pl
|
import polars as pl
|
||||||
|
|
||||||
return pl.from_arrow(await self.to_arrow())
|
return pl.from_arrow(await self.to_arrow(timeout=timeout))
|
||||||
|
|
||||||
async def explain_plan(self, verbose: Optional[bool] = False):
|
async def explain_plan(self, verbose: Optional[bool] = False):
|
||||||
"""Return the execution plan for this query.
|
"""Return the execution plan for this query.
|
||||||
@@ -1941,6 +2298,15 @@ class AsyncQueryBase(object):
|
|||||||
""" # noqa: E501
|
""" # noqa: E501
|
||||||
return await self._inner.explain_plan(verbose)
|
return await self._inner.explain_plan(verbose)
|
||||||
|
|
||||||
|
async def analyze_plan(self):
|
||||||
|
"""Execute the query and display with runtime metrics.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
plan : str
|
||||||
|
"""
|
||||||
|
return await self._inner.analyze_plan()
|
||||||
|
|
||||||
|
|
||||||
class AsyncQuery(AsyncQueryBase):
|
class AsyncQuery(AsyncQueryBase):
|
||||||
def __init__(self, inner: LanceQuery):
|
def __init__(self, inner: LanceQuery):
|
||||||
@@ -2041,7 +2407,7 @@ class AsyncQuery(AsyncQueryBase):
|
|||||||
)
|
)
|
||||||
|
|
||||||
def nearest_to_text(
|
def nearest_to_text(
|
||||||
self, query: str, columns: Union[str, List[str], None] = None
|
self, query: str | FullTextQuery, columns: Union[str, List[str], None] = None
|
||||||
) -> AsyncFTSQuery:
|
) -> AsyncFTSQuery:
|
||||||
"""
|
"""
|
||||||
Find the documents that are most relevant to the given text query.
|
Find the documents that are most relevant to the given text query.
|
||||||
@@ -2067,9 +2433,13 @@ class AsyncQuery(AsyncQueryBase):
|
|||||||
columns = [columns]
|
columns = [columns]
|
||||||
if columns is None:
|
if columns is None:
|
||||||
columns = []
|
columns = []
|
||||||
return AsyncFTSQuery(
|
|
||||||
self._inner.nearest_to_text({"query": query, "columns": columns})
|
if isinstance(query, str):
|
||||||
)
|
return AsyncFTSQuery(
|
||||||
|
self._inner.nearest_to_text({"query": query, "columns": columns})
|
||||||
|
)
|
||||||
|
# FullTextQuery object
|
||||||
|
return AsyncFTSQuery(self._inner.nearest_to_text({"query": query.to_dict()}))
|
||||||
|
|
||||||
|
|
||||||
class AsyncFTSQuery(AsyncQueryBase):
|
class AsyncFTSQuery(AsyncQueryBase):
|
||||||
@@ -2165,9 +2535,12 @@ class AsyncFTSQuery(AsyncQueryBase):
|
|||||||
)
|
)
|
||||||
|
|
||||||
async def to_batches(
|
async def to_batches(
|
||||||
self, *, max_batch_length: Optional[int] = None
|
self,
|
||||||
|
*,
|
||||||
|
max_batch_length: Optional[int] = None,
|
||||||
|
timeout: Optional[timedelta] = None,
|
||||||
) -> AsyncRecordBatchReader:
|
) -> AsyncRecordBatchReader:
|
||||||
reader = await super().to_batches()
|
reader = await super().to_batches(timeout=timeout)
|
||||||
results = pa.Table.from_batches(await reader.read_all(), reader.schema)
|
results = pa.Table.from_batches(await reader.read_all(), reader.schema)
|
||||||
if self._reranker:
|
if self._reranker:
|
||||||
results = self._reranker.rerank_fts(self.get_query(), results)
|
results = self._reranker.rerank_fts(self.get_query(), results)
|
||||||
@@ -2352,7 +2725,7 @@ class AsyncVectorQuery(AsyncQueryBase, AsyncVectorQueryBase):
|
|||||||
return self
|
return self
|
||||||
|
|
||||||
def nearest_to_text(
|
def nearest_to_text(
|
||||||
self, query: str, columns: Union[str, List[str], None] = None
|
self, query: str | FullTextQuery, columns: Union[str, List[str], None] = None
|
||||||
) -> AsyncHybridQuery:
|
) -> AsyncHybridQuery:
|
||||||
"""
|
"""
|
||||||
Find the documents that are most relevant to the given text query,
|
Find the documents that are most relevant to the given text query,
|
||||||
@@ -2382,14 +2755,21 @@ class AsyncVectorQuery(AsyncQueryBase, AsyncVectorQueryBase):
|
|||||||
columns = [columns]
|
columns = [columns]
|
||||||
if columns is None:
|
if columns is None:
|
||||||
columns = []
|
columns = []
|
||||||
return AsyncHybridQuery(
|
|
||||||
self._inner.nearest_to_text({"query": query, "columns": columns})
|
if isinstance(query, str):
|
||||||
)
|
return AsyncHybridQuery(
|
||||||
|
self._inner.nearest_to_text({"query": query, "columns": columns})
|
||||||
|
)
|
||||||
|
# FullTextQuery object
|
||||||
|
return AsyncHybridQuery(self._inner.nearest_to_text({"query": query.to_dict()}))
|
||||||
|
|
||||||
async def to_batches(
|
async def to_batches(
|
||||||
self, *, max_batch_length: Optional[int] = None
|
self,
|
||||||
|
*,
|
||||||
|
max_batch_length: Optional[int] = None,
|
||||||
|
timeout: Optional[timedelta] = None,
|
||||||
) -> AsyncRecordBatchReader:
|
) -> AsyncRecordBatchReader:
|
||||||
reader = await super().to_batches()
|
reader = await super().to_batches(timeout=timeout)
|
||||||
results = pa.Table.from_batches(await reader.read_all(), reader.schema)
|
results = pa.Table.from_batches(await reader.read_all(), reader.schema)
|
||||||
if self._reranker:
|
if self._reranker:
|
||||||
results = self._reranker.rerank_vector(self._query_string, results)
|
results = self._reranker.rerank_vector(self._query_string, results)
|
||||||
@@ -2445,7 +2825,10 @@ class AsyncHybridQuery(AsyncQueryBase, AsyncVectorQueryBase):
|
|||||||
return self
|
return self
|
||||||
|
|
||||||
async def to_batches(
|
async def to_batches(
|
||||||
self, *, max_batch_length: Optional[int] = None
|
self,
|
||||||
|
*,
|
||||||
|
max_batch_length: Optional[int] = None,
|
||||||
|
timeout: Optional[timedelta] = None,
|
||||||
) -> AsyncRecordBatchReader:
|
) -> AsyncRecordBatchReader:
|
||||||
fts_query = AsyncFTSQuery(self._inner.to_fts_query())
|
fts_query = AsyncFTSQuery(self._inner.to_fts_query())
|
||||||
vec_query = AsyncVectorQuery(self._inner.to_vector_query())
|
vec_query = AsyncVectorQuery(self._inner.to_vector_query())
|
||||||
@@ -2457,8 +2840,8 @@ class AsyncHybridQuery(AsyncQueryBase, AsyncVectorQueryBase):
|
|||||||
vec_query.with_row_id()
|
vec_query.with_row_id()
|
||||||
|
|
||||||
fts_results, vector_results = await asyncio.gather(
|
fts_results, vector_results = await asyncio.gather(
|
||||||
fts_query.to_arrow(),
|
fts_query.to_arrow(timeout=timeout),
|
||||||
vec_query.to_arrow(),
|
vec_query.to_arrow(timeout=timeout),
|
||||||
)
|
)
|
||||||
|
|
||||||
result = LanceHybridQueryBuilder._combine_hybrid_results(
|
result = LanceHybridQueryBuilder._combine_hybrid_results(
|
||||||
@@ -2510,7 +2893,7 @@ class AsyncHybridQuery(AsyncQueryBase, AsyncVectorQueryBase):
|
|||||||
|
|
||||||
Returns
|
Returns
|
||||||
-------
|
-------
|
||||||
plan
|
plan : str
|
||||||
""" # noqa: E501
|
""" # noqa: E501
|
||||||
|
|
||||||
results = ["Vector Search Plan:"]
|
results = ["Vector Search Plan:"]
|
||||||
@@ -2519,3 +2902,23 @@ class AsyncHybridQuery(AsyncQueryBase, AsyncVectorQueryBase):
|
|||||||
results.append(await self._inner.to_fts_query().explain_plan(verbose))
|
results.append(await self._inner.to_fts_query().explain_plan(verbose))
|
||||||
|
|
||||||
return "\n".join(results)
|
return "\n".join(results)
|
||||||
|
|
||||||
|
async def analyze_plan(self):
|
||||||
|
"""
|
||||||
|
Execute the query and return the physical execution plan with runtime metrics.
|
||||||
|
|
||||||
|
This runs both the vector and FTS (full-text search) queries and returns
|
||||||
|
detailed metrics for each step of execution—such as rows processed,
|
||||||
|
elapsed time, I/O stats, and more. It’s useful for debugging and
|
||||||
|
performance analysis.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
plan : str
|
||||||
|
"""
|
||||||
|
results = ["Vector Search Query:"]
|
||||||
|
results.append(await self._inner.to_vector_query().analyze_plan())
|
||||||
|
results.append("FTS Search Query:")
|
||||||
|
results.append(await self._inner.to_fts_query().analyze_plan())
|
||||||
|
|
||||||
|
return "\n".join(results)
|
||||||
|
|||||||
@@ -87,6 +87,9 @@ class RemoteTable(Table):
|
|||||||
def checkout_latest(self):
|
def checkout_latest(self):
|
||||||
return LOOP.run(self._table.checkout_latest())
|
return LOOP.run(self._table.checkout_latest())
|
||||||
|
|
||||||
|
def restore(self, version: Optional[int] = None):
|
||||||
|
return LOOP.run(self._table.restore(version))
|
||||||
|
|
||||||
def list_indices(self) -> Iterable[IndexConfig]:
|
def list_indices(self) -> Iterable[IndexConfig]:
|
||||||
"""List all the indices on the table"""
|
"""List all the indices on the table"""
|
||||||
return LOOP.run(self._table.list_indices())
|
return LOOP.run(self._table.list_indices())
|
||||||
@@ -352,9 +355,15 @@ class RemoteTable(Table):
|
|||||||
)
|
)
|
||||||
|
|
||||||
def _execute_query(
|
def _execute_query(
|
||||||
self, query: Query, batch_size: Optional[int] = None
|
self,
|
||||||
|
query: Query,
|
||||||
|
*,
|
||||||
|
batch_size: Optional[int] = None,
|
||||||
|
timeout: Optional[timedelta] = None,
|
||||||
) -> pa.RecordBatchReader:
|
) -> pa.RecordBatchReader:
|
||||||
async_iter = LOOP.run(self._table._execute_query(query, batch_size=batch_size))
|
async_iter = LOOP.run(
|
||||||
|
self._table._execute_query(query, batch_size=batch_size, timeout=timeout)
|
||||||
|
)
|
||||||
|
|
||||||
def iter_sync():
|
def iter_sync():
|
||||||
try:
|
try:
|
||||||
@@ -365,6 +374,12 @@ class RemoteTable(Table):
|
|||||||
|
|
||||||
return pa.RecordBatchReader.from_batches(async_iter.schema, iter_sync())
|
return pa.RecordBatchReader.from_batches(async_iter.schema, iter_sync())
|
||||||
|
|
||||||
|
def _explain_plan(self, query: Query, verbose: Optional[bool] = False) -> str:
|
||||||
|
return LOOP.run(self._table._explain_plan(query, verbose))
|
||||||
|
|
||||||
|
def _analyze_plan(self, query: Query) -> str:
|
||||||
|
return LOOP.run(self._table._analyze_plan(query))
|
||||||
|
|
||||||
def merge_insert(self, on: Union[str, Iterable[str]]) -> LanceMergeInsertBuilder:
|
def merge_insert(self, on: Union[str, Iterable[str]]) -> LanceMergeInsertBuilder:
|
||||||
"""Returns a [`LanceMergeInsertBuilder`][lancedb.merge.LanceMergeInsertBuilder]
|
"""Returns a [`LanceMergeInsertBuilder`][lancedb.merge.LanceMergeInsertBuilder]
|
||||||
that can be used to create a "merge insert" operation.
|
that can be used to create a "merge insert" operation.
|
||||||
|
|||||||
@@ -47,6 +47,9 @@ class AnswerdotaiRerankers(Reranker):
|
|||||||
)
|
)
|
||||||
|
|
||||||
def _rerank(self, result_set: pa.Table, query: str):
|
def _rerank(self, result_set: pa.Table, query: str):
|
||||||
|
result_set = self._handle_empty_results(result_set)
|
||||||
|
if len(result_set) == 0:
|
||||||
|
return result_set
|
||||||
docs = result_set[self.column].to_pylist()
|
docs = result_set[self.column].to_pylist()
|
||||||
doc_ids = list(range(len(docs)))
|
doc_ids = list(range(len(docs)))
|
||||||
result = self.reranker.rank(query, docs, doc_ids=doc_ids)
|
result = self.reranker.rank(query, docs, doc_ids=doc_ids)
|
||||||
@@ -83,7 +86,6 @@ class AnswerdotaiRerankers(Reranker):
|
|||||||
vector_results = self._rerank(vector_results, query)
|
vector_results = self._rerank(vector_results, query)
|
||||||
if self.score == "relevance":
|
if self.score == "relevance":
|
||||||
vector_results = vector_results.drop_columns(["_distance"])
|
vector_results = vector_results.drop_columns(["_distance"])
|
||||||
|
|
||||||
vector_results = vector_results.sort_by([("_relevance_score", "descending")])
|
vector_results = vector_results.sort_by([("_relevance_score", "descending")])
|
||||||
return vector_results
|
return vector_results
|
||||||
|
|
||||||
@@ -91,7 +93,5 @@ class AnswerdotaiRerankers(Reranker):
|
|||||||
fts_results = self._rerank(fts_results, query)
|
fts_results = self._rerank(fts_results, query)
|
||||||
if self.score == "relevance":
|
if self.score == "relevance":
|
||||||
fts_results = fts_results.drop_columns(["_score"])
|
fts_results = fts_results.drop_columns(["_score"])
|
||||||
|
|
||||||
fts_results = fts_results.sort_by([("_relevance_score", "descending")])
|
fts_results = fts_results.sort_by([("_relevance_score", "descending")])
|
||||||
|
|
||||||
return fts_results
|
return fts_results
|
||||||
|
|||||||
@@ -65,6 +65,16 @@ class Reranker(ABC):
|
|||||||
f"{self.__class__.__name__} does not implement rerank_vector"
|
f"{self.__class__.__name__} does not implement rerank_vector"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def _handle_empty_results(self, results: pa.Table):
|
||||||
|
"""
|
||||||
|
Helper method to handle empty FTS results consistently
|
||||||
|
"""
|
||||||
|
if len(results) > 0:
|
||||||
|
return results
|
||||||
|
return results.append_column(
|
||||||
|
"_relevance_score", pa.array([], type=pa.float32())
|
||||||
|
)
|
||||||
|
|
||||||
def rerank_fts(
|
def rerank_fts(
|
||||||
self,
|
self,
|
||||||
query: str,
|
query: str,
|
||||||
|
|||||||
@@ -62,6 +62,9 @@ class CohereReranker(Reranker):
|
|||||||
return cohere.Client(os.environ.get("COHERE_API_KEY") or self.api_key)
|
return cohere.Client(os.environ.get("COHERE_API_KEY") or self.api_key)
|
||||||
|
|
||||||
def _rerank(self, result_set: pa.Table, query: str):
|
def _rerank(self, result_set: pa.Table, query: str):
|
||||||
|
result_set = self._handle_empty_results(result_set)
|
||||||
|
if len(result_set) == 0:
|
||||||
|
return result_set
|
||||||
docs = result_set[self.column].to_pylist()
|
docs = result_set[self.column].to_pylist()
|
||||||
response = self._client.rerank(
|
response = self._client.rerank(
|
||||||
query=query,
|
query=query,
|
||||||
@@ -99,24 +102,14 @@ class CohereReranker(Reranker):
|
|||||||
)
|
)
|
||||||
return combined_results
|
return combined_results
|
||||||
|
|
||||||
def rerank_vector(
|
def rerank_vector(self, query: str, vector_results: pa.Table):
|
||||||
self,
|
vector_results = self._rerank(vector_results, query)
|
||||||
query: str,
|
|
||||||
vector_results: pa.Table,
|
|
||||||
):
|
|
||||||
result_set = self._rerank(vector_results, query)
|
|
||||||
if self.score == "relevance":
|
if self.score == "relevance":
|
||||||
result_set = result_set.drop_columns(["_distance"])
|
vector_results = vector_results.drop_columns(["_distance"])
|
||||||
|
return vector_results
|
||||||
|
|
||||||
return result_set
|
def rerank_fts(self, query: str, fts_results: pa.Table):
|
||||||
|
fts_results = self._rerank(fts_results, query)
|
||||||
def rerank_fts(
|
|
||||||
self,
|
|
||||||
query: str,
|
|
||||||
fts_results: pa.Table,
|
|
||||||
):
|
|
||||||
result_set = self._rerank(fts_results, query)
|
|
||||||
if self.score == "relevance":
|
if self.score == "relevance":
|
||||||
result_set = result_set.drop_columns(["_score"])
|
fts_results = fts_results.drop_columns(["_score"])
|
||||||
|
return fts_results
|
||||||
return result_set
|
|
||||||
|
|||||||
@@ -63,6 +63,9 @@ class CrossEncoderReranker(Reranker):
|
|||||||
return cross_encoder
|
return cross_encoder
|
||||||
|
|
||||||
def _rerank(self, result_set: pa.Table, query: str):
|
def _rerank(self, result_set: pa.Table, query: str):
|
||||||
|
result_set = self._handle_empty_results(result_set)
|
||||||
|
if len(result_set) == 0:
|
||||||
|
return result_set
|
||||||
passages = result_set[self.column].to_pylist()
|
passages = result_set[self.column].to_pylist()
|
||||||
cross_inp = [[query, passage] for passage in passages]
|
cross_inp = [[query, passage] for passage in passages]
|
||||||
cross_scores = self.model.predict(cross_inp)
|
cross_scores = self.model.predict(cross_inp)
|
||||||
@@ -93,11 +96,7 @@ class CrossEncoderReranker(Reranker):
|
|||||||
|
|
||||||
return combined_results
|
return combined_results
|
||||||
|
|
||||||
def rerank_vector(
|
def rerank_vector(self, query: str, vector_results: pa.Table):
|
||||||
self,
|
|
||||||
query: str,
|
|
||||||
vector_results: pa.Table,
|
|
||||||
):
|
|
||||||
vector_results = self._rerank(vector_results, query)
|
vector_results = self._rerank(vector_results, query)
|
||||||
if self.score == "relevance":
|
if self.score == "relevance":
|
||||||
vector_results = vector_results.drop_columns(["_distance"])
|
vector_results = vector_results.drop_columns(["_distance"])
|
||||||
@@ -105,11 +104,7 @@ class CrossEncoderReranker(Reranker):
|
|||||||
vector_results = vector_results.sort_by([("_relevance_score", "descending")])
|
vector_results = vector_results.sort_by([("_relevance_score", "descending")])
|
||||||
return vector_results
|
return vector_results
|
||||||
|
|
||||||
def rerank_fts(
|
def rerank_fts(self, query: str, fts_results: pa.Table):
|
||||||
self,
|
|
||||||
query: str,
|
|
||||||
fts_results: pa.Table,
|
|
||||||
):
|
|
||||||
fts_results = self._rerank(fts_results, query)
|
fts_results = self._rerank(fts_results, query)
|
||||||
if self.score == "relevance":
|
if self.score == "relevance":
|
||||||
fts_results = fts_results.drop_columns(["_score"])
|
fts_results = fts_results.drop_columns(["_score"])
|
||||||
|
|||||||
@@ -62,6 +62,9 @@ class JinaReranker(Reranker):
|
|||||||
return self._session
|
return self._session
|
||||||
|
|
||||||
def _rerank(self, result_set: pa.Table, query: str):
|
def _rerank(self, result_set: pa.Table, query: str):
|
||||||
|
result_set = self._handle_empty_results(result_set)
|
||||||
|
if len(result_set) == 0:
|
||||||
|
return result_set
|
||||||
docs = result_set[self.column].to_pylist()
|
docs = result_set[self.column].to_pylist()
|
||||||
response = self._client.post( # type: ignore
|
response = self._client.post( # type: ignore
|
||||||
API_URL,
|
API_URL,
|
||||||
@@ -104,24 +107,14 @@ class JinaReranker(Reranker):
|
|||||||
)
|
)
|
||||||
return combined_results
|
return combined_results
|
||||||
|
|
||||||
def rerank_vector(
|
def rerank_vector(self, query: str, vector_results: pa.Table):
|
||||||
self,
|
vector_results = self._rerank(vector_results, query)
|
||||||
query: str,
|
|
||||||
vector_results: pa.Table,
|
|
||||||
):
|
|
||||||
result_set = self._rerank(vector_results, query)
|
|
||||||
if self.score == "relevance":
|
if self.score == "relevance":
|
||||||
result_set = result_set.drop_columns(["_distance"])
|
vector_results = vector_results.drop_columns(["_distance"])
|
||||||
|
return vector_results
|
||||||
|
|
||||||
return result_set
|
def rerank_fts(self, query: str, fts_results: pa.Table):
|
||||||
|
fts_results = self._rerank(fts_results, query)
|
||||||
def rerank_fts(
|
|
||||||
self,
|
|
||||||
query: str,
|
|
||||||
fts_results: pa.Table,
|
|
||||||
):
|
|
||||||
result_set = self._rerank(fts_results, query)
|
|
||||||
if self.score == "relevance":
|
if self.score == "relevance":
|
||||||
result_set = result_set.drop_columns(["_score"])
|
fts_results = fts_results.drop_columns(["_score"])
|
||||||
|
return fts_results
|
||||||
return result_set
|
|
||||||
|
|||||||
@@ -44,6 +44,9 @@ class OpenaiReranker(Reranker):
|
|||||||
self.api_key = api_key
|
self.api_key = api_key
|
||||||
|
|
||||||
def _rerank(self, result_set: pa.Table, query: str):
|
def _rerank(self, result_set: pa.Table, query: str):
|
||||||
|
result_set = self._handle_empty_results(result_set)
|
||||||
|
if len(result_set) == 0:
|
||||||
|
return result_set
|
||||||
docs = result_set[self.column].to_pylist()
|
docs = result_set[self.column].to_pylist()
|
||||||
response = self._client.chat.completions.create(
|
response = self._client.chat.completions.create(
|
||||||
model=self.model_name,
|
model=self.model_name,
|
||||||
@@ -104,18 +107,14 @@ class OpenaiReranker(Reranker):
|
|||||||
vector_results = self._rerank(vector_results, query)
|
vector_results = self._rerank(vector_results, query)
|
||||||
if self.score == "relevance":
|
if self.score == "relevance":
|
||||||
vector_results = vector_results.drop_columns(["_distance"])
|
vector_results = vector_results.drop_columns(["_distance"])
|
||||||
|
|
||||||
vector_results = vector_results.sort_by([("_relevance_score", "descending")])
|
vector_results = vector_results.sort_by([("_relevance_score", "descending")])
|
||||||
|
|
||||||
return vector_results
|
return vector_results
|
||||||
|
|
||||||
def rerank_fts(self, query: str, fts_results: pa.Table):
|
def rerank_fts(self, query: str, fts_results: pa.Table):
|
||||||
fts_results = self._rerank(fts_results, query)
|
fts_results = self._rerank(fts_results, query)
|
||||||
if self.score == "relevance":
|
if self.score == "relevance":
|
||||||
fts_results = fts_results.drop_columns(["_score"])
|
fts_results = fts_results.drop_columns(["_score"])
|
||||||
|
|
||||||
fts_results = fts_results.sort_by([("_relevance_score", "descending")])
|
fts_results = fts_results.sort_by([("_relevance_score", "descending")])
|
||||||
|
|
||||||
return fts_results
|
return fts_results
|
||||||
|
|
||||||
@cached_property
|
@cached_property
|
||||||
|
|||||||
@@ -63,6 +63,9 @@ class VoyageAIReranker(Reranker):
|
|||||||
)
|
)
|
||||||
|
|
||||||
def _rerank(self, result_set: pa.Table, query: str):
|
def _rerank(self, result_set: pa.Table, query: str):
|
||||||
|
result_set = self._handle_empty_results(result_set)
|
||||||
|
if len(result_set) == 0:
|
||||||
|
return result_set
|
||||||
docs = result_set[self.column].to_pylist()
|
docs = result_set[self.column].to_pylist()
|
||||||
response = self._client.rerank(
|
response = self._client.rerank(
|
||||||
query=query,
|
query=query,
|
||||||
@@ -101,24 +104,14 @@ class VoyageAIReranker(Reranker):
|
|||||||
)
|
)
|
||||||
return combined_results
|
return combined_results
|
||||||
|
|
||||||
def rerank_vector(
|
def rerank_vector(self, query: str, vector_results: pa.Table):
|
||||||
self,
|
vector_results = self._rerank(vector_results, query)
|
||||||
query: str,
|
|
||||||
vector_results: pa.Table,
|
|
||||||
):
|
|
||||||
result_set = self._rerank(vector_results, query)
|
|
||||||
if self.score == "relevance":
|
if self.score == "relevance":
|
||||||
result_set = result_set.drop_columns(["_distance"])
|
vector_results = vector_results.drop_columns(["_distance"])
|
||||||
|
return vector_results
|
||||||
|
|
||||||
return result_set
|
def rerank_fts(self, query: str, fts_results: pa.Table):
|
||||||
|
fts_results = self._rerank(fts_results, query)
|
||||||
def rerank_fts(
|
|
||||||
self,
|
|
||||||
query: str,
|
|
||||||
fts_results: pa.Table,
|
|
||||||
):
|
|
||||||
result_set = self._rerank(fts_results, query)
|
|
||||||
if self.score == "relevance":
|
if self.score == "relevance":
|
||||||
result_set = result_set.drop_columns(["_score"])
|
fts_results = fts_results.drop_columns(["_score"])
|
||||||
|
return fts_results
|
||||||
return result_set
|
|
||||||
|
|||||||
@@ -52,6 +52,7 @@ from .query import (
|
|||||||
AsyncHybridQuery,
|
AsyncHybridQuery,
|
||||||
AsyncQuery,
|
AsyncQuery,
|
||||||
AsyncVectorQuery,
|
AsyncVectorQuery,
|
||||||
|
FullTextQuery,
|
||||||
LanceEmptyQueryBuilder,
|
LanceEmptyQueryBuilder,
|
||||||
LanceFtsQueryBuilder,
|
LanceFtsQueryBuilder,
|
||||||
LanceHybridQueryBuilder,
|
LanceHybridQueryBuilder,
|
||||||
@@ -919,7 +920,9 @@ class Table(ABC):
|
|||||||
@abstractmethod
|
@abstractmethod
|
||||||
def search(
|
def search(
|
||||||
self,
|
self,
|
||||||
query: Optional[Union[VEC, str, "PIL.Image.Image", Tuple]] = None,
|
query: Optional[
|
||||||
|
Union[VEC, str, "PIL.Image.Image", Tuple, FullTextQuery]
|
||||||
|
] = None,
|
||||||
vector_column_name: Optional[str] = None,
|
vector_column_name: Optional[str] = None,
|
||||||
query_type: QueryType = "auto",
|
query_type: QueryType = "auto",
|
||||||
ordering_field_name: Optional[str] = None,
|
ordering_field_name: Optional[str] = None,
|
||||||
@@ -1004,9 +1007,19 @@ class Table(ABC):
|
|||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def _execute_query(
|
def _execute_query(
|
||||||
self, query: Query, batch_size: Optional[int] = None
|
self,
|
||||||
|
query: Query,
|
||||||
|
*,
|
||||||
|
batch_size: Optional[int] = None,
|
||||||
|
timeout: Optional[timedelta] = None,
|
||||||
) -> pa.RecordBatchReader: ...
|
) -> pa.RecordBatchReader: ...
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def _explain_plan(self, query: Query, verbose: Optional[bool] = False) -> str: ...
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def _analyze_plan(self, query: Query) -> str: ...
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def _do_merge(
|
def _do_merge(
|
||||||
self,
|
self,
|
||||||
@@ -1262,16 +1275,21 @@ class Table(ABC):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def add_columns(self, transforms: Dict[str, str]):
|
def add_columns(
|
||||||
|
self, transforms: Dict[str, str] | pa.Field | List[pa.Field] | pa.Schema
|
||||||
|
):
|
||||||
"""
|
"""
|
||||||
Add new columns with defined values.
|
Add new columns with defined values.
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
transforms: Dict[str, str]
|
transforms: Dict[str, str], pa.Field, List[pa.Field], pa.Schema
|
||||||
A map of column name to a SQL expression to use to calculate the
|
A map of column name to a SQL expression to use to calculate the
|
||||||
value of the new column. These expressions will be evaluated for
|
value of the new column. These expressions will be evaluated for
|
||||||
each row in the table, and can reference existing columns.
|
each row in the table, and can reference existing columns.
|
||||||
|
Alternatively, a pyarrow Field or Schema can be provided to add
|
||||||
|
new columns with the specified data types. The new columns will
|
||||||
|
be initialized with null values.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
@@ -1339,6 +1357,21 @@ class Table(ABC):
|
|||||||
It can also be used to undo a `[Self::checkout]` operation
|
It can also be used to undo a `[Self::checkout]` operation
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def restore(self, version: Optional[int] = None):
|
||||||
|
"""Restore a version of the table. This is an in-place operation.
|
||||||
|
|
||||||
|
This creates a new version where the data is equivalent to the
|
||||||
|
specified previous version. Data is not copied (as of python-v0.2.1).
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
version : int, default None
|
||||||
|
The version to restore. If unspecified then restores the currently
|
||||||
|
checked out version. If the currently checked out version is the
|
||||||
|
latest version then this is a no-op.
|
||||||
|
"""
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def list_versions(self) -> List[Dict[str, Any]]:
|
def list_versions(self) -> List[Dict[str, Any]]:
|
||||||
"""List all versions of the table"""
|
"""List all versions of the table"""
|
||||||
@@ -1712,8 +1745,32 @@ class LanceTable(Table):
|
|||||||
)
|
)
|
||||||
|
|
||||||
def drop_index(self, name: str) -> None:
|
def drop_index(self, name: str) -> None:
|
||||||
|
"""
|
||||||
|
Drops an index from the table
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
name: str
|
||||||
|
The name of the index to drop
|
||||||
|
"""
|
||||||
return LOOP.run(self._table.drop_index(name))
|
return LOOP.run(self._table.drop_index(name))
|
||||||
|
|
||||||
|
def prewarm_index(self, name: str) -> None:
|
||||||
|
"""
|
||||||
|
Prewarms an index in the table
|
||||||
|
|
||||||
|
This loads the entire index into memory
|
||||||
|
|
||||||
|
If the index does not fit into the available cache this call
|
||||||
|
may be wasteful
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
name: str
|
||||||
|
The name of the index to prewarm
|
||||||
|
"""
|
||||||
|
return LOOP.run(self._table.prewarm_index(name))
|
||||||
|
|
||||||
def create_scalar_index(
|
def create_scalar_index(
|
||||||
self,
|
self,
|
||||||
column: str,
|
column: str,
|
||||||
@@ -2013,7 +2070,9 @@ class LanceTable(Table):
|
|||||||
@overload
|
@overload
|
||||||
def search(
|
def search(
|
||||||
self,
|
self,
|
||||||
query: Optional[Union[VEC, str, "PIL.Image.Image", Tuple]] = None,
|
query: Optional[
|
||||||
|
Union[VEC, str, "PIL.Image.Image", Tuple, FullTextQuery]
|
||||||
|
] = None,
|
||||||
vector_column_name: Optional[str] = None,
|
vector_column_name: Optional[str] = None,
|
||||||
query_type: Literal["hybrid"] = "hybrid",
|
query_type: Literal["hybrid"] = "hybrid",
|
||||||
ordering_field_name: Optional[str] = None,
|
ordering_field_name: Optional[str] = None,
|
||||||
@@ -2032,7 +2091,9 @@ class LanceTable(Table):
|
|||||||
|
|
||||||
def search(
|
def search(
|
||||||
self,
|
self,
|
||||||
query: Optional[Union[VEC, str, "PIL.Image.Image", Tuple]] = None,
|
query: Optional[
|
||||||
|
Union[VEC, str, "PIL.Image.Image", Tuple, FullTextQuery]
|
||||||
|
] = None,
|
||||||
vector_column_name: Optional[str] = None,
|
vector_column_name: Optional[str] = None,
|
||||||
query_type: QueryType = "auto",
|
query_type: QueryType = "auto",
|
||||||
ordering_field_name: Optional[str] = None,
|
ordering_field_name: Optional[str] = None,
|
||||||
@@ -2104,6 +2165,8 @@ class LanceTable(Table):
|
|||||||
and also the "_distance" column which is the distance between the query
|
and also the "_distance" column which is the distance between the query
|
||||||
vector and the returned vector.
|
vector and the returned vector.
|
||||||
"""
|
"""
|
||||||
|
if isinstance(query, FullTextQuery):
|
||||||
|
query_type = "fts"
|
||||||
vector_column_name = infer_vector_column_name(
|
vector_column_name = infer_vector_column_name(
|
||||||
schema=self.schema,
|
schema=self.schema,
|
||||||
query_type=query_type,
|
query_type=query_type,
|
||||||
@@ -2279,9 +2342,15 @@ class LanceTable(Table):
|
|||||||
LOOP.run(self._table.update(values, where=where, updates_sql=values_sql))
|
LOOP.run(self._table.update(values, where=where, updates_sql=values_sql))
|
||||||
|
|
||||||
def _execute_query(
|
def _execute_query(
|
||||||
self, query: Query, batch_size: Optional[int] = None
|
self,
|
||||||
|
query: Query,
|
||||||
|
*,
|
||||||
|
batch_size: Optional[int] = None,
|
||||||
|
timeout: Optional[timedelta] = None,
|
||||||
) -> pa.RecordBatchReader:
|
) -> pa.RecordBatchReader:
|
||||||
async_iter = LOOP.run(self._table._execute_query(query, batch_size))
|
async_iter = LOOP.run(
|
||||||
|
self._table._execute_query(query, batch_size=batch_size, timeout=timeout)
|
||||||
|
)
|
||||||
|
|
||||||
def iter_sync():
|
def iter_sync():
|
||||||
try:
|
try:
|
||||||
@@ -2292,8 +2361,11 @@ class LanceTable(Table):
|
|||||||
|
|
||||||
return pa.RecordBatchReader.from_batches(async_iter.schema, iter_sync())
|
return pa.RecordBatchReader.from_batches(async_iter.schema, iter_sync())
|
||||||
|
|
||||||
def _explain_plan(self, query: Query) -> str:
|
def _explain_plan(self, query: Query, verbose: Optional[bool] = False) -> str:
|
||||||
return LOOP.run(self._table._explain_plan(query))
|
return LOOP.run(self._table._explain_plan(query, verbose))
|
||||||
|
|
||||||
|
def _analyze_plan(self, query: Query) -> str:
|
||||||
|
return LOOP.run(self._table._analyze_plan(query))
|
||||||
|
|
||||||
def _do_merge(
|
def _do_merge(
|
||||||
self,
|
self,
|
||||||
@@ -2442,7 +2514,9 @@ class LanceTable(Table):
|
|||||||
"""
|
"""
|
||||||
return LOOP.run(self._table.index_stats(index_name))
|
return LOOP.run(self._table.index_stats(index_name))
|
||||||
|
|
||||||
def add_columns(self, transforms: Dict[str, str]):
|
def add_columns(
|
||||||
|
self, transforms: Dict[str, str] | pa.field | List[pa.field] | pa.Schema
|
||||||
|
):
|
||||||
LOOP.run(self._table.add_columns(transforms))
|
LOOP.run(self._table.add_columns(transforms))
|
||||||
|
|
||||||
def alter_columns(self, *alterations: Iterable[Dict[str, str]]):
|
def alter_columns(self, *alterations: Iterable[Dict[str, str]]):
|
||||||
@@ -2952,6 +3026,23 @@ class AsyncTable:
|
|||||||
"""
|
"""
|
||||||
await self._inner.drop_index(name)
|
await self._inner.drop_index(name)
|
||||||
|
|
||||||
|
async def prewarm_index(self, name: str) -> None:
|
||||||
|
"""
|
||||||
|
Prewarm an index in the table.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
name: str
|
||||||
|
The name of the index to prewarm
|
||||||
|
|
||||||
|
Notes
|
||||||
|
-----
|
||||||
|
This will load the index into memory. This may reduce the cold-start time for
|
||||||
|
future queries. If the index does not fit in the cache then this call may be
|
||||||
|
wasteful.
|
||||||
|
"""
|
||||||
|
await self._inner.prewarm_index(name)
|
||||||
|
|
||||||
async def add(
|
async def add(
|
||||||
self,
|
self,
|
||||||
data: DATA,
|
data: DATA,
|
||||||
@@ -3103,7 +3194,9 @@ class AsyncTable:
|
|||||||
@overload
|
@overload
|
||||||
async def search(
|
async def search(
|
||||||
self,
|
self,
|
||||||
query: Optional[Union[VEC, str, "PIL.Image.Image", Tuple]] = None,
|
query: Optional[
|
||||||
|
Union[VEC, str, "PIL.Image.Image", Tuple, FullTextQuery]
|
||||||
|
] = None,
|
||||||
vector_column_name: Optional[str] = None,
|
vector_column_name: Optional[str] = None,
|
||||||
query_type: Literal["vector"] = ...,
|
query_type: Literal["vector"] = ...,
|
||||||
ordering_field_name: Optional[str] = None,
|
ordering_field_name: Optional[str] = None,
|
||||||
@@ -3112,7 +3205,9 @@ class AsyncTable:
|
|||||||
|
|
||||||
async def search(
|
async def search(
|
||||||
self,
|
self,
|
||||||
query: Optional[Union[VEC, str, "PIL.Image.Image", Tuple]] = None,
|
query: Optional[
|
||||||
|
Union[VEC, str, "PIL.Image.Image", Tuple, FullTextQuery]
|
||||||
|
] = None,
|
||||||
vector_column_name: Optional[str] = None,
|
vector_column_name: Optional[str] = None,
|
||||||
query_type: QueryType = "auto",
|
query_type: QueryType = "auto",
|
||||||
ordering_field_name: Optional[str] = None,
|
ordering_field_name: Optional[str] = None,
|
||||||
@@ -3171,8 +3266,10 @@ class AsyncTable:
|
|||||||
async def get_embedding_func(
|
async def get_embedding_func(
|
||||||
vector_column_name: Optional[str],
|
vector_column_name: Optional[str],
|
||||||
query_type: QueryType,
|
query_type: QueryType,
|
||||||
query: Optional[Union[VEC, str, "PIL.Image.Image", Tuple]],
|
query: Optional[Union[VEC, str, "PIL.Image.Image", Tuple, FullTextQuery]],
|
||||||
) -> Tuple[str, EmbeddingFunctionConfig]:
|
) -> Tuple[str, EmbeddingFunctionConfig]:
|
||||||
|
if isinstance(query, FullTextQuery):
|
||||||
|
query_type = "fts"
|
||||||
schema = await self.schema()
|
schema = await self.schema()
|
||||||
vector_column_name = infer_vector_column_name(
|
vector_column_name = infer_vector_column_name(
|
||||||
schema=schema,
|
schema=schema,
|
||||||
@@ -3222,6 +3319,8 @@ class AsyncTable:
|
|||||||
if is_embedding(query):
|
if is_embedding(query):
|
||||||
vector_query = query
|
vector_query = query
|
||||||
query_type = "vector"
|
query_type = "vector"
|
||||||
|
elif isinstance(query, FullTextQuery):
|
||||||
|
query_type = "fts"
|
||||||
elif isinstance(query, str):
|
elif isinstance(query, str):
|
||||||
try:
|
try:
|
||||||
(
|
(
|
||||||
@@ -3342,13 +3441,15 @@ class AsyncTable:
|
|||||||
async_query = async_query.nearest_to_text(
|
async_query = async_query.nearest_to_text(
|
||||||
query.full_text_query.query, query.full_text_query.columns
|
query.full_text_query.query, query.full_text_query.columns
|
||||||
)
|
)
|
||||||
if query.full_text_query.limit is not None:
|
|
||||||
async_query = async_query.limit(query.full_text_query.limit)
|
|
||||||
|
|
||||||
return async_query
|
return async_query
|
||||||
|
|
||||||
async def _execute_query(
|
async def _execute_query(
|
||||||
self, query: Query, batch_size: Optional[int] = None
|
self,
|
||||||
|
query: Query,
|
||||||
|
*,
|
||||||
|
batch_size: Optional[int] = None,
|
||||||
|
timeout: Optional[timedelta] = None,
|
||||||
) -> pa.RecordBatchReader:
|
) -> pa.RecordBatchReader:
|
||||||
# The sync table calls into this method, so we need to map the
|
# The sync table calls into this method, so we need to map the
|
||||||
# query to the async version of the query and run that here. This is only
|
# query to the async version of the query and run that here. This is only
|
||||||
@@ -3356,12 +3457,19 @@ class AsyncTable:
|
|||||||
|
|
||||||
async_query = self._sync_query_to_async(query)
|
async_query = self._sync_query_to_async(query)
|
||||||
|
|
||||||
return await async_query.to_batches(max_batch_length=batch_size)
|
return await async_query.to_batches(
|
||||||
|
max_batch_length=batch_size, timeout=timeout
|
||||||
|
)
|
||||||
|
|
||||||
async def _explain_plan(self, query: Query) -> str:
|
async def _explain_plan(self, query: Query, verbose: Optional[bool]) -> str:
|
||||||
# This method is used by the sync table
|
# This method is used by the sync table
|
||||||
async_query = self._sync_query_to_async(query)
|
async_query = self._sync_query_to_async(query)
|
||||||
return await async_query.explain_plan()
|
return await async_query.explain_plan(verbose)
|
||||||
|
|
||||||
|
async def _analyze_plan(self, query: Query) -> str:
|
||||||
|
# This method is used by the sync table
|
||||||
|
async_query = self._sync_query_to_async(query)
|
||||||
|
return await async_query.analyze_plan()
|
||||||
|
|
||||||
async def _do_merge(
|
async def _do_merge(
|
||||||
self,
|
self,
|
||||||
@@ -3501,7 +3609,9 @@ class AsyncTable:
|
|||||||
|
|
||||||
return await self._inner.update(updates_sql, where)
|
return await self._inner.update(updates_sql, where)
|
||||||
|
|
||||||
async def add_columns(self, transforms: dict[str, str]):
|
async def add_columns(
|
||||||
|
self, transforms: dict[str, str] | pa.field | List[pa.field] | pa.Schema
|
||||||
|
):
|
||||||
"""
|
"""
|
||||||
Add new columns with defined values.
|
Add new columns with defined values.
|
||||||
|
|
||||||
@@ -3511,8 +3621,19 @@ class AsyncTable:
|
|||||||
A map of column name to a SQL expression to use to calculate the
|
A map of column name to a SQL expression to use to calculate the
|
||||||
value of the new column. These expressions will be evaluated for
|
value of the new column. These expressions will be evaluated for
|
||||||
each row in the table, and can reference existing columns.
|
each row in the table, and can reference existing columns.
|
||||||
|
Alternatively, you can pass a pyarrow field or schema to add
|
||||||
|
new columns with NULLs.
|
||||||
"""
|
"""
|
||||||
await self._inner.add_columns(list(transforms.items()))
|
if isinstance(transforms, pa.Field):
|
||||||
|
transforms = [transforms]
|
||||||
|
if isinstance(transforms, list) and all(
|
||||||
|
{isinstance(f, pa.Field) for f in transforms}
|
||||||
|
):
|
||||||
|
transforms = pa.schema(transforms)
|
||||||
|
if isinstance(transforms, pa.Schema):
|
||||||
|
await self._inner.add_columns_with_schema(transforms)
|
||||||
|
else:
|
||||||
|
await self._inner.add_columns(list(transforms.items()))
|
||||||
|
|
||||||
async def alter_columns(self, *alterations: Iterable[dict[str, Any]]):
|
async def alter_columns(self, *alterations: Iterable[dict[str, Any]]):
|
||||||
"""
|
"""
|
||||||
@@ -3610,7 +3731,7 @@ class AsyncTable:
|
|||||||
"""
|
"""
|
||||||
await self._inner.checkout_latest()
|
await self._inner.checkout_latest()
|
||||||
|
|
||||||
async def restore(self):
|
async def restore(self, version: Optional[int] = None):
|
||||||
"""
|
"""
|
||||||
Restore the table to the currently checked out version
|
Restore the table to the currently checked out version
|
||||||
|
|
||||||
@@ -3623,7 +3744,7 @@ class AsyncTable:
|
|||||||
Once the operation concludes the table will no longer be in a checked
|
Once the operation concludes the table will no longer be in a checked
|
||||||
out state and the read_consistency_interval, if any, will apply.
|
out state and the read_consistency_interval, if any, will apply.
|
||||||
"""
|
"""
|
||||||
await self._inner.restore()
|
await self._inner.restore(version)
|
||||||
|
|
||||||
async def optimize(
|
async def optimize(
|
||||||
self,
|
self,
|
||||||
|
|||||||
@@ -253,9 +253,14 @@ def infer_vector_column_name(
|
|||||||
query: Optional[Any], # inferred later in query builder
|
query: Optional[Any], # inferred later in query builder
|
||||||
vector_column_name: Optional[str],
|
vector_column_name: Optional[str],
|
||||||
):
|
):
|
||||||
if (vector_column_name is None and query is not None and query_type != "fts") or (
|
if vector_column_name is not None:
|
||||||
vector_column_name is None and query_type == "hybrid"
|
return vector_column_name
|
||||||
):
|
|
||||||
|
if query_type == "fts":
|
||||||
|
# FTS queries do not require a vector column
|
||||||
|
return None
|
||||||
|
|
||||||
|
if query is not None or query_type == "hybrid":
|
||||||
try:
|
try:
|
||||||
vector_column_name = inf_vector_column_query(schema)
|
vector_column_name = inf_vector_column_query(schema)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|||||||
@@ -562,7 +562,7 @@ async def test_table_async():
|
|||||||
async_db = await lancedb.connect_async(uri, read_consistency_interval=timedelta(0))
|
async_db = await lancedb.connect_async(uri, read_consistency_interval=timedelta(0))
|
||||||
async_tbl = await async_db.open_table("test_table_async")
|
async_tbl = await async_db.open_table("test_table_async")
|
||||||
# --8<-- [end:table_async_strong_consistency]
|
# --8<-- [end:table_async_strong_consistency]
|
||||||
# --8<-- [start:table_async_ventual_consistency]
|
# --8<-- [start:table_async_eventual_consistency]
|
||||||
uri = "data/sample-lancedb"
|
uri = "data/sample-lancedb"
|
||||||
async_db = await lancedb.connect_async(
|
async_db = await lancedb.connect_async(
|
||||||
uri, read_consistency_interval=timedelta(seconds=5)
|
uri, read_consistency_interval=timedelta(seconds=5)
|
||||||
|
|||||||
@@ -6,7 +6,9 @@ import lancedb
|
|||||||
|
|
||||||
# --8<-- [end:import-lancedb]
|
# --8<-- [end:import-lancedb]
|
||||||
# --8<-- [start:import-numpy]
|
# --8<-- [start:import-numpy]
|
||||||
|
from lancedb.query import BoostQuery, MatchQuery
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
import pyarrow as pa
|
||||||
|
|
||||||
# --8<-- [end:import-numpy]
|
# --8<-- [end:import-numpy]
|
||||||
# --8<-- [start:import-datetime]
|
# --8<-- [start:import-datetime]
|
||||||
@@ -154,6 +156,84 @@ async def test_vector_search_async():
|
|||||||
# --8<-- [end:search_result_async_as_list]
|
# --8<-- [end:search_result_async_as_list]
|
||||||
|
|
||||||
|
|
||||||
|
def test_fts_fuzzy_query():
|
||||||
|
uri = "data/fuzzy-example"
|
||||||
|
db = lancedb.connect(uri)
|
||||||
|
|
||||||
|
table = db.create_table(
|
||||||
|
"my_table_fts_fuzzy",
|
||||||
|
data=pa.table(
|
||||||
|
{
|
||||||
|
"text": [
|
||||||
|
"fa",
|
||||||
|
"fo", # spellchecker:disable-line
|
||||||
|
"fob",
|
||||||
|
"focus",
|
||||||
|
"foo",
|
||||||
|
"food",
|
||||||
|
"foul",
|
||||||
|
]
|
||||||
|
}
|
||||||
|
),
|
||||||
|
mode="overwrite",
|
||||||
|
)
|
||||||
|
table.create_fts_index("text", use_tantivy=False, replace=True)
|
||||||
|
|
||||||
|
results = table.search(MatchQuery("foo", "text", fuzziness=1)).to_pandas()
|
||||||
|
assert len(results) == 4
|
||||||
|
assert set(results["text"].to_list()) == {
|
||||||
|
"foo",
|
||||||
|
"fo", # 1 deletion # spellchecker:disable-line
|
||||||
|
"fob", # 1 substitution
|
||||||
|
"food", # 1 insertion
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def test_fts_boost_query():
|
||||||
|
uri = "data/boost-example"
|
||||||
|
db = lancedb.connect(uri)
|
||||||
|
|
||||||
|
table = db.create_table(
|
||||||
|
"my_table_fts_boost",
|
||||||
|
data=pa.table(
|
||||||
|
{
|
||||||
|
"title": [
|
||||||
|
"The Hidden Gems of Travel",
|
||||||
|
"Exploring Nature's Wonders",
|
||||||
|
"Cultural Treasures Unveiled",
|
||||||
|
"The Nightlife Chronicles",
|
||||||
|
"Scenic Escapes and Challenges",
|
||||||
|
],
|
||||||
|
"desc": [
|
||||||
|
"A vibrant city with occasional traffic jams.",
|
||||||
|
"Beautiful landscapes but overpriced tourist spots.",
|
||||||
|
"Rich cultural heritage but humid summers.",
|
||||||
|
"Bustling nightlife but noisy streets.",
|
||||||
|
"Scenic views but limited public transport options.",
|
||||||
|
],
|
||||||
|
}
|
||||||
|
),
|
||||||
|
mode="overwrite",
|
||||||
|
)
|
||||||
|
table.create_fts_index("desc", use_tantivy=False, replace=True)
|
||||||
|
|
||||||
|
results = table.search(
|
||||||
|
BoostQuery(
|
||||||
|
MatchQuery("beautiful, cultural, nightlife", "desc"),
|
||||||
|
MatchQuery("bad traffic jams, overpriced", "desc"),
|
||||||
|
),
|
||||||
|
).to_pandas()
|
||||||
|
|
||||||
|
# we will hit 3 results because the positive query has 3 hits
|
||||||
|
assert len(results) == 3
|
||||||
|
# the one containing "overpriced" will be negatively boosted,
|
||||||
|
# so it will be the last one
|
||||||
|
assert (
|
||||||
|
results["desc"].to_list()[2]
|
||||||
|
== "Beautiful landscapes but overpriced tourist spots."
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def test_fts_native():
|
def test_fts_native():
|
||||||
# --8<-- [start:basic_fts]
|
# --8<-- [start:basic_fts]
|
||||||
uri = "data/sample-lancedb"
|
uri = "data/sample-lancedb"
|
||||||
|
|||||||
@@ -12,6 +12,7 @@ import pyarrow as pa
|
|||||||
import pytest
|
import pytest
|
||||||
from lancedb.embeddings import get_registry
|
from lancedb.embeddings import get_registry
|
||||||
from lancedb.pydantic import LanceModel, Vector
|
from lancedb.pydantic import LanceModel, Vector
|
||||||
|
import requests
|
||||||
|
|
||||||
# These are integration tests for embedding functions.
|
# These are integration tests for embedding functions.
|
||||||
# They are slow because they require downloading models
|
# They are slow because they require downloading models
|
||||||
@@ -516,3 +517,61 @@ def test_voyageai_embedding_function():
|
|||||||
|
|
||||||
tbl.add(df)
|
tbl.add(df)
|
||||||
assert len(tbl.to_pandas()["vector"][0]) == voyageai.ndims()
|
assert len(tbl.to_pandas()["vector"][0]) == voyageai.ndims()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.slow
|
||||||
|
@pytest.mark.skipif(
|
||||||
|
os.environ.get("VOYAGE_API_KEY") is None, reason="VOYAGE_API_KEY not set"
|
||||||
|
)
|
||||||
|
def test_voyageai_multimodal_embedding_function():
|
||||||
|
voyageai = (
|
||||||
|
get_registry().get("voyageai").create(name="voyage-multimodal-3", max_retries=0)
|
||||||
|
)
|
||||||
|
|
||||||
|
class Images(LanceModel):
|
||||||
|
label: str
|
||||||
|
image_uri: str = voyageai.SourceField() # image uri as the source
|
||||||
|
image_bytes: bytes = voyageai.SourceField() # image bytes as the source
|
||||||
|
vector: Vector(voyageai.ndims()) = voyageai.VectorField() # vector column
|
||||||
|
vec_from_bytes: Vector(voyageai.ndims()) = (
|
||||||
|
voyageai.VectorField()
|
||||||
|
) # Another vector column
|
||||||
|
|
||||||
|
db = lancedb.connect("~/lancedb")
|
||||||
|
table = db.create_table("test", schema=Images, mode="overwrite")
|
||||||
|
labels = ["cat", "cat", "dog", "dog", "horse", "horse"]
|
||||||
|
uris = [
|
||||||
|
"http://farm1.staticflickr.com/53/167798175_7c7845bbbd_z.jpg",
|
||||||
|
"http://farm1.staticflickr.com/134/332220238_da527d8140_z.jpg",
|
||||||
|
"http://farm9.staticflickr.com/8387/8602747737_2e5c2a45d4_z.jpg",
|
||||||
|
"http://farm5.staticflickr.com/4092/5017326486_1f46057f5f_z.jpg",
|
||||||
|
"http://farm9.staticflickr.com/8216/8434969557_d37882c42d_z.jpg",
|
||||||
|
"http://farm6.staticflickr.com/5142/5835678453_4f3a4edb45_z.jpg",
|
||||||
|
]
|
||||||
|
# get each uri as bytes
|
||||||
|
image_bytes = [requests.get(uri).content for uri in uris]
|
||||||
|
table.add(
|
||||||
|
pd.DataFrame({"label": labels, "image_uri": uris, "image_bytes": image_bytes})
|
||||||
|
)
|
||||||
|
assert len(table.to_pandas()["vector"][0]) == voyageai.ndims()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.slow
|
||||||
|
@pytest.mark.skipif(
|
||||||
|
os.environ.get("VOYAGE_API_KEY") is None, reason="VOYAGE_API_KEY not set"
|
||||||
|
)
|
||||||
|
def test_voyageai_multimodal_embedding_text_function():
|
||||||
|
voyageai = (
|
||||||
|
get_registry().get("voyageai").create(name="voyage-multimodal-3", max_retries=0)
|
||||||
|
)
|
||||||
|
|
||||||
|
class TextModel(LanceModel):
|
||||||
|
text: str = voyageai.SourceField()
|
||||||
|
vector: Vector(voyageai.ndims()) = voyageai.VectorField()
|
||||||
|
|
||||||
|
df = pd.DataFrame({"text": ["hello world", "goodbye world"]})
|
||||||
|
db = lancedb.connect("~/lancedb")
|
||||||
|
tbl = db.create_table("test", schema=TextModel, mode="overwrite")
|
||||||
|
|
||||||
|
tbl.add(df)
|
||||||
|
assert len(tbl.to_pandas()["vector"][0]) == voyageai.ndims()
|
||||||
|
|||||||
@@ -20,7 +20,9 @@ from unittest import mock
|
|||||||
import lancedb as ldb
|
import lancedb as ldb
|
||||||
from lancedb.db import DBConnection
|
from lancedb.db import DBConnection
|
||||||
from lancedb.index import FTS
|
from lancedb.index import FTS
|
||||||
|
from lancedb.query import BoostQuery, MatchQuery, MultiMatchQuery, PhraseQuery
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
import pyarrow as pa
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import pytest
|
import pytest
|
||||||
from utils import exception_output
|
from utils import exception_output
|
||||||
@@ -178,11 +180,47 @@ def test_search_fts(table, use_tantivy):
|
|||||||
results = table.search("puppy").select(["id", "text"]).to_list()
|
results = table.search("puppy").select(["id", "text"]).to_list()
|
||||||
assert len(results) == 10
|
assert len(results) == 10
|
||||||
|
|
||||||
|
if not use_tantivy:
|
||||||
|
# Test with a query
|
||||||
|
results = (
|
||||||
|
table.search(MatchQuery("puppy", "text"))
|
||||||
|
.select(["id", "text"])
|
||||||
|
.limit(5)
|
||||||
|
.to_list()
|
||||||
|
)
|
||||||
|
assert len(results) == 5
|
||||||
|
|
||||||
|
# Test boost query
|
||||||
|
results = (
|
||||||
|
table.search(
|
||||||
|
BoostQuery(
|
||||||
|
MatchQuery("puppy", "text"),
|
||||||
|
MatchQuery("runs", "text"),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
.select(["id", "text"])
|
||||||
|
.limit(5)
|
||||||
|
.to_list()
|
||||||
|
)
|
||||||
|
assert len(results) == 5
|
||||||
|
|
||||||
|
# Test multi match query
|
||||||
|
table.create_fts_index("text2", use_tantivy=use_tantivy)
|
||||||
|
results = (
|
||||||
|
table.search(MultiMatchQuery("puppy", ["text", "text2"]))
|
||||||
|
.select(["id", "text"])
|
||||||
|
.limit(5)
|
||||||
|
.to_list()
|
||||||
|
)
|
||||||
|
assert len(results) == 5
|
||||||
|
assert len(results[0]) == 3 # id, text, _score
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_fts_select_async(async_table):
|
async def test_fts_select_async(async_table):
|
||||||
tbl = await async_table
|
tbl = await async_table
|
||||||
await tbl.create_index("text", config=FTS())
|
await tbl.create_index("text", config=FTS())
|
||||||
|
await tbl.create_index("text2", config=FTS())
|
||||||
results = (
|
results = (
|
||||||
await tbl.query()
|
await tbl.query()
|
||||||
.nearest_to_text("puppy")
|
.nearest_to_text("puppy")
|
||||||
@@ -193,6 +231,54 @@ async def test_fts_select_async(async_table):
|
|||||||
assert len(results) == 5
|
assert len(results) == 5
|
||||||
assert len(results[0]) == 3 # id, text, _score
|
assert len(results[0]) == 3 # id, text, _score
|
||||||
|
|
||||||
|
# Test with FullTextQuery
|
||||||
|
results = (
|
||||||
|
await tbl.query()
|
||||||
|
.nearest_to_text(MatchQuery("puppy", "text"))
|
||||||
|
.select(["id", "text"])
|
||||||
|
.limit(5)
|
||||||
|
.to_list()
|
||||||
|
)
|
||||||
|
assert len(results) == 5
|
||||||
|
assert len(results[0]) == 3 # id, text, _score
|
||||||
|
|
||||||
|
# Test with BoostQuery
|
||||||
|
results = (
|
||||||
|
await tbl.query()
|
||||||
|
.nearest_to_text(
|
||||||
|
BoostQuery(
|
||||||
|
MatchQuery("puppy", "text"),
|
||||||
|
MatchQuery("runs", "text"),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
.select(["id", "text"])
|
||||||
|
.limit(5)
|
||||||
|
.to_list()
|
||||||
|
)
|
||||||
|
assert len(results) == 5
|
||||||
|
assert len(results[0]) == 3 # id, text, _score
|
||||||
|
|
||||||
|
# Test with MultiMatchQuery
|
||||||
|
results = (
|
||||||
|
await tbl.query()
|
||||||
|
.nearest_to_text(MultiMatchQuery("puppy", ["text", "text2"]))
|
||||||
|
.select(["id", "text"])
|
||||||
|
.limit(5)
|
||||||
|
.to_list()
|
||||||
|
)
|
||||||
|
assert len(results) == 5
|
||||||
|
assert len(results[0]) == 3 # id, text, _score
|
||||||
|
|
||||||
|
# Test with search() API
|
||||||
|
results = (
|
||||||
|
await (await tbl.search(MatchQuery("puppy", "text")))
|
||||||
|
.select(["id", "text"])
|
||||||
|
.limit(5)
|
||||||
|
.to_list()
|
||||||
|
)
|
||||||
|
assert len(results) == 5
|
||||||
|
assert len(results[0]) == 3 # id, text, _score
|
||||||
|
|
||||||
|
|
||||||
def test_search_fts_phrase_query(table):
|
def test_search_fts_phrase_query(table):
|
||||||
table.create_fts_index("text", use_tantivy=False, with_position=False)
|
table.create_fts_index("text", use_tantivy=False, with_position=False)
|
||||||
@@ -207,6 +293,13 @@ def test_search_fts_phrase_query(table):
|
|||||||
assert len(results) > len(phrase_results)
|
assert len(results) > len(phrase_results)
|
||||||
assert len(phrase_results) > 0
|
assert len(phrase_results) > 0
|
||||||
|
|
||||||
|
# Test with a query
|
||||||
|
phrase_results = (
|
||||||
|
table.search(PhraseQuery("puppy runs", "text")).limit(100).to_list()
|
||||||
|
)
|
||||||
|
assert len(results) > len(phrase_results)
|
||||||
|
assert len(phrase_results) > 0
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_search_fts_phrase_query_async(async_table):
|
async def test_search_fts_phrase_query_async(async_table):
|
||||||
@@ -227,6 +320,16 @@ async def test_search_fts_phrase_query_async(async_table):
|
|||||||
assert len(results) > len(phrase_results)
|
assert len(results) > len(phrase_results)
|
||||||
assert len(phrase_results) > 0
|
assert len(phrase_results) > 0
|
||||||
|
|
||||||
|
# Test with a query
|
||||||
|
phrase_results = (
|
||||||
|
await async_table.query()
|
||||||
|
.nearest_to_text(PhraseQuery("puppy runs", "text"))
|
||||||
|
.limit(100)
|
||||||
|
.to_list()
|
||||||
|
)
|
||||||
|
assert len(results) > len(phrase_results)
|
||||||
|
assert len(phrase_results) > 0
|
||||||
|
|
||||||
|
|
||||||
def test_search_fts_specify_column(table):
|
def test_search_fts_specify_column(table):
|
||||||
table.create_fts_index("text", use_tantivy=False)
|
table.create_fts_index("text", use_tantivy=False)
|
||||||
@@ -524,3 +627,32 @@ def test_language(mem_db: DBConnection):
|
|||||||
# Stop words -> no results
|
# Stop words -> no results
|
||||||
results = table.search("la", query_type="fts").limit(5).to_list()
|
results = table.search("la", query_type="fts").limit(5).to_list()
|
||||||
assert len(results) == 0
|
assert len(results) == 0
|
||||||
|
|
||||||
|
|
||||||
|
def test_fts_on_list(mem_db: DBConnection):
|
||||||
|
data = pa.table(
|
||||||
|
{
|
||||||
|
"text": [
|
||||||
|
["lance database", "the", "search"],
|
||||||
|
["lance database"],
|
||||||
|
["lance", "search"],
|
||||||
|
["database", "search"],
|
||||||
|
["unrelated", "doc"],
|
||||||
|
],
|
||||||
|
"vector": [
|
||||||
|
[1.0, 2.0, 3.0],
|
||||||
|
[4.0, 5.0, 6.0],
|
||||||
|
[7.0, 8.0, 9.0],
|
||||||
|
[10.0, 11.0, 12.0],
|
||||||
|
[13.0, 14.0, 15.0],
|
||||||
|
],
|
||||||
|
}
|
||||||
|
)
|
||||||
|
table = mem_db.create_table("test", data=data)
|
||||||
|
table.create_fts_index("text", use_tantivy=False)
|
||||||
|
|
||||||
|
res = table.search("lance").limit(5).to_list()
|
||||||
|
assert len(res) == 3
|
||||||
|
|
||||||
|
res = table.search(PhraseQuery("lance database", "text")).limit(5).to_list()
|
||||||
|
assert len(res) == 2
|
||||||
|
|||||||
@@ -114,6 +114,16 @@ async def test_explain_plan(table: AsyncTable):
|
|||||||
assert "LanceScan" in plan
|
assert "LanceScan" in plan
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_analyze_plan(table: AsyncTable):
|
||||||
|
res = await (
|
||||||
|
table.query().nearest_to_text("dog").nearest_to([0.1, 0.1]).analyze_plan()
|
||||||
|
)
|
||||||
|
|
||||||
|
assert "AnalyzeExec" in res
|
||||||
|
assert "metrics=" in res
|
||||||
|
|
||||||
|
|
||||||
def test_normalize_scores():
|
def test_normalize_scores():
|
||||||
cases = [
|
cases = [
|
||||||
(pa.array([0.1, 0.4]), pa.array([0.0, 1.0])),
|
(pa.array([0.1, 0.4]), pa.array([0.0, 1.0])),
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ import pyarrow as pa
|
|||||||
import pytest
|
import pytest
|
||||||
import pytest_asyncio
|
import pytest_asyncio
|
||||||
from lancedb import AsyncConnection, AsyncTable, connect_async
|
from lancedb import AsyncConnection, AsyncTable, connect_async
|
||||||
from lancedb.index import BTree, IvfFlat, IvfPq, Bitmap, LabelList, HnswPq, HnswSq
|
from lancedb.index import BTree, IvfFlat, IvfPq, Bitmap, LabelList, HnswPq, HnswSq, FTS
|
||||||
|
|
||||||
|
|
||||||
@pytest_asyncio.fixture
|
@pytest_asyncio.fixture
|
||||||
@@ -31,6 +31,7 @@ async def some_table(db_async):
|
|||||||
{
|
{
|
||||||
"id": list(range(NROWS)),
|
"id": list(range(NROWS)),
|
||||||
"vector": sample_fixed_size_list_array(NROWS, DIM),
|
"vector": sample_fixed_size_list_array(NROWS, DIM),
|
||||||
|
"fsb": pa.array([bytes([i]) for i in range(NROWS)], pa.binary(1)),
|
||||||
"tags": [
|
"tags": [
|
||||||
[f"tag{random.randint(0, 8)}" for _ in range(2)] for _ in range(NROWS)
|
[f"tag{random.randint(0, 8)}" for _ in range(2)] for _ in range(NROWS)
|
||||||
],
|
],
|
||||||
@@ -85,6 +86,16 @@ async def test_create_scalar_index(some_table: AsyncTable):
|
|||||||
assert len(indices) == 0
|
assert len(indices) == 0
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_create_fixed_size_binary_index(some_table: AsyncTable):
|
||||||
|
await some_table.create_index("fsb", config=BTree())
|
||||||
|
indices = await some_table.list_indices()
|
||||||
|
assert str(indices) == '[Index(BTree, columns=["fsb"], name="fsb_idx")]'
|
||||||
|
assert len(indices) == 1
|
||||||
|
assert indices[0].index_type == "BTree"
|
||||||
|
assert indices[0].columns == ["fsb"]
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_create_bitmap_index(some_table: AsyncTable):
|
async def test_create_bitmap_index(some_table: AsyncTable):
|
||||||
await some_table.create_index("id", config=Bitmap())
|
await some_table.create_index("id", config=Bitmap())
|
||||||
@@ -108,6 +119,18 @@ async def test_create_label_list_index(some_table: AsyncTable):
|
|||||||
assert str(indices) == '[Index(LabelList, columns=["tags"], name="tags_idx")]'
|
assert str(indices) == '[Index(LabelList, columns=["tags"], name="tags_idx")]'
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_full_text_search_index(some_table: AsyncTable):
|
||||||
|
await some_table.create_index("tags", config=FTS(with_position=False))
|
||||||
|
indices = await some_table.list_indices()
|
||||||
|
assert str(indices) == '[Index(FTS, columns=["tags"], name="tags_idx")]'
|
||||||
|
|
||||||
|
await some_table.prewarm_index("tags_idx")
|
||||||
|
|
||||||
|
res = await (await some_table.search("tag0")).to_arrow()
|
||||||
|
assert res.num_rows > 0
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_create_vector_index(some_table: AsyncTable):
|
async def test_create_vector_index(some_table: AsyncTable):
|
||||||
# Can create
|
# Can create
|
||||||
|
|||||||
@@ -511,7 +511,8 @@ def test_query_builder_with_different_vector_column():
|
|||||||
columns=["b"],
|
columns=["b"],
|
||||||
vector_column="foo_vector",
|
vector_column="foo_vector",
|
||||||
),
|
),
|
||||||
None,
|
batch_size=None,
|
||||||
|
timeout=None,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -702,6 +703,20 @@ async def test_fast_search_async(tmp_path):
|
|||||||
assert "LanceScan" not in plan
|
assert "LanceScan" not in plan
|
||||||
|
|
||||||
|
|
||||||
|
def test_analyze_plan(table):
|
||||||
|
q = LanceVectorQueryBuilder(table, [0, 0], "vector")
|
||||||
|
res = q.analyze_plan()
|
||||||
|
assert "AnalyzeExec" in res
|
||||||
|
assert "metrics=" in res
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_analyze_plan_async(table_async: AsyncTable):
|
||||||
|
res = await table_async.query().nearest_to(pa.array([1, 2])).analyze_plan()
|
||||||
|
assert "AnalyzeExec" in res
|
||||||
|
assert "metrics=" in res
|
||||||
|
|
||||||
|
|
||||||
def test_explain_plan(table):
|
def test_explain_plan(table):
|
||||||
q = LanceVectorQueryBuilder(table, [0, 0], "vector")
|
q = LanceVectorQueryBuilder(table, [0, 0], "vector")
|
||||||
plan = q.explain_plan(verbose=True)
|
plan = q.explain_plan(verbose=True)
|
||||||
@@ -1062,3 +1077,67 @@ async def test_query_serialization_async(table_async: AsyncTable):
|
|||||||
full_text_query=FullTextSearchQuery(columns=[], query="foo"),
|
full_text_query=FullTextSearchQuery(columns=[], query="foo"),
|
||||||
with_row_id=False,
|
with_row_id=False,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_query_timeout(tmp_path):
|
||||||
|
# Use local directory instead of memory:// to add a bit of latency to
|
||||||
|
# operations so a timeout of zero will trigger exceptions.
|
||||||
|
db = lancedb.connect(tmp_path)
|
||||||
|
data = pa.table(
|
||||||
|
{
|
||||||
|
"text": ["a", "b"],
|
||||||
|
"vector": pa.FixedSizeListArray.from_arrays(
|
||||||
|
pc.random(4).cast(pa.float32()), 2
|
||||||
|
),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
table = db.create_table("test", data)
|
||||||
|
table.create_fts_index("text", use_tantivy=False)
|
||||||
|
|
||||||
|
with pytest.raises(Exception, match="Query timeout"):
|
||||||
|
table.search().where("text = 'a'").to_list(timeout=timedelta(0))
|
||||||
|
|
||||||
|
with pytest.raises(Exception, match="Query timeout"):
|
||||||
|
table.search([0.0, 0.0]).to_arrow(timeout=timedelta(0))
|
||||||
|
|
||||||
|
with pytest.raises(Exception, match="Query timeout"):
|
||||||
|
table.search("a", query_type="fts").to_pandas(timeout=timedelta(0))
|
||||||
|
|
||||||
|
with pytest.raises(Exception, match="Query timeout"):
|
||||||
|
table.search(query_type="hybrid").vector([0.0, 0.0]).text("a").to_arrow(
|
||||||
|
timeout=timedelta(0)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_query_timeout_async(tmp_path):
|
||||||
|
db = await lancedb.connect_async(tmp_path)
|
||||||
|
data = pa.table(
|
||||||
|
{
|
||||||
|
"text": ["a", "b"],
|
||||||
|
"vector": pa.FixedSizeListArray.from_arrays(
|
||||||
|
pc.random(4).cast(pa.float32()), 2
|
||||||
|
),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
table = await db.create_table("test", data)
|
||||||
|
await table.create_index("text", config=FTS())
|
||||||
|
|
||||||
|
with pytest.raises(Exception, match="Query timeout"):
|
||||||
|
await table.query().where("text != 'a'").to_list(timeout=timedelta(0))
|
||||||
|
|
||||||
|
with pytest.raises(Exception, match="Query timeout"):
|
||||||
|
await table.vector_search([0.0, 0.0]).to_arrow(timeout=timedelta(0))
|
||||||
|
|
||||||
|
with pytest.raises(Exception, match="Query timeout"):
|
||||||
|
await (await table.search("a", query_type="fts")).to_pandas(
|
||||||
|
timeout=timedelta(0)
|
||||||
|
)
|
||||||
|
|
||||||
|
with pytest.raises(Exception, match="Query timeout"):
|
||||||
|
await (
|
||||||
|
table.query()
|
||||||
|
.nearest_to_text("a")
|
||||||
|
.nearest_to([0.0, 0.0])
|
||||||
|
.to_list(timeout=timedelta(0))
|
||||||
|
)
|
||||||
|
|||||||
@@ -444,6 +444,16 @@ def test_query_sync_fts():
|
|||||||
"prefilter": True,
|
"prefilter": True,
|
||||||
"with_row_id": True,
|
"with_row_id": True,
|
||||||
"version": None,
|
"version": None,
|
||||||
|
} or body == {
|
||||||
|
"full_text_query": {
|
||||||
|
"query": "puppy",
|
||||||
|
"columns": ["description", "name"],
|
||||||
|
},
|
||||||
|
"k": 42,
|
||||||
|
"vector": [],
|
||||||
|
"prefilter": True,
|
||||||
|
"with_row_id": True,
|
||||||
|
"version": None,
|
||||||
}
|
}
|
||||||
|
|
||||||
return pa.table({"id": [1, 2, 3]})
|
return pa.table({"id": [1, 2, 3]})
|
||||||
|
|||||||
@@ -457,3 +457,45 @@ def test_voyageai_reranker(tmp_path, use_tantivy):
|
|||||||
reranker = VoyageAIReranker(model_name="rerank-2")
|
reranker = VoyageAIReranker(model_name="rerank-2")
|
||||||
table, schema = get_test_table(tmp_path, use_tantivy)
|
table, schema = get_test_table(tmp_path, use_tantivy)
|
||||||
_run_test_reranker(reranker, table, "single player experience", None, schema)
|
_run_test_reranker(reranker, table, "single player experience", None, schema)
|
||||||
|
|
||||||
|
|
||||||
|
def test_empty_result_reranker():
|
||||||
|
pytest.importorskip("sentence_transformers")
|
||||||
|
db = lancedb.connect("memory://")
|
||||||
|
|
||||||
|
# Define schema
|
||||||
|
schema = pa.schema(
|
||||||
|
[
|
||||||
|
("id", pa.int64()),
|
||||||
|
("text", pa.string()),
|
||||||
|
("vector", pa.list_(pa.float32(), 128)), # 128-dimensional vector
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create empty table with schema
|
||||||
|
empty_table = db.create_table("empty_table", schema=schema, mode="overwrite")
|
||||||
|
empty_table.create_fts_index("text", use_tantivy=False, replace=True)
|
||||||
|
for reranker in [
|
||||||
|
CrossEncoderReranker(),
|
||||||
|
# ColbertReranker(),
|
||||||
|
# AnswerdotaiRerankers(),
|
||||||
|
# OpenaiReranker(),
|
||||||
|
# JinaReranker(),
|
||||||
|
# VoyageAIReranker(model_name="rerank-2"),
|
||||||
|
]:
|
||||||
|
results = (
|
||||||
|
empty_table.search(list(range(128)))
|
||||||
|
.limit(3)
|
||||||
|
.rerank(reranker, "query")
|
||||||
|
.to_arrow()
|
||||||
|
)
|
||||||
|
# check if empty set contains _relevance_score column
|
||||||
|
assert "_relevance_score" in results.column_names
|
||||||
|
assert len(results) == 0
|
||||||
|
|
||||||
|
results = (
|
||||||
|
empty_table.search("query", query_type="fts")
|
||||||
|
.limit(3)
|
||||||
|
.rerank(reranker)
|
||||||
|
.to_arrow()
|
||||||
|
)
|
||||||
|
|||||||
@@ -1384,6 +1384,37 @@ async def test_add_columns_async(mem_db_async: AsyncConnection):
|
|||||||
assert data["new_col"].to_pylist() == [2, 3]
|
assert data["new_col"].to_pylist() == [2, 3]
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_add_columns_with_schema(mem_db_async: AsyncConnection):
|
||||||
|
data = pa.table({"id": [0, 1]})
|
||||||
|
table = await mem_db_async.create_table("my_table", data=data)
|
||||||
|
await table.add_columns(
|
||||||
|
[pa.field("x", pa.int64()), pa.field("vector", pa.list_(pa.float32(), 8))]
|
||||||
|
)
|
||||||
|
|
||||||
|
assert await table.schema() == pa.schema(
|
||||||
|
[
|
||||||
|
pa.field("id", pa.int64()),
|
||||||
|
pa.field("x", pa.int64()),
|
||||||
|
pa.field("vector", pa.list_(pa.float32(), 8)),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
table = await mem_db_async.create_table("table2", data=data)
|
||||||
|
await table.add_columns(
|
||||||
|
pa.schema(
|
||||||
|
[pa.field("y", pa.int64()), pa.field("emb", pa.list_(pa.float32(), 8))]
|
||||||
|
)
|
||||||
|
)
|
||||||
|
assert await table.schema() == pa.schema(
|
||||||
|
[
|
||||||
|
pa.field("id", pa.int64()),
|
||||||
|
pa.field("y", pa.int64()),
|
||||||
|
pa.field("emb", pa.list_(pa.float32(), 8)),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def test_alter_columns(mem_db: DBConnection):
|
def test_alter_columns(mem_db: DBConnection):
|
||||||
data = pa.table({"id": [0, 1]})
|
data = pa.table({"id": [0, 1]})
|
||||||
table = mem_db.create_table("my_table", data=data)
|
table = mem_db.create_table("my_table", data=data)
|
||||||
|
|||||||
@@ -2,25 +2,26 @@
|
|||||||
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||||
|
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
use std::time::Duration;
|
||||||
|
|
||||||
use arrow::array::make_array;
|
use arrow::array::make_array;
|
||||||
use arrow::array::Array;
|
use arrow::array::Array;
|
||||||
use arrow::array::ArrayData;
|
use arrow::array::ArrayData;
|
||||||
use arrow::pyarrow::FromPyArrow;
|
use arrow::pyarrow::FromPyArrow;
|
||||||
use arrow::pyarrow::IntoPyArrow;
|
use arrow::pyarrow::IntoPyArrow;
|
||||||
use lancedb::index::scalar::FullTextSearchQuery;
|
use lancedb::index::scalar::{FtsQuery, FullTextSearchQuery, MatchQuery, PhraseQuery};
|
||||||
use lancedb::query::QueryExecutionOptions;
|
use lancedb::query::QueryExecutionOptions;
|
||||||
use lancedb::query::QueryFilter;
|
use lancedb::query::QueryFilter;
|
||||||
use lancedb::query::{
|
use lancedb::query::{
|
||||||
ExecutableQuery, Query as LanceDbQuery, QueryBase, Select, VectorQuery as LanceDbVectorQuery,
|
ExecutableQuery, Query as LanceDbQuery, QueryBase, Select, VectorQuery as LanceDbVectorQuery,
|
||||||
};
|
};
|
||||||
use lancedb::table::AnyQuery;
|
use lancedb::table::AnyQuery;
|
||||||
use pyo3::exceptions::PyNotImplementedError;
|
|
||||||
use pyo3::exceptions::PyRuntimeError;
|
use pyo3::exceptions::PyRuntimeError;
|
||||||
|
use pyo3::exceptions::{PyNotImplementedError, PyValueError};
|
||||||
use pyo3::prelude::{PyAnyMethods, PyDictMethods};
|
use pyo3::prelude::{PyAnyMethods, PyDictMethods};
|
||||||
use pyo3::pymethods;
|
use pyo3::pymethods;
|
||||||
use pyo3::types::PyDict;
|
|
||||||
use pyo3::types::PyList;
|
use pyo3::types::PyList;
|
||||||
|
use pyo3::types::{PyDict, PyString};
|
||||||
use pyo3::Bound;
|
use pyo3::Bound;
|
||||||
use pyo3::IntoPyObject;
|
use pyo3::IntoPyObject;
|
||||||
use pyo3::PyAny;
|
use pyo3::PyAny;
|
||||||
@@ -31,7 +32,7 @@ use pyo3_async_runtimes::tokio::future_into_py;
|
|||||||
|
|
||||||
use crate::arrow::RecordBatchStream;
|
use crate::arrow::RecordBatchStream;
|
||||||
use crate::error::PythonErrorExt;
|
use crate::error::PythonErrorExt;
|
||||||
use crate::util::parse_distance_type;
|
use crate::util::{parse_distance_type, parse_fts_query};
|
||||||
|
|
||||||
// Python representation of full text search parameters
|
// Python representation of full text search parameters
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
@@ -45,9 +46,9 @@ pub struct PyFullTextSearchQuery {
|
|||||||
|
|
||||||
impl From<FullTextSearchQuery> for PyFullTextSearchQuery {
|
impl From<FullTextSearchQuery> for PyFullTextSearchQuery {
|
||||||
fn from(query: FullTextSearchQuery) -> Self {
|
fn from(query: FullTextSearchQuery) -> Self {
|
||||||
PyFullTextSearchQuery {
|
Self {
|
||||||
columns: query.columns,
|
columns: query.columns().into_iter().collect(),
|
||||||
query: query.query,
|
query: query.query.query().to_owned(),
|
||||||
limit: query.limit,
|
limit: query.limit,
|
||||||
wand_factor: query.wand_factor,
|
wand_factor: query.wand_factor,
|
||||||
}
|
}
|
||||||
@@ -99,7 +100,7 @@ pub struct PyQueryRequest {
|
|||||||
impl From<AnyQuery> for PyQueryRequest {
|
impl From<AnyQuery> for PyQueryRequest {
|
||||||
fn from(query: AnyQuery) -> Self {
|
fn from(query: AnyQuery) -> Self {
|
||||||
match query {
|
match query {
|
||||||
AnyQuery::Query(query_request) => PyQueryRequest {
|
AnyQuery::Query(query_request) => Self {
|
||||||
limit: query_request.limit,
|
limit: query_request.limit,
|
||||||
offset: query_request.offset,
|
offset: query_request.offset,
|
||||||
filter: query_request.filter.map(PyQueryFilter),
|
filter: query_request.filter.map(PyQueryFilter),
|
||||||
@@ -121,7 +122,7 @@ impl From<AnyQuery> for PyQueryRequest {
|
|||||||
postfilter: None,
|
postfilter: None,
|
||||||
norm: None,
|
norm: None,
|
||||||
},
|
},
|
||||||
AnyQuery::VectorQuery(vector_query) => PyQueryRequest {
|
AnyQuery::VectorQuery(vector_query) => Self {
|
||||||
limit: vector_query.base.limit,
|
limit: vector_query.base.limit,
|
||||||
offset: vector_query.base.offset,
|
offset: vector_query.base.offset,
|
||||||
filter: vector_query.base.filter.map(PyQueryFilter),
|
filter: vector_query.base.filter.map(PyQueryFilter),
|
||||||
@@ -236,29 +237,69 @@ impl Query {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub fn nearest_to_text(&mut self, query: Bound<'_, PyDict>) -> PyResult<FTSQuery> {
|
pub fn nearest_to_text(&mut self, query: Bound<'_, PyDict>) -> PyResult<FTSQuery> {
|
||||||
let query_text = query
|
let fts_query = query
|
||||||
.get_item("query")?
|
.get_item("query")?
|
||||||
.ok_or(PyErr::new::<PyRuntimeError, _>(
|
.ok_or(PyErr::new::<PyRuntimeError, _>(
|
||||||
"Query text is required for nearest_to_text",
|
"Query text is required for nearest_to_text",
|
||||||
))?
|
))?;
|
||||||
.extract::<String>()?;
|
|
||||||
let columns = query
|
|
||||||
.get_item("columns")?
|
|
||||||
.map(|columns| columns.extract::<Vec<String>>())
|
|
||||||
.transpose()?;
|
|
||||||
|
|
||||||
let fts_query = FullTextSearchQuery::new(query_text).columns(columns);
|
let query = if let Ok(query_text) = fts_query.downcast::<PyString>() {
|
||||||
|
let mut query_text = query_text.to_string();
|
||||||
|
let columns = query
|
||||||
|
.get_item("columns")?
|
||||||
|
.map(|columns| columns.extract::<Vec<String>>())
|
||||||
|
.transpose()?;
|
||||||
|
|
||||||
|
let is_phrase =
|
||||||
|
query_text.len() >= 2 && query_text.starts_with('"') && query_text.ends_with('"');
|
||||||
|
let is_multi_match = columns.as_ref().map(|cols| cols.len() > 1).unwrap_or(false);
|
||||||
|
|
||||||
|
if is_phrase {
|
||||||
|
// Remove the surrounding quotes for phrase queries
|
||||||
|
query_text = query_text[1..query_text.len() - 1].to_string();
|
||||||
|
}
|
||||||
|
|
||||||
|
let query: FtsQuery = match (is_phrase, is_multi_match) {
|
||||||
|
(false, _) => MatchQuery::new(query_text).into(),
|
||||||
|
(true, false) => PhraseQuery::new(query_text).into(),
|
||||||
|
(true, true) => {
|
||||||
|
return Err(PyValueError::new_err(
|
||||||
|
"Phrase queries cannot be used with multiple columns.",
|
||||||
|
));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
let mut query = FullTextSearchQuery::new_query(query);
|
||||||
|
if let Some(cols) = columns {
|
||||||
|
if !cols.is_empty() {
|
||||||
|
query = query.with_columns(&cols).map_err(|e| {
|
||||||
|
PyValueError::new_err(format!(
|
||||||
|
"Failed to set full text search columns: {}",
|
||||||
|
e
|
||||||
|
))
|
||||||
|
})?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
query
|
||||||
|
} else if let Ok(query) = fts_query.downcast::<PyDict>() {
|
||||||
|
let query = parse_fts_query(query)?;
|
||||||
|
FullTextSearchQuery::new_query(query)
|
||||||
|
} else {
|
||||||
|
return Err(PyValueError::new_err(
|
||||||
|
"query must be a string or a Query object",
|
||||||
|
));
|
||||||
|
};
|
||||||
|
|
||||||
Ok(FTSQuery {
|
Ok(FTSQuery {
|
||||||
fts_query,
|
|
||||||
inner: self.inner.clone(),
|
inner: self.inner.clone(),
|
||||||
|
fts_query: query,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
#[pyo3(signature = (max_batch_length=None))]
|
#[pyo3(signature = (max_batch_length=None, timeout=None))]
|
||||||
pub fn execute(
|
pub fn execute(
|
||||||
self_: PyRef<'_, Self>,
|
self_: PyRef<'_, Self>,
|
||||||
max_batch_length: Option<u32>,
|
max_batch_length: Option<u32>,
|
||||||
|
timeout: Option<Duration>,
|
||||||
) -> PyResult<Bound<'_, PyAny>> {
|
) -> PyResult<Bound<'_, PyAny>> {
|
||||||
let inner = self_.inner.clone();
|
let inner = self_.inner.clone();
|
||||||
future_into_py(self_.py(), async move {
|
future_into_py(self_.py(), async move {
|
||||||
@@ -266,12 +307,15 @@ impl Query {
|
|||||||
if let Some(max_batch_length) = max_batch_length {
|
if let Some(max_batch_length) = max_batch_length {
|
||||||
opts.max_batch_length = max_batch_length;
|
opts.max_batch_length = max_batch_length;
|
||||||
}
|
}
|
||||||
|
if let Some(timeout) = timeout {
|
||||||
|
opts.timeout = Some(timeout);
|
||||||
|
}
|
||||||
let inner_stream = inner.execute_with_options(opts).await.infer_error()?;
|
let inner_stream = inner.execute_with_options(opts).await.infer_error()?;
|
||||||
Ok(RecordBatchStream::new(inner_stream))
|
Ok(RecordBatchStream::new(inner_stream))
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
fn explain_plan(self_: PyRef<'_, Self>, verbose: bool) -> PyResult<Bound<'_, PyAny>> {
|
pub fn explain_plan(self_: PyRef<'_, Self>, verbose: bool) -> PyResult<Bound<'_, PyAny>> {
|
||||||
let inner = self_.inner.clone();
|
let inner = self_.inner.clone();
|
||||||
future_into_py(self_.py(), async move {
|
future_into_py(self_.py(), async move {
|
||||||
inner
|
inner
|
||||||
@@ -281,6 +325,16 @@ impl Query {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn analyze_plan(self_: PyRef<'_, Self>) -> PyResult<Bound<'_, PyAny>> {
|
||||||
|
let inner = self_.inner.clone();
|
||||||
|
future_into_py(self_.py(), async move {
|
||||||
|
inner
|
||||||
|
.analyze_plan()
|
||||||
|
.await
|
||||||
|
.map_err(|e| PyRuntimeError::new_err(e.to_string()))
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
pub fn to_query_request(&self) -> PyQueryRequest {
|
pub fn to_query_request(&self) -> PyQueryRequest {
|
||||||
PyQueryRequest::from(AnyQuery::Query(self.inner.clone().into_request()))
|
PyQueryRequest::from(AnyQuery::Query(self.inner.clone().into_request()))
|
||||||
}
|
}
|
||||||
@@ -327,10 +381,11 @@ impl FTSQuery {
|
|||||||
self.inner = self.inner.clone().postfilter();
|
self.inner = self.inner.clone().postfilter();
|
||||||
}
|
}
|
||||||
|
|
||||||
#[pyo3(signature = (max_batch_length=None))]
|
#[pyo3(signature = (max_batch_length=None, timeout=None))]
|
||||||
pub fn execute(
|
pub fn execute(
|
||||||
self_: PyRef<'_, Self>,
|
self_: PyRef<'_, Self>,
|
||||||
max_batch_length: Option<u32>,
|
max_batch_length: Option<u32>,
|
||||||
|
timeout: Option<Duration>,
|
||||||
) -> PyResult<Bound<'_, PyAny>> {
|
) -> PyResult<Bound<'_, PyAny>> {
|
||||||
let inner = self_
|
let inner = self_
|
||||||
.inner
|
.inner
|
||||||
@@ -342,6 +397,9 @@ impl FTSQuery {
|
|||||||
if let Some(max_batch_length) = max_batch_length {
|
if let Some(max_batch_length) = max_batch_length {
|
||||||
opts.max_batch_length = max_batch_length;
|
opts.max_batch_length = max_batch_length;
|
||||||
}
|
}
|
||||||
|
if let Some(timeout) = timeout {
|
||||||
|
opts.timeout = Some(timeout);
|
||||||
|
}
|
||||||
let inner_stream = inner.execute_with_options(opts).await.infer_error()?;
|
let inner_stream = inner.execute_with_options(opts).await.infer_error()?;
|
||||||
Ok(RecordBatchStream::new(inner_stream))
|
Ok(RecordBatchStream::new(inner_stream))
|
||||||
})
|
})
|
||||||
@@ -365,8 +423,18 @@ impl FTSQuery {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn analyze_plan(self_: PyRef<'_, Self>) -> PyResult<Bound<'_, PyAny>> {
|
||||||
|
let inner = self_.inner.clone();
|
||||||
|
future_into_py(self_.py(), async move {
|
||||||
|
inner
|
||||||
|
.analyze_plan()
|
||||||
|
.await
|
||||||
|
.map_err(|e| PyRuntimeError::new_err(e.to_string()))
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
pub fn get_query(&self) -> String {
|
pub fn get_query(&self) -> String {
|
||||||
self.fts_query.query.clone()
|
self.fts_query.query.query().to_owned()
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn to_query_request(&self) -> PyQueryRequest {
|
pub fn to_query_request(&self) -> PyQueryRequest {
|
||||||
@@ -454,10 +522,11 @@ impl VectorQuery {
|
|||||||
self.inner = self.inner.clone().bypass_vector_index()
|
self.inner = self.inner.clone().bypass_vector_index()
|
||||||
}
|
}
|
||||||
|
|
||||||
#[pyo3(signature = (max_batch_length=None))]
|
#[pyo3(signature = (max_batch_length=None, timeout=None))]
|
||||||
pub fn execute(
|
pub fn execute(
|
||||||
self_: PyRef<'_, Self>,
|
self_: PyRef<'_, Self>,
|
||||||
max_batch_length: Option<u32>,
|
max_batch_length: Option<u32>,
|
||||||
|
timeout: Option<Duration>,
|
||||||
) -> PyResult<Bound<'_, PyAny>> {
|
) -> PyResult<Bound<'_, PyAny>> {
|
||||||
let inner = self_.inner.clone();
|
let inner = self_.inner.clone();
|
||||||
future_into_py(self_.py(), async move {
|
future_into_py(self_.py(), async move {
|
||||||
@@ -465,12 +534,15 @@ impl VectorQuery {
|
|||||||
if let Some(max_batch_length) = max_batch_length {
|
if let Some(max_batch_length) = max_batch_length {
|
||||||
opts.max_batch_length = max_batch_length;
|
opts.max_batch_length = max_batch_length;
|
||||||
}
|
}
|
||||||
|
if let Some(timeout) = timeout {
|
||||||
|
opts.timeout = Some(timeout);
|
||||||
|
}
|
||||||
let inner_stream = inner.execute_with_options(opts).await.infer_error()?;
|
let inner_stream = inner.execute_with_options(opts).await.infer_error()?;
|
||||||
Ok(RecordBatchStream::new(inner_stream))
|
Ok(RecordBatchStream::new(inner_stream))
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
fn explain_plan(self_: PyRef<'_, Self>, verbose: bool) -> PyResult<Bound<'_, PyAny>> {
|
pub fn explain_plan(self_: PyRef<'_, Self>, verbose: bool) -> PyResult<Bound<'_, PyAny>> {
|
||||||
let inner = self_.inner.clone();
|
let inner = self_.inner.clone();
|
||||||
future_into_py(self_.py(), async move {
|
future_into_py(self_.py(), async move {
|
||||||
inner
|
inner
|
||||||
@@ -480,6 +552,16 @@ impl VectorQuery {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn analyze_plan(self_: PyRef<'_, Self>) -> PyResult<Bound<'_, PyAny>> {
|
||||||
|
let inner = self_.inner.clone();
|
||||||
|
future_into_py(self_.py(), async move {
|
||||||
|
inner
|
||||||
|
.analyze_plan()
|
||||||
|
.await
|
||||||
|
.map_err(|e| PyRuntimeError::new_err(e.to_string()))
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
pub fn nearest_to_text(&mut self, query: Bound<'_, PyDict>) -> PyResult<HybridQuery> {
|
pub fn nearest_to_text(&mut self, query: Bound<'_, PyDict>) -> PyResult<HybridQuery> {
|
||||||
let base_query = self.inner.clone().into_plain();
|
let base_query = self.inner.clone().into_plain();
|
||||||
let fts_query = Query::new(base_query).nearest_to_text(query)?;
|
let fts_query = Query::new(base_query).nearest_to_text(query)?;
|
||||||
|
|||||||
@@ -1,9 +1,11 @@
|
|||||||
// SPDX-License-Identifier: Apache-2.0
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||||
|
use std::{collections::HashMap, sync::Arc};
|
||||||
|
|
||||||
use arrow::{
|
use arrow::{
|
||||||
datatypes::DataType,
|
datatypes::{DataType, Schema},
|
||||||
ffi_stream::ArrowArrayStreamReader,
|
ffi_stream::ArrowArrayStreamReader,
|
||||||
pyarrow::{FromPyArrow, ToPyArrow},
|
pyarrow::{FromPyArrow, PyArrowType, ToPyArrow},
|
||||||
};
|
};
|
||||||
use lancedb::table::{
|
use lancedb::table::{
|
||||||
AddDataMode, ColumnAlteration, Duration, NewColumnTransform, OptimizeAction, OptimizeOptions,
|
AddDataMode, ColumnAlteration, Duration, NewColumnTransform, OptimizeAction, OptimizeOptions,
|
||||||
@@ -16,7 +18,6 @@ use pyo3::{
|
|||||||
Bound, FromPyObject, PyAny, PyRef, PyResult, Python,
|
Bound, FromPyObject, PyAny, PyRef, PyResult, Python,
|
||||||
};
|
};
|
||||||
use pyo3_async_runtimes::tokio::future_into_py;
|
use pyo3_async_runtimes::tokio::future_into_py;
|
||||||
use std::collections::HashMap;
|
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
error::PythonErrorExt,
|
error::PythonErrorExt,
|
||||||
@@ -203,6 +204,14 @@ impl Table {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn prewarm_index(self_: PyRef<'_, Self>, index_name: String) -> PyResult<Bound<'_, PyAny>> {
|
||||||
|
let inner = self_.inner_ref()?.clone();
|
||||||
|
future_into_py(self_.py(), async move {
|
||||||
|
inner.prewarm_index(&index_name).await.infer_error()?;
|
||||||
|
Ok(())
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
pub fn list_indices(self_: PyRef<'_, Self>) -> PyResult<Bound<'_, PyAny>> {
|
pub fn list_indices(self_: PyRef<'_, Self>) -> PyResult<Bound<'_, PyAny>> {
|
||||||
let inner = self_.inner_ref()?.clone();
|
let inner = self_.inner_ref()?.clone();
|
||||||
future_into_py(self_.py(), async move {
|
future_into_py(self_.py(), async move {
|
||||||
@@ -303,12 +312,16 @@ impl Table {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn restore(self_: PyRef<'_, Self>) -> PyResult<Bound<'_, PyAny>> {
|
#[pyo3(signature = (version=None))]
|
||||||
|
pub fn restore(self_: PyRef<'_, Self>, version: Option<u64>) -> PyResult<Bound<'_, PyAny>> {
|
||||||
let inner = self_.inner_ref()?.clone();
|
let inner = self_.inner_ref()?.clone();
|
||||||
future_into_py(
|
|
||||||
self_.py(),
|
future_into_py(self_.py(), async move {
|
||||||
async move { inner.restore().await.infer_error() },
|
if let Some(version) = version {
|
||||||
)
|
inner.checkout(version).await.infer_error()?;
|
||||||
|
}
|
||||||
|
inner.restore().await.infer_error()
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn query(&self) -> Query {
|
pub fn query(&self) -> Query {
|
||||||
@@ -440,6 +453,20 @@ impl Table {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn add_columns_with_schema(
|
||||||
|
self_: PyRef<'_, Self>,
|
||||||
|
schema: PyArrowType<Schema>,
|
||||||
|
) -> PyResult<Bound<'_, PyAny>> {
|
||||||
|
let arrow_schema = &schema.0;
|
||||||
|
let transform = NewColumnTransform::AllNulls(Arc::new(arrow_schema.clone()));
|
||||||
|
|
||||||
|
let inner = self_.inner_ref()?.clone();
|
||||||
|
future_into_py(self_.py(), async move {
|
||||||
|
inner.add_columns(transform, None).await.infer_error()?;
|
||||||
|
Ok(())
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
pub fn alter_columns<'a>(
|
pub fn alter_columns<'a>(
|
||||||
self_: PyRef<'a, Self>,
|
self_: PyRef<'a, Self>,
|
||||||
alterations: Vec<Bound<PyDict>>,
|
alterations: Vec<Bound<PyDict>>,
|
||||||
|
|||||||
@@ -3,11 +3,15 @@
|
|||||||
|
|
||||||
use std::sync::Mutex;
|
use std::sync::Mutex;
|
||||||
|
|
||||||
|
use lancedb::index::scalar::{BoostQuery, FtsQuery, MatchQuery, MultiMatchQuery, PhraseQuery};
|
||||||
use lancedb::DistanceType;
|
use lancedb::DistanceType;
|
||||||
|
use pyo3::prelude::{PyAnyMethods, PyDictMethods, PyListMethods};
|
||||||
|
use pyo3::types::PyDict;
|
||||||
use pyo3::{
|
use pyo3::{
|
||||||
exceptions::{PyRuntimeError, PyValueError},
|
exceptions::{PyRuntimeError, PyValueError},
|
||||||
pyfunction, PyResult,
|
pyfunction, PyResult,
|
||||||
};
|
};
|
||||||
|
use pyo3::{Bound, PyAny};
|
||||||
|
|
||||||
/// A wrapper around a rust builder
|
/// A wrapper around a rust builder
|
||||||
///
|
///
|
||||||
@@ -59,3 +63,117 @@ pub fn validate_table_name(table_name: &str) -> PyResult<()> {
|
|||||||
lancedb::utils::validate_table_name(table_name)
|
lancedb::utils::validate_table_name(table_name)
|
||||||
.map_err(|e| PyValueError::new_err(e.to_string()))
|
.map_err(|e| PyValueError::new_err(e.to_string()))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn parse_fts_query(query: &Bound<'_, PyDict>) -> PyResult<FtsQuery> {
|
||||||
|
let query_type = query.keys().get_item(0)?.extract::<String>()?;
|
||||||
|
let query_value = query
|
||||||
|
.get_item(&query_type)?
|
||||||
|
.ok_or(PyValueError::new_err(format!(
|
||||||
|
"Query type {} not found",
|
||||||
|
query_type
|
||||||
|
)))?;
|
||||||
|
let query_value = query_value.downcast::<PyDict>()?;
|
||||||
|
|
||||||
|
match query_type.as_str() {
|
||||||
|
"match" => {
|
||||||
|
let column = query_value.keys().get_item(0)?.extract::<String>()?;
|
||||||
|
let params = query_value
|
||||||
|
.get_item(&column)?
|
||||||
|
.ok_or(PyValueError::new_err(format!(
|
||||||
|
"column {} not found",
|
||||||
|
column
|
||||||
|
)))?;
|
||||||
|
let params = params.downcast::<PyDict>()?;
|
||||||
|
|
||||||
|
let query = params
|
||||||
|
.get_item("query")?
|
||||||
|
.ok_or(PyValueError::new_err("query not found"))?
|
||||||
|
.extract::<String>()?;
|
||||||
|
let boost = params
|
||||||
|
.get_item("boost")?
|
||||||
|
.ok_or(PyValueError::new_err("boost not found"))?
|
||||||
|
.extract::<f32>()?;
|
||||||
|
let fuzziness = params
|
||||||
|
.get_item("fuzziness")?
|
||||||
|
.ok_or(PyValueError::new_err("fuzziness not found"))?
|
||||||
|
.extract::<Option<u32>>()?;
|
||||||
|
let max_expansions = params
|
||||||
|
.get_item("max_expansions")?
|
||||||
|
.ok_or(PyValueError::new_err("max_expansions not found"))?
|
||||||
|
.extract::<usize>()?;
|
||||||
|
|
||||||
|
let query = MatchQuery::new(query)
|
||||||
|
.with_column(Some(column))
|
||||||
|
.with_boost(boost)
|
||||||
|
.with_fuzziness(fuzziness)
|
||||||
|
.with_max_expansions(max_expansions);
|
||||||
|
Ok(query.into())
|
||||||
|
}
|
||||||
|
|
||||||
|
"match_phrase" => {
|
||||||
|
let column = query_value.keys().get_item(0)?.extract::<String>()?;
|
||||||
|
let query = query_value
|
||||||
|
.get_item(&column)?
|
||||||
|
.ok_or(PyValueError::new_err(format!(
|
||||||
|
"column {} not found",
|
||||||
|
column
|
||||||
|
)))?
|
||||||
|
.extract::<String>()?;
|
||||||
|
|
||||||
|
let query = PhraseQuery::new(query).with_column(Some(column));
|
||||||
|
Ok(query.into())
|
||||||
|
}
|
||||||
|
|
||||||
|
"boost" => {
|
||||||
|
let positive: Bound<'_, PyAny> = query_value
|
||||||
|
.get_item("positive")?
|
||||||
|
.ok_or(PyValueError::new_err("positive not found"))?;
|
||||||
|
let positive = positive.downcast::<PyDict>()?;
|
||||||
|
|
||||||
|
let negative = query_value
|
||||||
|
.get_item("negative")?
|
||||||
|
.ok_or(PyValueError::new_err("negative not found"))?;
|
||||||
|
let negative = negative.downcast::<PyDict>()?;
|
||||||
|
|
||||||
|
let negative_boost = query_value
|
||||||
|
.get_item("negative_boost")?
|
||||||
|
.ok_or(PyValueError::new_err("negative_boost not found"))?
|
||||||
|
.extract::<f32>()?;
|
||||||
|
|
||||||
|
let positive_query = parse_fts_query(positive)?;
|
||||||
|
let negative_query = parse_fts_query(negative)?;
|
||||||
|
let query = BoostQuery::new(positive_query, negative_query, Some(negative_boost));
|
||||||
|
|
||||||
|
Ok(query.into())
|
||||||
|
}
|
||||||
|
|
||||||
|
"multi_match" => {
|
||||||
|
let query = query_value
|
||||||
|
.get_item("query")?
|
||||||
|
.ok_or(PyValueError::new_err("query not found"))?
|
||||||
|
.extract::<String>()?;
|
||||||
|
|
||||||
|
let columns = query_value
|
||||||
|
.get_item("columns")?
|
||||||
|
.ok_or(PyValueError::new_err("columns not found"))?
|
||||||
|
.extract::<Vec<String>>()?;
|
||||||
|
|
||||||
|
let boost = query_value
|
||||||
|
.get_item("boost")?
|
||||||
|
.ok_or(PyValueError::new_err("boost not found"))?
|
||||||
|
.extract::<Vec<f32>>()?;
|
||||||
|
|
||||||
|
let query = MultiMatchQuery::try_new(query, columns)
|
||||||
|
.and_then(|q| q.try_with_boosts(boost))
|
||||||
|
.map_err(|e| {
|
||||||
|
PyValueError::new_err(format!("Error creating MultiMatchQuery: {}", e))
|
||||||
|
})?;
|
||||||
|
Ok(query.into())
|
||||||
|
}
|
||||||
|
|
||||||
|
_ => Err(PyValueError::new_err(format!(
|
||||||
|
"Unsupported query type: {}",
|
||||||
|
query_type
|
||||||
|
))),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "lancedb-node"
|
name = "lancedb-node"
|
||||||
version = "0.18.2-beta.1"
|
version = "0.19.0-beta.7"
|
||||||
description = "Serverless, low-latency vector database for AI applications"
|
description = "Serverless, low-latency vector database for AI applications"
|
||||||
license.workspace = true
|
license.workspace = true
|
||||||
edition.workspace = true
|
edition.workspace = true
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "lancedb"
|
name = "lancedb"
|
||||||
version = "0.18.2-beta.1"
|
version = "0.19.0-beta.7"
|
||||||
edition.workspace = true
|
edition.workspace = true
|
||||||
description = "LanceDB: A serverless, low-latency vector database for AI applications"
|
description = "LanceDB: A serverless, low-latency vector database for AI applications"
|
||||||
license.workspace = true
|
license.workspace = true
|
||||||
|
|||||||
@@ -139,12 +139,6 @@ impl CreateTableBuilder<true> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Apply the given write options when writing the initial data
|
|
||||||
pub fn write_options(mut self, write_options: WriteOptions) -> Self {
|
|
||||||
self.request.write_options = write_options;
|
|
||||||
self
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Execute the create table operation
|
/// Execute the create table operation
|
||||||
pub async fn execute(self) -> Result<Table> {
|
pub async fn execute(self) -> Result<Table> {
|
||||||
let embedding_registry = self.embedding_registry.clone();
|
let embedding_registry = self.embedding_registry.clone();
|
||||||
@@ -226,6 +220,12 @@ impl<const HAS_DATA: bool> CreateTableBuilder<HAS_DATA> {
|
|||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Apply the given write options when writing the initial data
|
||||||
|
pub fn write_options(mut self, write_options: WriteOptions) -> Self {
|
||||||
|
self.request.write_options = write_options;
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
/// Set an option for the storage layer.
|
/// Set an option for the storage layer.
|
||||||
///
|
///
|
||||||
/// Options already set on the connection will be inherited by the table,
|
/// Options already set on the connection will be inherited by the table,
|
||||||
@@ -863,7 +863,7 @@ impl ConnectBuilder {
|
|||||||
/// # Arguments
|
/// # Arguments
|
||||||
///
|
///
|
||||||
/// * `uri` - URI where the database is located, can be a local directory, supported remote cloud storage,
|
/// * `uri` - URI where the database is located, can be a local directory, supported remote cloud storage,
|
||||||
/// or a LanceDB Cloud database. See [ConnectOptions::uri] for a list of accepted formats
|
/// or a LanceDB Cloud database. See [ConnectOptions::uri] for a list of accepted formats
|
||||||
pub fn connect(uri: &str) -> ConnectBuilder {
|
pub fn connect(uri: &str) -> ConnectBuilder {
|
||||||
ConnectBuilder::new(uri)
|
ConnectBuilder::new(uri)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -41,7 +41,7 @@ where
|
|||||||
/// ----------
|
/// ----------
|
||||||
/// - reader: RecordBatchReader
|
/// - reader: RecordBatchReader
|
||||||
/// - strict: if set true, only `fixed_size_list<float>` is considered as vector column. If set to false,
|
/// - strict: if set true, only `fixed_size_list<float>` is considered as vector column. If set to false,
|
||||||
/// a `list<float>` column with same length is also considered as vector column.
|
/// a `list<float>` column with same length is also considered as vector column.
|
||||||
pub fn infer_vector_columns(
|
pub fn infer_vector_columns(
|
||||||
reader: impl RecordBatchReader + Send,
|
reader: impl RecordBatchReader + Send,
|
||||||
strict: bool,
|
strict: bool,
|
||||||
|
|||||||
@@ -80,5 +80,6 @@ impl FtsIndexBuilder {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub use lance_index::scalar::inverted::query::*;
|
||||||
pub use lance_index::scalar::inverted::TokenizerConfig;
|
pub use lance_index::scalar::inverted::TokenizerConfig;
|
||||||
pub use lance_index::scalar::FullTextSearchQuery;
|
pub use lance_index::scalar::FullTextSearchQuery;
|
||||||
|
|||||||
@@ -14,6 +14,9 @@ use object_store::{
|
|||||||
|
|
||||||
use async_trait::async_trait;
|
use async_trait::async_trait;
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
pub mod io_tracking;
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
struct MirroringObjectStore {
|
struct MirroringObjectStore {
|
||||||
primary: Arc<dyn ObjectStore>,
|
primary: Arc<dyn ObjectStore>,
|
||||||
|
|||||||
237
rust/lancedb/src/io/object_store/io_tracking.rs
Normal file
237
rust/lancedb/src/io/object_store/io_tracking.rs
Normal file
@@ -0,0 +1,237 @@
|
|||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||||
|
|
||||||
|
use std::{
|
||||||
|
fmt::{Display, Formatter},
|
||||||
|
sync::{Arc, Mutex},
|
||||||
|
};
|
||||||
|
|
||||||
|
use bytes::Bytes;
|
||||||
|
use futures::stream::BoxStream;
|
||||||
|
use lance::io::WrappingObjectStore;
|
||||||
|
use object_store::{
|
||||||
|
path::Path, GetOptions, GetResult, ListResult, MultipartUpload, ObjectMeta, ObjectStore,
|
||||||
|
PutMultipartOpts, PutOptions, PutPayload, PutResult, Result as OSResult, UploadPart,
|
||||||
|
};
|
||||||
|
|
||||||
|
#[derive(Debug, Default)]
|
||||||
|
pub struct IoStats {
|
||||||
|
pub read_iops: u64,
|
||||||
|
pub read_bytes: u64,
|
||||||
|
pub write_iops: u64,
|
||||||
|
pub write_bytes: u64,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Display for IoStats {
|
||||||
|
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||||
|
write!(f, "{:#?}", self)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct IoTrackingStore {
|
||||||
|
target: Arc<dyn ObjectStore>,
|
||||||
|
stats: Arc<Mutex<IoStats>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Display for IoTrackingStore {
|
||||||
|
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||||
|
write!(f, "{:#?}", self)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Default, Clone)]
|
||||||
|
pub struct IoStatsHolder(Arc<Mutex<IoStats>>);
|
||||||
|
|
||||||
|
impl IoStatsHolder {
|
||||||
|
pub fn incremental_stats(&self) -> IoStats {
|
||||||
|
std::mem::take(&mut self.0.lock().expect("failed to lock IoStats"))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl WrappingObjectStore for IoStatsHolder {
|
||||||
|
fn wrap(&self, target: Arc<dyn ObjectStore>) -> Arc<dyn ObjectStore> {
|
||||||
|
Arc::new(IoTrackingStore {
|
||||||
|
target,
|
||||||
|
stats: self.0.clone(),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl IoTrackingStore {
|
||||||
|
pub fn new_wrapper() -> (Arc<dyn WrappingObjectStore>, Arc<Mutex<IoStats>>) {
|
||||||
|
let stats = Arc::new(Mutex::new(IoStats::default()));
|
||||||
|
(Arc::new(IoStatsHolder(stats.clone())), stats)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn record_read(&self, num_bytes: u64) {
|
||||||
|
let mut stats = self.stats.lock().unwrap();
|
||||||
|
stats.read_iops += 1;
|
||||||
|
stats.read_bytes += num_bytes;
|
||||||
|
}
|
||||||
|
|
||||||
|
fn record_write(&self, num_bytes: u64) {
|
||||||
|
let mut stats = self.stats.lock().unwrap();
|
||||||
|
stats.write_iops += 1;
|
||||||
|
stats.write_bytes += num_bytes;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[async_trait::async_trait]
|
||||||
|
#[deny(clippy::missing_trait_methods)]
|
||||||
|
impl ObjectStore for IoTrackingStore {
|
||||||
|
async fn put(&self, location: &Path, bytes: PutPayload) -> OSResult<PutResult> {
|
||||||
|
self.record_write(bytes.content_length() as u64);
|
||||||
|
self.target.put(location, bytes).await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn put_opts(
|
||||||
|
&self,
|
||||||
|
location: &Path,
|
||||||
|
bytes: PutPayload,
|
||||||
|
opts: PutOptions,
|
||||||
|
) -> OSResult<PutResult> {
|
||||||
|
self.record_write(bytes.content_length() as u64);
|
||||||
|
self.target.put_opts(location, bytes, opts).await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn put_multipart(&self, location: &Path) -> OSResult<Box<dyn MultipartUpload>> {
|
||||||
|
let target = self.target.put_multipart(location).await?;
|
||||||
|
Ok(Box::new(IoTrackingMultipartUpload {
|
||||||
|
target,
|
||||||
|
stats: self.stats.clone(),
|
||||||
|
}))
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn put_multipart_opts(
|
||||||
|
&self,
|
||||||
|
location: &Path,
|
||||||
|
opts: PutMultipartOpts,
|
||||||
|
) -> OSResult<Box<dyn MultipartUpload>> {
|
||||||
|
let target = self.target.put_multipart_opts(location, opts).await?;
|
||||||
|
Ok(Box::new(IoTrackingMultipartUpload {
|
||||||
|
target,
|
||||||
|
stats: self.stats.clone(),
|
||||||
|
}))
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn get(&self, location: &Path) -> OSResult<GetResult> {
|
||||||
|
let result = self.target.get(location).await;
|
||||||
|
if let Ok(result) = &result {
|
||||||
|
let num_bytes = result.range.end - result.range.start;
|
||||||
|
self.record_read(num_bytes as u64);
|
||||||
|
}
|
||||||
|
result
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn get_opts(&self, location: &Path, options: GetOptions) -> OSResult<GetResult> {
|
||||||
|
let result = self.target.get_opts(location, options).await;
|
||||||
|
if let Ok(result) = &result {
|
||||||
|
let num_bytes = result.range.end - result.range.start;
|
||||||
|
self.record_read(num_bytes as u64);
|
||||||
|
}
|
||||||
|
result
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn get_range(&self, location: &Path, range: std::ops::Range<usize>) -> OSResult<Bytes> {
|
||||||
|
let result = self.target.get_range(location, range).await;
|
||||||
|
if let Ok(result) = &result {
|
||||||
|
self.record_read(result.len() as u64);
|
||||||
|
}
|
||||||
|
result
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn get_ranges(
|
||||||
|
&self,
|
||||||
|
location: &Path,
|
||||||
|
ranges: &[std::ops::Range<usize>],
|
||||||
|
) -> OSResult<Vec<Bytes>> {
|
||||||
|
let result = self.target.get_ranges(location, ranges).await;
|
||||||
|
if let Ok(result) = &result {
|
||||||
|
self.record_read(result.iter().map(|b| b.len() as u64).sum());
|
||||||
|
}
|
||||||
|
result
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn head(&self, location: &Path) -> OSResult<ObjectMeta> {
|
||||||
|
self.record_read(0);
|
||||||
|
self.target.head(location).await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn delete(&self, location: &Path) -> OSResult<()> {
|
||||||
|
self.record_write(0);
|
||||||
|
self.target.delete(location).await
|
||||||
|
}
|
||||||
|
|
||||||
|
fn delete_stream<'a>(
|
||||||
|
&'a self,
|
||||||
|
locations: BoxStream<'a, OSResult<Path>>,
|
||||||
|
) -> BoxStream<'a, OSResult<Path>> {
|
||||||
|
self.target.delete_stream(locations)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn list(&self, prefix: Option<&Path>) -> BoxStream<'_, OSResult<ObjectMeta>> {
|
||||||
|
self.record_read(0);
|
||||||
|
self.target.list(prefix)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn list_with_offset(
|
||||||
|
&self,
|
||||||
|
prefix: Option<&Path>,
|
||||||
|
offset: &Path,
|
||||||
|
) -> BoxStream<'_, OSResult<ObjectMeta>> {
|
||||||
|
self.record_read(0);
|
||||||
|
self.target.list_with_offset(prefix, offset)
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn list_with_delimiter(&self, prefix: Option<&Path>) -> OSResult<ListResult> {
|
||||||
|
self.record_read(0);
|
||||||
|
self.target.list_with_delimiter(prefix).await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn copy(&self, from: &Path, to: &Path) -> OSResult<()> {
|
||||||
|
self.record_write(0);
|
||||||
|
self.target.copy(from, to).await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn rename(&self, from: &Path, to: &Path) -> OSResult<()> {
|
||||||
|
self.record_write(0);
|
||||||
|
self.target.rename(from, to).await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn rename_if_not_exists(&self, from: &Path, to: &Path) -> OSResult<()> {
|
||||||
|
self.record_write(0);
|
||||||
|
self.target.rename_if_not_exists(from, to).await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn copy_if_not_exists(&self, from: &Path, to: &Path) -> OSResult<()> {
|
||||||
|
self.record_write(0);
|
||||||
|
self.target.copy_if_not_exists(from, to).await
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
struct IoTrackingMultipartUpload {
|
||||||
|
target: Box<dyn MultipartUpload>,
|
||||||
|
stats: Arc<Mutex<IoStats>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[async_trait::async_trait]
|
||||||
|
impl MultipartUpload for IoTrackingMultipartUpload {
|
||||||
|
async fn abort(&mut self) -> OSResult<()> {
|
||||||
|
self.target.abort().await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn complete(&mut self) -> OSResult<PutResult> {
|
||||||
|
self.target.complete().await
|
||||||
|
}
|
||||||
|
|
||||||
|
fn put_part(&mut self, payload: PutPayload) -> UploadPart {
|
||||||
|
{
|
||||||
|
let mut stats = self.stats.lock().unwrap();
|
||||||
|
stats.write_iops += 1;
|
||||||
|
stats.write_bytes += payload.content_length() as u64;
|
||||||
|
}
|
||||||
|
self.target.put_part(payload)
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -31,7 +31,7 @@
|
|||||||
//! are not yet ready to be released.
|
//! are not yet ready to be released.
|
||||||
//!
|
//!
|
||||||
//! - `remote` - Enable remote client to connect to LanceDB cloud. This is not yet fully implemented
|
//! - `remote` - Enable remote client to connect to LanceDB cloud. This is not yet fully implemented
|
||||||
//! and should not be enabled.
|
//! and should not be enabled.
|
||||||
//!
|
//!
|
||||||
//! ### Quick Start
|
//! ### Quick Start
|
||||||
//!
|
//!
|
||||||
|
|||||||
@@ -1,8 +1,8 @@
|
|||||||
// SPDX-License-Identifier: Apache-2.0
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||||
|
|
||||||
use std::future::Future;
|
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
use std::{future::Future, time::Duration};
|
||||||
|
|
||||||
use arrow::compute::concat_batches;
|
use arrow::compute::concat_batches;
|
||||||
use arrow_array::{make_array, Array, Float16Array, Float32Array, Float64Array};
|
use arrow_array::{make_array, Array, Float16Array, Float32Array, Float64Array};
|
||||||
@@ -25,6 +25,7 @@ use crate::error::{Error, Result};
|
|||||||
use crate::rerankers::rrf::RRFReranker;
|
use crate::rerankers::rrf::RRFReranker;
|
||||||
use crate::rerankers::{check_reranker_result, NormalizeMethod, Reranker};
|
use crate::rerankers::{check_reranker_result, NormalizeMethod, Reranker};
|
||||||
use crate::table::BaseTable;
|
use crate::table::BaseTable;
|
||||||
|
use crate::utils::TimeoutStream;
|
||||||
use crate::DistanceType;
|
use crate::DistanceType;
|
||||||
use crate::{arrow::SendableRecordBatchStream, table::AnyQuery};
|
use crate::{arrow::SendableRecordBatchStream, table::AnyQuery};
|
||||||
|
|
||||||
@@ -525,12 +526,15 @@ pub struct QueryExecutionOptions {
|
|||||||
///
|
///
|
||||||
/// By default, this is 1024
|
/// By default, this is 1024
|
||||||
pub max_batch_length: u32,
|
pub max_batch_length: u32,
|
||||||
|
/// Max duration to wait for the query to execute before timing out.
|
||||||
|
pub timeout: Option<Duration>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Default for QueryExecutionOptions {
|
impl Default for QueryExecutionOptions {
|
||||||
fn default() -> Self {
|
fn default() -> Self {
|
||||||
Self {
|
Self {
|
||||||
max_batch_length: 1024,
|
max_batch_length: 1024,
|
||||||
|
timeout: None,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -579,6 +583,15 @@ pub trait ExecutableQuery {
|
|||||||
) -> impl Future<Output = Result<SendableRecordBatchStream>> + Send;
|
) -> impl Future<Output = Result<SendableRecordBatchStream>> + Send;
|
||||||
|
|
||||||
fn explain_plan(&self, verbose: bool) -> impl Future<Output = Result<String>> + Send;
|
fn explain_plan(&self, verbose: bool) -> impl Future<Output = Result<String>> + Send;
|
||||||
|
|
||||||
|
fn analyze_plan(&self) -> impl Future<Output = Result<String>> + Send {
|
||||||
|
self.analyze_plan_with_options(QueryExecutionOptions::default())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn analyze_plan_with_options(
|
||||||
|
&self,
|
||||||
|
options: QueryExecutionOptions,
|
||||||
|
) -> impl Future<Output = Result<String>> + Send;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// A query filter that can be applied to a query
|
/// A query filter that can be applied to a query
|
||||||
@@ -765,6 +778,11 @@ impl ExecutableQuery for Query {
|
|||||||
let query = AnyQuery::Query(self.request.clone());
|
let query = AnyQuery::Query(self.request.clone());
|
||||||
self.parent.explain_plan(&query, verbose).await
|
self.parent.explain_plan(&query, verbose).await
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async fn analyze_plan_with_options(&self, options: QueryExecutionOptions) -> Result<String> {
|
||||||
|
let query = AnyQuery::Query(self.request.clone());
|
||||||
|
self.parent.analyze_plan(&query, options).await
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// A request for a nearest-neighbors search into a table
|
/// A request for a nearest-neighbors search into a table
|
||||||
@@ -993,7 +1011,10 @@ impl VectorQuery {
|
|||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn execute_hybrid(&self) -> Result<SendableRecordBatchStream> {
|
pub async fn execute_hybrid(
|
||||||
|
&self,
|
||||||
|
options: QueryExecutionOptions,
|
||||||
|
) -> Result<SendableRecordBatchStream> {
|
||||||
// clone query and specify we want to include row IDs, which can be needed for reranking
|
// clone query and specify we want to include row IDs, which can be needed for reranking
|
||||||
let mut fts_query = Query::new(self.parent.clone());
|
let mut fts_query = Query::new(self.parent.clone());
|
||||||
fts_query.request = self.request.base.clone();
|
fts_query.request = self.request.base.clone();
|
||||||
@@ -1002,7 +1023,10 @@ impl VectorQuery {
|
|||||||
let mut vector_query = self.clone().with_row_id();
|
let mut vector_query = self.clone().with_row_id();
|
||||||
|
|
||||||
vector_query.request.base.full_text_search = None;
|
vector_query.request.base.full_text_search = None;
|
||||||
let (fts_results, vec_results) = try_join!(fts_query.execute(), vector_query.execute())?;
|
let (fts_results, vec_results) = try_join!(
|
||||||
|
fts_query.execute_with_options(options.clone()),
|
||||||
|
vector_query.inner_execute_with_options(options)
|
||||||
|
)?;
|
||||||
|
|
||||||
let (fts_results, vec_results) = try_join!(
|
let (fts_results, vec_results) = try_join!(
|
||||||
fts_results.try_collect::<Vec<_>>(),
|
fts_results.try_collect::<Vec<_>>(),
|
||||||
@@ -1042,7 +1066,7 @@ impl VectorQuery {
|
|||||||
})?;
|
})?;
|
||||||
|
|
||||||
let mut results = reranker
|
let mut results = reranker
|
||||||
.rerank_hybrid(&fts_query.query, vec_results, fts_results)
|
.rerank_hybrid(&fts_query.query.query(), vec_results, fts_results)
|
||||||
.await?;
|
.await?;
|
||||||
|
|
||||||
check_reranker_result(&results)?;
|
check_reranker_result(&results)?;
|
||||||
@@ -1060,6 +1084,20 @@ impl VectorQuery {
|
|||||||
RecordBatchStreamAdapter::new(results.schema(), stream::iter([Ok(results)])),
|
RecordBatchStreamAdapter::new(results.schema(), stream::iter([Ok(results)])),
|
||||||
))
|
))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async fn inner_execute_with_options(
|
||||||
|
&self,
|
||||||
|
options: QueryExecutionOptions,
|
||||||
|
) -> Result<SendableRecordBatchStream> {
|
||||||
|
let plan = self.create_plan(options.clone()).await?;
|
||||||
|
let inner = execute_plan(plan, Default::default())?;
|
||||||
|
let inner = if let Some(timeout) = options.timeout {
|
||||||
|
TimeoutStream::new_boxed(inner, timeout)
|
||||||
|
} else {
|
||||||
|
inner
|
||||||
|
};
|
||||||
|
Ok(DatasetRecordBatchStream::new(inner).into())
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl ExecutableQuery for VectorQuery {
|
impl ExecutableQuery for VectorQuery {
|
||||||
@@ -1073,22 +1111,24 @@ impl ExecutableQuery for VectorQuery {
|
|||||||
options: QueryExecutionOptions,
|
options: QueryExecutionOptions,
|
||||||
) -> Result<SendableRecordBatchStream> {
|
) -> Result<SendableRecordBatchStream> {
|
||||||
if self.request.base.full_text_search.is_some() {
|
if self.request.base.full_text_search.is_some() {
|
||||||
let hybrid_result = async move { self.execute_hybrid().await }.boxed().await?;
|
let hybrid_result = async move { self.execute_hybrid(options).await }
|
||||||
|
.boxed()
|
||||||
|
.await?;
|
||||||
return Ok(hybrid_result);
|
return Ok(hybrid_result);
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(SendableRecordBatchStream::from(
|
self.inner_execute_with_options(options).await
|
||||||
DatasetRecordBatchStream::new(execute_plan(
|
|
||||||
self.create_plan(options).await?,
|
|
||||||
Default::default(),
|
|
||||||
)?),
|
|
||||||
))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn explain_plan(&self, verbose: bool) -> Result<String> {
|
async fn explain_plan(&self, verbose: bool) -> Result<String> {
|
||||||
let query = AnyQuery::VectorQuery(self.request.clone());
|
let query = AnyQuery::VectorQuery(self.request.clone());
|
||||||
self.parent.explain_plan(&query, verbose).await
|
self.parent.explain_plan(&query, verbose).await
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async fn analyze_plan_with_options(&self, options: QueryExecutionOptions) -> Result<String> {
|
||||||
|
let query = AnyQuery::VectorQuery(self.request.clone());
|
||||||
|
self.parent.analyze_plan(&query, options).await
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl HasQuery for VectorQuery {
|
impl HasQuery for VectorQuery {
|
||||||
@@ -1370,6 +1410,31 @@ mod tests {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_analyze_plan() {
|
||||||
|
let tmp_dir = tempdir().unwrap();
|
||||||
|
let table = make_test_table(&tmp_dir).await;
|
||||||
|
|
||||||
|
let result = table.query().analyze_plan().await.unwrap();
|
||||||
|
assert!(result.contains("metrics="));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_analyze_plan_with_options() {
|
||||||
|
let tmp_dir = tempdir().unwrap();
|
||||||
|
let table = make_test_table(&tmp_dir).await;
|
||||||
|
|
||||||
|
let result = table
|
||||||
|
.query()
|
||||||
|
.analyze_plan_with_options(QueryExecutionOptions {
|
||||||
|
max_batch_length: 10,
|
||||||
|
..Default::default()
|
||||||
|
})
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
assert!(result.contains("metrics="));
|
||||||
|
}
|
||||||
|
|
||||||
fn assert_plan_exists(plan: &Arc<dyn ExecutionPlan>, name: &str) -> bool {
|
fn assert_plan_exists(plan: &Arc<dyn ExecutionPlan>, name: &str) -> bool {
|
||||||
if plan.name() == name {
|
if plan.name() == name {
|
||||||
return true;
|
return true;
|
||||||
|
|||||||
@@ -13,7 +13,7 @@ use reqwest::{
|
|||||||
use crate::error::{Error, Result};
|
use crate::error::{Error, Result};
|
||||||
use crate::remote::db::RemoteOptions;
|
use crate::remote::db::RemoteOptions;
|
||||||
|
|
||||||
const REQUEST_ID_HEADER: &str = "x-request-id";
|
const REQUEST_ID_HEADER: HeaderName = HeaderName::from_static("x-request-id");
|
||||||
|
|
||||||
/// Configuration for the LanceDB Cloud HTTP client.
|
/// Configuration for the LanceDB Cloud HTTP client.
|
||||||
#[derive(Clone, Debug)]
|
#[derive(Clone, Debug)]
|
||||||
@@ -299,7 +299,7 @@ impl<S: HttpSend> RestfulLanceDbClient<S> {
|
|||||||
) -> Result<HeaderMap> {
|
) -> Result<HeaderMap> {
|
||||||
let mut headers = HeaderMap::new();
|
let mut headers = HeaderMap::new();
|
||||||
headers.insert(
|
headers.insert(
|
||||||
"x-api-key",
|
HeaderName::from_static("x-api-key"),
|
||||||
HeaderValue::from_str(api_key).map_err(|_| Error::InvalidInput {
|
HeaderValue::from_str(api_key).map_err(|_| Error::InvalidInput {
|
||||||
message: "non-ascii api key provided".to_string(),
|
message: "non-ascii api key provided".to_string(),
|
||||||
})?,
|
})?,
|
||||||
@@ -307,7 +307,7 @@ impl<S: HttpSend> RestfulLanceDbClient<S> {
|
|||||||
if region == "local" {
|
if region == "local" {
|
||||||
let host = format!("{}.local.api.lancedb.com", db_name);
|
let host = format!("{}.local.api.lancedb.com", db_name);
|
||||||
headers.insert(
|
headers.insert(
|
||||||
"Host",
|
http::header::HOST,
|
||||||
HeaderValue::from_str(&host).map_err(|_| Error::InvalidInput {
|
HeaderValue::from_str(&host).map_err(|_| Error::InvalidInput {
|
||||||
message: format!("non-ascii database name '{}' provided", db_name),
|
message: format!("non-ascii database name '{}' provided", db_name),
|
||||||
})?,
|
})?,
|
||||||
@@ -315,7 +315,7 @@ impl<S: HttpSend> RestfulLanceDbClient<S> {
|
|||||||
}
|
}
|
||||||
if has_host_override {
|
if has_host_override {
|
||||||
headers.insert(
|
headers.insert(
|
||||||
"x-lancedb-database",
|
HeaderName::from_static("x-lancedb-database"),
|
||||||
HeaderValue::from_str(db_name).map_err(|_| Error::InvalidInput {
|
HeaderValue::from_str(db_name).map_err(|_| Error::InvalidInput {
|
||||||
message: format!("non-ascii database name '{}' provided", db_name),
|
message: format!("non-ascii database name '{}' provided", db_name),
|
||||||
})?,
|
})?,
|
||||||
@@ -323,7 +323,7 @@ impl<S: HttpSend> RestfulLanceDbClient<S> {
|
|||||||
}
|
}
|
||||||
if db_prefix.is_some() {
|
if db_prefix.is_some() {
|
||||||
headers.insert(
|
headers.insert(
|
||||||
"x-lancedb-database-prefix",
|
HeaderName::from_static("x-lancedb-database-prefix"),
|
||||||
HeaderValue::from_str(db_prefix.unwrap()).map_err(|_| Error::InvalidInput {
|
HeaderValue::from_str(db_prefix.unwrap()).map_err(|_| Error::InvalidInput {
|
||||||
message: format!(
|
message: format!(
|
||||||
"non-ascii database prefix '{}' provided",
|
"non-ascii database prefix '{}' provided",
|
||||||
@@ -335,7 +335,7 @@ impl<S: HttpSend> RestfulLanceDbClient<S> {
|
|||||||
|
|
||||||
if let Some(v) = options.0.get("account_name") {
|
if let Some(v) = options.0.get("account_name") {
|
||||||
headers.insert(
|
headers.insert(
|
||||||
"x-azure-storage-account-name",
|
HeaderName::from_static("x-azure-storage-account-name"),
|
||||||
HeaderValue::from_str(v).map_err(|_| Error::InvalidInput {
|
HeaderValue::from_str(v).map_err(|_| Error::InvalidInput {
|
||||||
message: format!("non-ascii storage account name '{}' provided", db_name),
|
message: format!("non-ascii storage account name '{}' provided", db_name),
|
||||||
})?,
|
})?,
|
||||||
@@ -343,7 +343,7 @@ impl<S: HttpSend> RestfulLanceDbClient<S> {
|
|||||||
}
|
}
|
||||||
if let Some(v) = options.0.get("azure_storage_account_name") {
|
if let Some(v) = options.0.get("azure_storage_account_name") {
|
||||||
headers.insert(
|
headers.insert(
|
||||||
"x-azure-storage-account-name",
|
HeaderName::from_static("x-azure-storage-account-name"),
|
||||||
HeaderValue::from_str(v).map_err(|_| Error::InvalidInput {
|
HeaderValue::from_str(v).map_err(|_| Error::InvalidInput {
|
||||||
message: format!("non-ascii storage account name '{}' provided", db_name),
|
message: format!("non-ascii storage account name '{}' provided", db_name),
|
||||||
})?,
|
})?,
|
||||||
|
|||||||
@@ -52,6 +52,10 @@ impl ServerVersion {
|
|||||||
pub fn support_multivector(&self) -> bool {
|
pub fn support_multivector(&self) -> bool {
|
||||||
self.0 >= semver::Version::new(0, 2, 0)
|
self.0 >= semver::Version::new(0, 2, 0)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn support_structural_fts(&self) -> bool {
|
||||||
|
self.0 >= semver::Version::new(0, 3, 0)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub const OPT_REMOTE_PREFIX: &str = "remote_database_";
|
pub const OPT_REMOTE_PREFIX: &str = "remote_database_";
|
||||||
|
|||||||
@@ -20,7 +20,7 @@ use datafusion_physical_plan::stream::RecordBatchStreamAdapter;
|
|||||||
use datafusion_physical_plan::{ExecutionPlan, RecordBatchStream, SendableRecordBatchStream};
|
use datafusion_physical_plan::{ExecutionPlan, RecordBatchStream, SendableRecordBatchStream};
|
||||||
use futures::TryStreamExt;
|
use futures::TryStreamExt;
|
||||||
use http::header::CONTENT_TYPE;
|
use http::header::CONTENT_TYPE;
|
||||||
use http::StatusCode;
|
use http::{HeaderName, StatusCode};
|
||||||
use lance::arrow::json::{JsonDataType, JsonSchema};
|
use lance::arrow::json::{JsonDataType, JsonSchema};
|
||||||
use lance::dataset::scanner::DatasetRecordBatchStream;
|
use lance::dataset::scanner::DatasetRecordBatchStream;
|
||||||
use lance::dataset::{ColumnAlteration, NewColumnTransform, Version};
|
use lance::dataset::{ColumnAlteration, NewColumnTransform, Version};
|
||||||
@@ -44,6 +44,8 @@ use super::client::{HttpSend, RestfulLanceDbClient, Sender};
|
|||||||
use super::db::ServerVersion;
|
use super::db::ServerVersion;
|
||||||
use super::ARROW_STREAM_CONTENT_TYPE;
|
use super::ARROW_STREAM_CONTENT_TYPE;
|
||||||
|
|
||||||
|
const REQUEST_TIMEOUT_HEADER: HeaderName = HeaderName::from_static("x-request-timeout-ms");
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub struct RemoteTable<S: HttpSend = Sender> {
|
pub struct RemoteTable<S: HttpSend = Sender> {
|
||||||
#[allow(dead_code)]
|
#[allow(dead_code)]
|
||||||
@@ -155,7 +157,11 @@ impl<S: HttpSend> RemoteTable<S> {
|
|||||||
Ok(Box::pin(RecordBatchStreamAdapter::new(schema, stream)))
|
Ok(Box::pin(RecordBatchStreamAdapter::new(schema, stream)))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn apply_query_params(body: &mut serde_json::Value, params: &QueryRequest) -> Result<()> {
|
fn apply_query_params(
|
||||||
|
&self,
|
||||||
|
body: &mut serde_json::Value,
|
||||||
|
params: &QueryRequest,
|
||||||
|
) -> Result<()> {
|
||||||
body["prefilter"] = params.prefilter.into();
|
body["prefilter"] = params.prefilter.into();
|
||||||
if let Some(offset) = params.offset {
|
if let Some(offset) = params.offset {
|
||||||
body["offset"] = serde_json::Value::Number(serde_json::Number::from(offset));
|
body["offset"] = serde_json::Value::Number(serde_json::Number::from(offset));
|
||||||
@@ -209,10 +215,17 @@ impl<S: HttpSend> RemoteTable<S> {
|
|||||||
message: "Wand factor is not yet supported in LanceDB Cloud".into(),
|
message: "Wand factor is not yet supported in LanceDB Cloud".into(),
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
body["full_text_query"] = serde_json::json!({
|
|
||||||
"columns": full_text_search.columns,
|
if self.server_version.support_structural_fts() {
|
||||||
"query": full_text_search.query,
|
body["full_text_query"] = serde_json::json!({
|
||||||
})
|
"query": full_text_search.query.clone(),
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
body["full_text_query"] = serde_json::json!({
|
||||||
|
"columns": full_text_search.columns().into_iter().collect::<Vec<_>>(),
|
||||||
|
"query": full_text_search.query.query(),
|
||||||
|
})
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
@@ -223,7 +236,7 @@ impl<S: HttpSend> RemoteTable<S> {
|
|||||||
mut body: serde_json::Value,
|
mut body: serde_json::Value,
|
||||||
query: &VectorQueryRequest,
|
query: &VectorQueryRequest,
|
||||||
) -> Result<Vec<serde_json::Value>> {
|
) -> Result<Vec<serde_json::Value>> {
|
||||||
Self::apply_query_params(&mut body, &query.base)?;
|
self.apply_query_params(&mut body, &query.base)?;
|
||||||
|
|
||||||
// Apply general parameters, before we dispatch based on number of query vectors.
|
// Apply general parameters, before we dispatch based on number of query vectors.
|
||||||
body["distance_type"] = serde_json::json!(query.distance_type.unwrap_or_default());
|
body["distance_type"] = serde_json::json!(query.distance_type.unwrap_or_default());
|
||||||
@@ -321,28 +334,25 @@ impl<S: HttpSend> RemoteTable<S> {
|
|||||||
async fn execute_query(
|
async fn execute_query(
|
||||||
&self,
|
&self,
|
||||||
query: &AnyQuery,
|
query: &AnyQuery,
|
||||||
_options: QueryExecutionOptions,
|
options: &QueryExecutionOptions,
|
||||||
) -> Result<Vec<Pin<Box<dyn RecordBatchStream + Send>>>> {
|
) -> Result<Vec<Pin<Box<dyn RecordBatchStream + Send>>>> {
|
||||||
let request = self.client.post(&format!("/v1/table/{}/query/", self.name));
|
let mut request = self.client.post(&format!("/v1/table/{}/query/", self.name));
|
||||||
|
|
||||||
let version = self.current_version().await;
|
if let Some(timeout) = options.timeout {
|
||||||
let mut body = serde_json::json!({ "version": version });
|
// Client side timeout
|
||||||
|
request = request.timeout(timeout);
|
||||||
|
// Also send to server, so it can abort the query if it takes too long.
|
||||||
|
// (If it doesn't fit into u64, it's not worth sending anyways.)
|
||||||
|
if let Ok(timeout_ms) = u64::try_from(timeout.as_millis()) {
|
||||||
|
request = request.header(REQUEST_TIMEOUT_HEADER, timeout_ms);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
let requests = match query {
|
let query_bodies = self.prepare_query_bodies(query).await?;
|
||||||
AnyQuery::Query(query) => {
|
let requests: Vec<reqwest::RequestBuilder> = query_bodies
|
||||||
Self::apply_query_params(&mut body, query)?;
|
.into_iter()
|
||||||
// Empty vector can be passed if no vector search is performed.
|
.map(|body| request.try_clone().unwrap().json(&body))
|
||||||
body["vector"] = serde_json::Value::Array(Vec::new());
|
.collect();
|
||||||
vec![request.json(&body)]
|
|
||||||
}
|
|
||||||
AnyQuery::VectorQuery(query) => {
|
|
||||||
let bodies = self.apply_vector_query_params(body, query)?;
|
|
||||||
bodies
|
|
||||||
.into_iter()
|
|
||||||
.map(|body| request.try_clone().unwrap().json(&body))
|
|
||||||
.collect()
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
let futures = requests.into_iter().map(|req| async move {
|
let futures = requests.into_iter().map(|req| async move {
|
||||||
let (request_id, response) = self.client.send(req, true).await?;
|
let (request_id, response) = self.client.send(req, true).await?;
|
||||||
@@ -351,6 +361,22 @@ impl<S: HttpSend> RemoteTable<S> {
|
|||||||
let streams = futures::future::try_join_all(futures).await?;
|
let streams = futures::future::try_join_all(futures).await?;
|
||||||
Ok(streams)
|
Ok(streams)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async fn prepare_query_bodies(&self, query: &AnyQuery) -> Result<Vec<serde_json::Value>> {
|
||||||
|
let version = self.current_version().await;
|
||||||
|
let base_body = serde_json::json!({ "version": version });
|
||||||
|
|
||||||
|
match query {
|
||||||
|
AnyQuery::Query(query) => {
|
||||||
|
let mut body = base_body.clone();
|
||||||
|
self.apply_query_params(&mut body, query)?;
|
||||||
|
// Empty vector can be passed if no vector search is performed.
|
||||||
|
body["vector"] = serde_json::Value::Array(Vec::new());
|
||||||
|
Ok(vec![body])
|
||||||
|
}
|
||||||
|
AnyQuery::VectorQuery(query) => self.apply_vector_query_params(base_body, query),
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Deserialize)]
|
#[derive(Deserialize)]
|
||||||
@@ -422,10 +448,17 @@ impl<S: HttpSend> BaseTable for RemoteTable<S> {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
async fn restore(&self) -> Result<()> {
|
async fn restore(&self) -> Result<()> {
|
||||||
self.check_mutable().await?;
|
let mut request = self
|
||||||
Err(Error::NotSupported {
|
.client
|
||||||
message: "restore is not supported on LanceDB cloud.".into(),
|
.post(&format!("/v1/table/{}/restore/", self.name));
|
||||||
})
|
let version = self.current_version().await;
|
||||||
|
let body = serde_json::json!({ "version": version });
|
||||||
|
request = request.json(&body);
|
||||||
|
|
||||||
|
let (request_id, response) = self.client.send(request, true).await?;
|
||||||
|
self.check_table_response(&request_id, response).await?;
|
||||||
|
self.checkout_latest().await?;
|
||||||
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn list_versions(&self) -> Result<Vec<Version>> {
|
async fn list_versions(&self) -> Result<Vec<Version>> {
|
||||||
@@ -522,7 +555,7 @@ impl<S: HttpSend> BaseTable for RemoteTable<S> {
|
|||||||
query: &AnyQuery,
|
query: &AnyQuery,
|
||||||
options: QueryExecutionOptions,
|
options: QueryExecutionOptions,
|
||||||
) -> Result<Arc<dyn ExecutionPlan>> {
|
) -> Result<Arc<dyn ExecutionPlan>> {
|
||||||
let streams = self.execute_query(query, options).await?;
|
let streams = self.execute_query(query, &options).await?;
|
||||||
if streams.len() == 1 {
|
if streams.len() == 1 {
|
||||||
let stream = streams.into_iter().next().unwrap();
|
let stream = streams.into_iter().next().unwrap();
|
||||||
Ok(Arc::new(OneShotExec::new(stream)))
|
Ok(Arc::new(OneShotExec::new(stream)))
|
||||||
@@ -538,9 +571,9 @@ impl<S: HttpSend> BaseTable for RemoteTable<S> {
|
|||||||
async fn query(
|
async fn query(
|
||||||
&self,
|
&self,
|
||||||
query: &AnyQuery,
|
query: &AnyQuery,
|
||||||
_options: QueryExecutionOptions,
|
options: QueryExecutionOptions,
|
||||||
) -> Result<DatasetRecordBatchStream> {
|
) -> Result<DatasetRecordBatchStream> {
|
||||||
let streams = self.execute_query(query, _options).await?;
|
let streams = self.execute_query(query, &options).await?;
|
||||||
|
|
||||||
if streams.len() == 1 {
|
if streams.len() == 1 {
|
||||||
Ok(DatasetRecordBatchStream::new(
|
Ok(DatasetRecordBatchStream::new(
|
||||||
@@ -559,6 +592,94 @@ impl<S: HttpSend> BaseTable for RemoteTable<S> {
|
|||||||
)?))
|
)?))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async fn explain_plan(&self, query: &AnyQuery, verbose: bool) -> Result<String> {
|
||||||
|
let base_request = self
|
||||||
|
.client
|
||||||
|
.post(&format!("/v1/table/{}/explain_plan/", self.name));
|
||||||
|
|
||||||
|
let query_bodies = self.prepare_query_bodies(query).await?;
|
||||||
|
let requests: Vec<reqwest::RequestBuilder> = query_bodies
|
||||||
|
.into_iter()
|
||||||
|
.map(|query_body| {
|
||||||
|
let explain_request = serde_json::json!({
|
||||||
|
"verbose": verbose,
|
||||||
|
"query": query_body
|
||||||
|
});
|
||||||
|
|
||||||
|
base_request.try_clone().unwrap().json(&explain_request)
|
||||||
|
})
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
|
||||||
|
let futures = requests.into_iter().map(|req| async move {
|
||||||
|
let (request_id, response) = self.client.send(req, true).await?;
|
||||||
|
let response = self.check_table_response(&request_id, response).await?;
|
||||||
|
let body = response.text().await.err_to_http(request_id.clone())?;
|
||||||
|
|
||||||
|
serde_json::from_str(&body).map_err(|e| Error::Http {
|
||||||
|
source: format!("Failed to parse explain plan: {}", e).into(),
|
||||||
|
request_id,
|
||||||
|
status_code: None,
|
||||||
|
})
|
||||||
|
});
|
||||||
|
|
||||||
|
let plan_texts = futures::future::try_join_all(futures).await?;
|
||||||
|
let final_plan = if plan_texts.len() > 1 {
|
||||||
|
plan_texts
|
||||||
|
.into_iter()
|
||||||
|
.enumerate()
|
||||||
|
.map(|(i, plan)| format!("--- Plan #{} ---\n{}", i + 1, plan))
|
||||||
|
.collect::<Vec<_>>()
|
||||||
|
.join("\n\n")
|
||||||
|
} else {
|
||||||
|
plan_texts.into_iter().next().unwrap_or_default()
|
||||||
|
};
|
||||||
|
|
||||||
|
Ok(final_plan)
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn analyze_plan(
|
||||||
|
&self,
|
||||||
|
query: &AnyQuery,
|
||||||
|
_options: QueryExecutionOptions,
|
||||||
|
) -> Result<String> {
|
||||||
|
let request = self
|
||||||
|
.client
|
||||||
|
.post(&format!("/v1/table/{}/analyze_plan/", self.name));
|
||||||
|
|
||||||
|
let query_bodies = self.prepare_query_bodies(query).await?;
|
||||||
|
let requests: Vec<reqwest::RequestBuilder> = query_bodies
|
||||||
|
.into_iter()
|
||||||
|
.map(|body| request.try_clone().unwrap().json(&body))
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
let futures = requests.into_iter().map(|req| async move {
|
||||||
|
let (request_id, response) = self.client.send(req, true).await?;
|
||||||
|
let response = self.check_table_response(&request_id, response).await?;
|
||||||
|
let body = response.text().await.err_to_http(request_id.clone())?;
|
||||||
|
|
||||||
|
serde_json::from_str(&body).map_err(|e| Error::Http {
|
||||||
|
source: format!("Failed to execute analyze plan: {}", e).into(),
|
||||||
|
request_id,
|
||||||
|
status_code: None,
|
||||||
|
})
|
||||||
|
});
|
||||||
|
|
||||||
|
let analyze_result_texts = futures::future::try_join_all(futures).await?;
|
||||||
|
let final_analyze = if analyze_result_texts.len() > 1 {
|
||||||
|
analyze_result_texts
|
||||||
|
.into_iter()
|
||||||
|
.enumerate()
|
||||||
|
.map(|(i, plan)| format!("--- Query #{} ---\n{}", i + 1, plan))
|
||||||
|
.collect::<Vec<_>>()
|
||||||
|
.join("\n\n")
|
||||||
|
} else {
|
||||||
|
analyze_result_texts.into_iter().next().unwrap_or_default()
|
||||||
|
};
|
||||||
|
|
||||||
|
Ok(final_analyze)
|
||||||
|
}
|
||||||
|
|
||||||
async fn update(&self, update: UpdateBuilder) -> Result<u64> {
|
async fn update(&self, update: UpdateBuilder) -> Result<u64> {
|
||||||
self.check_mutable().await?;
|
self.check_mutable().await?;
|
||||||
let request = self
|
let request = self
|
||||||
@@ -581,6 +702,7 @@ impl<S: HttpSend> BaseTable for RemoteTable<S> {
|
|||||||
|
|
||||||
Ok(0) // TODO: support returning number of modified rows once supported in SaaS.
|
Ok(0) // TODO: support returning number of modified rows once supported in SaaS.
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn delete(&self, predicate: &str) -> Result<()> {
|
async fn delete(&self, predicate: &str) -> Result<()> {
|
||||||
self.check_mutable().await?;
|
self.check_mutable().await?;
|
||||||
let body = serde_json::json!({ "predicate": predicate });
|
let body = serde_json::json!({ "predicate": predicate });
|
||||||
@@ -881,6 +1003,12 @@ impl<S: HttpSend> BaseTable for RemoteTable<S> {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async fn prewarm_index(&self, _index_name: &str) -> Result<()> {
|
||||||
|
Err(Error::NotSupported {
|
||||||
|
message: "prewarm_index is not yet supported on LanceDB cloud.".into(),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
async fn table_definition(&self) -> Result<TableDefinition> {
|
async fn table_definition(&self) -> Result<TableDefinition> {
|
||||||
Err(Error::NotSupported {
|
Err(Error::NotSupported {
|
||||||
message: "table_definition is not supported on LanceDB cloud.".into(),
|
message: "table_definition is not supported on LanceDB cloud.".into(),
|
||||||
@@ -938,6 +1066,7 @@ mod tests {
|
|||||||
use arrow_schema::{DataType, Field, Schema};
|
use arrow_schema::{DataType, Field, Schema};
|
||||||
use chrono::{DateTime, Utc};
|
use chrono::{DateTime, Utc};
|
||||||
use futures::{future::BoxFuture, StreamExt, TryFutureExt};
|
use futures::{future::BoxFuture, StreamExt, TryFutureExt};
|
||||||
|
use lance_index::scalar::inverted::query::MatchQuery;
|
||||||
use lance_index::scalar::FullTextSearchQuery;
|
use lance_index::scalar::FullTextSearchQuery;
|
||||||
use reqwest::Body;
|
use reqwest::Body;
|
||||||
use rstest::rstest;
|
use rstest::rstest;
|
||||||
@@ -1584,7 +1713,18 @@ mod tests {
|
|||||||
"prefilter": true,
|
"prefilter": true,
|
||||||
"version": null
|
"version": null
|
||||||
});
|
});
|
||||||
assert_eq!(body, expected_body);
|
let expected_body_2 = serde_json::json!({
|
||||||
|
"full_text_query": {
|
||||||
|
"columns": ["b","a"],
|
||||||
|
"query": "hello world",
|
||||||
|
},
|
||||||
|
"k": 10,
|
||||||
|
"vector": [],
|
||||||
|
"with_row_id": true,
|
||||||
|
"prefilter": true,
|
||||||
|
"version": null
|
||||||
|
});
|
||||||
|
assert!(body == expected_body || body == expected_body_2);
|
||||||
|
|
||||||
let data = RecordBatch::try_new(
|
let data = RecordBatch::try_new(
|
||||||
Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, false)])),
|
Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, false)])),
|
||||||
@@ -1603,7 +1743,8 @@ mod tests {
|
|||||||
.query()
|
.query()
|
||||||
.full_text_search(
|
.full_text_search(
|
||||||
FullTextSearchQuery::new("hello world".into())
|
FullTextSearchQuery::new("hello world".into())
|
||||||
.columns(Some(vec!["a".into(), "b".into()])),
|
.with_columns(&["a".into(), "b".into()])
|
||||||
|
.unwrap(),
|
||||||
)
|
)
|
||||||
.with_row_id()
|
.with_row_id()
|
||||||
.limit(10)
|
.limit(10)
|
||||||
@@ -1612,6 +1753,67 @@ mod tests {
|
|||||||
.unwrap();
|
.unwrap();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_query_structured_fts() {
|
||||||
|
let table =
|
||||||
|
Table::new_with_handler_version("my_table", semver::Version::new(0, 3, 0), |request| {
|
||||||
|
assert_eq!(request.method(), "POST");
|
||||||
|
assert_eq!(request.url().path(), "/v1/table/my_table/query/");
|
||||||
|
assert_eq!(
|
||||||
|
request.headers().get("Content-Type").unwrap(),
|
||||||
|
JSON_CONTENT_TYPE
|
||||||
|
);
|
||||||
|
|
||||||
|
let body = request.body().unwrap().as_bytes().unwrap();
|
||||||
|
let body: serde_json::Value = serde_json::from_slice(body).unwrap();
|
||||||
|
let expected_body = serde_json::json!({
|
||||||
|
"full_text_query": {
|
||||||
|
"query": {
|
||||||
|
"match": {
|
||||||
|
"terms": "hello world",
|
||||||
|
"column": "a",
|
||||||
|
"boost": 1.0,
|
||||||
|
"fuzziness": 0,
|
||||||
|
"max_expansions": 50,
|
||||||
|
"operator": "Or",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"k": 10,
|
||||||
|
"vector": [],
|
||||||
|
"with_row_id": true,
|
||||||
|
"prefilter": true,
|
||||||
|
"version": null
|
||||||
|
});
|
||||||
|
assert_eq!(body, expected_body);
|
||||||
|
|
||||||
|
let data = RecordBatch::try_new(
|
||||||
|
Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, false)])),
|
||||||
|
vec![Arc::new(Int32Array::from(vec![1, 2, 3]))],
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
let response_body = write_ipc_file(&data);
|
||||||
|
http::Response::builder()
|
||||||
|
.status(200)
|
||||||
|
.header(CONTENT_TYPE, ARROW_FILE_CONTENT_TYPE)
|
||||||
|
.body(response_body)
|
||||||
|
.unwrap()
|
||||||
|
});
|
||||||
|
|
||||||
|
let _ = table
|
||||||
|
.query()
|
||||||
|
.full_text_search(FullTextSearchQuery::new_query(
|
||||||
|
MatchQuery::new("hello world".to_owned())
|
||||||
|
.with_column(Some("a".to_owned()))
|
||||||
|
.into(),
|
||||||
|
))
|
||||||
|
.with_row_id()
|
||||||
|
.limit(10)
|
||||||
|
.execute()
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
#[rstest]
|
#[rstest]
|
||||||
#[case(DEFAULT_SERVER_VERSION.clone())]
|
#[case(DEFAULT_SERVER_VERSION.clone())]
|
||||||
#[case(semver::Version::new(0, 2, 0))]
|
#[case(semver::Version::new(0, 2, 0))]
|
||||||
|
|||||||
@@ -29,8 +29,8 @@ impl FromStr for NormalizeMethod {
|
|||||||
|
|
||||||
fn from_str(s: &str) -> Result<Self> {
|
fn from_str(s: &str) -> Result<Self> {
|
||||||
match s.to_lowercase().as_str() {
|
match s.to_lowercase().as_str() {
|
||||||
"score" => Ok(NormalizeMethod::Score),
|
"score" => Ok(Self::Score),
|
||||||
"rank" => Ok(NormalizeMethod::Rank),
|
"rank" => Ok(Self::Rank),
|
||||||
_ => Err(Error::InvalidInput {
|
_ => Err(Error::InvalidInput {
|
||||||
message: format!("invalid normalize method: {}", s),
|
message: format!("invalid normalize method: {}", s),
|
||||||
}),
|
}),
|
||||||
@@ -41,8 +41,8 @@ impl FromStr for NormalizeMethod {
|
|||||||
impl std::fmt::Display for NormalizeMethod {
|
impl std::fmt::Display for NormalizeMethod {
|
||||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
match self {
|
match self {
|
||||||
NormalizeMethod::Score => write!(f, "score"),
|
Self::Score => write!(f, "score"),
|
||||||
NormalizeMethod::Rank => write!(f, "rank"),
|
Self::Rank => write!(f, "rank"),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -33,7 +33,7 @@ use lance::dataset::{
|
|||||||
use lance::dataset::{MergeInsertBuilder as LanceMergeInsertBuilder, WhenNotMatchedBySource};
|
use lance::dataset::{MergeInsertBuilder as LanceMergeInsertBuilder, WhenNotMatchedBySource};
|
||||||
use lance::index::vector::utils::infer_vector_dim;
|
use lance::index::vector::utils::infer_vector_dim;
|
||||||
use lance::io::WrappingObjectStore;
|
use lance::io::WrappingObjectStore;
|
||||||
use lance_datafusion::exec::execute_plan;
|
use lance_datafusion::exec::{analyze_plan as lance_analyze_plan, execute_plan};
|
||||||
use lance_datafusion::utils::StreamingWriteSource;
|
use lance_datafusion::utils::StreamingWriteSource;
|
||||||
use lance_index::vector::hnsw::builder::HnswBuildParams;
|
use lance_index::vector::hnsw::builder::HnswBuildParams;
|
||||||
use lance_index::vector::ivf::IvfBuildParams;
|
use lance_index::vector::ivf::IvfBuildParams;
|
||||||
@@ -68,7 +68,7 @@ use crate::query::{
|
|||||||
use crate::utils::{
|
use crate::utils::{
|
||||||
default_vector_column, supported_bitmap_data_type, supported_btree_data_type,
|
default_vector_column, supported_bitmap_data_type, supported_btree_data_type,
|
||||||
supported_fts_data_type, supported_label_list_data_type, supported_vector_data_type,
|
supported_fts_data_type, supported_label_list_data_type, supported_vector_data_type,
|
||||||
PatchReadParam, PatchWriteParam,
|
PatchReadParam, PatchWriteParam, TimeoutStream,
|
||||||
};
|
};
|
||||||
|
|
||||||
use self::dataset::DatasetConsistencyWrapper;
|
use self::dataset::DatasetConsistencyWrapper;
|
||||||
@@ -433,6 +433,12 @@ pub trait BaseTable: std::fmt::Display + std::fmt::Debug + Send + Sync {
|
|||||||
|
|
||||||
Ok(format!("{}", display.indent(verbose)))
|
Ok(format!("{}", display.indent(verbose)))
|
||||||
}
|
}
|
||||||
|
async fn analyze_plan(
|
||||||
|
&self,
|
||||||
|
query: &AnyQuery,
|
||||||
|
options: QueryExecutionOptions,
|
||||||
|
) -> Result<String>;
|
||||||
|
|
||||||
/// Add new records to the table.
|
/// Add new records to the table.
|
||||||
async fn add(
|
async fn add(
|
||||||
&self,
|
&self,
|
||||||
@@ -449,6 +455,8 @@ pub trait BaseTable: std::fmt::Display + std::fmt::Debug + Send + Sync {
|
|||||||
async fn list_indices(&self) -> Result<Vec<IndexConfig>>;
|
async fn list_indices(&self) -> Result<Vec<IndexConfig>>;
|
||||||
/// Drop an index from the table.
|
/// Drop an index from the table.
|
||||||
async fn drop_index(&self, name: &str) -> Result<()>;
|
async fn drop_index(&self, name: &str) -> Result<()>;
|
||||||
|
/// Prewarm an index in the table
|
||||||
|
async fn prewarm_index(&self, name: &str) -> Result<()>;
|
||||||
/// Get statistics about the index.
|
/// Get statistics about the index.
|
||||||
async fn index_stats(&self, index_name: &str) -> Result<Option<IndexStatistics>>;
|
async fn index_stats(&self, index_name: &str) -> Result<Option<IndexStatistics>>;
|
||||||
/// Merge insert new records into the table.
|
/// Merge insert new records into the table.
|
||||||
@@ -788,8 +796,8 @@ impl Table {
|
|||||||
/// # Arguments
|
/// # Arguments
|
||||||
///
|
///
|
||||||
/// * `on` One or more columns to join on. This is how records from the
|
/// * `on` One or more columns to join on. This is how records from the
|
||||||
/// source table and target table are matched. Typically this is some
|
/// source table and target table are matched. Typically this is some
|
||||||
/// kind of key or id column.
|
/// kind of key or id column.
|
||||||
///
|
///
|
||||||
/// # Examples
|
/// # Examples
|
||||||
///
|
///
|
||||||
@@ -1080,6 +1088,22 @@ impl Table {
|
|||||||
self.inner.drop_index(name).await
|
self.inner.drop_index(name).await
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Prewarm an index in the table
|
||||||
|
///
|
||||||
|
/// This is a hint to fully load the index into memory. It can be used to
|
||||||
|
/// avoid cold starts
|
||||||
|
///
|
||||||
|
/// It is generally wasteful to call this if the index does not fit into the
|
||||||
|
/// available cache.
|
||||||
|
///
|
||||||
|
/// Note: This function is not yet supported on all indices, in which case it
|
||||||
|
/// may do nothing.
|
||||||
|
///
|
||||||
|
/// Use [`Self::list_indices()`] to find the names of the indices.
|
||||||
|
pub async fn prewarm_index(&self, name: &str) -> Result<()> {
|
||||||
|
self.inner.prewarm_index(name).await
|
||||||
|
}
|
||||||
|
|
||||||
// Take many execution plans and map them into a single plan that adds
|
// Take many execution plans and map them into a single plan that adds
|
||||||
// a query_index column and unions them.
|
// a query_index column and unions them.
|
||||||
pub(crate) fn multi_vector_plan(
|
pub(crate) fn multi_vector_plan(
|
||||||
@@ -1769,11 +1793,14 @@ impl NativeTable {
|
|||||||
query: &AnyQuery,
|
query: &AnyQuery,
|
||||||
options: QueryExecutionOptions,
|
options: QueryExecutionOptions,
|
||||||
) -> Result<DatasetRecordBatchStream> {
|
) -> Result<DatasetRecordBatchStream> {
|
||||||
let plan = self.create_plan(query, options).await?;
|
let plan = self.create_plan(query, options.clone()).await?;
|
||||||
Ok(DatasetRecordBatchStream::new(execute_plan(
|
let inner = execute_plan(plan, Default::default())?;
|
||||||
plan,
|
let inner = if let Some(timeout) = options.timeout {
|
||||||
Default::default(),
|
TimeoutStream::new_boxed(inner, timeout)
|
||||||
)?))
|
} else {
|
||||||
|
inner
|
||||||
|
};
|
||||||
|
Ok(DatasetRecordBatchStream::new(inner))
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Check whether the table uses V2 manifest paths.
|
/// Check whether the table uses V2 manifest paths.
|
||||||
@@ -1997,6 +2024,11 @@ impl BaseTable for NativeTable {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async fn prewarm_index(&self, index_name: &str) -> Result<()> {
|
||||||
|
let dataset = self.dataset.get().await?;
|
||||||
|
Ok(dataset.prewarm_index(index_name).await?)
|
||||||
|
}
|
||||||
|
|
||||||
async fn update(&self, update: UpdateBuilder) -> Result<u64> {
|
async fn update(&self, update: UpdateBuilder) -> Result<u64> {
|
||||||
let dataset = self.dataset.get().await?.clone();
|
let dataset = self.dataset.get().await?.clone();
|
||||||
let mut builder = LanceUpdateBuilder::new(Arc::new(dataset));
|
let mut builder = LanceUpdateBuilder::new(Arc::new(dataset));
|
||||||
@@ -2192,6 +2224,15 @@ impl BaseTable for NativeTable {
|
|||||||
self.generic_query(query, options).await
|
self.generic_query(query, options).await
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async fn analyze_plan(
|
||||||
|
&self,
|
||||||
|
query: &AnyQuery,
|
||||||
|
options: QueryExecutionOptions,
|
||||||
|
) -> Result<String> {
|
||||||
|
let plan = self.create_plan(query, options).await?;
|
||||||
|
Ok(lance_analyze_plan(plan, Default::default()).await?)
|
||||||
|
}
|
||||||
|
|
||||||
async fn merge_insert(
|
async fn merge_insert(
|
||||||
&self,
|
&self,
|
||||||
params: MergeInsertBuilder,
|
params: MergeInsertBuilder,
|
||||||
@@ -3437,6 +3478,9 @@ mod tests {
|
|||||||
assert_eq!(stats.num_unindexed_rows, 0);
|
assert_eq!(stats.num_unindexed_rows, 0);
|
||||||
assert_eq!(stats.index_type, crate::index::IndexType::FTS);
|
assert_eq!(stats.index_type, crate::index::IndexType::FTS);
|
||||||
assert_eq!(stats.distance_type, None);
|
assert_eq!(stats.distance_type, None);
|
||||||
|
|
||||||
|
// Make sure we can call prewarm without error
|
||||||
|
table.prewarm_index("text_idx").await.unwrap();
|
||||||
}
|
}
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
@@ -3532,7 +3576,7 @@ mod tests {
|
|||||||
let native_tbl = table.as_native().unwrap();
|
let native_tbl = table.as_native().unwrap();
|
||||||
|
|
||||||
let manifest = native_tbl.manifest().await.unwrap();
|
let manifest = native_tbl.manifest().await.unwrap();
|
||||||
assert_eq!(manifest.config.len(), 0);
|
let base_config_len = manifest.config.len();
|
||||||
|
|
||||||
native_tbl
|
native_tbl
|
||||||
.update_config(vec![("test_key1".to_string(), "test_val1".to_string())])
|
.update_config(vec![("test_key1".to_string(), "test_val1".to_string())])
|
||||||
@@ -3540,7 +3584,7 @@ mod tests {
|
|||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
let manifest = native_tbl.manifest().await.unwrap();
|
let manifest = native_tbl.manifest().await.unwrap();
|
||||||
assert_eq!(manifest.config.len(), 1);
|
assert_eq!(manifest.config.len(), 1 + base_config_len);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
manifest.config.get("test_key1"),
|
manifest.config.get("test_key1"),
|
||||||
Some(&"test_val1".to_string())
|
Some(&"test_val1".to_string())
|
||||||
@@ -3551,7 +3595,7 @@ mod tests {
|
|||||||
.await
|
.await
|
||||||
.unwrap();
|
.unwrap();
|
||||||
let manifest = native_tbl.manifest().await.unwrap();
|
let manifest = native_tbl.manifest().await.unwrap();
|
||||||
assert_eq!(manifest.config.len(), 2);
|
assert_eq!(manifest.config.len(), 2 + base_config_len);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
manifest.config.get("test_key1"),
|
manifest.config.get("test_key1"),
|
||||||
Some(&"test_val1".to_string())
|
Some(&"test_val1".to_string())
|
||||||
@@ -3569,7 +3613,7 @@ mod tests {
|
|||||||
.await
|
.await
|
||||||
.unwrap();
|
.unwrap();
|
||||||
let manifest = native_tbl.manifest().await.unwrap();
|
let manifest = native_tbl.manifest().await.unwrap();
|
||||||
assert_eq!(manifest.config.len(), 2);
|
assert_eq!(manifest.config.len(), 2 + base_config_len);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
manifest.config.get("test_key1"),
|
manifest.config.get("test_key1"),
|
||||||
Some(&"test_val1".to_string())
|
Some(&"test_val1".to_string())
|
||||||
@@ -3581,7 +3625,7 @@ mod tests {
|
|||||||
|
|
||||||
native_tbl.delete_config_keys(&["test_key1"]).await.unwrap();
|
native_tbl.delete_config_keys(&["test_key1"]).await.unwrap();
|
||||||
let manifest = native_tbl.manifest().await.unwrap();
|
let manifest = native_tbl.manifest().await.unwrap();
|
||||||
assert_eq!(manifest.config.len(), 1);
|
assert_eq!(manifest.config.len(), 1 + base_config_len);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
manifest.config.get("test_key2"),
|
manifest.config.get("test_key2"),
|
||||||
Some(&"test_val2_update".to_string())
|
Some(&"test_val2_update".to_string())
|
||||||
|
|||||||
@@ -290,3 +290,48 @@ impl DerefMut for DatasetWriteGuard<'_> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use arrow_schema::{DataType, Field, Schema};
|
||||||
|
use lance::{dataset::WriteParams, io::ObjectStoreParams};
|
||||||
|
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
use crate::{connect, io::object_store::io_tracking::IoStatsHolder, table::WriteOptions};
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_iops_open_strong_consistency() {
|
||||||
|
let db = connect("memory://")
|
||||||
|
.read_consistency_interval(Duration::ZERO)
|
||||||
|
.execute()
|
||||||
|
.await
|
||||||
|
.expect("Failed to connect to database");
|
||||||
|
let io_stats = IoStatsHolder::default();
|
||||||
|
|
||||||
|
let schema = Arc::new(Schema::new(vec![Field::new("id", DataType::Int32, false)]));
|
||||||
|
|
||||||
|
let table = db
|
||||||
|
.create_empty_table("test", schema)
|
||||||
|
.write_options(WriteOptions {
|
||||||
|
lance_write_params: Some(WriteParams {
|
||||||
|
store_params: Some(ObjectStoreParams {
|
||||||
|
object_store_wrapper: Some(Arc::new(io_stats.clone())),
|
||||||
|
..Default::default()
|
||||||
|
}),
|
||||||
|
..Default::default()
|
||||||
|
}),
|
||||||
|
})
|
||||||
|
.execute()
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
io_stats.incremental_stats();
|
||||||
|
|
||||||
|
// We should only need 1 read IOP to check the schema: looking for the
|
||||||
|
// latest version.
|
||||||
|
table.schema().await.unwrap();
|
||||||
|
let stats = io_stats.incremental_stats();
|
||||||
|
assert_eq!(stats.read_iops, 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -3,14 +3,20 @@
|
|||||||
|
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use arrow_schema::{DataType, Schema};
|
use arrow_array::RecordBatch;
|
||||||
|
use arrow_schema::{DataType, Schema, SchemaRef};
|
||||||
|
use datafusion_common::{DataFusionError, Result as DataFusionResult};
|
||||||
|
use datafusion_execution::RecordBatchStream;
|
||||||
|
use futures::{FutureExt, Stream};
|
||||||
use lance::arrow::json::JsonDataType;
|
use lance::arrow::json::JsonDataType;
|
||||||
use lance::dataset::{ReadParams, WriteParams};
|
use lance::dataset::{ReadParams, WriteParams};
|
||||||
use lance::index::vector::utils::infer_vector_dim;
|
use lance::index::vector::utils::infer_vector_dim;
|
||||||
use lance::io::{ObjectStoreParams, WrappingObjectStore};
|
use lance::io::{ObjectStoreParams, WrappingObjectStore};
|
||||||
use lazy_static::lazy_static;
|
use lazy_static::lazy_static;
|
||||||
|
use std::pin::Pin;
|
||||||
|
|
||||||
use crate::error::{Error, Result};
|
use crate::error::{Error, Result};
|
||||||
|
use datafusion_physical_plan::SendableRecordBatchStream;
|
||||||
|
|
||||||
lazy_static! {
|
lazy_static! {
|
||||||
static ref TABLE_NAME_REGEX: regex::Regex = regex::Regex::new(r"^[a-zA-Z0-9_\-\.]+$").unwrap();
|
static ref TABLE_NAME_REGEX: regex::Regex = regex::Regex::new(r"^[a-zA-Z0-9_\-\.]+$").unwrap();
|
||||||
@@ -135,6 +141,7 @@ pub fn supported_btree_data_type(dtype: &DataType) -> bool {
|
|||||||
| DataType::Date32
|
| DataType::Date32
|
||||||
| DataType::Date64
|
| DataType::Date64
|
||||||
| DataType::Timestamp(_, _)
|
| DataType::Timestamp(_, _)
|
||||||
|
| DataType::FixedSizeBinary(_)
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -151,7 +158,17 @@ pub fn supported_label_list_data_type(dtype: &DataType) -> bool {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub fn supported_fts_data_type(dtype: &DataType) -> bool {
|
pub fn supported_fts_data_type(dtype: &DataType) -> bool {
|
||||||
matches!(dtype, DataType::Utf8 | DataType::LargeUtf8)
|
supported_fts_data_type_impl(dtype, false)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn supported_fts_data_type_impl(dtype: &DataType, in_list: bool) -> bool {
|
||||||
|
match (dtype, in_list) {
|
||||||
|
(DataType::Utf8 | DataType::LargeUtf8, _) => true,
|
||||||
|
(DataType::List(field) | DataType::LargeList(field), false) => {
|
||||||
|
supported_fts_data_type_impl(field.data_type(), true)
|
||||||
|
}
|
||||||
|
_ => false,
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn supported_vector_data_type(dtype: &DataType) -> bool {
|
pub fn supported_vector_data_type(dtype: &DataType) -> bool {
|
||||||
@@ -177,11 +194,97 @@ pub fn string_to_datatype(s: &str) -> Option<DataType> {
|
|||||||
(&json_type).try_into().ok()
|
(&json_type).try_into().ok()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
enum TimeoutState {
|
||||||
|
NotStarted {
|
||||||
|
timeout: std::time::Duration,
|
||||||
|
},
|
||||||
|
Started {
|
||||||
|
deadline: Pin<Box<tokio::time::Sleep>>,
|
||||||
|
timeout: std::time::Duration,
|
||||||
|
},
|
||||||
|
Completed,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A `Stream` wrapper that implements a timeout.
|
||||||
|
///
|
||||||
|
/// The timeout starts when the first `poll_next` is called. As soon as the timeout
|
||||||
|
/// duration has passed, the stream will return an `Err` indicating a timeout error
|
||||||
|
/// for the next poll.
|
||||||
|
pub struct TimeoutStream {
|
||||||
|
inner: SendableRecordBatchStream,
|
||||||
|
state: TimeoutState,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl TimeoutStream {
|
||||||
|
pub fn new(inner: SendableRecordBatchStream, timeout: std::time::Duration) -> Self {
|
||||||
|
Self {
|
||||||
|
inner,
|
||||||
|
state: TimeoutState::NotStarted { timeout },
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn new_boxed(
|
||||||
|
inner: SendableRecordBatchStream,
|
||||||
|
timeout: std::time::Duration,
|
||||||
|
) -> SendableRecordBatchStream {
|
||||||
|
Box::pin(Self::new(inner, timeout))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn timeout_error(timeout: &std::time::Duration) -> DataFusionError {
|
||||||
|
DataFusionError::Execution(format!("Query timeout after {} ms", timeout.as_millis()))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl RecordBatchStream for TimeoutStream {
|
||||||
|
fn schema(&self) -> SchemaRef {
|
||||||
|
self.inner.schema()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Stream for TimeoutStream {
|
||||||
|
type Item = DataFusionResult<RecordBatch>;
|
||||||
|
|
||||||
|
fn poll_next(
|
||||||
|
mut self: std::pin::Pin<&mut Self>,
|
||||||
|
cx: &mut std::task::Context<'_>,
|
||||||
|
) -> std::task::Poll<Option<Self::Item>> {
|
||||||
|
match &mut self.state {
|
||||||
|
TimeoutState::NotStarted { timeout } => {
|
||||||
|
if timeout.is_zero() {
|
||||||
|
return std::task::Poll::Ready(Some(Err(Self::timeout_error(timeout))));
|
||||||
|
}
|
||||||
|
let deadline = Box::pin(tokio::time::sleep(*timeout));
|
||||||
|
self.state = TimeoutState::Started {
|
||||||
|
deadline,
|
||||||
|
timeout: *timeout,
|
||||||
|
};
|
||||||
|
self.poll_next(cx)
|
||||||
|
}
|
||||||
|
TimeoutState::Started { deadline, timeout } => match deadline.poll_unpin(cx) {
|
||||||
|
std::task::Poll::Ready(_) => {
|
||||||
|
let err = Self::timeout_error(timeout);
|
||||||
|
self.state = TimeoutState::Completed;
|
||||||
|
std::task::Poll::Ready(Some(Err(err)))
|
||||||
|
}
|
||||||
|
std::task::Poll::Pending => {
|
||||||
|
let inner = Pin::new(&mut self.inner);
|
||||||
|
inner.poll_next(cx)
|
||||||
|
}
|
||||||
|
},
|
||||||
|
TimeoutState::Completed => std::task::Poll::Ready(None),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use arrow_array::Int32Array;
|
||||||
|
use arrow_schema::Field;
|
||||||
|
use datafusion_physical_plan::stream::RecordBatchStreamAdapter;
|
||||||
|
use futures::{stream, StreamExt};
|
||||||
|
use tokio::time::sleep;
|
||||||
|
|
||||||
use arrow_schema::{DataType, Field};
|
use super::*;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_guess_default_column() {
|
fn test_guess_default_column() {
|
||||||
@@ -248,4 +351,85 @@ mod tests {
|
|||||||
let expected = DataType::Int32;
|
let expected = DataType::Int32;
|
||||||
assert_eq!(string_to_datatype(string), Some(expected));
|
assert_eq!(string_to_datatype(string), Some(expected));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn sample_batch() -> RecordBatch {
|
||||||
|
let schema = Arc::new(Schema::new(vec![Field::new(
|
||||||
|
"col1",
|
||||||
|
DataType::Int32,
|
||||||
|
false,
|
||||||
|
)]));
|
||||||
|
RecordBatch::try_new(
|
||||||
|
schema.clone(),
|
||||||
|
vec![Arc::new(Int32Array::from(vec![1, 2, 3]))],
|
||||||
|
)
|
||||||
|
.unwrap()
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_timeout_stream() {
|
||||||
|
let batch = sample_batch();
|
||||||
|
let schema = batch.schema();
|
||||||
|
let mock_stream = stream::iter(vec![Ok(batch.clone()), Ok(batch.clone())]);
|
||||||
|
|
||||||
|
let sendable_stream: SendableRecordBatchStream =
|
||||||
|
Box::pin(RecordBatchStreamAdapter::new(schema.clone(), mock_stream));
|
||||||
|
let timeout_duration = std::time::Duration::from_millis(10);
|
||||||
|
let mut timeout_stream = TimeoutStream::new(sendable_stream, timeout_duration);
|
||||||
|
|
||||||
|
// Poll the stream to get the first batch
|
||||||
|
let first_result = timeout_stream.next().await;
|
||||||
|
assert!(first_result.is_some());
|
||||||
|
assert!(first_result.unwrap().is_ok());
|
||||||
|
|
||||||
|
// Sleep for the timeout duration
|
||||||
|
sleep(timeout_duration).await;
|
||||||
|
|
||||||
|
// Poll the stream again and ensure it returns a timeout error
|
||||||
|
let second_result = timeout_stream.next().await.unwrap();
|
||||||
|
assert!(second_result.is_err());
|
||||||
|
assert!(second_result
|
||||||
|
.unwrap_err()
|
||||||
|
.to_string()
|
||||||
|
.contains("Query timeout"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_timeout_stream_zero_duration() {
|
||||||
|
let batch = sample_batch();
|
||||||
|
let schema = batch.schema();
|
||||||
|
let mock_stream = stream::iter(vec![Ok(batch.clone()), Ok(batch.clone())]);
|
||||||
|
|
||||||
|
let sendable_stream: SendableRecordBatchStream =
|
||||||
|
Box::pin(RecordBatchStreamAdapter::new(schema.clone(), mock_stream));
|
||||||
|
|
||||||
|
// Setup similar to test_timeout_stream
|
||||||
|
let timeout_duration = std::time::Duration::from_secs(0);
|
||||||
|
let mut timeout_stream = TimeoutStream::new(sendable_stream, timeout_duration);
|
||||||
|
|
||||||
|
// First poll should immediately return a timeout error
|
||||||
|
let result = timeout_stream.next().await.unwrap();
|
||||||
|
assert!(result.is_err());
|
||||||
|
assert!(result.unwrap_err().to_string().contains("Query timeout"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_timeout_stream_completes_normally() {
|
||||||
|
let batch = sample_batch();
|
||||||
|
let schema = batch.schema();
|
||||||
|
let mock_stream = stream::iter(vec![Ok(batch.clone()), Ok(batch.clone())]);
|
||||||
|
|
||||||
|
let sendable_stream: SendableRecordBatchStream =
|
||||||
|
Box::pin(RecordBatchStreamAdapter::new(schema.clone(), mock_stream));
|
||||||
|
|
||||||
|
// Setup a stream with 2 batches
|
||||||
|
// Use a longer timeout that won't trigger
|
||||||
|
let timeout_duration = std::time::Duration::from_secs(1);
|
||||||
|
let mut timeout_stream = TimeoutStream::new(sendable_stream, timeout_duration);
|
||||||
|
|
||||||
|
// Both polls should return data normally
|
||||||
|
assert!(timeout_stream.next().await.unwrap().is_ok());
|
||||||
|
assert!(timeout_stream.next().await.unwrap().is_ok());
|
||||||
|
// Stream should be empty now
|
||||||
|
assert!(timeout_stream.next().await.is_none());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user