mirror of
https://github.com/lancedb/lancedb.git
synced 2025-12-23 21:39:57 +00:00
Compare commits
5 Commits
docs/quick
...
python-v0.
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b67f13f642 | ||
|
|
2f12d67469 | ||
|
|
8d7cc29abb | ||
|
|
a4404e9e18 | ||
|
|
077e5bb586 |
@@ -1,5 +1,5 @@
|
|||||||
[tool.bumpversion]
|
[tool.bumpversion]
|
||||||
current_version = "0.19.1-beta.1"
|
current_version = "0.18.2-beta.1"
|
||||||
parse = """(?x)
|
parse = """(?x)
|
||||||
(?P<major>0|[1-9]\\d*)\\.
|
(?P<major>0|[1-9]\\d*)\\.
|
||||||
(?P<minor>0|[1-9]\\d*)\\.
|
(?P<minor>0|[1-9]\\d*)\\.
|
||||||
|
|||||||
13
.github/workflows/docs.yml
vendored
13
.github/workflows/docs.yml
vendored
@@ -18,24 +18,17 @@ concurrency:
|
|||||||
group: "pages"
|
group: "pages"
|
||||||
cancel-in-progress: true
|
cancel-in-progress: true
|
||||||
|
|
||||||
env:
|
|
||||||
# This reduces the disk space needed for the build
|
|
||||||
RUSTFLAGS: "-C debuginfo=0"
|
|
||||||
# according to: https://matklad.github.io/2021/09/04/fast-rust-builds.html
|
|
||||||
# CI builds are faster with incremental disabled.
|
|
||||||
CARGO_INCREMENTAL: "0"
|
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
# Single deploy job since we're just deploying
|
# Single deploy job since we're just deploying
|
||||||
build:
|
build:
|
||||||
environment:
|
environment:
|
||||||
name: github-pages
|
name: github-pages
|
||||||
url: ${{ steps.deployment.outputs.page_url }}
|
url: ${{ steps.deployment.outputs.page_url }}
|
||||||
runs-on: ubuntu-24.04
|
runs-on: buildjet-8vcpu-ubuntu-2204
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout
|
- name: Checkout
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
- name: Install dependencies needed for ubuntu
|
- name: Install dependecies needed for ubuntu
|
||||||
run: |
|
run: |
|
||||||
sudo apt install -y protobuf-compiler libssl-dev
|
sudo apt install -y protobuf-compiler libssl-dev
|
||||||
rustup update && rustup default
|
rustup update && rustup default
|
||||||
@@ -45,7 +38,6 @@ jobs:
|
|||||||
python-version: "3.10"
|
python-version: "3.10"
|
||||||
cache: "pip"
|
cache: "pip"
|
||||||
cache-dependency-path: "docs/requirements.txt"
|
cache-dependency-path: "docs/requirements.txt"
|
||||||
- uses: Swatinem/rust-cache@v2
|
|
||||||
- name: Build Python
|
- name: Build Python
|
||||||
working-directory: python
|
working-directory: python
|
||||||
run: |
|
run: |
|
||||||
@@ -57,6 +49,7 @@ jobs:
|
|||||||
node-version: 20
|
node-version: 20
|
||||||
cache: 'npm'
|
cache: 'npm'
|
||||||
cache-dependency-path: node/package-lock.json
|
cache-dependency-path: node/package-lock.json
|
||||||
|
- uses: Swatinem/rust-cache@v2
|
||||||
- name: Install node dependencies
|
- name: Install node dependencies
|
||||||
working-directory: node
|
working-directory: node
|
||||||
run: |
|
run: |
|
||||||
|
|||||||
6
.github/workflows/java-publish.yml
vendored
6
.github/workflows/java-publish.yml
vendored
@@ -43,7 +43,7 @@ jobs:
|
|||||||
- uses: Swatinem/rust-cache@v2
|
- uses: Swatinem/rust-cache@v2
|
||||||
- uses: actions-rust-lang/setup-rust-toolchain@v1
|
- uses: actions-rust-lang/setup-rust-toolchain@v1
|
||||||
with:
|
with:
|
||||||
toolchain: "1.81.0"
|
toolchain: "1.79.0"
|
||||||
cache-workspaces: "./java/core/lancedb-jni"
|
cache-workspaces: "./java/core/lancedb-jni"
|
||||||
# Disable full debug symbol generation to speed up CI build and keep memory down
|
# Disable full debug symbol generation to speed up CI build and keep memory down
|
||||||
# "1" means line tables only, which is useful for panic tracebacks.
|
# "1" means line tables only, which is useful for panic tracebacks.
|
||||||
@@ -97,7 +97,7 @@ jobs:
|
|||||||
- name: Dry run
|
- name: Dry run
|
||||||
if: github.event_name == 'pull_request'
|
if: github.event_name == 'pull_request'
|
||||||
run: |
|
run: |
|
||||||
mvn --batch-mode -DskipTests -Drust.release.build=true package
|
mvn --batch-mode -DskipTests package
|
||||||
- name: Set github
|
- name: Set github
|
||||||
run: |
|
run: |
|
||||||
git config --global user.email "LanceDB Github Runner"
|
git config --global user.email "LanceDB Github Runner"
|
||||||
@@ -108,7 +108,7 @@ jobs:
|
|||||||
echo "use-agent" >> ~/.gnupg/gpg.conf
|
echo "use-agent" >> ~/.gnupg/gpg.conf
|
||||||
echo "pinentry-mode loopback" >> ~/.gnupg/gpg.conf
|
echo "pinentry-mode loopback" >> ~/.gnupg/gpg.conf
|
||||||
export GPG_TTY=$(tty)
|
export GPG_TTY=$(tty)
|
||||||
mvn --batch-mode -DskipTests -Drust.release.build=true -DpushChanges=false -Dgpg.passphrase=${{ secrets.GPG_PASSPHRASE }} deploy -P deploy-to-ossrh
|
mvn --batch-mode -DskipTests -DpushChanges=false -Dgpg.passphrase=${{ secrets.GPG_PASSPHRASE }} deploy -P deploy-to-ossrh
|
||||||
env:
|
env:
|
||||||
SONATYPE_USER: ${{ secrets.SONATYPE_USER }}
|
SONATYPE_USER: ${{ secrets.SONATYPE_USER }}
|
||||||
SONATYPE_TOKEN: ${{ secrets.SONATYPE_TOKEN }}
|
SONATYPE_TOKEN: ${{ secrets.SONATYPE_TOKEN }}
|
||||||
|
|||||||
44
.github/workflows/npm-publish.yml
vendored
44
.github/workflows/npm-publish.yml
vendored
@@ -18,7 +18,6 @@ on:
|
|||||||
# This should trigger a dry run (we skip the final publish step)
|
# This should trigger a dry run (we skip the final publish step)
|
||||||
paths:
|
paths:
|
||||||
- .github/workflows/npm-publish.yml
|
- .github/workflows/npm-publish.yml
|
||||||
- Cargo.toml # Change in dependency frequently breaks builds
|
|
||||||
|
|
||||||
concurrency:
|
concurrency:
|
||||||
group: ${{ github.workflow }}-${{ github.ref }}
|
group: ${{ github.workflow }}-${{ github.ref }}
|
||||||
@@ -131,24 +130,29 @@ jobs:
|
|||||||
set -e &&
|
set -e &&
|
||||||
apt-get update &&
|
apt-get update &&
|
||||||
apt-get install -y protobuf-compiler pkg-config
|
apt-get install -y protobuf-compiler pkg-config
|
||||||
- target: x86_64-unknown-linux-musl
|
|
||||||
# This one seems to need some extra memory
|
# TODO: re-enable x64 musl builds. I could not figure out why, but it
|
||||||
host: ubuntu-2404-8x-x64
|
# consistently made GHA runners non-responsive at the end of build. Example:
|
||||||
# https://github.com/napi-rs/napi-rs/blob/main/alpine.Dockerfile
|
# https://github.com/lancedb/lancedb/actions/runs/13980431071/job/39144319470?pr=2250
|
||||||
docker: ghcr.io/napi-rs/napi-rs/nodejs-rust:lts-alpine
|
|
||||||
features: fp16kernels
|
# - target: x86_64-unknown-linux-musl
|
||||||
pre_build: |-
|
# # This one seems to need some extra memory
|
||||||
set -e &&
|
# host: ubuntu-2404-8x-x64
|
||||||
apk add protobuf-dev curl &&
|
# # https://github.com/napi-rs/napi-rs/blob/main/alpine.Dockerfile
|
||||||
ln -s /usr/lib/gcc/x86_64-alpine-linux-musl/14.2.0/crtbeginS.o /usr/lib/crtbeginS.o &&
|
# docker: ghcr.io/napi-rs/napi-rs/nodejs-rust:lts-alpine
|
||||||
ln -s /usr/lib/libgcc_s.so /usr/lib/libgcc.so &&
|
# features: ","
|
||||||
CC=gcc &&
|
# pre_build: |-
|
||||||
CXX=g++
|
# set -e &&
|
||||||
|
# apk add protobuf-dev curl &&
|
||||||
|
# ln -s /usr/lib/gcc/x86_64-alpine-linux-musl/14.2.0/crtbeginS.o /usr/lib/crtbeginS.o &&
|
||||||
|
# ln -s /usr/lib/libgcc_s.so /usr/lib/libgcc.so
|
||||||
|
|
||||||
- target: aarch64-unknown-linux-gnu
|
- target: aarch64-unknown-linux-gnu
|
||||||
host: ubuntu-2404-8x-x64
|
host: ubuntu-2404-8x-x64
|
||||||
# https://github.com/napi-rs/napi-rs/blob/main/debian-aarch64.Dockerfile
|
# https://github.com/napi-rs/napi-rs/blob/main/debian-aarch64.Dockerfile
|
||||||
docker: ghcr.io/napi-rs/napi-rs/nodejs-rust:lts-debian-aarch64
|
docker: ghcr.io/napi-rs/napi-rs/nodejs-rust:lts-debian-aarch64
|
||||||
features: "fp16kernels"
|
# TODO: enable fp16kernels after https://github.com/lancedb/lance/pull/3559
|
||||||
|
features: ","
|
||||||
pre_build: |-
|
pre_build: |-
|
||||||
set -e &&
|
set -e &&
|
||||||
apt-get update &&
|
apt-get update &&
|
||||||
@@ -166,8 +170,8 @@ jobs:
|
|||||||
set -e &&
|
set -e &&
|
||||||
apk add protobuf-dev &&
|
apk add protobuf-dev &&
|
||||||
rustup target add aarch64-unknown-linux-musl &&
|
rustup target add aarch64-unknown-linux-musl &&
|
||||||
export CC_aarch64_unknown_linux_musl=aarch64-linux-musl-gcc &&
|
export CC="/aarch64-linux-musl-cross/bin/aarch64-linux-musl-gcc" &&
|
||||||
export CXX_aarch64_unknown_linux_musl=aarch64-linux-musl-g++
|
export CXX="/aarch64-linux-musl-cross/bin/aarch64-linux-musl-g++"
|
||||||
name: build - ${{ matrix.settings.target }}
|
name: build - ${{ matrix.settings.target }}
|
||||||
runs-on: ${{ matrix.settings.host }}
|
runs-on: ${{ matrix.settings.host }}
|
||||||
defaults:
|
defaults:
|
||||||
@@ -531,12 +535,6 @@ jobs:
|
|||||||
for filename in *.tgz; do
|
for filename in *.tgz; do
|
||||||
npm publish $PUBLISH_ARGS $filename
|
npm publish $PUBLISH_ARGS $filename
|
||||||
done
|
done
|
||||||
- name: Deprecate
|
|
||||||
env:
|
|
||||||
NODE_AUTH_TOKEN: ${{ secrets.LANCEDB_NPM_REGISTRY_TOKEN }}
|
|
||||||
# We need to deprecate the old package to avoid confusion.
|
|
||||||
# Each time we publish a new version, it gets undeprecated.
|
|
||||||
run: npm deprecate vectordb "Use @lancedb/lancedb instead."
|
|
||||||
- name: Notify Slack Action
|
- name: Notify Slack Action
|
||||||
uses: ravsamhq/notify-slack-action@2.3.0
|
uses: ravsamhq/notify-slack-action@2.3.0
|
||||||
if: ${{ always() }}
|
if: ${{ always() }}
|
||||||
|
|||||||
1
.github/workflows/pypi-publish.yml
vendored
1
.github/workflows/pypi-publish.yml
vendored
@@ -8,7 +8,6 @@ on:
|
|||||||
# This should trigger a dry run (we skip the final publish step)
|
# This should trigger a dry run (we skip the final publish step)
|
||||||
paths:
|
paths:
|
||||||
- .github/workflows/pypi-publish.yml
|
- .github/workflows/pypi-publish.yml
|
||||||
- Cargo.toml # Change in dependency frequently breaks builds
|
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
linux:
|
linux:
|
||||||
|
|||||||
5
.github/workflows/python.yml
vendored
5
.github/workflows/python.yml
vendored
@@ -136,9 +136,9 @@ jobs:
|
|||||||
- uses: ./.github/workflows/run_tests
|
- uses: ./.github/workflows/run_tests
|
||||||
with:
|
with:
|
||||||
integration: true
|
integration: true
|
||||||
- name: Test without pylance or pandas
|
- name: Test without pylance
|
||||||
run: |
|
run: |
|
||||||
pip uninstall -y pylance pandas
|
pip uninstall -y pylance
|
||||||
pytest -vv python/tests/test_table.py
|
pytest -vv python/tests/test_table.py
|
||||||
# Make sure wheels are not included in the Rust cache
|
# Make sure wheels are not included in the Rust cache
|
||||||
- name: Delete wheels
|
- name: Delete wheels
|
||||||
@@ -228,7 +228,6 @@ jobs:
|
|||||||
- name: Install lancedb
|
- name: Install lancedb
|
||||||
run: |
|
run: |
|
||||||
pip install "pydantic<2"
|
pip install "pydantic<2"
|
||||||
pip install pyarrow==16
|
|
||||||
pip install --extra-index-url https://pypi.fury.io/lancedb/ -e .[tests]
|
pip install --extra-index-url https://pypi.fury.io/lancedb/ -e .[tests]
|
||||||
pip install tantivy
|
pip install tantivy
|
||||||
- name: Run tests
|
- name: Run tests
|
||||||
|
|||||||
1106
Cargo.lock
generated
1106
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
30
Cargo.toml
30
Cargo.toml
@@ -21,14 +21,16 @@ categories = ["database-implementations"]
|
|||||||
rust-version = "1.78.0"
|
rust-version = "1.78.0"
|
||||||
|
|
||||||
[workspace.dependencies]
|
[workspace.dependencies]
|
||||||
lance = { "version" = "=0.27.0", "features" = ["dynamodb"], tag = "v0.27.0-beta.2", git="https://github.com/lancedb/lance.git" }
|
lance = { "version" = "=0.25.0", "features" = [
|
||||||
lance-io = { version = "=0.27.0", tag = "v0.27.0-beta.2", git="https://github.com/lancedb/lance.git" }
|
"dynamodb",
|
||||||
lance-index = { version = "=0.27.0", tag = "v0.27.0-beta.2", git="https://github.com/lancedb/lance.git" }
|
] }
|
||||||
lance-linalg = { version = "=0.27.0", tag = "v0.27.0-beta.2", git="https://github.com/lancedb/lance.git" }
|
lance-io = { version = "=0.25.0" }
|
||||||
lance-table = { version = "=0.27.0", tag = "v0.27.0-beta.2", git="https://github.com/lancedb/lance.git" }
|
lance-index = { version = "=0.25.0" }
|
||||||
lance-testing = { version = "=0.27.0", tag = "v0.27.0-beta.2", git="https://github.com/lancedb/lance.git" }
|
lance-linalg = { version = "=0.25.0" }
|
||||||
lance-datafusion = { version = "=0.27.0", tag = "v0.27.0-beta.2", git="https://github.com/lancedb/lance.git" }
|
lance-table = { version = "=0.25.0" }
|
||||||
lance-encoding = { version = "=0.27.0", tag = "v0.27.0-beta.2", git="https://github.com/lancedb/lance.git" }
|
lance-testing = { version = "=0.25.0" }
|
||||||
|
lance-datafusion = { version = "=0.25.0" }
|
||||||
|
lance-encoding = { version = "=0.25.0" }
|
||||||
# Note that this one does not include pyarrow
|
# Note that this one does not include pyarrow
|
||||||
arrow = { version = "54.1", optional = false }
|
arrow = { version = "54.1", optional = false }
|
||||||
arrow-array = "54.1"
|
arrow-array = "54.1"
|
||||||
@@ -39,12 +41,12 @@ arrow-schema = "54.1"
|
|||||||
arrow-arith = "54.1"
|
arrow-arith = "54.1"
|
||||||
arrow-cast = "54.1"
|
arrow-cast = "54.1"
|
||||||
async-trait = "0"
|
async-trait = "0"
|
||||||
datafusion = { version = "46.0", default-features = false }
|
datafusion = { version = "45.0", default-features = false }
|
||||||
datafusion-catalog = "46.0"
|
datafusion-catalog = "45.0"
|
||||||
datafusion-common = { version = "46.0", default-features = false }
|
datafusion-common = { version = "45.0", default-features = false }
|
||||||
datafusion-execution = "46.0"
|
datafusion-execution = "45.0"
|
||||||
datafusion-expr = "46.0"
|
datafusion-expr = "45.0"
|
||||||
datafusion-physical-plan = "46.0"
|
datafusion-physical-plan = "45.0"
|
||||||
env_logger = "0.11"
|
env_logger = "0.11"
|
||||||
half = { "version" = "=2.4.1", default-features = false, features = [
|
half = { "version" = "=2.4.1", default-features = false, features = [
|
||||||
"num-traits",
|
"num-traits",
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
LanceDB docs are deployed to https://lancedb.github.io/lancedb/.
|
LanceDB docs are deployed to https://lancedb.github.io/lancedb/.
|
||||||
|
|
||||||
Docs is built and deployed automatically by [Github Actions](../.github/workflows/docs.yml)
|
Docs is built and deployed automatically by [Github Actions](.github/workflows/docs.yml)
|
||||||
whenever a commit is pushed to the `main` branch. So it is possible for the docs to show
|
whenever a commit is pushed to the `main` branch. So it is possible for the docs to show
|
||||||
unreleased features.
|
unreleased features.
|
||||||
|
|
||||||
|
|||||||
@@ -105,8 +105,7 @@ markdown_extensions:
|
|||||||
nav:
|
nav:
|
||||||
- Home:
|
- Home:
|
||||||
- LanceDB: index.md
|
- LanceDB: index.md
|
||||||
- 👉 Quickstart: quickstart.md
|
- 🏃🏼♂️ Quick start: basic.md
|
||||||
- 🏃🏼♂️ Basic Usage: basic.md
|
|
||||||
- 📚 Concepts:
|
- 📚 Concepts:
|
||||||
- Vector search: concepts/vector_search.md
|
- Vector search: concepts/vector_search.md
|
||||||
- Indexing:
|
- Indexing:
|
||||||
@@ -238,9 +237,7 @@ nav:
|
|||||||
- 👾 JavaScript (lancedb): js/globals.md
|
- 👾 JavaScript (lancedb): js/globals.md
|
||||||
- 🦀 Rust: https://docs.rs/lancedb/latest/lancedb/
|
- 🦀 Rust: https://docs.rs/lancedb/latest/lancedb/
|
||||||
|
|
||||||
- Getting Started:
|
- Quick start: basic.md
|
||||||
- Quickstart: quickstart.md
|
|
||||||
- Basic Usage: basic.md
|
|
||||||
- Concepts:
|
- Concepts:
|
||||||
- Vector search: concepts/vector_search.md
|
- Vector search: concepts/vector_search.md
|
||||||
- Indexing:
|
- Indexing:
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
# Basic Usage
|
# Quick start
|
||||||
|
|
||||||
!!! info "LanceDB can be run in a number of ways:"
|
!!! info "LanceDB can be run in a number of ways:"
|
||||||
|
|
||||||
|
|||||||
@@ -342,7 +342,7 @@ For **read and write access**, LanceDB will need a policy such as:
|
|||||||
"Action": [
|
"Action": [
|
||||||
"s3:PutObject",
|
"s3:PutObject",
|
||||||
"s3:GetObject",
|
"s3:GetObject",
|
||||||
"s3:DeleteObject"
|
"s3:DeleteObject",
|
||||||
],
|
],
|
||||||
"Resource": "arn:aws:s3:::<bucket>/<prefix>/*"
|
"Resource": "arn:aws:s3:::<bucket>/<prefix>/*"
|
||||||
},
|
},
|
||||||
@@ -374,7 +374,7 @@ For **read-only access**, LanceDB will need a policy such as:
|
|||||||
{
|
{
|
||||||
"Effect": "Allow",
|
"Effect": "Allow",
|
||||||
"Action": [
|
"Action": [
|
||||||
"s3:GetObject"
|
"s3:GetObject",
|
||||||
],
|
],
|
||||||
"Resource": "arn:aws:s3:::<bucket>/<prefix>/*"
|
"Resource": "arn:aws:s3:::<bucket>/<prefix>/*"
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -765,10 +765,7 @@ This can be used to update zero to all rows depending on how many rows match the
|
|||||||
];
|
];
|
||||||
const tbl = await db.createTable("my_table", data)
|
const tbl = await db.createTable("my_table", data)
|
||||||
|
|
||||||
await tbl.update({
|
await tbl.update({vector: [10, 10]}, { where: "x = 2"})
|
||||||
values: { vector: [10, 10] },
|
|
||||||
where: "x = 2"
|
|
||||||
});
|
|
||||||
```
|
```
|
||||||
|
|
||||||
=== "vectordb (deprecated)"
|
=== "vectordb (deprecated)"
|
||||||
@@ -787,10 +784,7 @@ This can be used to update zero to all rows depending on how many rows match the
|
|||||||
];
|
];
|
||||||
const tbl = await db.createTable("my_table", data)
|
const tbl = await db.createTable("my_table", data)
|
||||||
|
|
||||||
await tbl.update({
|
await tbl.update({ where: "x = 2", values: {vector: [10, 10]} })
|
||||||
where: "x = 2",
|
|
||||||
values: { vector: [10, 10] }
|
|
||||||
});
|
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Updating using a sql query
|
#### Updating using a sql query
|
||||||
|
|||||||
@@ -1,67 +0,0 @@
|
|||||||
[**@lancedb/lancedb**](../README.md) • **Docs**
|
|
||||||
|
|
||||||
***
|
|
||||||
|
|
||||||
[@lancedb/lancedb](../globals.md) / BoostQuery
|
|
||||||
|
|
||||||
# Class: BoostQuery
|
|
||||||
|
|
||||||
Represents a full-text query interface.
|
|
||||||
This interface defines the structure and behavior for full-text queries,
|
|
||||||
including methods to retrieve the query type and convert the query to a dictionary format.
|
|
||||||
|
|
||||||
## Implements
|
|
||||||
|
|
||||||
- [`FullTextQuery`](../interfaces/FullTextQuery.md)
|
|
||||||
|
|
||||||
## Constructors
|
|
||||||
|
|
||||||
### new BoostQuery()
|
|
||||||
|
|
||||||
```ts
|
|
||||||
new BoostQuery(
|
|
||||||
positive,
|
|
||||||
negative,
|
|
||||||
options?): BoostQuery
|
|
||||||
```
|
|
||||||
|
|
||||||
Creates an instance of BoostQuery.
|
|
||||||
The boost returns documents that match the positive query,
|
|
||||||
but penalizes those that match the negative query.
|
|
||||||
the penalty is controlled by the `negativeBoost` parameter.
|
|
||||||
|
|
||||||
#### Parameters
|
|
||||||
|
|
||||||
* **positive**: [`FullTextQuery`](../interfaces/FullTextQuery.md)
|
|
||||||
The positive query that boosts the relevance score.
|
|
||||||
|
|
||||||
* **negative**: [`FullTextQuery`](../interfaces/FullTextQuery.md)
|
|
||||||
The negative query that reduces the relevance score.
|
|
||||||
|
|
||||||
* **options?**
|
|
||||||
Optional parameters for the boost query.
|
|
||||||
- `negativeBoost`: The boost factor for the negative query (default is 0.0).
|
|
||||||
|
|
||||||
* **options.negativeBoost?**: `number`
|
|
||||||
|
|
||||||
#### Returns
|
|
||||||
|
|
||||||
[`BoostQuery`](BoostQuery.md)
|
|
||||||
|
|
||||||
## Methods
|
|
||||||
|
|
||||||
### queryType()
|
|
||||||
|
|
||||||
```ts
|
|
||||||
queryType(): FullTextQueryType
|
|
||||||
```
|
|
||||||
|
|
||||||
The type of the full-text query.
|
|
||||||
|
|
||||||
#### Returns
|
|
||||||
|
|
||||||
[`FullTextQueryType`](../enumerations/FullTextQueryType.md)
|
|
||||||
|
|
||||||
#### Implementation of
|
|
||||||
|
|
||||||
[`FullTextQuery`](../interfaces/FullTextQuery.md).[`queryType`](../interfaces/FullTextQuery.md#querytype)
|
|
||||||
@@ -1,70 +0,0 @@
|
|||||||
[**@lancedb/lancedb**](../README.md) • **Docs**
|
|
||||||
|
|
||||||
***
|
|
||||||
|
|
||||||
[@lancedb/lancedb](../globals.md) / MatchQuery
|
|
||||||
|
|
||||||
# Class: MatchQuery
|
|
||||||
|
|
||||||
Represents a full-text query interface.
|
|
||||||
This interface defines the structure and behavior for full-text queries,
|
|
||||||
including methods to retrieve the query type and convert the query to a dictionary format.
|
|
||||||
|
|
||||||
## Implements
|
|
||||||
|
|
||||||
- [`FullTextQuery`](../interfaces/FullTextQuery.md)
|
|
||||||
|
|
||||||
## Constructors
|
|
||||||
|
|
||||||
### new MatchQuery()
|
|
||||||
|
|
||||||
```ts
|
|
||||||
new MatchQuery(
|
|
||||||
query,
|
|
||||||
column,
|
|
||||||
options?): MatchQuery
|
|
||||||
```
|
|
||||||
|
|
||||||
Creates an instance of MatchQuery.
|
|
||||||
|
|
||||||
#### Parameters
|
|
||||||
|
|
||||||
* **query**: `string`
|
|
||||||
The text query to search for.
|
|
||||||
|
|
||||||
* **column**: `string`
|
|
||||||
The name of the column to search within.
|
|
||||||
|
|
||||||
* **options?**
|
|
||||||
Optional parameters for the match query.
|
|
||||||
- `boost`: The boost factor for the query (default is 1.0).
|
|
||||||
- `fuzziness`: The fuzziness level for the query (default is 0).
|
|
||||||
- `maxExpansions`: The maximum number of terms to consider for fuzzy matching (default is 50).
|
|
||||||
|
|
||||||
* **options.boost?**: `number`
|
|
||||||
|
|
||||||
* **options.fuzziness?**: `number`
|
|
||||||
|
|
||||||
* **options.maxExpansions?**: `number`
|
|
||||||
|
|
||||||
#### Returns
|
|
||||||
|
|
||||||
[`MatchQuery`](MatchQuery.md)
|
|
||||||
|
|
||||||
## Methods
|
|
||||||
|
|
||||||
### queryType()
|
|
||||||
|
|
||||||
```ts
|
|
||||||
queryType(): FullTextQueryType
|
|
||||||
```
|
|
||||||
|
|
||||||
The type of the full-text query.
|
|
||||||
|
|
||||||
#### Returns
|
|
||||||
|
|
||||||
[`FullTextQueryType`](../enumerations/FullTextQueryType.md)
|
|
||||||
|
|
||||||
#### Implementation of
|
|
||||||
|
|
||||||
[`FullTextQuery`](../interfaces/FullTextQuery.md).[`queryType`](../interfaces/FullTextQuery.md#querytype)
|
|
||||||
@@ -33,20 +33,20 @@ Construct a MergeInsertBuilder. __Internal use only.__
|
|||||||
### execute()
|
### execute()
|
||||||
|
|
||||||
```ts
|
```ts
|
||||||
execute(data): Promise<MergeStats>
|
execute(data): Promise<void>
|
||||||
```
|
```
|
||||||
|
|
||||||
Executes the merge insert operation
|
Executes the merge insert operation
|
||||||
|
|
||||||
|
Nothing is returned but the `Table` is updated
|
||||||
|
|
||||||
#### Parameters
|
#### Parameters
|
||||||
|
|
||||||
* **data**: [`Data`](../type-aliases/Data.md)
|
* **data**: [`Data`](../type-aliases/Data.md)
|
||||||
|
|
||||||
#### Returns
|
#### Returns
|
||||||
|
|
||||||
`Promise`<[`MergeStats`](../interfaces/MergeStats.md)>
|
`Promise`<`void`>
|
||||||
|
|
||||||
Statistics about the merge operation: counts of inserted, updated, and deleted rows
|
|
||||||
|
|
||||||
***
|
***
|
||||||
|
|
||||||
|
|||||||
@@ -1,64 +0,0 @@
|
|||||||
[**@lancedb/lancedb**](../README.md) • **Docs**
|
|
||||||
|
|
||||||
***
|
|
||||||
|
|
||||||
[@lancedb/lancedb](../globals.md) / MultiMatchQuery
|
|
||||||
|
|
||||||
# Class: MultiMatchQuery
|
|
||||||
|
|
||||||
Represents a full-text query interface.
|
|
||||||
This interface defines the structure and behavior for full-text queries,
|
|
||||||
including methods to retrieve the query type and convert the query to a dictionary format.
|
|
||||||
|
|
||||||
## Implements
|
|
||||||
|
|
||||||
- [`FullTextQuery`](../interfaces/FullTextQuery.md)
|
|
||||||
|
|
||||||
## Constructors
|
|
||||||
|
|
||||||
### new MultiMatchQuery()
|
|
||||||
|
|
||||||
```ts
|
|
||||||
new MultiMatchQuery(
|
|
||||||
query,
|
|
||||||
columns,
|
|
||||||
options?): MultiMatchQuery
|
|
||||||
```
|
|
||||||
|
|
||||||
Creates an instance of MultiMatchQuery.
|
|
||||||
|
|
||||||
#### Parameters
|
|
||||||
|
|
||||||
* **query**: `string`
|
|
||||||
The text query to search for across multiple columns.
|
|
||||||
|
|
||||||
* **columns**: `string`[]
|
|
||||||
An array of column names to search within.
|
|
||||||
|
|
||||||
* **options?**
|
|
||||||
Optional parameters for the multi-match query.
|
|
||||||
- `boosts`: An array of boost factors for each column (default is 1.0 for all).
|
|
||||||
|
|
||||||
* **options.boosts?**: `number`[]
|
|
||||||
|
|
||||||
#### Returns
|
|
||||||
|
|
||||||
[`MultiMatchQuery`](MultiMatchQuery.md)
|
|
||||||
|
|
||||||
## Methods
|
|
||||||
|
|
||||||
### queryType()
|
|
||||||
|
|
||||||
```ts
|
|
||||||
queryType(): FullTextQueryType
|
|
||||||
```
|
|
||||||
|
|
||||||
The type of the full-text query.
|
|
||||||
|
|
||||||
#### Returns
|
|
||||||
|
|
||||||
[`FullTextQueryType`](../enumerations/FullTextQueryType.md)
|
|
||||||
|
|
||||||
#### Implementation of
|
|
||||||
|
|
||||||
[`FullTextQuery`](../interfaces/FullTextQuery.md).[`queryType`](../interfaces/FullTextQuery.md#querytype)
|
|
||||||
@@ -1,55 +0,0 @@
|
|||||||
[**@lancedb/lancedb**](../README.md) • **Docs**
|
|
||||||
|
|
||||||
***
|
|
||||||
|
|
||||||
[@lancedb/lancedb](../globals.md) / PhraseQuery
|
|
||||||
|
|
||||||
# Class: PhraseQuery
|
|
||||||
|
|
||||||
Represents a full-text query interface.
|
|
||||||
This interface defines the structure and behavior for full-text queries,
|
|
||||||
including methods to retrieve the query type and convert the query to a dictionary format.
|
|
||||||
|
|
||||||
## Implements
|
|
||||||
|
|
||||||
- [`FullTextQuery`](../interfaces/FullTextQuery.md)
|
|
||||||
|
|
||||||
## Constructors
|
|
||||||
|
|
||||||
### new PhraseQuery()
|
|
||||||
|
|
||||||
```ts
|
|
||||||
new PhraseQuery(query, column): PhraseQuery
|
|
||||||
```
|
|
||||||
|
|
||||||
Creates an instance of `PhraseQuery`.
|
|
||||||
|
|
||||||
#### Parameters
|
|
||||||
|
|
||||||
* **query**: `string`
|
|
||||||
The phrase to search for in the specified column.
|
|
||||||
|
|
||||||
* **column**: `string`
|
|
||||||
The name of the column to search within.
|
|
||||||
|
|
||||||
#### Returns
|
|
||||||
|
|
||||||
[`PhraseQuery`](PhraseQuery.md)
|
|
||||||
|
|
||||||
## Methods
|
|
||||||
|
|
||||||
### queryType()
|
|
||||||
|
|
||||||
```ts
|
|
||||||
queryType(): FullTextQueryType
|
|
||||||
```
|
|
||||||
|
|
||||||
The type of the full-text query.
|
|
||||||
|
|
||||||
#### Returns
|
|
||||||
|
|
||||||
[`FullTextQueryType`](../enumerations/FullTextQueryType.md)
|
|
||||||
|
|
||||||
#### Implementation of
|
|
||||||
|
|
||||||
[`FullTextQuery`](../interfaces/FullTextQuery.md).[`queryType`](../interfaces/FullTextQuery.md#querytype)
|
|
||||||
@@ -30,53 +30,6 @@ protected inner: Query | Promise<Query>;
|
|||||||
|
|
||||||
## Methods
|
## Methods
|
||||||
|
|
||||||
### analyzePlan()
|
|
||||||
|
|
||||||
```ts
|
|
||||||
analyzePlan(): Promise<string>
|
|
||||||
```
|
|
||||||
|
|
||||||
Executes the query and returns the physical query plan annotated with runtime metrics.
|
|
||||||
|
|
||||||
This is useful for debugging and performance analysis, as it shows how the query was executed
|
|
||||||
and includes metrics such as elapsed time, rows processed, and I/O statistics.
|
|
||||||
|
|
||||||
#### Returns
|
|
||||||
|
|
||||||
`Promise`<`string`>
|
|
||||||
|
|
||||||
A query execution plan with runtime metrics for each step.
|
|
||||||
|
|
||||||
#### Example
|
|
||||||
|
|
||||||
```ts
|
|
||||||
import * as lancedb from "@lancedb/lancedb"
|
|
||||||
|
|
||||||
const db = await lancedb.connect("./.lancedb");
|
|
||||||
const table = await db.createTable("my_table", [
|
|
||||||
{ vector: [1.1, 0.9], id: "1" },
|
|
||||||
]);
|
|
||||||
|
|
||||||
const plan = await table.query().nearestTo([0.5, 0.2]).analyzePlan();
|
|
||||||
|
|
||||||
Example output (with runtime metrics inlined):
|
|
||||||
AnalyzeExec verbose=true, metrics=[]
|
|
||||||
ProjectionExec: expr=[id@3 as id, vector@0 as vector, _distance@2 as _distance], metrics=[output_rows=1, elapsed_compute=3.292µs]
|
|
||||||
Take: columns="vector, _rowid, _distance, (id)", metrics=[output_rows=1, elapsed_compute=66.001µs, batches_processed=1, bytes_read=8, iops=1, requests=1]
|
|
||||||
CoalesceBatchesExec: target_batch_size=1024, metrics=[output_rows=1, elapsed_compute=3.333µs]
|
|
||||||
GlobalLimitExec: skip=0, fetch=10, metrics=[output_rows=1, elapsed_compute=167ns]
|
|
||||||
FilterExec: _distance@2 IS NOT NULL, metrics=[output_rows=1, elapsed_compute=8.542µs]
|
|
||||||
SortExec: TopK(fetch=10), expr=[_distance@2 ASC NULLS LAST], metrics=[output_rows=1, elapsed_compute=63.25µs, row_replacements=1]
|
|
||||||
KNNVectorDistance: metric=l2, metrics=[output_rows=1, elapsed_compute=114.333µs, output_batches=1]
|
|
||||||
LanceScan: uri=/path/to/data, projection=[vector], row_id=true, row_addr=false, ordered=false, metrics=[output_rows=1, elapsed_compute=103.626µs, bytes_read=549, iops=2, requests=2]
|
|
||||||
```
|
|
||||||
|
|
||||||
#### Inherited from
|
|
||||||
|
|
||||||
[`QueryBase`](QueryBase.md).[`analyzePlan`](QueryBase.md#analyzeplan)
|
|
||||||
|
|
||||||
***
|
|
||||||
|
|
||||||
### execute()
|
### execute()
|
||||||
|
|
||||||
```ts
|
```ts
|
||||||
@@ -206,7 +159,7 @@ fullTextSearch(query, options?): this
|
|||||||
|
|
||||||
#### Parameters
|
#### Parameters
|
||||||
|
|
||||||
* **query**: `string` \| [`FullTextQuery`](../interfaces/FullTextQuery.md)
|
* **query**: `string`
|
||||||
|
|
||||||
* **options?**: `Partial`<[`FullTextSearchOptions`](../interfaces/FullTextSearchOptions.md)>
|
* **options?**: `Partial`<[`FullTextSearchOptions`](../interfaces/FullTextSearchOptions.md)>
|
||||||
|
|
||||||
@@ -309,7 +262,7 @@ nearestToText(query, columns?): Query
|
|||||||
|
|
||||||
#### Parameters
|
#### Parameters
|
||||||
|
|
||||||
* **query**: `string` \| [`FullTextQuery`](../interfaces/FullTextQuery.md)
|
* **query**: `string`
|
||||||
|
|
||||||
* **columns?**: `string`[]
|
* **columns?**: `string`[]
|
||||||
|
|
||||||
|
|||||||
@@ -36,49 +36,6 @@ protected inner: NativeQueryType | Promise<NativeQueryType>;
|
|||||||
|
|
||||||
## Methods
|
## Methods
|
||||||
|
|
||||||
### analyzePlan()
|
|
||||||
|
|
||||||
```ts
|
|
||||||
analyzePlan(): Promise<string>
|
|
||||||
```
|
|
||||||
|
|
||||||
Executes the query and returns the physical query plan annotated with runtime metrics.
|
|
||||||
|
|
||||||
This is useful for debugging and performance analysis, as it shows how the query was executed
|
|
||||||
and includes metrics such as elapsed time, rows processed, and I/O statistics.
|
|
||||||
|
|
||||||
#### Returns
|
|
||||||
|
|
||||||
`Promise`<`string`>
|
|
||||||
|
|
||||||
A query execution plan with runtime metrics for each step.
|
|
||||||
|
|
||||||
#### Example
|
|
||||||
|
|
||||||
```ts
|
|
||||||
import * as lancedb from "@lancedb/lancedb"
|
|
||||||
|
|
||||||
const db = await lancedb.connect("./.lancedb");
|
|
||||||
const table = await db.createTable("my_table", [
|
|
||||||
{ vector: [1.1, 0.9], id: "1" },
|
|
||||||
]);
|
|
||||||
|
|
||||||
const plan = await table.query().nearestTo([0.5, 0.2]).analyzePlan();
|
|
||||||
|
|
||||||
Example output (with runtime metrics inlined):
|
|
||||||
AnalyzeExec verbose=true, metrics=[]
|
|
||||||
ProjectionExec: expr=[id@3 as id, vector@0 as vector, _distance@2 as _distance], metrics=[output_rows=1, elapsed_compute=3.292µs]
|
|
||||||
Take: columns="vector, _rowid, _distance, (id)", metrics=[output_rows=1, elapsed_compute=66.001µs, batches_processed=1, bytes_read=8, iops=1, requests=1]
|
|
||||||
CoalesceBatchesExec: target_batch_size=1024, metrics=[output_rows=1, elapsed_compute=3.333µs]
|
|
||||||
GlobalLimitExec: skip=0, fetch=10, metrics=[output_rows=1, elapsed_compute=167ns]
|
|
||||||
FilterExec: _distance@2 IS NOT NULL, metrics=[output_rows=1, elapsed_compute=8.542µs]
|
|
||||||
SortExec: TopK(fetch=10), expr=[_distance@2 ASC NULLS LAST], metrics=[output_rows=1, elapsed_compute=63.25µs, row_replacements=1]
|
|
||||||
KNNVectorDistance: metric=l2, metrics=[output_rows=1, elapsed_compute=114.333µs, output_batches=1]
|
|
||||||
LanceScan: uri=/path/to/data, projection=[vector], row_id=true, row_addr=false, ordered=false, metrics=[output_rows=1, elapsed_compute=103.626µs, bytes_read=549, iops=2, requests=2]
|
|
||||||
```
|
|
||||||
|
|
||||||
***
|
|
||||||
|
|
||||||
### execute()
|
### execute()
|
||||||
|
|
||||||
```ts
|
```ts
|
||||||
@@ -192,7 +149,7 @@ fullTextSearch(query, options?): this
|
|||||||
|
|
||||||
#### Parameters
|
#### Parameters
|
||||||
|
|
||||||
* **query**: `string` \| [`FullTextQuery`](../interfaces/FullTextQuery.md)
|
* **query**: `string`
|
||||||
|
|
||||||
* **options?**: `Partial`<[`FullTextSearchOptions`](../interfaces/FullTextSearchOptions.md)>
|
* **options?**: `Partial`<[`FullTextSearchOptions`](../interfaces/FullTextSearchOptions.md)>
|
||||||
|
|
||||||
|
|||||||
@@ -117,8 +117,8 @@ wish to return to standard mode, call `checkoutLatest`.
|
|||||||
|
|
||||||
#### Parameters
|
#### Parameters
|
||||||
|
|
||||||
* **version**: `string` \| `number`
|
* **version**: `number`
|
||||||
The version to checkout, could be version number or tag
|
The version to checkout
|
||||||
|
|
||||||
#### Returns
|
#### Returns
|
||||||
|
|
||||||
@@ -454,28 +454,6 @@ Modeled after ``VACUUM`` in PostgreSQL.
|
|||||||
|
|
||||||
***
|
***
|
||||||
|
|
||||||
### prewarmIndex()
|
|
||||||
|
|
||||||
```ts
|
|
||||||
abstract prewarmIndex(name): Promise<void>
|
|
||||||
```
|
|
||||||
|
|
||||||
Prewarm an index in the table.
|
|
||||||
|
|
||||||
#### Parameters
|
|
||||||
|
|
||||||
* **name**: `string`
|
|
||||||
The name of the index.
|
|
||||||
This will load the index into memory. This may reduce the cold-start time for
|
|
||||||
future queries. If the index does not fit in the cache then this call may be
|
|
||||||
wasteful.
|
|
||||||
|
|
||||||
#### Returns
|
|
||||||
|
|
||||||
`Promise`<`void`>
|
|
||||||
|
|
||||||
***
|
|
||||||
|
|
||||||
### query()
|
### query()
|
||||||
|
|
||||||
```ts
|
```ts
|
||||||
@@ -597,7 +575,7 @@ of the given query
|
|||||||
|
|
||||||
#### Parameters
|
#### Parameters
|
||||||
|
|
||||||
* **query**: `string` \| [`IntoVector`](../type-aliases/IntoVector.md) \| [`FullTextQuery`](../interfaces/FullTextQuery.md)
|
* **query**: `string` \| [`IntoVector`](../type-aliases/IntoVector.md)
|
||||||
the query, a vector or string
|
the query, a vector or string
|
||||||
|
|
||||||
* **queryType?**: `string`
|
* **queryType?**: `string`
|
||||||
@@ -615,50 +593,6 @@ of the given query
|
|||||||
|
|
||||||
***
|
***
|
||||||
|
|
||||||
### stats()
|
|
||||||
|
|
||||||
```ts
|
|
||||||
abstract stats(): Promise<TableStatistics>
|
|
||||||
```
|
|
||||||
|
|
||||||
Returns table and fragment statistics
|
|
||||||
|
|
||||||
#### Returns
|
|
||||||
|
|
||||||
`Promise`<[`TableStatistics`](../interfaces/TableStatistics.md)>
|
|
||||||
|
|
||||||
The table and fragment statistics
|
|
||||||
|
|
||||||
***
|
|
||||||
|
|
||||||
### tags()
|
|
||||||
|
|
||||||
```ts
|
|
||||||
abstract tags(): Promise<Tags>
|
|
||||||
```
|
|
||||||
|
|
||||||
Get a tags manager for this table.
|
|
||||||
|
|
||||||
Tags allow you to label specific versions of a table with a human-readable name.
|
|
||||||
The returned tags manager can be used to list, create, update, or delete tags.
|
|
||||||
|
|
||||||
#### Returns
|
|
||||||
|
|
||||||
`Promise`<[`Tags`](Tags.md)>
|
|
||||||
|
|
||||||
A tags manager for this table
|
|
||||||
|
|
||||||
#### Example
|
|
||||||
|
|
||||||
```typescript
|
|
||||||
const tagsManager = await table.tags();
|
|
||||||
await tagsManager.create("v1", 1);
|
|
||||||
const tags = await tagsManager.list();
|
|
||||||
console.log(tags); // { "v1": { version: 1, manifestSize: ... } }
|
|
||||||
```
|
|
||||||
|
|
||||||
***
|
|
||||||
|
|
||||||
### toArrow()
|
### toArrow()
|
||||||
|
|
||||||
```ts
|
```ts
|
||||||
@@ -797,26 +731,3 @@ Retrieve the version of the table
|
|||||||
#### Returns
|
#### Returns
|
||||||
|
|
||||||
`Promise`<`number`>
|
`Promise`<`number`>
|
||||||
|
|
||||||
***
|
|
||||||
|
|
||||||
### waitForIndex()
|
|
||||||
|
|
||||||
```ts
|
|
||||||
abstract waitForIndex(indexNames, timeoutSeconds): Promise<void>
|
|
||||||
```
|
|
||||||
|
|
||||||
Waits for asynchronous indexing to complete on the table.
|
|
||||||
|
|
||||||
#### Parameters
|
|
||||||
|
|
||||||
* **indexNames**: `string`[]
|
|
||||||
The name of the indices to wait for
|
|
||||||
|
|
||||||
* **timeoutSeconds**: `number`
|
|
||||||
The number of seconds to wait before timing out
|
|
||||||
This will raise an error if the indices are not created and fully indexed within the timeout.
|
|
||||||
|
|
||||||
#### Returns
|
|
||||||
|
|
||||||
`Promise`<`void`>
|
|
||||||
|
|||||||
@@ -1,35 +0,0 @@
|
|||||||
[**@lancedb/lancedb**](../README.md) • **Docs**
|
|
||||||
|
|
||||||
***
|
|
||||||
|
|
||||||
[@lancedb/lancedb](../globals.md) / TagContents
|
|
||||||
|
|
||||||
# Class: TagContents
|
|
||||||
|
|
||||||
## Constructors
|
|
||||||
|
|
||||||
### new TagContents()
|
|
||||||
|
|
||||||
```ts
|
|
||||||
new TagContents(): TagContents
|
|
||||||
```
|
|
||||||
|
|
||||||
#### Returns
|
|
||||||
|
|
||||||
[`TagContents`](TagContents.md)
|
|
||||||
|
|
||||||
## Properties
|
|
||||||
|
|
||||||
### manifestSize
|
|
||||||
|
|
||||||
```ts
|
|
||||||
manifestSize: number;
|
|
||||||
```
|
|
||||||
|
|
||||||
***
|
|
||||||
|
|
||||||
### version
|
|
||||||
|
|
||||||
```ts
|
|
||||||
version: number;
|
|
||||||
```
|
|
||||||
@@ -1,99 +0,0 @@
|
|||||||
[**@lancedb/lancedb**](../README.md) • **Docs**
|
|
||||||
|
|
||||||
***
|
|
||||||
|
|
||||||
[@lancedb/lancedb](../globals.md) / Tags
|
|
||||||
|
|
||||||
# Class: Tags
|
|
||||||
|
|
||||||
## Constructors
|
|
||||||
|
|
||||||
### new Tags()
|
|
||||||
|
|
||||||
```ts
|
|
||||||
new Tags(): Tags
|
|
||||||
```
|
|
||||||
|
|
||||||
#### Returns
|
|
||||||
|
|
||||||
[`Tags`](Tags.md)
|
|
||||||
|
|
||||||
## Methods
|
|
||||||
|
|
||||||
### create()
|
|
||||||
|
|
||||||
```ts
|
|
||||||
create(tag, version): Promise<void>
|
|
||||||
```
|
|
||||||
|
|
||||||
#### Parameters
|
|
||||||
|
|
||||||
* **tag**: `string`
|
|
||||||
|
|
||||||
* **version**: `number`
|
|
||||||
|
|
||||||
#### Returns
|
|
||||||
|
|
||||||
`Promise`<`void`>
|
|
||||||
|
|
||||||
***
|
|
||||||
|
|
||||||
### delete()
|
|
||||||
|
|
||||||
```ts
|
|
||||||
delete(tag): Promise<void>
|
|
||||||
```
|
|
||||||
|
|
||||||
#### Parameters
|
|
||||||
|
|
||||||
* **tag**: `string`
|
|
||||||
|
|
||||||
#### Returns
|
|
||||||
|
|
||||||
`Promise`<`void`>
|
|
||||||
|
|
||||||
***
|
|
||||||
|
|
||||||
### getVersion()
|
|
||||||
|
|
||||||
```ts
|
|
||||||
getVersion(tag): Promise<number>
|
|
||||||
```
|
|
||||||
|
|
||||||
#### Parameters
|
|
||||||
|
|
||||||
* **tag**: `string`
|
|
||||||
|
|
||||||
#### Returns
|
|
||||||
|
|
||||||
`Promise`<`number`>
|
|
||||||
|
|
||||||
***
|
|
||||||
|
|
||||||
### list()
|
|
||||||
|
|
||||||
```ts
|
|
||||||
list(): Promise<Record<string, TagContents>>
|
|
||||||
```
|
|
||||||
|
|
||||||
#### Returns
|
|
||||||
|
|
||||||
`Promise`<`Record`<`string`, [`TagContents`](TagContents.md)>>
|
|
||||||
|
|
||||||
***
|
|
||||||
|
|
||||||
### update()
|
|
||||||
|
|
||||||
```ts
|
|
||||||
update(tag, version): Promise<void>
|
|
||||||
```
|
|
||||||
|
|
||||||
#### Parameters
|
|
||||||
|
|
||||||
* **tag**: `string`
|
|
||||||
|
|
||||||
* **version**: `number`
|
|
||||||
|
|
||||||
#### Returns
|
|
||||||
|
|
||||||
`Promise`<`void`>
|
|
||||||
@@ -48,53 +48,6 @@ addQueryVector(vector): VectorQuery
|
|||||||
|
|
||||||
***
|
***
|
||||||
|
|
||||||
### analyzePlan()
|
|
||||||
|
|
||||||
```ts
|
|
||||||
analyzePlan(): Promise<string>
|
|
||||||
```
|
|
||||||
|
|
||||||
Executes the query and returns the physical query plan annotated with runtime metrics.
|
|
||||||
|
|
||||||
This is useful for debugging and performance analysis, as it shows how the query was executed
|
|
||||||
and includes metrics such as elapsed time, rows processed, and I/O statistics.
|
|
||||||
|
|
||||||
#### Returns
|
|
||||||
|
|
||||||
`Promise`<`string`>
|
|
||||||
|
|
||||||
A query execution plan with runtime metrics for each step.
|
|
||||||
|
|
||||||
#### Example
|
|
||||||
|
|
||||||
```ts
|
|
||||||
import * as lancedb from "@lancedb/lancedb"
|
|
||||||
|
|
||||||
const db = await lancedb.connect("./.lancedb");
|
|
||||||
const table = await db.createTable("my_table", [
|
|
||||||
{ vector: [1.1, 0.9], id: "1" },
|
|
||||||
]);
|
|
||||||
|
|
||||||
const plan = await table.query().nearestTo([0.5, 0.2]).analyzePlan();
|
|
||||||
|
|
||||||
Example output (with runtime metrics inlined):
|
|
||||||
AnalyzeExec verbose=true, metrics=[]
|
|
||||||
ProjectionExec: expr=[id@3 as id, vector@0 as vector, _distance@2 as _distance], metrics=[output_rows=1, elapsed_compute=3.292µs]
|
|
||||||
Take: columns="vector, _rowid, _distance, (id)", metrics=[output_rows=1, elapsed_compute=66.001µs, batches_processed=1, bytes_read=8, iops=1, requests=1]
|
|
||||||
CoalesceBatchesExec: target_batch_size=1024, metrics=[output_rows=1, elapsed_compute=3.333µs]
|
|
||||||
GlobalLimitExec: skip=0, fetch=10, metrics=[output_rows=1, elapsed_compute=167ns]
|
|
||||||
FilterExec: _distance@2 IS NOT NULL, metrics=[output_rows=1, elapsed_compute=8.542µs]
|
|
||||||
SortExec: TopK(fetch=10), expr=[_distance@2 ASC NULLS LAST], metrics=[output_rows=1, elapsed_compute=63.25µs, row_replacements=1]
|
|
||||||
KNNVectorDistance: metric=l2, metrics=[output_rows=1, elapsed_compute=114.333µs, output_batches=1]
|
|
||||||
LanceScan: uri=/path/to/data, projection=[vector], row_id=true, row_addr=false, ordered=false, metrics=[output_rows=1, elapsed_compute=103.626µs, bytes_read=549, iops=2, requests=2]
|
|
||||||
```
|
|
||||||
|
|
||||||
#### Inherited from
|
|
||||||
|
|
||||||
[`QueryBase`](QueryBase.md).[`analyzePlan`](QueryBase.md#analyzeplan)
|
|
||||||
|
|
||||||
***
|
|
||||||
|
|
||||||
### bypassVectorIndex()
|
### bypassVectorIndex()
|
||||||
|
|
||||||
```ts
|
```ts
|
||||||
@@ -347,7 +300,7 @@ fullTextSearch(query, options?): this
|
|||||||
|
|
||||||
#### Parameters
|
#### Parameters
|
||||||
|
|
||||||
* **query**: `string` \| [`FullTextQuery`](../interfaces/FullTextQuery.md)
|
* **query**: `string`
|
||||||
|
|
||||||
* **options?**: `Partial`<[`FullTextSearchOptions`](../interfaces/FullTextSearchOptions.md)>
|
* **options?**: `Partial`<[`FullTextSearchOptions`](../interfaces/FullTextSearchOptions.md)>
|
||||||
|
|
||||||
|
|||||||
@@ -1,46 +0,0 @@
|
|||||||
[**@lancedb/lancedb**](../README.md) • **Docs**
|
|
||||||
|
|
||||||
***
|
|
||||||
|
|
||||||
[@lancedb/lancedb](../globals.md) / FullTextQueryType
|
|
||||||
|
|
||||||
# Enumeration: FullTextQueryType
|
|
||||||
|
|
||||||
Enum representing the types of full-text queries supported.
|
|
||||||
|
|
||||||
- `Match`: Performs a full-text search for terms in the query string.
|
|
||||||
- `MatchPhrase`: Searches for an exact phrase match in the text.
|
|
||||||
- `Boost`: Boosts the relevance score of specific terms in the query.
|
|
||||||
- `MultiMatch`: Searches across multiple fields for the query terms.
|
|
||||||
|
|
||||||
## Enumeration Members
|
|
||||||
|
|
||||||
### Boost
|
|
||||||
|
|
||||||
```ts
|
|
||||||
Boost: "boost";
|
|
||||||
```
|
|
||||||
|
|
||||||
***
|
|
||||||
|
|
||||||
### Match
|
|
||||||
|
|
||||||
```ts
|
|
||||||
Match: "match";
|
|
||||||
```
|
|
||||||
|
|
||||||
***
|
|
||||||
|
|
||||||
### MatchPhrase
|
|
||||||
|
|
||||||
```ts
|
|
||||||
MatchPhrase: "match_phrase";
|
|
||||||
```
|
|
||||||
|
|
||||||
***
|
|
||||||
|
|
||||||
### MultiMatch
|
|
||||||
|
|
||||||
```ts
|
|
||||||
MultiMatch: "multi_match";
|
|
||||||
```
|
|
||||||
@@ -9,26 +9,16 @@
|
|||||||
- [embedding](namespaces/embedding/README.md)
|
- [embedding](namespaces/embedding/README.md)
|
||||||
- [rerankers](namespaces/rerankers/README.md)
|
- [rerankers](namespaces/rerankers/README.md)
|
||||||
|
|
||||||
## Enumerations
|
|
||||||
|
|
||||||
- [FullTextQueryType](enumerations/FullTextQueryType.md)
|
|
||||||
|
|
||||||
## Classes
|
## Classes
|
||||||
|
|
||||||
- [BoostQuery](classes/BoostQuery.md)
|
|
||||||
- [Connection](classes/Connection.md)
|
- [Connection](classes/Connection.md)
|
||||||
- [Index](classes/Index.md)
|
- [Index](classes/Index.md)
|
||||||
- [MakeArrowTableOptions](classes/MakeArrowTableOptions.md)
|
- [MakeArrowTableOptions](classes/MakeArrowTableOptions.md)
|
||||||
- [MatchQuery](classes/MatchQuery.md)
|
|
||||||
- [MergeInsertBuilder](classes/MergeInsertBuilder.md)
|
- [MergeInsertBuilder](classes/MergeInsertBuilder.md)
|
||||||
- [MultiMatchQuery](classes/MultiMatchQuery.md)
|
|
||||||
- [PhraseQuery](classes/PhraseQuery.md)
|
|
||||||
- [Query](classes/Query.md)
|
- [Query](classes/Query.md)
|
||||||
- [QueryBase](classes/QueryBase.md)
|
- [QueryBase](classes/QueryBase.md)
|
||||||
- [RecordBatchIterator](classes/RecordBatchIterator.md)
|
- [RecordBatchIterator](classes/RecordBatchIterator.md)
|
||||||
- [Table](classes/Table.md)
|
- [Table](classes/Table.md)
|
||||||
- [TagContents](classes/TagContents.md)
|
|
||||||
- [Tags](classes/Tags.md)
|
|
||||||
- [VectorColumnOptions](classes/VectorColumnOptions.md)
|
- [VectorColumnOptions](classes/VectorColumnOptions.md)
|
||||||
- [VectorQuery](classes/VectorQuery.md)
|
- [VectorQuery](classes/VectorQuery.md)
|
||||||
|
|
||||||
@@ -42,10 +32,7 @@
|
|||||||
- [ConnectionOptions](interfaces/ConnectionOptions.md)
|
- [ConnectionOptions](interfaces/ConnectionOptions.md)
|
||||||
- [CreateTableOptions](interfaces/CreateTableOptions.md)
|
- [CreateTableOptions](interfaces/CreateTableOptions.md)
|
||||||
- [ExecutableQuery](interfaces/ExecutableQuery.md)
|
- [ExecutableQuery](interfaces/ExecutableQuery.md)
|
||||||
- [FragmentStatistics](interfaces/FragmentStatistics.md)
|
|
||||||
- [FragmentSummaryStats](interfaces/FragmentSummaryStats.md)
|
|
||||||
- [FtsOptions](interfaces/FtsOptions.md)
|
- [FtsOptions](interfaces/FtsOptions.md)
|
||||||
- [FullTextQuery](interfaces/FullTextQuery.md)
|
|
||||||
- [FullTextSearchOptions](interfaces/FullTextSearchOptions.md)
|
- [FullTextSearchOptions](interfaces/FullTextSearchOptions.md)
|
||||||
- [HnswPqOptions](interfaces/HnswPqOptions.md)
|
- [HnswPqOptions](interfaces/HnswPqOptions.md)
|
||||||
- [HnswSqOptions](interfaces/HnswSqOptions.md)
|
- [HnswSqOptions](interfaces/HnswSqOptions.md)
|
||||||
@@ -54,7 +41,6 @@
|
|||||||
- [IndexStatistics](interfaces/IndexStatistics.md)
|
- [IndexStatistics](interfaces/IndexStatistics.md)
|
||||||
- [IvfFlatOptions](interfaces/IvfFlatOptions.md)
|
- [IvfFlatOptions](interfaces/IvfFlatOptions.md)
|
||||||
- [IvfPqOptions](interfaces/IvfPqOptions.md)
|
- [IvfPqOptions](interfaces/IvfPqOptions.md)
|
||||||
- [MergeStats](interfaces/MergeStats.md)
|
|
||||||
- [OpenTableOptions](interfaces/OpenTableOptions.md)
|
- [OpenTableOptions](interfaces/OpenTableOptions.md)
|
||||||
- [OptimizeOptions](interfaces/OptimizeOptions.md)
|
- [OptimizeOptions](interfaces/OptimizeOptions.md)
|
||||||
- [OptimizeStats](interfaces/OptimizeStats.md)
|
- [OptimizeStats](interfaces/OptimizeStats.md)
|
||||||
@@ -62,7 +48,6 @@
|
|||||||
- [RemovalStats](interfaces/RemovalStats.md)
|
- [RemovalStats](interfaces/RemovalStats.md)
|
||||||
- [RetryConfig](interfaces/RetryConfig.md)
|
- [RetryConfig](interfaces/RetryConfig.md)
|
||||||
- [TableNamesOptions](interfaces/TableNamesOptions.md)
|
- [TableNamesOptions](interfaces/TableNamesOptions.md)
|
||||||
- [TableStatistics](interfaces/TableStatistics.md)
|
|
||||||
- [TimeoutConfig](interfaces/TimeoutConfig.md)
|
- [TimeoutConfig](interfaces/TimeoutConfig.md)
|
||||||
- [UpdateOptions](interfaces/UpdateOptions.md)
|
- [UpdateOptions](interfaces/UpdateOptions.md)
|
||||||
- [Version](interfaces/Version.md)
|
- [Version](interfaces/Version.md)
|
||||||
|
|||||||
@@ -1,37 +0,0 @@
|
|||||||
[**@lancedb/lancedb**](../README.md) • **Docs**
|
|
||||||
|
|
||||||
***
|
|
||||||
|
|
||||||
[@lancedb/lancedb](../globals.md) / FragmentStatistics
|
|
||||||
|
|
||||||
# Interface: FragmentStatistics
|
|
||||||
|
|
||||||
## Properties
|
|
||||||
|
|
||||||
### lengths
|
|
||||||
|
|
||||||
```ts
|
|
||||||
lengths: FragmentSummaryStats;
|
|
||||||
```
|
|
||||||
|
|
||||||
Statistics on the number of rows in the table fragments
|
|
||||||
|
|
||||||
***
|
|
||||||
|
|
||||||
### numFragments
|
|
||||||
|
|
||||||
```ts
|
|
||||||
numFragments: number;
|
|
||||||
```
|
|
||||||
|
|
||||||
The number of fragments in the table
|
|
||||||
|
|
||||||
***
|
|
||||||
|
|
||||||
### numSmallFragments
|
|
||||||
|
|
||||||
```ts
|
|
||||||
numSmallFragments: number;
|
|
||||||
```
|
|
||||||
|
|
||||||
The number of uncompacted fragments in the table
|
|
||||||
@@ -1,77 +0,0 @@
|
|||||||
[**@lancedb/lancedb**](../README.md) • **Docs**
|
|
||||||
|
|
||||||
***
|
|
||||||
|
|
||||||
[@lancedb/lancedb](../globals.md) / FragmentSummaryStats
|
|
||||||
|
|
||||||
# Interface: FragmentSummaryStats
|
|
||||||
|
|
||||||
## Properties
|
|
||||||
|
|
||||||
### max
|
|
||||||
|
|
||||||
```ts
|
|
||||||
max: number;
|
|
||||||
```
|
|
||||||
|
|
||||||
The number of rows in the fragment with the most rows
|
|
||||||
|
|
||||||
***
|
|
||||||
|
|
||||||
### mean
|
|
||||||
|
|
||||||
```ts
|
|
||||||
mean: number;
|
|
||||||
```
|
|
||||||
|
|
||||||
The mean number of rows in the fragments
|
|
||||||
|
|
||||||
***
|
|
||||||
|
|
||||||
### min
|
|
||||||
|
|
||||||
```ts
|
|
||||||
min: number;
|
|
||||||
```
|
|
||||||
|
|
||||||
The number of rows in the fragment with the fewest rows
|
|
||||||
|
|
||||||
***
|
|
||||||
|
|
||||||
### p25
|
|
||||||
|
|
||||||
```ts
|
|
||||||
p25: number;
|
|
||||||
```
|
|
||||||
|
|
||||||
The 25th percentile of number of rows in the fragments
|
|
||||||
|
|
||||||
***
|
|
||||||
|
|
||||||
### p50
|
|
||||||
|
|
||||||
```ts
|
|
||||||
p50: number;
|
|
||||||
```
|
|
||||||
|
|
||||||
The 50th percentile of number of rows in the fragments
|
|
||||||
|
|
||||||
***
|
|
||||||
|
|
||||||
### p75
|
|
||||||
|
|
||||||
```ts
|
|
||||||
p75: number;
|
|
||||||
```
|
|
||||||
|
|
||||||
The 75th percentile of number of rows in the fragments
|
|
||||||
|
|
||||||
***
|
|
||||||
|
|
||||||
### p99
|
|
||||||
|
|
||||||
```ts
|
|
||||||
p99: number;
|
|
||||||
```
|
|
||||||
|
|
||||||
The 99th percentile of number of rows in the fragments
|
|
||||||
@@ -1,25 +0,0 @@
|
|||||||
[**@lancedb/lancedb**](../README.md) • **Docs**
|
|
||||||
|
|
||||||
***
|
|
||||||
|
|
||||||
[@lancedb/lancedb](../globals.md) / FullTextQuery
|
|
||||||
|
|
||||||
# Interface: FullTextQuery
|
|
||||||
|
|
||||||
Represents a full-text query interface.
|
|
||||||
This interface defines the structure and behavior for full-text queries,
|
|
||||||
including methods to retrieve the query type and convert the query to a dictionary format.
|
|
||||||
|
|
||||||
## Methods
|
|
||||||
|
|
||||||
### queryType()
|
|
||||||
|
|
||||||
```ts
|
|
||||||
queryType(): FullTextQueryType
|
|
||||||
```
|
|
||||||
|
|
||||||
The type of the full-text query.
|
|
||||||
|
|
||||||
#### Returns
|
|
||||||
|
|
||||||
[`FullTextQueryType`](../enumerations/FullTextQueryType.md)
|
|
||||||
@@ -39,11 +39,3 @@ and the same name, then an error will be returned. This is true even if
|
|||||||
that index is out of date.
|
that index is out of date.
|
||||||
|
|
||||||
The default is true
|
The default is true
|
||||||
|
|
||||||
***
|
|
||||||
|
|
||||||
### waitTimeoutSeconds?
|
|
||||||
|
|
||||||
```ts
|
|
||||||
optional waitTimeoutSeconds: number;
|
|
||||||
```
|
|
||||||
|
|||||||
@@ -1,31 +0,0 @@
|
|||||||
[**@lancedb/lancedb**](../README.md) • **Docs**
|
|
||||||
|
|
||||||
***
|
|
||||||
|
|
||||||
[@lancedb/lancedb](../globals.md) / MergeStats
|
|
||||||
|
|
||||||
# Interface: MergeStats
|
|
||||||
|
|
||||||
## Properties
|
|
||||||
|
|
||||||
### numDeletedRows
|
|
||||||
|
|
||||||
```ts
|
|
||||||
numDeletedRows: bigint;
|
|
||||||
```
|
|
||||||
|
|
||||||
***
|
|
||||||
|
|
||||||
### numInsertedRows
|
|
||||||
|
|
||||||
```ts
|
|
||||||
numInsertedRows: bigint;
|
|
||||||
```
|
|
||||||
|
|
||||||
***
|
|
||||||
|
|
||||||
### numUpdatedRows
|
|
||||||
|
|
||||||
```ts
|
|
||||||
numUpdatedRows: bigint;
|
|
||||||
```
|
|
||||||
@@ -20,13 +20,3 @@ The maximum number of rows to return in a single batch
|
|||||||
|
|
||||||
Batches may have fewer rows if the underlying data is stored
|
Batches may have fewer rows if the underlying data is stored
|
||||||
in smaller chunks.
|
in smaller chunks.
|
||||||
|
|
||||||
***
|
|
||||||
|
|
||||||
### timeoutMs?
|
|
||||||
|
|
||||||
```ts
|
|
||||||
optional timeoutMs: number;
|
|
||||||
```
|
|
||||||
|
|
||||||
Timeout for query execution in milliseconds
|
|
||||||
|
|||||||
@@ -1,47 +0,0 @@
|
|||||||
[**@lancedb/lancedb**](../README.md) • **Docs**
|
|
||||||
|
|
||||||
***
|
|
||||||
|
|
||||||
[@lancedb/lancedb](../globals.md) / TableStatistics
|
|
||||||
|
|
||||||
# Interface: TableStatistics
|
|
||||||
|
|
||||||
## Properties
|
|
||||||
|
|
||||||
### fragmentStats
|
|
||||||
|
|
||||||
```ts
|
|
||||||
fragmentStats: FragmentStatistics;
|
|
||||||
```
|
|
||||||
|
|
||||||
Statistics on table fragments
|
|
||||||
|
|
||||||
***
|
|
||||||
|
|
||||||
### numIndices
|
|
||||||
|
|
||||||
```ts
|
|
||||||
numIndices: number;
|
|
||||||
```
|
|
||||||
|
|
||||||
The number of indices in the table
|
|
||||||
|
|
||||||
***
|
|
||||||
|
|
||||||
### numRows
|
|
||||||
|
|
||||||
```ts
|
|
||||||
numRows: number;
|
|
||||||
```
|
|
||||||
|
|
||||||
The number of rows in the table
|
|
||||||
|
|
||||||
***
|
|
||||||
|
|
||||||
### totalBytes
|
|
||||||
|
|
||||||
```ts
|
|
||||||
totalBytes: number;
|
|
||||||
```
|
|
||||||
|
|
||||||
The total number of bytes in the table
|
|
||||||
@@ -1,101 +0,0 @@
|
|||||||
|
|
||||||
# Getting Started with LanceDB: A Minimal Vector Search Tutorial
|
|
||||||
|
|
||||||
Let's set up a LanceDB database, insert vector data, and perform a simple vector search. We'll use simple character classes like "knight" and "rogue" to illustrate semantic relevance.
|
|
||||||
|
|
||||||
## 1. Install Dependencies
|
|
||||||
|
|
||||||
Before starting, make sure you have the necessary packages:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
pip install lancedb pandas numpy
|
|
||||||
```
|
|
||||||
|
|
||||||
## 2. Import Required Libraries
|
|
||||||
|
|
||||||
```python
|
|
||||||
import lancedb
|
|
||||||
import pandas as pd
|
|
||||||
import numpy as np
|
|
||||||
```
|
|
||||||
|
|
||||||
## 3. Connect to LanceDB
|
|
||||||
|
|
||||||
You can use a local directory to store your database:
|
|
||||||
|
|
||||||
```python
|
|
||||||
db = lancedb.connect("./lancedb")
|
|
||||||
```
|
|
||||||
|
|
||||||
## 4. Create Sample Data
|
|
||||||
|
|
||||||
Add sample text data and corresponding 4D vectors:
|
|
||||||
|
|
||||||
```python
|
|
||||||
data = pd.DataFrame([
|
|
||||||
{"id": "1", "vector": [1.0, 0.0, 0.0, 0.0], "text": "knight"},
|
|
||||||
{"id": "2", "vector": [0.9, 0.1, 0.0, 0.0], "text": "warrior"},
|
|
||||||
{"id": "3", "vector": [0.0, 1.0, 0.0, 0.0], "text": "rogue"},
|
|
||||||
{"id": "4", "vector": [0.0, 0.9, 0.1, 0.0], "text": "thief"},
|
|
||||||
{"id": "5", "vector": [0.5, 0.5, 0.0, 0.0], "text": "ranger"},
|
|
||||||
])
|
|
||||||
```
|
|
||||||
|
|
||||||
## 5. Create a Table in LanceDB
|
|
||||||
|
|
||||||
```python
|
|
||||||
table = db.create_table("rpg_classes", data=data, mode="overwrite")
|
|
||||||
```
|
|
||||||
|
|
||||||
Let's see how the table looks:
|
|
||||||
```python
|
|
||||||
print(data)
|
|
||||||
```
|
|
||||||
|
|
||||||
| id | vector | text |
|
|
||||||
|----|--------|------|
|
|
||||||
| 1 | [1.0, 0.0, 0.0, 0.0] | knight |
|
|
||||||
| 2 | [0.9, 0.1, 0.0, 0.0] | warrior |
|
|
||||||
| 3 | [0.0, 1.0, 0.0, 0.0] | rogue |
|
|
||||||
| 4 | [0.0, 0.9, 0.1, 0.0] | thief |
|
|
||||||
| 5 | [0.5, 0.5, 0.0, 0.0] | ranger |
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
## 6. Perform a Vector Search
|
|
||||||
|
|
||||||
Search for the most similar character classes to our query vector:
|
|
||||||
|
|
||||||
```python
|
|
||||||
# Query as if we are searching for "rogue"
|
|
||||||
results = table.search([0.95, 0.05, 0.0, 0.0]).limit(3).to_df()
|
|
||||||
print(results)
|
|
||||||
```
|
|
||||||
|
|
||||||
This will return the top 3 closest classes to the vector, effectively showing how LanceDB can be used for semantic search.
|
|
||||||
|
|
||||||
| id | vector | text | _distance |
|
|
||||||
|------|------------------------|----------|-----------|
|
|
||||||
| 3 | [0.0, 1.0, 0.0, 0.0] | rogue | 0.00 |
|
|
||||||
| 4 | [0.0, 0.9, 0.1, 0.0] | thief | 0.02 |
|
|
||||||
| 5 | [0.5, 0.5, 0.0, 0.0] | ranger | 0.50 |
|
|
||||||
|
|
||||||
Let's try searching for "knight"
|
|
||||||
|
|
||||||
```python
|
|
||||||
query_vector = [1.0, 0.0, 0.0, 0.0]
|
|
||||||
results = table.search(query_vector).limit(3).to_pandas()
|
|
||||||
print(results)
|
|
||||||
```
|
|
||||||
|
|
||||||
| id | vector | text | _distance |
|
|
||||||
|------|------------------------|----------|-----------|
|
|
||||||
| 1 | [1.0, 0.0, 0.0, 0.0] | knight | 0.00 |
|
|
||||||
| 2 | [0.9, 0.1, 0.0, 0.0] | warrior | 0.02 |
|
|
||||||
| 5 | [0.5, 0.5, 0.0, 0.0] | ranger | 0.50 |
|
|
||||||
|
|
||||||
## Next Steps
|
|
||||||
|
|
||||||
That's it - you just conducted vector search!
|
|
||||||
|
|
||||||
For more beginner tips, check out the [Basic Usage](basic.md) guide.
|
|
||||||
@@ -35,9 +35,3 @@ print the resolved query plan. You can use the `explain_plan` method to do this:
|
|||||||
* Python Sync: [LanceQueryBuilder.explain_plan][lancedb.query.LanceQueryBuilder.explain_plan]
|
* Python Sync: [LanceQueryBuilder.explain_plan][lancedb.query.LanceQueryBuilder.explain_plan]
|
||||||
* Python Async: [AsyncQueryBase.explain_plan][lancedb.query.AsyncQueryBase.explain_plan]
|
* Python Async: [AsyncQueryBase.explain_plan][lancedb.query.AsyncQueryBase.explain_plan]
|
||||||
* Node @lancedb/lancedb: [LanceQueryBuilder.explainPlan](/lancedb/js/classes/QueryBase/#explainplan)
|
* Node @lancedb/lancedb: [LanceQueryBuilder.explainPlan](/lancedb/js/classes/QueryBase/#explainplan)
|
||||||
|
|
||||||
To understand how a query was actually executed—including metrics like execution time, number of rows processed, I/O stats, and more—use the analyze_plan method. This executes the query and returns a physical execution plan annotated with runtime metrics, making it especially helpful for performance tuning and debugging.
|
|
||||||
|
|
||||||
* Python Sync: [LanceQueryBuilder.analyze_plan][lancedb.query.LanceQueryBuilder.analyze_plan]
|
|
||||||
* Python Async: [AsyncQueryBase.analyze_plan][lancedb.query.AsyncQueryBase.analyze_plan]
|
|
||||||
* Node @lancedb/lancedb: [LanceQueryBuilder.analyzePlan](/lancedb/js/classes/QueryBase/#analyzePlan)
|
|
||||||
|
|||||||
@@ -8,16 +8,13 @@
|
|||||||
<parent>
|
<parent>
|
||||||
<groupId>com.lancedb</groupId>
|
<groupId>com.lancedb</groupId>
|
||||||
<artifactId>lancedb-parent</artifactId>
|
<artifactId>lancedb-parent</artifactId>
|
||||||
<version>0.19.1-beta.1</version>
|
<version>0.18.2-beta.1</version>
|
||||||
<relativePath>../pom.xml</relativePath>
|
<relativePath>../pom.xml</relativePath>
|
||||||
</parent>
|
</parent>
|
||||||
|
|
||||||
<artifactId>lancedb-core</artifactId>
|
<artifactId>lancedb-core</artifactId>
|
||||||
<name>LanceDB Core</name>
|
<name>LanceDB Core</name>
|
||||||
<packaging>jar</packaging>
|
<packaging>jar</packaging>
|
||||||
<properties>
|
|
||||||
<rust.release.build>false</rust.release.build>
|
|
||||||
</properties>
|
|
||||||
|
|
||||||
<dependencies>
|
<dependencies>
|
||||||
<dependency>
|
<dependency>
|
||||||
@@ -71,7 +68,7 @@
|
|||||||
</goals>
|
</goals>
|
||||||
<configuration>
|
<configuration>
|
||||||
<path>lancedb-jni</path>
|
<path>lancedb-jni</path>
|
||||||
<release>${rust.release.build}</release>
|
<release>true</release>
|
||||||
<!-- Copy native libraries to target/classes for runtime access -->
|
<!-- Copy native libraries to target/classes for runtime access -->
|
||||||
<copyTo>${project.build.directory}/classes/nativelib</copyTo>
|
<copyTo>${project.build.directory}/classes/nativelib</copyTo>
|
||||||
<copyWithPlatformDir>true</copyWithPlatformDir>
|
<copyWithPlatformDir>true</copyWithPlatformDir>
|
||||||
|
|||||||
@@ -1,25 +1,16 @@
|
|||||||
/*
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||||
* you may not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
package com.lancedb.lancedb;
|
package com.lancedb.lancedb;
|
||||||
|
|
||||||
import io.questdb.jar.jni.JarJniLoader;
|
import io.questdb.jar.jni.JarJniLoader;
|
||||||
|
|
||||||
import java.io.Closeable;
|
import java.io.Closeable;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
|
|
||||||
/** Represents LanceDB database. */
|
/**
|
||||||
|
* Represents LanceDB database.
|
||||||
|
*/
|
||||||
public class Connection implements Closeable {
|
public class Connection implements Closeable {
|
||||||
static {
|
static {
|
||||||
JarJniLoader.loadLib(Connection.class, "/nativelib", "lancedb_jni");
|
JarJniLoader.loadLib(Connection.class, "/nativelib", "lancedb_jni");
|
||||||
@@ -27,11 +18,14 @@ public class Connection implements Closeable {
|
|||||||
|
|
||||||
private long nativeConnectionHandle;
|
private long nativeConnectionHandle;
|
||||||
|
|
||||||
/** Connect to a LanceDB instance. */
|
/**
|
||||||
|
* Connect to a LanceDB instance.
|
||||||
|
*/
|
||||||
public static native Connection connect(String uri);
|
public static native Connection connect(String uri);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get the names of all tables in the database. The names are sorted in ascending order.
|
* Get the names of all tables in the database. The names are sorted in
|
||||||
|
* ascending order.
|
||||||
*
|
*
|
||||||
* @return the table names
|
* @return the table names
|
||||||
*/
|
*/
|
||||||
@@ -40,7 +34,8 @@ public class Connection implements Closeable {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get the names of filtered tables in the database. The names are sorted in ascending order.
|
* Get the names of filtered tables in the database. The names are sorted in
|
||||||
|
* ascending order.
|
||||||
*
|
*
|
||||||
* @param limit The number of results to return.
|
* @param limit The number of results to return.
|
||||||
* @return the table names
|
* @return the table names
|
||||||
@@ -50,11 +45,12 @@ public class Connection implements Closeable {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get the names of filtered tables in the database. The names are sorted in ascending order.
|
* Get the names of filtered tables in the database. The names are sorted in
|
||||||
|
* ascending order.
|
||||||
*
|
*
|
||||||
* @param startAfter If present, only return names that come lexicographically after the supplied
|
* @param startAfter If present, only return names that come lexicographically after the supplied
|
||||||
* value. This can be combined with limit to implement pagination by setting this to the last
|
* value. This can be combined with limit to implement pagination
|
||||||
* table name from the previous page.
|
* by setting this to the last table name from the previous page.
|
||||||
* @return the table names
|
* @return the table names
|
||||||
*/
|
*/
|
||||||
public List<String> tableNames(String startAfter) {
|
public List<String> tableNames(String startAfter) {
|
||||||
@@ -62,11 +58,12 @@ public class Connection implements Closeable {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get the names of filtered tables in the database. The names are sorted in ascending order.
|
* Get the names of filtered tables in the database. The names are sorted in
|
||||||
|
* ascending order.
|
||||||
*
|
*
|
||||||
* @param startAfter If present, only return names that come lexicographically after the supplied
|
* @param startAfter If present, only return names that come lexicographically after the supplied
|
||||||
* value. This can be combined with limit to implement pagination by setting this to the last
|
* value. This can be combined with limit to implement pagination
|
||||||
* table name from the previous page.
|
* by setting this to the last table name from the previous page.
|
||||||
* @param limit The number of results to return.
|
* @param limit The number of results to return.
|
||||||
* @return the table names
|
* @return the table names
|
||||||
*/
|
*/
|
||||||
@@ -75,19 +72,22 @@ public class Connection implements Closeable {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get the names of filtered tables in the database. The names are sorted in ascending order.
|
* Get the names of filtered tables in the database. The names are sorted in
|
||||||
|
* ascending order.
|
||||||
*
|
*
|
||||||
* @param startAfter If present, only return names that come lexicographically after the supplied
|
* @param startAfter If present, only return names that come lexicographically after the supplied
|
||||||
* value. This can be combined with limit to implement pagination by setting this to the last
|
* value. This can be combined with limit to implement pagination
|
||||||
* table name from the previous page.
|
* by setting this to the last table name from the previous page.
|
||||||
* @param limit The number of results to return.
|
* @param limit The number of results to return.
|
||||||
* @return the table names
|
* @return the table names
|
||||||
*/
|
*/
|
||||||
public native List<String> tableNames(Optional<String> startAfter, Optional<Integer> limit);
|
public native List<String> tableNames(
|
||||||
|
Optional<String> startAfter, Optional<Integer> limit);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Closes this connection and releases any system resources associated with it. If the connection
|
* Closes this connection and releases any system resources associated with it. If
|
||||||
* is already closed, then invoking this method has no effect.
|
* the connection is
|
||||||
|
* already closed, then invoking this method has no effect.
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public void close() {
|
public void close() {
|
||||||
@@ -98,7 +98,8 @@ public class Connection implements Closeable {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Native method to release the Lance connection resources associated with the given handle.
|
* Native method to release the Lance connection resources associated with the
|
||||||
|
* given handle.
|
||||||
*
|
*
|
||||||
* @param handle The native handle to the connection resource.
|
* @param handle The native handle to the connection resource.
|
||||||
*/
|
*/
|
||||||
|
|||||||
@@ -1,35 +1,27 @@
|
|||||||
/*
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||||
* you may not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
package com.lancedb.lancedb;
|
package com.lancedb.lancedb;
|
||||||
|
|
||||||
import org.junit.jupiter.api.BeforeAll;
|
|
||||||
import org.junit.jupiter.api.Test;
|
|
||||||
import org.junit.jupiter.api.io.TempDir;
|
|
||||||
|
|
||||||
import java.net.URL;
|
|
||||||
import java.nio.file.Path;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||||
|
|
||||||
|
import java.nio.file.Path;
|
||||||
|
import java.util.List;
|
||||||
|
import java.net.URL;
|
||||||
|
import org.junit.jupiter.api.BeforeAll;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
import org.junit.jupiter.api.io.TempDir;
|
||||||
|
|
||||||
public class ConnectionTest {
|
public class ConnectionTest {
|
||||||
private static final String[] TABLE_NAMES = {
|
private static final String[] TABLE_NAMES = {
|
||||||
"dataset_version", "new_empty_dataset", "test", "write_stream"
|
"dataset_version",
|
||||||
|
"new_empty_dataset",
|
||||||
|
"test",
|
||||||
|
"write_stream"
|
||||||
};
|
};
|
||||||
|
|
||||||
@TempDir static Path tempDir; // Temporary directory for the tests
|
@TempDir
|
||||||
|
static Path tempDir; // Temporary directory for the tests
|
||||||
private static URL lanceDbURL;
|
private static URL lanceDbURL;
|
||||||
|
|
||||||
@BeforeAll
|
@BeforeAll
|
||||||
@@ -61,21 +53,18 @@ public class ConnectionTest {
|
|||||||
@Test
|
@Test
|
||||||
void tableNamesStartAfter() {
|
void tableNamesStartAfter() {
|
||||||
try (Connection conn = Connection.connect(lanceDbURL.toString())) {
|
try (Connection conn = Connection.connect(lanceDbURL.toString())) {
|
||||||
assertTableNamesStartAfter(
|
assertTableNamesStartAfter(conn, TABLE_NAMES[0], 3, TABLE_NAMES[1], TABLE_NAMES[2], TABLE_NAMES[3]);
|
||||||
conn, TABLE_NAMES[0], 3, TABLE_NAMES[1], TABLE_NAMES[2], TABLE_NAMES[3]);
|
|
||||||
assertTableNamesStartAfter(conn, TABLE_NAMES[1], 2, TABLE_NAMES[2], TABLE_NAMES[3]);
|
assertTableNamesStartAfter(conn, TABLE_NAMES[1], 2, TABLE_NAMES[2], TABLE_NAMES[3]);
|
||||||
assertTableNamesStartAfter(conn, TABLE_NAMES[2], 1, TABLE_NAMES[3]);
|
assertTableNamesStartAfter(conn, TABLE_NAMES[2], 1, TABLE_NAMES[3]);
|
||||||
assertTableNamesStartAfter(conn, TABLE_NAMES[3], 0);
|
assertTableNamesStartAfter(conn, TABLE_NAMES[3], 0);
|
||||||
assertTableNamesStartAfter(
|
assertTableNamesStartAfter(conn, "a_dataset", 4, TABLE_NAMES[0], TABLE_NAMES[1], TABLE_NAMES[2], TABLE_NAMES[3]);
|
||||||
conn, "a_dataset", 4, TABLE_NAMES[0], TABLE_NAMES[1], TABLE_NAMES[2], TABLE_NAMES[3]);
|
|
||||||
assertTableNamesStartAfter(conn, "o_dataset", 2, TABLE_NAMES[2], TABLE_NAMES[3]);
|
assertTableNamesStartAfter(conn, "o_dataset", 2, TABLE_NAMES[2], TABLE_NAMES[3]);
|
||||||
assertTableNamesStartAfter(conn, "v_dataset", 1, TABLE_NAMES[3]);
|
assertTableNamesStartAfter(conn, "v_dataset", 1, TABLE_NAMES[3]);
|
||||||
assertTableNamesStartAfter(conn, "z_dataset", 0);
|
assertTableNamesStartAfter(conn, "z_dataset", 0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private void assertTableNamesStartAfter(
|
private void assertTableNamesStartAfter(Connection conn, String startAfter, int expectedSize, String... expectedNames) {
|
||||||
Connection conn, String startAfter, int expectedSize, String... expectedNames) {
|
|
||||||
List<String> tableNames = conn.tableNames(startAfter);
|
List<String> tableNames = conn.tableNames(startAfter);
|
||||||
assertEquals(expectedSize, tableNames.size());
|
assertEquals(expectedSize, tableNames.size());
|
||||||
for (int i = 0; i < expectedNames.length; i++) {
|
for (int i = 0; i < expectedNames.length; i++) {
|
||||||
@@ -85,7 +74,7 @@ public class ConnectionTest {
|
|||||||
|
|
||||||
@Test
|
@Test
|
||||||
void tableNamesLimit() {
|
void tableNamesLimit() {
|
||||||
try (Connection conn = Connection.connect(lanceDbURL.toString())) {
|
try (Connection conn = Connection.connect(lanceDbURL.toString())) {
|
||||||
for (int i = 0; i <= TABLE_NAMES.length; i++) {
|
for (int i = 0; i <= TABLE_NAMES.length; i++) {
|
||||||
List<String> tableNames = conn.tableNames(i);
|
List<String> tableNames = conn.tableNames(i);
|
||||||
assertEquals(i, tableNames.size());
|
assertEquals(i, tableNames.size());
|
||||||
|
|||||||
76
java/pom.xml
76
java/pom.xml
@@ -6,7 +6,7 @@
|
|||||||
|
|
||||||
<groupId>com.lancedb</groupId>
|
<groupId>com.lancedb</groupId>
|
||||||
<artifactId>lancedb-parent</artifactId>
|
<artifactId>lancedb-parent</artifactId>
|
||||||
<version>0.19.1-beta.1</version>
|
<version>0.18.2-beta.1</version>
|
||||||
<packaging>pom</packaging>
|
<packaging>pom</packaging>
|
||||||
|
|
||||||
<name>LanceDB Parent</name>
|
<name>LanceDB Parent</name>
|
||||||
@@ -29,25 +29,6 @@
|
|||||||
<properties>
|
<properties>
|
||||||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
||||||
<arrow.version>15.0.0</arrow.version>
|
<arrow.version>15.0.0</arrow.version>
|
||||||
<spotless.skip>false</spotless.skip>
|
|
||||||
<spotless.version>2.30.0</spotless.version>
|
|
||||||
<spotless.java.googlejavaformat.version>1.7</spotless.java.googlejavaformat.version>
|
|
||||||
<spotless.delimiter>package</spotless.delimiter>
|
|
||||||
<spotless.license.header>
|
|
||||||
/*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
* you may not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
</spotless.license.header>
|
|
||||||
</properties>
|
</properties>
|
||||||
|
|
||||||
<modules>
|
<modules>
|
||||||
@@ -146,8 +127,7 @@
|
|||||||
<configuration>
|
<configuration>
|
||||||
<configLocation>google_checks.xml</configLocation>
|
<configLocation>google_checks.xml</configLocation>
|
||||||
<consoleOutput>true</consoleOutput>
|
<consoleOutput>true</consoleOutput>
|
||||||
<failsOnError>false</failsOnError>
|
<failsOnError>true</failsOnError>
|
||||||
<failOnViolation>false</failOnViolation>
|
|
||||||
<violationSeverity>warning</violationSeverity>
|
<violationSeverity>warning</violationSeverity>
|
||||||
<linkXRef>false</linkXRef>
|
<linkXRef>false</linkXRef>
|
||||||
</configuration>
|
</configuration>
|
||||||
@@ -161,10 +141,6 @@
|
|||||||
</execution>
|
</execution>
|
||||||
</executions>
|
</executions>
|
||||||
</plugin>
|
</plugin>
|
||||||
<plugin>
|
|
||||||
<groupId>com.diffplug.spotless</groupId>
|
|
||||||
<artifactId>spotless-maven-plugin</artifactId>
|
|
||||||
</plugin>
|
|
||||||
</plugins>
|
</plugins>
|
||||||
<pluginManagement>
|
<pluginManagement>
|
||||||
<plugins>
|
<plugins>
|
||||||
@@ -203,54 +179,6 @@
|
|||||||
<artifactId>maven-install-plugin</artifactId>
|
<artifactId>maven-install-plugin</artifactId>
|
||||||
<version>2.5.2</version>
|
<version>2.5.2</version>
|
||||||
</plugin>
|
</plugin>
|
||||||
<plugin>
|
|
||||||
<groupId>com.diffplug.spotless</groupId>
|
|
||||||
<artifactId>spotless-maven-plugin</artifactId>
|
|
||||||
<version>${spotless.version}</version>
|
|
||||||
<configuration>
|
|
||||||
<skip>${spotless.skip}</skip>
|
|
||||||
<upToDateChecking>
|
|
||||||
<enabled>true</enabled>
|
|
||||||
</upToDateChecking>
|
|
||||||
<java>
|
|
||||||
<includes>
|
|
||||||
<include>src/main/java/**/*.java</include>
|
|
||||||
<include>src/test/java/**/*.java</include>
|
|
||||||
</includes>
|
|
||||||
<googleJavaFormat>
|
|
||||||
<version>${spotless.java.googlejavaformat.version}</version>
|
|
||||||
<style>GOOGLE</style>
|
|
||||||
</googleJavaFormat>
|
|
||||||
|
|
||||||
<importOrder>
|
|
||||||
<order>com.lancedb.lance,,javax,java,\#</order>
|
|
||||||
</importOrder>
|
|
||||||
|
|
||||||
<removeUnusedImports />
|
|
||||||
</java>
|
|
||||||
<scala>
|
|
||||||
<includes>
|
|
||||||
<include>src/main/scala/**/*.scala</include>
|
|
||||||
<include>src/main/scala-*/**/*.scala</include>
|
|
||||||
<include>src/test/scala/**/*.scala</include>
|
|
||||||
<include>src/test/scala-*/**/*.scala</include>
|
|
||||||
</includes>
|
|
||||||
</scala>
|
|
||||||
<licenseHeader>
|
|
||||||
<content>${spotless.license.header}</content>
|
|
||||||
<delimiter>${spotless.delimiter}</delimiter>
|
|
||||||
</licenseHeader>
|
|
||||||
</configuration>
|
|
||||||
<executions>
|
|
||||||
<execution>
|
|
||||||
<id>spotless-check</id>
|
|
||||||
<phase>validate</phase>
|
|
||||||
<goals>
|
|
||||||
<goal>apply</goal>
|
|
||||||
</goals>
|
|
||||||
</execution>
|
|
||||||
</executions>
|
|
||||||
</plugin>
|
|
||||||
</plugins>
|
</plugins>
|
||||||
</pluginManagement>
|
</pluginManagement>
|
||||||
</build>
|
</build>
|
||||||
|
|||||||
51
node/package-lock.json
generated
51
node/package-lock.json
generated
@@ -1,12 +1,12 @@
|
|||||||
{
|
{
|
||||||
"name": "vectordb",
|
"name": "vectordb",
|
||||||
"version": "0.19.1-beta.1",
|
"version": "0.18.2-beta.0",
|
||||||
"lockfileVersion": 3,
|
"lockfileVersion": 3,
|
||||||
"requires": true,
|
"requires": true,
|
||||||
"packages": {
|
"packages": {
|
||||||
"": {
|
"": {
|
||||||
"name": "vectordb",
|
"name": "vectordb",
|
||||||
"version": "0.19.1-beta.1",
|
"version": "0.18.2-beta.0",
|
||||||
"cpu": [
|
"cpu": [
|
||||||
"x64",
|
"x64",
|
||||||
"arm64"
|
"arm64"
|
||||||
@@ -52,11 +52,11 @@
|
|||||||
"uuid": "^9.0.0"
|
"uuid": "^9.0.0"
|
||||||
},
|
},
|
||||||
"optionalDependencies": {
|
"optionalDependencies": {
|
||||||
"@lancedb/vectordb-darwin-arm64": "0.19.1-beta.1",
|
"@lancedb/vectordb-darwin-arm64": "0.18.2-beta.0",
|
||||||
"@lancedb/vectordb-darwin-x64": "0.19.1-beta.1",
|
"@lancedb/vectordb-darwin-x64": "0.18.2-beta.0",
|
||||||
"@lancedb/vectordb-linux-arm64-gnu": "0.19.1-beta.1",
|
"@lancedb/vectordb-linux-arm64-gnu": "0.18.2-beta.0",
|
||||||
"@lancedb/vectordb-linux-x64-gnu": "0.19.1-beta.1",
|
"@lancedb/vectordb-linux-x64-gnu": "0.18.2-beta.0",
|
||||||
"@lancedb/vectordb-win32-x64-msvc": "0.19.1-beta.1"
|
"@lancedb/vectordb-win32-x64-msvc": "0.18.2-beta.0"
|
||||||
},
|
},
|
||||||
"peerDependencies": {
|
"peerDependencies": {
|
||||||
"@apache-arrow/ts": "^14.0.2",
|
"@apache-arrow/ts": "^14.0.2",
|
||||||
@@ -327,9 +327,9 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/@lancedb/vectordb-darwin-arm64": {
|
"node_modules/@lancedb/vectordb-darwin-arm64": {
|
||||||
"version": "0.19.1-beta.1",
|
"version": "0.18.2-beta.0",
|
||||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.19.1-beta.1.tgz",
|
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.18.2-beta.0.tgz",
|
||||||
"integrity": "sha512-Epvel0pF5TM6MtIWQ2KhqezqSSHTL3Wr7a2rGAwz6X/XY23i6DbMPpPs0HyeIDzDrhxNfE3cz3S+SiCA6xpR0g==",
|
"integrity": "sha512-FzIcElkS6R5I5kU1S5m7yLVTB1Duv1XcmZQtVmYl/JjNlfxS1WTtMzdzMqSBFohDcgU2Tkc5+1FpK1B94dUUbg==",
|
||||||
"cpu": [
|
"cpu": [
|
||||||
"arm64"
|
"arm64"
|
||||||
],
|
],
|
||||||
@@ -340,9 +340,9 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
"node_modules/@lancedb/vectordb-darwin-x64": {
|
"node_modules/@lancedb/vectordb-darwin-x64": {
|
||||||
"version": "0.19.1-beta.1",
|
"version": "0.18.2-beta.0",
|
||||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.19.1-beta.1.tgz",
|
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.18.2-beta.0.tgz",
|
||||||
"integrity": "sha512-hOiUSlIoISbiXytp46hToi/r6sF5pImAsfbzCsIq8ExDV4TPa8fjbhcIT80vxxOwc2mpSSK4HsVJYod95RSbEQ==",
|
"integrity": "sha512-jv+XludfLNBDm1DjdqyghwDMtd4E+ygwycQpkpK72wyZSh6Qytrgq+4dNi/zCZ3UChFLbKbIxrVxv9yENQn2Pg==",
|
||||||
"cpu": [
|
"cpu": [
|
||||||
"x64"
|
"x64"
|
||||||
],
|
],
|
||||||
@@ -353,9 +353,9 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
"node_modules/@lancedb/vectordb-linux-arm64-gnu": {
|
"node_modules/@lancedb/vectordb-linux-arm64-gnu": {
|
||||||
"version": "0.19.1-beta.1",
|
"version": "0.18.2-beta.0",
|
||||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.19.1-beta.1.tgz",
|
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.18.2-beta.0.tgz",
|
||||||
"integrity": "sha512-/1JhGVDEngwrlM8o2TNW8G6nJ9U/VgHKAORmj/cTA7O30helJIoo9jfvUAUy+vZ4VoEwRXQbMI+gaYTg0l3MTg==",
|
"integrity": "sha512-8/fBpbNYhhpetf/pZv0DyPnQkeAbsiICMyCoRiNu5auvQK4AsGF1XvLWrDi68u9F0GysBKvuatYuGqa/yh+Anw==",
|
||||||
"cpu": [
|
"cpu": [
|
||||||
"arm64"
|
"arm64"
|
||||||
],
|
],
|
||||||
@@ -366,9 +366,9 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
"node_modules/@lancedb/vectordb-linux-x64-gnu": {
|
"node_modules/@lancedb/vectordb-linux-x64-gnu": {
|
||||||
"version": "0.19.1-beta.1",
|
"version": "0.18.2-beta.0",
|
||||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.19.1-beta.1.tgz",
|
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.18.2-beta.0.tgz",
|
||||||
"integrity": "sha512-zNRGSSUt8nTJMmll4NdxhQjwxR8Rezq3T4dsRoiDts5ienMam5HFjYiZ3FkDZQo16rgq2BcbFuH1G8u1chywlg==",
|
"integrity": "sha512-7a1Kc/2V2ff4HlLzXyXVdK0Z0VIFUt50v2SBRdlcycJ0NLW9ZqV+9UjB/NAOwMXVgYd7d3rKjACGkQzkpvcyeg==",
|
||||||
"cpu": [
|
"cpu": [
|
||||||
"x64"
|
"x64"
|
||||||
],
|
],
|
||||||
@@ -379,9 +379,9 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
"node_modules/@lancedb/vectordb-win32-x64-msvc": {
|
"node_modules/@lancedb/vectordb-win32-x64-msvc": {
|
||||||
"version": "0.19.1-beta.1",
|
"version": "0.18.2-beta.0",
|
||||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.19.1-beta.1.tgz",
|
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.18.2-beta.0.tgz",
|
||||||
"integrity": "sha512-yV550AJGlsIFdm1KoHQPJ1TZx121ZXCIdebBtBZj3wOObIhyB/i0kZAtGvwjkmr7EYyfzt1EHZzbjSGVdehIAA==",
|
"integrity": "sha512-EeCiSf2RtJMESnkIca28GI6rAStYj2q9sVIyNCXpmIZSkJVpfQ3iswHGAbHrEfaPl0J1Re9cnRHLLuqkumwiIQ==",
|
||||||
"cpu": [
|
"cpu": [
|
||||||
"x64"
|
"x64"
|
||||||
],
|
],
|
||||||
@@ -1184,10 +1184,9 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/axios": {
|
"node_modules/axios": {
|
||||||
"version": "1.8.4",
|
"version": "1.7.7",
|
||||||
"resolved": "https://registry.npmjs.org/axios/-/axios-1.8.4.tgz",
|
"resolved": "https://registry.npmjs.org/axios/-/axios-1.7.7.tgz",
|
||||||
"integrity": "sha512-eBSYY4Y68NNlHbHBMdeDmKNtDgXWhQsJcGqzO3iLUM0GraQFSS9cVgPX5I9b3lbdFKyYoAEGAZF1DwhTaljNAw==",
|
"integrity": "sha512-S4kL7XrjgBmvdGut0sN3yJxqYzrDOnivkBiN0OFs6hLiUam3UPvswUo0kqGyhqUZGEOytHyumEdXsAkgCOUf3Q==",
|
||||||
"license": "MIT",
|
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"follow-redirects": "^1.15.6",
|
"follow-redirects": "^1.15.6",
|
||||||
"form-data": "^4.0.0",
|
"form-data": "^4.0.0",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "vectordb",
|
"name": "vectordb",
|
||||||
"version": "0.19.1-beta.1",
|
"version": "0.18.2-beta.1",
|
||||||
"description": " Serverless, low-latency vector database for AI applications",
|
"description": " Serverless, low-latency vector database for AI applications",
|
||||||
"private": false,
|
"private": false,
|
||||||
"main": "dist/index.js",
|
"main": "dist/index.js",
|
||||||
@@ -89,10 +89,10 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"optionalDependencies": {
|
"optionalDependencies": {
|
||||||
"@lancedb/vectordb-darwin-x64": "0.19.1-beta.1",
|
"@lancedb/vectordb-darwin-x64": "0.18.2-beta.1",
|
||||||
"@lancedb/vectordb-darwin-arm64": "0.19.1-beta.1",
|
"@lancedb/vectordb-darwin-arm64": "0.18.2-beta.1",
|
||||||
"@lancedb/vectordb-linux-x64-gnu": "0.19.1-beta.1",
|
"@lancedb/vectordb-linux-x64-gnu": "0.18.2-beta.1",
|
||||||
"@lancedb/vectordb-linux-arm64-gnu": "0.19.1-beta.1",
|
"@lancedb/vectordb-linux-arm64-gnu": "0.18.2-beta.1",
|
||||||
"@lancedb/vectordb-win32-x64-msvc": "0.19.1-beta.1"
|
"@lancedb/vectordb-win32-x64-msvc": "0.18.2-beta.1"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "lancedb-nodejs"
|
name = "lancedb-nodejs"
|
||||||
edition.workspace = true
|
edition.workspace = true
|
||||||
version = "0.19.1-beta.1"
|
version = "0.18.2-beta.1"
|
||||||
license.workspace = true
|
license.workspace = true
|
||||||
description.workspace = true
|
description.workspace = true
|
||||||
repository.workspace = true
|
repository.workspace = true
|
||||||
@@ -28,9 +28,6 @@ napi-derive = "2.16.4"
|
|||||||
lzma-sys = { version = "*", features = ["static"] }
|
lzma-sys = { version = "*", features = ["static"] }
|
||||||
log.workspace = true
|
log.workspace = true
|
||||||
|
|
||||||
# Workaround for build failure until we can fix it.
|
|
||||||
aws-lc-sys = "=0.28.0"
|
|
||||||
|
|
||||||
[build-dependencies]
|
[build-dependencies]
|
||||||
napi-build = "2.1"
|
napi-build = "2.1"
|
||||||
|
|
||||||
|
|||||||
@@ -374,71 +374,6 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
|
|||||||
expect(table2.numRows).toBe(4);
|
expect(table2.numRows).toBe(4);
|
||||||
expect(table2.schema).toEqual(schema);
|
expect(table2.schema).toEqual(schema);
|
||||||
});
|
});
|
||||||
|
|
||||||
it("should correctly retain values in nested struct fields", async function () {
|
|
||||||
// Define test data with nested struct
|
|
||||||
const testData = [
|
|
||||||
{
|
|
||||||
id: "doc1",
|
|
||||||
vector: [1, 2, 3],
|
|
||||||
metadata: {
|
|
||||||
filePath: "/path/to/file1.ts",
|
|
||||||
startLine: 10,
|
|
||||||
endLine: 20,
|
|
||||||
text: "function test() { return true; }",
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
id: "doc2",
|
|
||||||
vector: [4, 5, 6],
|
|
||||||
metadata: {
|
|
||||||
filePath: "/path/to/file2.ts",
|
|
||||||
startLine: 30,
|
|
||||||
endLine: 40,
|
|
||||||
text: "function test2() { return false; }",
|
|
||||||
},
|
|
||||||
},
|
|
||||||
];
|
|
||||||
|
|
||||||
// Create Arrow table from the data
|
|
||||||
const table = makeArrowTable(testData);
|
|
||||||
|
|
||||||
// Verify schema has the nested struct fields
|
|
||||||
const metadataField = table.schema.fields.find(
|
|
||||||
(f) => f.name === "metadata",
|
|
||||||
);
|
|
||||||
expect(metadataField).toBeDefined();
|
|
||||||
// biome-ignore lint/suspicious/noExplicitAny: accessing fields in different Arrow versions
|
|
||||||
const childNames = metadataField?.type.children.map((c: any) => c.name);
|
|
||||||
expect(childNames).toEqual([
|
|
||||||
"filePath",
|
|
||||||
"startLine",
|
|
||||||
"endLine",
|
|
||||||
"text",
|
|
||||||
]);
|
|
||||||
|
|
||||||
// Convert to buffer and back (simulating storage and retrieval)
|
|
||||||
const buf = await fromTableToBuffer(table);
|
|
||||||
const retrievedTable = tableFromIPC(buf);
|
|
||||||
|
|
||||||
// Verify the retrieved table has the same structure
|
|
||||||
const rows = [];
|
|
||||||
for (let i = 0; i < retrievedTable.numRows; i++) {
|
|
||||||
rows.push(retrievedTable.get(i));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check values in the first row
|
|
||||||
const firstRow = rows[0];
|
|
||||||
expect(firstRow.id).toBe("doc1");
|
|
||||||
expect(firstRow.vector.toJSON()).toEqual([1, 2, 3]);
|
|
||||||
|
|
||||||
// Verify metadata values are preserved (this is where the bug is)
|
|
||||||
expect(firstRow.metadata).toBeDefined();
|
|
||||||
expect(firstRow.metadata.filePath).toBe("/path/to/file1.ts");
|
|
||||||
expect(firstRow.metadata.startLine).toBe(10);
|
|
||||||
expect(firstRow.metadata.endLine).toBe(20);
|
|
||||||
expect(firstRow.metadata.text).toBe("function test() { return true; }");
|
|
||||||
});
|
|
||||||
});
|
});
|
||||||
|
|
||||||
class DummyEmbedding extends EmbeddingFunction<string> {
|
class DummyEmbedding extends EmbeddingFunction<string> {
|
||||||
|
|||||||
@@ -10,7 +10,7 @@ import * as arrow16 from "apache-arrow-16";
|
|||||||
import * as arrow17 from "apache-arrow-17";
|
import * as arrow17 from "apache-arrow-17";
|
||||||
import * as arrow18 from "apache-arrow-18";
|
import * as arrow18 from "apache-arrow-18";
|
||||||
|
|
||||||
import { MatchQuery, PhraseQuery, Table, connect } from "../lancedb";
|
import { Table, connect } from "../lancedb";
|
||||||
import {
|
import {
|
||||||
Table as ArrowTable,
|
Table as ArrowTable,
|
||||||
Field,
|
Field,
|
||||||
@@ -33,7 +33,6 @@ import {
|
|||||||
register,
|
register,
|
||||||
} from "../lancedb/embedding";
|
} from "../lancedb/embedding";
|
||||||
import { Index } from "../lancedb/indices";
|
import { Index } from "../lancedb/indices";
|
||||||
import { instanceOfFullTextQuery } from "../lancedb/query";
|
|
||||||
|
|
||||||
describe.each([arrow15, arrow16, arrow17, arrow18])(
|
describe.each([arrow15, arrow16, arrow17, arrow18])(
|
||||||
"Given a table",
|
"Given a table",
|
||||||
@@ -71,29 +70,6 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
|
|||||||
await expect(table.countRows()).resolves.toBe(3);
|
await expect(table.countRows()).resolves.toBe(3);
|
||||||
});
|
});
|
||||||
|
|
||||||
it("should show table stats", async () => {
|
|
||||||
await table.add([{ id: 1 }, { id: 2 }]);
|
|
||||||
await table.add([{ id: 1 }]);
|
|
||||||
await expect(table.stats()).resolves.toEqual({
|
|
||||||
fragmentStats: {
|
|
||||||
lengths: {
|
|
||||||
max: 2,
|
|
||||||
mean: 1,
|
|
||||||
min: 1,
|
|
||||||
p25: 1,
|
|
||||||
p50: 2,
|
|
||||||
p75: 2,
|
|
||||||
p99: 2,
|
|
||||||
},
|
|
||||||
numFragments: 2,
|
|
||||||
numSmallFragments: 2,
|
|
||||||
},
|
|
||||||
numIndices: 0,
|
|
||||||
numRows: 3,
|
|
||||||
totalBytes: 24,
|
|
||||||
});
|
|
||||||
});
|
|
||||||
|
|
||||||
it("should overwrite data if asked", async () => {
|
it("should overwrite data if asked", async () => {
|
||||||
await table.add([{ id: 1 }, { id: 2 }]);
|
await table.add([{ id: 1 }, { id: 2 }]);
|
||||||
await table.add([{ id: 1 }], { mode: "overwrite" });
|
await table.add([{ id: 1 }], { mode: "overwrite" });
|
||||||
@@ -338,16 +314,11 @@ describe("merge insert", () => {
|
|||||||
{ a: 3, b: "y" },
|
{ a: 3, b: "y" },
|
||||||
{ a: 4, b: "z" },
|
{ a: 4, b: "z" },
|
||||||
];
|
];
|
||||||
const stats = await table
|
await table
|
||||||
.mergeInsert("a")
|
.mergeInsert("a")
|
||||||
.whenMatchedUpdateAll()
|
.whenMatchedUpdateAll()
|
||||||
.whenNotMatchedInsertAll()
|
.whenNotMatchedInsertAll()
|
||||||
.execute(newData);
|
.execute(newData);
|
||||||
|
|
||||||
expect(stats.numInsertedRows).toBe(1n);
|
|
||||||
expect(stats.numUpdatedRows).toBe(2n);
|
|
||||||
expect(stats.numDeletedRows).toBe(0n);
|
|
||||||
|
|
||||||
const expected = [
|
const expected = [
|
||||||
{ a: 1, b: "a" },
|
{ a: 1, b: "a" },
|
||||||
{ a: 2, b: "x" },
|
{ a: 2, b: "x" },
|
||||||
@@ -535,15 +506,6 @@ describe("When creating an index", () => {
|
|||||||
expect(indices2.length).toBe(0);
|
expect(indices2.length).toBe(0);
|
||||||
});
|
});
|
||||||
|
|
||||||
it("should wait for index readiness", async () => {
|
|
||||||
// Create an index and then wait for it to be ready
|
|
||||||
await tbl.createIndex("vec");
|
|
||||||
const indices = await tbl.listIndices();
|
|
||||||
expect(indices.length).toBeGreaterThan(0);
|
|
||||||
const idxName = indices[0].name;
|
|
||||||
await expect(tbl.waitForIndex([idxName], 5)).resolves.toBeUndefined();
|
|
||||||
});
|
|
||||||
|
|
||||||
it("should search with distance range", async () => {
|
it("should search with distance range", async () => {
|
||||||
await tbl.createIndex("vec");
|
await tbl.createIndex("vec");
|
||||||
|
|
||||||
@@ -671,23 +633,6 @@ describe("When creating an index", () => {
|
|||||||
expect(plan2).not.toMatch("LanceScan");
|
expect(plan2).not.toMatch("LanceScan");
|
||||||
});
|
});
|
||||||
|
|
||||||
it("should be able to run analyze plan", async () => {
|
|
||||||
await tbl.createIndex("vec");
|
|
||||||
await tbl.add([
|
|
||||||
{
|
|
||||||
id: 300,
|
|
||||||
vec: Array(32)
|
|
||||||
.fill(1)
|
|
||||||
.map(() => Math.random()),
|
|
||||||
tags: [],
|
|
||||||
},
|
|
||||||
]);
|
|
||||||
|
|
||||||
const plan = await tbl.query().nearestTo(queryVec).analyzePlan();
|
|
||||||
expect(plan).toMatch("AnalyzeExec");
|
|
||||||
expect(plan).toMatch("metrics=");
|
|
||||||
});
|
|
||||||
|
|
||||||
it("should be able to query with row id", async () => {
|
it("should be able to query with row id", async () => {
|
||||||
const results = await tbl
|
const results = await tbl
|
||||||
.query()
|
.query()
|
||||||
@@ -861,7 +806,6 @@ describe("When creating an index", () => {
|
|||||||
// Only build index over v1
|
// Only build index over v1
|
||||||
await tbl.createIndex("vec", {
|
await tbl.createIndex("vec", {
|
||||||
config: Index.ivfPq({ numPartitions: 2, numSubVectors: 2 }),
|
config: Index.ivfPq({ numPartitions: 2, numSubVectors: 2 }),
|
||||||
waitTimeoutSeconds: 30,
|
|
||||||
});
|
});
|
||||||
|
|
||||||
const rst = await tbl
|
const rst = await tbl
|
||||||
@@ -906,44 +850,6 @@ describe("When creating an index", () => {
|
|||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
describe("When querying a table", () => {
|
|
||||||
let tmpDir: tmp.DirResult;
|
|
||||||
beforeEach(() => {
|
|
||||||
tmpDir = tmp.dirSync({ unsafeCleanup: true });
|
|
||||||
});
|
|
||||||
afterEach(() => tmpDir.removeCallback());
|
|
||||||
|
|
||||||
it("should throw an error when timeout is reached", async () => {
|
|
||||||
const db = await connect(tmpDir.name);
|
|
||||||
const data = makeArrowTable([
|
|
||||||
{ text: "a", vector: [0.1, 0.2] },
|
|
||||||
{ text: "b", vector: [0.3, 0.4] },
|
|
||||||
]);
|
|
||||||
const table = await db.createTable("test", data);
|
|
||||||
await table.createIndex("text", { config: Index.fts() });
|
|
||||||
|
|
||||||
await expect(
|
|
||||||
table.query().where("text != 'a'").toArray({ timeoutMs: 0 }),
|
|
||||||
).rejects.toThrow("Query timeout");
|
|
||||||
|
|
||||||
await expect(
|
|
||||||
table.query().nearestTo([0.0, 0.0]).toArrow({ timeoutMs: 0 }),
|
|
||||||
).rejects.toThrow("Query timeout");
|
|
||||||
|
|
||||||
await expect(
|
|
||||||
table.search("a", "fts").toArray({ timeoutMs: 0 }),
|
|
||||||
).rejects.toThrow("Query timeout");
|
|
||||||
|
|
||||||
await expect(
|
|
||||||
table
|
|
||||||
.query()
|
|
||||||
.nearestToText("a")
|
|
||||||
.nearestTo([0.0, 0.0])
|
|
||||||
.toArrow({ timeoutMs: 0 }),
|
|
||||||
).rejects.toThrow("Query timeout");
|
|
||||||
});
|
|
||||||
});
|
|
||||||
|
|
||||||
describe("Read consistency interval", () => {
|
describe("Read consistency interval", () => {
|
||||||
let tmpDir: tmp.DirResult;
|
let tmpDir: tmp.DirResult;
|
||||||
beforeEach(() => {
|
beforeEach(() => {
|
||||||
@@ -1206,73 +1112,6 @@ describe("when dealing with versioning", () => {
|
|||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
describe("when dealing with tags", () => {
|
|
||||||
let tmpDir: tmp.DirResult;
|
|
||||||
beforeEach(() => {
|
|
||||||
tmpDir = tmp.dirSync({ unsafeCleanup: true });
|
|
||||||
});
|
|
||||||
afterEach(() => {
|
|
||||||
tmpDir.removeCallback();
|
|
||||||
});
|
|
||||||
|
|
||||||
it("can manage tags", async () => {
|
|
||||||
const conn = await connect(tmpDir.name, {
|
|
||||||
readConsistencyInterval: 0,
|
|
||||||
});
|
|
||||||
|
|
||||||
const table = await conn.createTable("my_table", [
|
|
||||||
{ id: 1n, vector: [0.1, 0.2] },
|
|
||||||
]);
|
|
||||||
expect(await table.version()).toBe(1);
|
|
||||||
|
|
||||||
await table.add([{ id: 2n, vector: [0.3, 0.4] }]);
|
|
||||||
expect(await table.version()).toBe(2);
|
|
||||||
|
|
||||||
const tagsManager = await table.tags();
|
|
||||||
|
|
||||||
const initialTags = await tagsManager.list();
|
|
||||||
expect(Object.keys(initialTags).length).toBe(0);
|
|
||||||
|
|
||||||
const tag1 = "tag1";
|
|
||||||
await tagsManager.create(tag1, 1);
|
|
||||||
expect(await tagsManager.getVersion(tag1)).toBe(1);
|
|
||||||
|
|
||||||
const tagsAfterFirst = await tagsManager.list();
|
|
||||||
expect(Object.keys(tagsAfterFirst).length).toBe(1);
|
|
||||||
expect(tagsAfterFirst).toHaveProperty(tag1);
|
|
||||||
expect(tagsAfterFirst[tag1].version).toBe(1);
|
|
||||||
|
|
||||||
await tagsManager.create("tag2", 2);
|
|
||||||
expect(await tagsManager.getVersion("tag2")).toBe(2);
|
|
||||||
|
|
||||||
const tagsAfterSecond = await tagsManager.list();
|
|
||||||
expect(Object.keys(tagsAfterSecond).length).toBe(2);
|
|
||||||
expect(tagsAfterSecond).toHaveProperty(tag1);
|
|
||||||
expect(tagsAfterSecond[tag1].version).toBe(1);
|
|
||||||
expect(tagsAfterSecond).toHaveProperty("tag2");
|
|
||||||
expect(tagsAfterSecond["tag2"].version).toBe(2);
|
|
||||||
|
|
||||||
await table.add([{ id: 3n, vector: [0.5, 0.6] }]);
|
|
||||||
await tagsManager.update(tag1, 3);
|
|
||||||
expect(await tagsManager.getVersion(tag1)).toBe(3);
|
|
||||||
|
|
||||||
await tagsManager.delete("tag2");
|
|
||||||
const tagsAfterDelete = await tagsManager.list();
|
|
||||||
expect(Object.keys(tagsAfterDelete).length).toBe(1);
|
|
||||||
expect(tagsAfterDelete).toHaveProperty(tag1);
|
|
||||||
expect(tagsAfterDelete[tag1].version).toBe(3);
|
|
||||||
|
|
||||||
await table.add([{ id: 4n, vector: [0.7, 0.8] }]);
|
|
||||||
expect(await table.version()).toBe(4);
|
|
||||||
|
|
||||||
await table.checkout(tag1);
|
|
||||||
expect(await table.version()).toBe(3);
|
|
||||||
|
|
||||||
await table.checkoutLatest();
|
|
||||||
expect(await table.version()).toBe(4);
|
|
||||||
});
|
|
||||||
});
|
|
||||||
|
|
||||||
describe("when optimizing a dataset", () => {
|
describe("when optimizing a dataset", () => {
|
||||||
let tmpDir: tmp.DirResult;
|
let tmpDir: tmp.DirResult;
|
||||||
let table: Table;
|
let table: Table;
|
||||||
@@ -1408,56 +1247,6 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
|
|||||||
|
|
||||||
const results = await table.search("hello").toArray();
|
const results = await table.search("hello").toArray();
|
||||||
expect(results[0].text).toBe(data[0].text);
|
expect(results[0].text).toBe(data[0].text);
|
||||||
|
|
||||||
const query = new MatchQuery("goodbye", "text");
|
|
||||||
expect(instanceOfFullTextQuery(query)).toBe(true);
|
|
||||||
const results2 = await table
|
|
||||||
.search(new MatchQuery("goodbye", "text"))
|
|
||||||
.toArray();
|
|
||||||
expect(results2[0].text).toBe(data[1].text);
|
|
||||||
});
|
|
||||||
|
|
||||||
test("prewarm full text search index", async () => {
|
|
||||||
const db = await connect(tmpDir.name);
|
|
||||||
const data = [
|
|
||||||
{ text: ["lance database", "the", "search"], vector: [0.1, 0.2, 0.3] },
|
|
||||||
{ text: ["lance database"], vector: [0.4, 0.5, 0.6] },
|
|
||||||
{ text: ["lance", "search"], vector: [0.7, 0.8, 0.9] },
|
|
||||||
{ text: ["database", "search"], vector: [1.0, 1.1, 1.2] },
|
|
||||||
{ text: ["unrelated", "doc"], vector: [1.3, 1.4, 1.5] },
|
|
||||||
];
|
|
||||||
const table = await db.createTable("test", data);
|
|
||||||
await table.createIndex("text", {
|
|
||||||
config: Index.fts(),
|
|
||||||
});
|
|
||||||
|
|
||||||
// For the moment, we just confirm we can call prewarmIndex without error
|
|
||||||
// and still search it afterwards
|
|
||||||
await table.prewarmIndex("text_idx");
|
|
||||||
|
|
||||||
const results = await table.search("lance").toArray();
|
|
||||||
expect(results.length).toBe(3);
|
|
||||||
});
|
|
||||||
|
|
||||||
test("full text index on list", async () => {
|
|
||||||
const db = await connect(tmpDir.name);
|
|
||||||
const data = [
|
|
||||||
{ text: ["lance database", "the", "search"], vector: [0.1, 0.2, 0.3] },
|
|
||||||
{ text: ["lance database"], vector: [0.4, 0.5, 0.6] },
|
|
||||||
{ text: ["lance", "search"], vector: [0.7, 0.8, 0.9] },
|
|
||||||
{ text: ["database", "search"], vector: [1.0, 1.1, 1.2] },
|
|
||||||
{ text: ["unrelated", "doc"], vector: [1.3, 1.4, 1.5] },
|
|
||||||
];
|
|
||||||
const table = await db.createTable("test", data);
|
|
||||||
await table.createIndex("text", {
|
|
||||||
config: Index.fts(),
|
|
||||||
});
|
|
||||||
|
|
||||||
const results = await table.search("lance").toArray();
|
|
||||||
expect(results.length).toBe(3);
|
|
||||||
|
|
||||||
const results2 = await table.search('"lance database"').toArray();
|
|
||||||
expect(results2.length).toBe(2);
|
|
||||||
});
|
});
|
||||||
|
|
||||||
test("full text search without positions", async () => {
|
test("full text search without positions", async () => {
|
||||||
@@ -1510,43 +1299,6 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
|
|||||||
expect(results.length).toBe(2);
|
expect(results.length).toBe(2);
|
||||||
const phraseResults = await table.search('"hello world"').toArray();
|
const phraseResults = await table.search('"hello world"').toArray();
|
||||||
expect(phraseResults.length).toBe(1);
|
expect(phraseResults.length).toBe(1);
|
||||||
const phraseResults2 = await table
|
|
||||||
.search(new PhraseQuery("hello world", "text"))
|
|
||||||
.toArray();
|
|
||||||
expect(phraseResults2.length).toBe(1);
|
|
||||||
});
|
|
||||||
|
|
||||||
test("full text search fuzzy query", async () => {
|
|
||||||
const db = await connect(tmpDir.name);
|
|
||||||
const data = [
|
|
||||||
{ text: "fa", vector: [0.1, 0.2, 0.3] },
|
|
||||||
{ text: "fo", vector: [0.4, 0.5, 0.6] },
|
|
||||||
{ text: "fob", vector: [0.4, 0.5, 0.6] },
|
|
||||||
{ text: "focus", vector: [0.4, 0.5, 0.6] },
|
|
||||||
{ text: "foo", vector: [0.4, 0.5, 0.6] },
|
|
||||||
{ text: "food", vector: [0.4, 0.5, 0.6] },
|
|
||||||
{ text: "foul", vector: [0.4, 0.5, 0.6] },
|
|
||||||
];
|
|
||||||
const table = await db.createTable("test", data);
|
|
||||||
await table.createIndex("text", {
|
|
||||||
config: Index.fts(),
|
|
||||||
});
|
|
||||||
|
|
||||||
const results = await table
|
|
||||||
.search(new MatchQuery("foo", "text"))
|
|
||||||
.toArray();
|
|
||||||
expect(results.length).toBe(1);
|
|
||||||
expect(results[0].text).toBe("foo");
|
|
||||||
|
|
||||||
const fuzzyResults = await table
|
|
||||||
.search(new MatchQuery("foo", "text", { fuzziness: 1 }))
|
|
||||||
.toArray();
|
|
||||||
expect(fuzzyResults.length).toBe(4);
|
|
||||||
const resultSet = new Set(fuzzyResults.map((r) => r.text));
|
|
||||||
expect(resultSet.has("foo")).toBe(true);
|
|
||||||
expect(resultSet.has("fob")).toBe(true);
|
|
||||||
expect(resultSet.has("fo")).toBe(true);
|
|
||||||
expect(resultSet.has("food")).toBe(true);
|
|
||||||
});
|
});
|
||||||
|
|
||||||
test.each([
|
test.each([
|
||||||
@@ -1594,30 +1346,6 @@ describe("when calling explainPlan", () => {
|
|||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
describe("when calling analyzePlan", () => {
|
|
||||||
let tmpDir: tmp.DirResult;
|
|
||||||
let table: Table;
|
|
||||||
let queryVec: number[];
|
|
||||||
beforeEach(async () => {
|
|
||||||
tmpDir = tmp.dirSync({ unsafeCleanup: true });
|
|
||||||
const con = await connect(tmpDir.name);
|
|
||||||
table = await con.createTable("vectors", [{ id: 1, vector: [1.1, 0.9] }]);
|
|
||||||
});
|
|
||||||
|
|
||||||
afterEach(() => {
|
|
||||||
tmpDir.removeCallback();
|
|
||||||
});
|
|
||||||
|
|
||||||
it("retrieves runtime metrics", async () => {
|
|
||||||
queryVec = Array(2)
|
|
||||||
.fill(1)
|
|
||||||
.map(() => Math.random());
|
|
||||||
const plan = await table.query().nearestTo(queryVec).analyzePlan();
|
|
||||||
console.log("Query Plan:\n", plan); // <--- Print the plan
|
|
||||||
expect(plan).toMatch("AnalyzeExec");
|
|
||||||
});
|
|
||||||
});
|
|
||||||
|
|
||||||
describe("column name options", () => {
|
describe("column name options", () => {
|
||||||
let tmpDir: tmp.DirResult;
|
let tmpDir: tmp.DirResult;
|
||||||
let table: Table;
|
let table: Table;
|
||||||
|
|||||||
@@ -639,9 +639,8 @@ function transposeData(
|
|||||||
): Vector {
|
): Vector {
|
||||||
if (field.type instanceof Struct) {
|
if (field.type instanceof Struct) {
|
||||||
const childFields = field.type.children;
|
const childFields = field.type.children;
|
||||||
const fullPath = [...path, field.name];
|
|
||||||
const childVectors = childFields.map((child) => {
|
const childVectors = childFields.map((child) => {
|
||||||
return transposeData(data, child, fullPath);
|
return transposeData(data, child, [...path, child.name]);
|
||||||
});
|
});
|
||||||
const structData = makeData({
|
const structData = makeData({
|
||||||
type: field.type,
|
type: field.type,
|
||||||
@@ -653,14 +652,7 @@ function transposeData(
|
|||||||
const values = data.map((datum) => {
|
const values = data.map((datum) => {
|
||||||
let current: unknown = datum;
|
let current: unknown = datum;
|
||||||
for (const key of valuesPath) {
|
for (const key of valuesPath) {
|
||||||
if (current == null) {
|
if (isObject(current) && Object.hasOwn(current, key)) {
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (
|
|
||||||
isObject(current) &&
|
|
||||||
(Object.hasOwn(current, key) || key in current)
|
|
||||||
) {
|
|
||||||
current = current[key];
|
current = current[key];
|
||||||
} else {
|
} else {
|
||||||
return null;
|
return null;
|
||||||
|
|||||||
@@ -23,12 +23,6 @@ export {
|
|||||||
OptimizeStats,
|
OptimizeStats,
|
||||||
CompactionStats,
|
CompactionStats,
|
||||||
RemovalStats,
|
RemovalStats,
|
||||||
TableStatistics,
|
|
||||||
FragmentStatistics,
|
|
||||||
FragmentSummaryStats,
|
|
||||||
Tags,
|
|
||||||
TagContents,
|
|
||||||
MergeStats,
|
|
||||||
} from "./native.js";
|
} from "./native.js";
|
||||||
|
|
||||||
export {
|
export {
|
||||||
@@ -53,12 +47,6 @@ export {
|
|||||||
QueryExecutionOptions,
|
QueryExecutionOptions,
|
||||||
FullTextSearchOptions,
|
FullTextSearchOptions,
|
||||||
RecordBatchIterator,
|
RecordBatchIterator,
|
||||||
FullTextQuery,
|
|
||||||
MatchQuery,
|
|
||||||
PhraseQuery,
|
|
||||||
BoostQuery,
|
|
||||||
MultiMatchQuery,
|
|
||||||
FullTextQueryType,
|
|
||||||
} from "./query";
|
} from "./query";
|
||||||
|
|
||||||
export {
|
export {
|
||||||
|
|||||||
@@ -681,6 +681,4 @@ export interface IndexOptions {
|
|||||||
* The default is true
|
* The default is true
|
||||||
*/
|
*/
|
||||||
replace?: boolean;
|
replace?: boolean;
|
||||||
|
|
||||||
waitTimeoutSeconds?: number;
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
// SPDX-License-Identifier: Apache-2.0
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||||
import { Data, Schema, fromDataToBuffer } from "./arrow";
|
import { Data, Schema, fromDataToBuffer } from "./arrow";
|
||||||
import { MergeStats, NativeMergeInsertBuilder } from "./native";
|
import { NativeMergeInsertBuilder } from "./native";
|
||||||
|
|
||||||
/** A builder used to create and run a merge insert operation */
|
/** A builder used to create and run a merge insert operation */
|
||||||
export class MergeInsertBuilder {
|
export class MergeInsertBuilder {
|
||||||
@@ -73,9 +73,9 @@ export class MergeInsertBuilder {
|
|||||||
/**
|
/**
|
||||||
* Executes the merge insert operation
|
* Executes the merge insert operation
|
||||||
*
|
*
|
||||||
* @returns Statistics about the merge operation: counts of inserted, updated, and deleted rows
|
* Nothing is returned but the `Table` is updated
|
||||||
*/
|
*/
|
||||||
async execute(data: Data): Promise<MergeStats> {
|
async execute(data: Data): Promise<void> {
|
||||||
let schema: Schema;
|
let schema: Schema;
|
||||||
if (this.#schema instanceof Promise) {
|
if (this.#schema instanceof Promise) {
|
||||||
schema = await this.#schema;
|
schema = await this.#schema;
|
||||||
@@ -84,6 +84,6 @@ export class MergeInsertBuilder {
|
|||||||
schema = this.#schema;
|
schema = this.#schema;
|
||||||
}
|
}
|
||||||
const buffer = await fromDataToBuffer(data, undefined, schema);
|
const buffer = await fromDataToBuffer(data, undefined, schema);
|
||||||
return await this.#native.execute(buffer);
|
await this.#native.execute(buffer);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -11,14 +11,12 @@ import {
|
|||||||
} from "./arrow";
|
} from "./arrow";
|
||||||
import { type IvfPqOptions } from "./indices";
|
import { type IvfPqOptions } from "./indices";
|
||||||
import {
|
import {
|
||||||
JsFullTextQuery,
|
|
||||||
RecordBatchIterator as NativeBatchIterator,
|
RecordBatchIterator as NativeBatchIterator,
|
||||||
Query as NativeQuery,
|
Query as NativeQuery,
|
||||||
Table as NativeTable,
|
Table as NativeTable,
|
||||||
VectorQuery as NativeVectorQuery,
|
VectorQuery as NativeVectorQuery,
|
||||||
} from "./native";
|
} from "./native";
|
||||||
import { Reranker } from "./rerankers";
|
import { Reranker } from "./rerankers";
|
||||||
|
|
||||||
export class RecordBatchIterator implements AsyncIterator<RecordBatch> {
|
export class RecordBatchIterator implements AsyncIterator<RecordBatch> {
|
||||||
private promisedInner?: Promise<NativeBatchIterator>;
|
private promisedInner?: Promise<NativeBatchIterator>;
|
||||||
private inner?: NativeBatchIterator;
|
private inner?: NativeBatchIterator;
|
||||||
@@ -64,7 +62,7 @@ class RecordBatchIterable<
|
|||||||
// biome-ignore lint/suspicious/noExplicitAny: skip
|
// biome-ignore lint/suspicious/noExplicitAny: skip
|
||||||
[Symbol.asyncIterator](): AsyncIterator<RecordBatch<any>, any, undefined> {
|
[Symbol.asyncIterator](): AsyncIterator<RecordBatch<any>, any, undefined> {
|
||||||
return new RecordBatchIterator(
|
return new RecordBatchIterator(
|
||||||
this.inner.execute(this.options?.maxBatchLength, this.options?.timeoutMs),
|
this.inner.execute(this.options?.maxBatchLength),
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -80,11 +78,6 @@ export interface QueryExecutionOptions {
|
|||||||
* in smaller chunks.
|
* in smaller chunks.
|
||||||
*/
|
*/
|
||||||
maxBatchLength?: number;
|
maxBatchLength?: number;
|
||||||
|
|
||||||
/**
|
|
||||||
* Timeout for query execution in milliseconds
|
|
||||||
*/
|
|
||||||
timeoutMs?: number;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -159,7 +152,7 @@ export class QueryBase<NativeQueryType extends NativeQuery | NativeVectorQuery>
|
|||||||
}
|
}
|
||||||
|
|
||||||
fullTextSearch(
|
fullTextSearch(
|
||||||
query: string | FullTextQuery,
|
query: string,
|
||||||
options?: Partial<FullTextSearchOptions>,
|
options?: Partial<FullTextSearchOptions>,
|
||||||
): this {
|
): this {
|
||||||
let columns: string[] | null = null;
|
let columns: string[] | null = null;
|
||||||
@@ -171,16 +164,9 @@ export class QueryBase<NativeQueryType extends NativeQuery | NativeVectorQuery>
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
this.doCall((inner: NativeQueryType) => {
|
this.doCall((inner: NativeQueryType) =>
|
||||||
if (typeof query === "string") {
|
inner.fullTextSearch(query, columns),
|
||||||
inner.fullTextSearch({
|
);
|
||||||
query: query,
|
|
||||||
columns: columns,
|
|
||||||
});
|
|
||||||
} else {
|
|
||||||
inner.fullTextSearch({ query: query.inner });
|
|
||||||
}
|
|
||||||
});
|
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -287,11 +273,9 @@ export class QueryBase<NativeQueryType extends NativeQuery | NativeVectorQuery>
|
|||||||
options?: Partial<QueryExecutionOptions>,
|
options?: Partial<QueryExecutionOptions>,
|
||||||
): Promise<NativeBatchIterator> {
|
): Promise<NativeBatchIterator> {
|
||||||
if (this.inner instanceof Promise) {
|
if (this.inner instanceof Promise) {
|
||||||
return this.inner.then((inner) =>
|
return this.inner.then((inner) => inner.execute(options?.maxBatchLength));
|
||||||
inner.execute(options?.maxBatchLength, options?.timeoutMs),
|
|
||||||
);
|
|
||||||
} else {
|
} else {
|
||||||
return this.inner.execute(options?.maxBatchLength, options?.timeoutMs);
|
return this.inner.execute(options?.maxBatchLength);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -364,43 +348,6 @@ export class QueryBase<NativeQueryType extends NativeQuery | NativeVectorQuery>
|
|||||||
return this.inner.explainPlan(verbose);
|
return this.inner.explainPlan(verbose);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Executes the query and returns the physical query plan annotated with runtime metrics.
|
|
||||||
*
|
|
||||||
* This is useful for debugging and performance analysis, as it shows how the query was executed
|
|
||||||
* and includes metrics such as elapsed time, rows processed, and I/O statistics.
|
|
||||||
*
|
|
||||||
* @example
|
|
||||||
* import * as lancedb from "@lancedb/lancedb"
|
|
||||||
*
|
|
||||||
* const db = await lancedb.connect("./.lancedb");
|
|
||||||
* const table = await db.createTable("my_table", [
|
|
||||||
* { vector: [1.1, 0.9], id: "1" },
|
|
||||||
* ]);
|
|
||||||
*
|
|
||||||
* const plan = await table.query().nearestTo([0.5, 0.2]).analyzePlan();
|
|
||||||
*
|
|
||||||
* Example output (with runtime metrics inlined):
|
|
||||||
* AnalyzeExec verbose=true, metrics=[]
|
|
||||||
* ProjectionExec: expr=[id@3 as id, vector@0 as vector, _distance@2 as _distance], metrics=[output_rows=1, elapsed_compute=3.292µs]
|
|
||||||
* Take: columns="vector, _rowid, _distance, (id)", metrics=[output_rows=1, elapsed_compute=66.001µs, batches_processed=1, bytes_read=8, iops=1, requests=1]
|
|
||||||
* CoalesceBatchesExec: target_batch_size=1024, metrics=[output_rows=1, elapsed_compute=3.333µs]
|
|
||||||
* GlobalLimitExec: skip=0, fetch=10, metrics=[output_rows=1, elapsed_compute=167ns]
|
|
||||||
* FilterExec: _distance@2 IS NOT NULL, metrics=[output_rows=1, elapsed_compute=8.542µs]
|
|
||||||
* SortExec: TopK(fetch=10), expr=[_distance@2 ASC NULLS LAST], metrics=[output_rows=1, elapsed_compute=63.25µs, row_replacements=1]
|
|
||||||
* KNNVectorDistance: metric=l2, metrics=[output_rows=1, elapsed_compute=114.333µs, output_batches=1]
|
|
||||||
* LanceScan: uri=/path/to/data, projection=[vector], row_id=true, row_addr=false, ordered=false, metrics=[output_rows=1, elapsed_compute=103.626µs, bytes_read=549, iops=2, requests=2]
|
|
||||||
*
|
|
||||||
* @returns A query execution plan with runtime metrics for each step.
|
|
||||||
*/
|
|
||||||
async analyzePlan(): Promise<string> {
|
|
||||||
if (this.inner instanceof Promise) {
|
|
||||||
return this.inner.then((inner) => inner.analyzePlan());
|
|
||||||
} else {
|
|
||||||
return this.inner.analyzePlan();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -734,177 +681,8 @@ export class Query extends QueryBase<NativeQuery> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
nearestToText(query: string | FullTextQuery, columns?: string[]): Query {
|
nearestToText(query: string, columns?: string[]): Query {
|
||||||
this.doCall((inner) => {
|
this.doCall((inner) => inner.fullTextSearch(query, columns));
|
||||||
if (typeof query === "string") {
|
|
||||||
inner.fullTextSearch({
|
|
||||||
query: query,
|
|
||||||
columns: columns,
|
|
||||||
});
|
|
||||||
} else {
|
|
||||||
inner.fullTextSearch({ query: query.inner });
|
|
||||||
}
|
|
||||||
});
|
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Enum representing the types of full-text queries supported.
|
|
||||||
*
|
|
||||||
* - `Match`: Performs a full-text search for terms in the query string.
|
|
||||||
* - `MatchPhrase`: Searches for an exact phrase match in the text.
|
|
||||||
* - `Boost`: Boosts the relevance score of specific terms in the query.
|
|
||||||
* - `MultiMatch`: Searches across multiple fields for the query terms.
|
|
||||||
*/
|
|
||||||
export enum FullTextQueryType {
|
|
||||||
Match = "match",
|
|
||||||
MatchPhrase = "match_phrase",
|
|
||||||
Boost = "boost",
|
|
||||||
MultiMatch = "multi_match",
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Represents a full-text query interface.
|
|
||||||
* This interface defines the structure and behavior for full-text queries,
|
|
||||||
* including methods to retrieve the query type and convert the query to a dictionary format.
|
|
||||||
*/
|
|
||||||
export interface FullTextQuery {
|
|
||||||
/**
|
|
||||||
* Returns the inner query object.
|
|
||||||
* This is the underlying query object used by the database engine.
|
|
||||||
* @ignore
|
|
||||||
*/
|
|
||||||
inner: JsFullTextQuery;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* The type of the full-text query.
|
|
||||||
*/
|
|
||||||
queryType(): FullTextQueryType;
|
|
||||||
}
|
|
||||||
|
|
||||||
// biome-ignore lint/suspicious/noExplicitAny: we want any here
|
|
||||||
export function instanceOfFullTextQuery(obj: any): obj is FullTextQuery {
|
|
||||||
return obj != null && obj.inner instanceof JsFullTextQuery;
|
|
||||||
}
|
|
||||||
|
|
||||||
export class MatchQuery implements FullTextQuery {
|
|
||||||
/** @ignore */
|
|
||||||
public readonly inner: JsFullTextQuery;
|
|
||||||
/**
|
|
||||||
* Creates an instance of MatchQuery.
|
|
||||||
*
|
|
||||||
* @param query - The text query to search for.
|
|
||||||
* @param column - The name of the column to search within.
|
|
||||||
* @param options - Optional parameters for the match query.
|
|
||||||
* - `boost`: The boost factor for the query (default is 1.0).
|
|
||||||
* - `fuzziness`: The fuzziness level for the query (default is 0).
|
|
||||||
* - `maxExpansions`: The maximum number of terms to consider for fuzzy matching (default is 50).
|
|
||||||
*/
|
|
||||||
constructor(
|
|
||||||
query: string,
|
|
||||||
column: string,
|
|
||||||
options?: {
|
|
||||||
boost?: number;
|
|
||||||
fuzziness?: number;
|
|
||||||
maxExpansions?: number;
|
|
||||||
},
|
|
||||||
) {
|
|
||||||
let fuzziness = options?.fuzziness;
|
|
||||||
if (fuzziness === undefined) {
|
|
||||||
fuzziness = 0;
|
|
||||||
}
|
|
||||||
this.inner = JsFullTextQuery.matchQuery(
|
|
||||||
query,
|
|
||||||
column,
|
|
||||||
options?.boost ?? 1.0,
|
|
||||||
fuzziness,
|
|
||||||
options?.maxExpansions ?? 50,
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
queryType(): FullTextQueryType {
|
|
||||||
return FullTextQueryType.Match;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
export class PhraseQuery implements FullTextQuery {
|
|
||||||
/** @ignore */
|
|
||||||
public readonly inner: JsFullTextQuery;
|
|
||||||
/**
|
|
||||||
* Creates an instance of `PhraseQuery`.
|
|
||||||
*
|
|
||||||
* @param query - The phrase to search for in the specified column.
|
|
||||||
* @param column - The name of the column to search within.
|
|
||||||
*/
|
|
||||||
constructor(query: string, column: string) {
|
|
||||||
this.inner = JsFullTextQuery.phraseQuery(query, column);
|
|
||||||
}
|
|
||||||
|
|
||||||
queryType(): FullTextQueryType {
|
|
||||||
return FullTextQueryType.MatchPhrase;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
export class BoostQuery implements FullTextQuery {
|
|
||||||
/** @ignore */
|
|
||||||
public readonly inner: JsFullTextQuery;
|
|
||||||
/**
|
|
||||||
* Creates an instance of BoostQuery.
|
|
||||||
* The boost returns documents that match the positive query,
|
|
||||||
* but penalizes those that match the negative query.
|
|
||||||
* the penalty is controlled by the `negativeBoost` parameter.
|
|
||||||
*
|
|
||||||
* @param positive - The positive query that boosts the relevance score.
|
|
||||||
* @param negative - The negative query that reduces the relevance score.
|
|
||||||
* @param options - Optional parameters for the boost query.
|
|
||||||
* - `negativeBoost`: The boost factor for the negative query (default is 0.0).
|
|
||||||
*/
|
|
||||||
constructor(
|
|
||||||
positive: FullTextQuery,
|
|
||||||
negative: FullTextQuery,
|
|
||||||
options?: {
|
|
||||||
negativeBoost?: number;
|
|
||||||
},
|
|
||||||
) {
|
|
||||||
this.inner = JsFullTextQuery.boostQuery(
|
|
||||||
positive.inner,
|
|
||||||
negative.inner,
|
|
||||||
options?.negativeBoost,
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
queryType(): FullTextQueryType {
|
|
||||||
return FullTextQueryType.Boost;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
export class MultiMatchQuery implements FullTextQuery {
|
|
||||||
/** @ignore */
|
|
||||||
public readonly inner: JsFullTextQuery;
|
|
||||||
/**
|
|
||||||
* Creates an instance of MultiMatchQuery.
|
|
||||||
*
|
|
||||||
* @param query - The text query to search for across multiple columns.
|
|
||||||
* @param columns - An array of column names to search within.
|
|
||||||
* @param options - Optional parameters for the multi-match query.
|
|
||||||
* - `boosts`: An array of boost factors for each column (default is 1.0 for all).
|
|
||||||
*/
|
|
||||||
constructor(
|
|
||||||
query: string,
|
|
||||||
columns: string[],
|
|
||||||
options?: {
|
|
||||||
boosts?: number[];
|
|
||||||
},
|
|
||||||
) {
|
|
||||||
this.inner = JsFullTextQuery.multiMatchQuery(
|
|
||||||
query,
|
|
||||||
columns,
|
|
||||||
options?.boosts,
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
queryType(): FullTextQueryType {
|
|
||||||
return FullTextQueryType.MultiMatch;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -20,16 +20,9 @@ import {
|
|||||||
IndexConfig,
|
IndexConfig,
|
||||||
IndexStatistics,
|
IndexStatistics,
|
||||||
OptimizeStats,
|
OptimizeStats,
|
||||||
TableStatistics,
|
|
||||||
Tags,
|
|
||||||
Table as _NativeTable,
|
Table as _NativeTable,
|
||||||
} from "./native";
|
} from "./native";
|
||||||
import {
|
import { Query, VectorQuery } from "./query";
|
||||||
FullTextQuery,
|
|
||||||
Query,
|
|
||||||
VectorQuery,
|
|
||||||
instanceOfFullTextQuery,
|
|
||||||
} from "./query";
|
|
||||||
import { sanitizeType } from "./sanitize";
|
import { sanitizeType } from "./sanitize";
|
||||||
import { IntoSql, toSQL } from "./util";
|
import { IntoSql, toSQL } from "./util";
|
||||||
export { IndexConfig } from "./native";
|
export { IndexConfig } from "./native";
|
||||||
@@ -237,30 +230,6 @@ export abstract class Table {
|
|||||||
*/
|
*/
|
||||||
abstract dropIndex(name: string): Promise<void>;
|
abstract dropIndex(name: string): Promise<void>;
|
||||||
|
|
||||||
/**
|
|
||||||
* Prewarm an index in the table.
|
|
||||||
*
|
|
||||||
* @param name The name of the index.
|
|
||||||
*
|
|
||||||
* This will load the index into memory. This may reduce the cold-start time for
|
|
||||||
* future queries. If the index does not fit in the cache then this call may be
|
|
||||||
* wasteful.
|
|
||||||
*/
|
|
||||||
abstract prewarmIndex(name: string): Promise<void>;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Waits for asynchronous indexing to complete on the table.
|
|
||||||
*
|
|
||||||
* @param indexNames The name of the indices to wait for
|
|
||||||
* @param timeoutSeconds The number of seconds to wait before timing out
|
|
||||||
*
|
|
||||||
* This will raise an error if the indices are not created and fully indexed within the timeout.
|
|
||||||
*/
|
|
||||||
abstract waitForIndex(
|
|
||||||
indexNames: string[],
|
|
||||||
timeoutSeconds: number,
|
|
||||||
): Promise<void>;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a {@link Query} Builder.
|
* Create a {@link Query} Builder.
|
||||||
*
|
*
|
||||||
@@ -325,7 +294,7 @@ export abstract class Table {
|
|||||||
* if the query is a string and no embedding function is defined, it will be treated as a full text search query
|
* if the query is a string and no embedding function is defined, it will be treated as a full text search query
|
||||||
*/
|
*/
|
||||||
abstract search(
|
abstract search(
|
||||||
query: string | IntoVector | FullTextQuery,
|
query: string | IntoVector,
|
||||||
queryType?: string,
|
queryType?: string,
|
||||||
ftsColumns?: string | string[],
|
ftsColumns?: string | string[],
|
||||||
): VectorQuery | Query;
|
): VectorQuery | Query;
|
||||||
@@ -376,7 +345,7 @@ export abstract class Table {
|
|||||||
*
|
*
|
||||||
* Calling this method will set the table into time-travel mode. If you
|
* Calling this method will set the table into time-travel mode. If you
|
||||||
* wish to return to standard mode, call `checkoutLatest`.
|
* wish to return to standard mode, call `checkoutLatest`.
|
||||||
* @param {number | string} version The version to checkout, could be version number or tag
|
* @param {number} version The version to checkout
|
||||||
* @example
|
* @example
|
||||||
* ```typescript
|
* ```typescript
|
||||||
* import * as lancedb from "@lancedb/lancedb"
|
* import * as lancedb from "@lancedb/lancedb"
|
||||||
@@ -392,8 +361,7 @@ export abstract class Table {
|
|||||||
* console.log(await table.version()); // 2
|
* console.log(await table.version()); // 2
|
||||||
* ```
|
* ```
|
||||||
*/
|
*/
|
||||||
abstract checkout(version: number | string): Promise<void>;
|
abstract checkout(version: number): Promise<void>;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Checkout the latest version of the table. _This is an in-place operation._
|
* Checkout the latest version of the table. _This is an in-place operation._
|
||||||
*
|
*
|
||||||
@@ -407,23 +375,6 @@ export abstract class Table {
|
|||||||
*/
|
*/
|
||||||
abstract listVersions(): Promise<Version[]>;
|
abstract listVersions(): Promise<Version[]>;
|
||||||
|
|
||||||
/**
|
|
||||||
* Get a tags manager for this table.
|
|
||||||
*
|
|
||||||
* Tags allow you to label specific versions of a table with a human-readable name.
|
|
||||||
* The returned tags manager can be used to list, create, update, or delete tags.
|
|
||||||
*
|
|
||||||
* @returns {Tags} A tags manager for this table
|
|
||||||
* @example
|
|
||||||
* ```typescript
|
|
||||||
* const tagsManager = await table.tags();
|
|
||||||
* await tagsManager.create("v1", 1);
|
|
||||||
* const tags = await tagsManager.list();
|
|
||||||
* console.log(tags); // { "v1": { version: 1, manifestSize: ... } }
|
|
||||||
* ```
|
|
||||||
*/
|
|
||||||
abstract tags(): Promise<Tags>;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Restore the table to the currently checked out version
|
* Restore the table to the currently checked out version
|
||||||
*
|
*
|
||||||
@@ -483,13 +434,6 @@ export abstract class Table {
|
|||||||
* Use {@link Table.listIndices} to find the names of the indices.
|
* Use {@link Table.listIndices} to find the names of the indices.
|
||||||
*/
|
*/
|
||||||
abstract indexStats(name: string): Promise<IndexStatistics | undefined>;
|
abstract indexStats(name: string): Promise<IndexStatistics | undefined>;
|
||||||
|
|
||||||
/** Returns table and fragment statistics
|
|
||||||
*
|
|
||||||
* @returns {TableStatistics} The table and fragment statistics
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
abstract stats(): Promise<TableStatistics>;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
export class LocalTable extends Table {
|
export class LocalTable extends Table {
|
||||||
@@ -609,39 +553,23 @@ export class LocalTable extends Table {
|
|||||||
// Bit of a hack to get around the fact that TS has no package-scope.
|
// Bit of a hack to get around the fact that TS has no package-scope.
|
||||||
// biome-ignore lint/suspicious/noExplicitAny: skip
|
// biome-ignore lint/suspicious/noExplicitAny: skip
|
||||||
const nativeIndex = (options?.config as any)?.inner;
|
const nativeIndex = (options?.config as any)?.inner;
|
||||||
await this.inner.createIndex(
|
await this.inner.createIndex(nativeIndex, column, options?.replace);
|
||||||
nativeIndex,
|
|
||||||
column,
|
|
||||||
options?.replace,
|
|
||||||
options?.waitTimeoutSeconds,
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
async dropIndex(name: string): Promise<void> {
|
async dropIndex(name: string): Promise<void> {
|
||||||
await this.inner.dropIndex(name);
|
await this.inner.dropIndex(name);
|
||||||
}
|
}
|
||||||
|
|
||||||
async prewarmIndex(name: string): Promise<void> {
|
|
||||||
await this.inner.prewarmIndex(name);
|
|
||||||
}
|
|
||||||
|
|
||||||
async waitForIndex(
|
|
||||||
indexNames: string[],
|
|
||||||
timeoutSeconds: number,
|
|
||||||
): Promise<void> {
|
|
||||||
await this.inner.waitForIndex(indexNames, timeoutSeconds);
|
|
||||||
}
|
|
||||||
|
|
||||||
query(): Query {
|
query(): Query {
|
||||||
return new Query(this.inner);
|
return new Query(this.inner);
|
||||||
}
|
}
|
||||||
|
|
||||||
search(
|
search(
|
||||||
query: string | IntoVector | FullTextQuery,
|
query: string | IntoVector,
|
||||||
queryType: string = "auto",
|
queryType: string = "auto",
|
||||||
ftsColumns?: string | string[],
|
ftsColumns?: string | string[],
|
||||||
): VectorQuery | Query {
|
): VectorQuery | Query {
|
||||||
if (typeof query !== "string" && !instanceOfFullTextQuery(query)) {
|
if (typeof query !== "string") {
|
||||||
if (queryType === "fts") {
|
if (queryType === "fts") {
|
||||||
throw new Error("Cannot perform full text search on a vector query");
|
throw new Error("Cannot perform full text search on a vector query");
|
||||||
}
|
}
|
||||||
@@ -657,10 +585,7 @@ export class LocalTable extends Table {
|
|||||||
|
|
||||||
// The query type is auto or vector
|
// The query type is auto or vector
|
||||||
// fall back to full text search if no embedding functions are defined and the query is a string
|
// fall back to full text search if no embedding functions are defined and the query is a string
|
||||||
if (
|
if (queryType === "auto" && getRegistry().length() === 0) {
|
||||||
queryType === "auto" &&
|
|
||||||
(getRegistry().length() === 0 || instanceOfFullTextQuery(query))
|
|
||||||
) {
|
|
||||||
return this.query().fullTextSearch(query, {
|
return this.query().fullTextSearch(query, {
|
||||||
columns: ftsColumns,
|
columns: ftsColumns,
|
||||||
});
|
});
|
||||||
@@ -726,11 +651,8 @@ export class LocalTable extends Table {
|
|||||||
return await this.inner.version();
|
return await this.inner.version();
|
||||||
}
|
}
|
||||||
|
|
||||||
async checkout(version: number | string): Promise<void> {
|
async checkout(version: number): Promise<void> {
|
||||||
if (typeof version === "string") {
|
await this.inner.checkout(version);
|
||||||
return this.inner.checkoutTag(version);
|
|
||||||
}
|
|
||||||
return this.inner.checkout(version);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
async checkoutLatest(): Promise<void> {
|
async checkoutLatest(): Promise<void> {
|
||||||
@@ -749,10 +671,6 @@ export class LocalTable extends Table {
|
|||||||
await this.inner.restore();
|
await this.inner.restore();
|
||||||
}
|
}
|
||||||
|
|
||||||
async tags(): Promise<Tags> {
|
|
||||||
return await this.inner.tags();
|
|
||||||
}
|
|
||||||
|
|
||||||
async optimize(options?: Partial<OptimizeOptions>): Promise<OptimizeStats> {
|
async optimize(options?: Partial<OptimizeOptions>): Promise<OptimizeStats> {
|
||||||
let cleanupOlderThanMs;
|
let cleanupOlderThanMs;
|
||||||
if (
|
if (
|
||||||
@@ -783,11 +701,6 @@ export class LocalTable extends Table {
|
|||||||
}
|
}
|
||||||
return stats;
|
return stats;
|
||||||
}
|
}
|
||||||
|
|
||||||
async stats(): Promise<TableStatistics> {
|
|
||||||
return await this.inner.stats();
|
|
||||||
}
|
|
||||||
|
|
||||||
mergeInsert(on: string | string[]): MergeInsertBuilder {
|
mergeInsert(on: string | string[]): MergeInsertBuilder {
|
||||||
on = Array.isArray(on) ? on : [on];
|
on = Array.isArray(on) ? on : [on];
|
||||||
return new MergeInsertBuilder(this.inner.mergeInsert(on), this.schema());
|
return new MergeInsertBuilder(this.inner.mergeInsert(on), this.schema());
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-darwin-arm64",
|
"name": "@lancedb/lancedb-darwin-arm64",
|
||||||
"version": "0.19.1-beta.1",
|
"version": "0.18.2-beta.1",
|
||||||
"os": ["darwin"],
|
"os": ["darwin"],
|
||||||
"cpu": ["arm64"],
|
"cpu": ["arm64"],
|
||||||
"main": "lancedb.darwin-arm64.node",
|
"main": "lancedb.darwin-arm64.node",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-darwin-x64",
|
"name": "@lancedb/lancedb-darwin-x64",
|
||||||
"version": "0.19.1-beta.1",
|
"version": "0.18.2-beta.1",
|
||||||
"os": ["darwin"],
|
"os": ["darwin"],
|
||||||
"cpu": ["x64"],
|
"cpu": ["x64"],
|
||||||
"main": "lancedb.darwin-x64.node",
|
"main": "lancedb.darwin-x64.node",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-linux-arm64-gnu",
|
"name": "@lancedb/lancedb-linux-arm64-gnu",
|
||||||
"version": "0.19.1-beta.1",
|
"version": "0.18.2-beta.1",
|
||||||
"os": ["linux"],
|
"os": ["linux"],
|
||||||
"cpu": ["arm64"],
|
"cpu": ["arm64"],
|
||||||
"main": "lancedb.linux-arm64-gnu.node",
|
"main": "lancedb.linux-arm64-gnu.node",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-linux-arm64-musl",
|
"name": "@lancedb/lancedb-linux-arm64-musl",
|
||||||
"version": "0.19.1-beta.1",
|
"version": "0.18.2-beta.1",
|
||||||
"os": ["linux"],
|
"os": ["linux"],
|
||||||
"cpu": ["arm64"],
|
"cpu": ["arm64"],
|
||||||
"main": "lancedb.linux-arm64-musl.node",
|
"main": "lancedb.linux-arm64-musl.node",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-linux-x64-gnu",
|
"name": "@lancedb/lancedb-linux-x64-gnu",
|
||||||
"version": "0.19.1-beta.1",
|
"version": "0.18.2-beta.1",
|
||||||
"os": ["linux"],
|
"os": ["linux"],
|
||||||
"cpu": ["x64"],
|
"cpu": ["x64"],
|
||||||
"main": "lancedb.linux-x64-gnu.node",
|
"main": "lancedb.linux-x64-gnu.node",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-linux-x64-musl",
|
"name": "@lancedb/lancedb-linux-x64-musl",
|
||||||
"version": "0.19.1-beta.1",
|
"version": "0.18.2-beta.1",
|
||||||
"os": ["linux"],
|
"os": ["linux"],
|
||||||
"cpu": ["x64"],
|
"cpu": ["x64"],
|
||||||
"main": "lancedb.linux-x64-musl.node",
|
"main": "lancedb.linux-x64-musl.node",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-win32-arm64-msvc",
|
"name": "@lancedb/lancedb-win32-arm64-msvc",
|
||||||
"version": "0.19.1-beta.1",
|
"version": "0.18.2-beta.1",
|
||||||
"os": [
|
"os": [
|
||||||
"win32"
|
"win32"
|
||||||
],
|
],
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-win32-x64-msvc",
|
"name": "@lancedb/lancedb-win32-x64-msvc",
|
||||||
"version": "0.19.1-beta.1",
|
"version": "0.18.2-beta.1",
|
||||||
"os": ["win32"],
|
"os": ["win32"],
|
||||||
"cpu": ["x64"],
|
"cpu": ["x64"],
|
||||||
"main": "lancedb.win32-x64-msvc.node",
|
"main": "lancedb.win32-x64-msvc.node",
|
||||||
|
|||||||
250
nodejs/package-lock.json
generated
250
nodejs/package-lock.json
generated
@@ -1,12 +1,12 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb",
|
"name": "@lancedb/lancedb",
|
||||||
"version": "0.19.1-beta.1",
|
"version": "0.18.2-beta.0",
|
||||||
"lockfileVersion": 3,
|
"lockfileVersion": 3,
|
||||||
"requires": true,
|
"requires": true,
|
||||||
"packages": {
|
"packages": {
|
||||||
"": {
|
"": {
|
||||||
"name": "@lancedb/lancedb",
|
"name": "@lancedb/lancedb",
|
||||||
"version": "0.19.1-beta.1",
|
"version": "0.18.2-beta.0",
|
||||||
"cpu": [
|
"cpu": [
|
||||||
"x64",
|
"x64",
|
||||||
"arm64"
|
"arm64"
|
||||||
@@ -2304,20 +2304,89 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/@babel/code-frame": {
|
"node_modules/@babel/code-frame": {
|
||||||
"version": "7.26.2",
|
"version": "7.23.5",
|
||||||
"resolved": "https://registry.npmjs.org/@babel/code-frame/-/code-frame-7.26.2.tgz",
|
"resolved": "https://registry.npmjs.org/@babel/code-frame/-/code-frame-7.23.5.tgz",
|
||||||
"integrity": "sha512-RJlIHRueQgwWitWgF8OdFYGZX328Ax5BCemNGlqHfplnRT9ESi8JkFlvaVYbS+UubVY6dpv87Fs2u5M29iNFVQ==",
|
"integrity": "sha512-CgH3s1a96LipHCmSUmYFPwY7MNx8C3avkq7i4Wl3cfa662ldtUe4VM1TPXX70pfmrlWTb6jLqTYrZyT2ZTJBgA==",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
"license": "MIT",
|
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@babel/helper-validator-identifier": "^7.25.9",
|
"@babel/highlight": "^7.23.4",
|
||||||
"js-tokens": "^4.0.0",
|
"chalk": "^2.4.2"
|
||||||
"picocolors": "^1.0.0"
|
|
||||||
},
|
},
|
||||||
"engines": {
|
"engines": {
|
||||||
"node": ">=6.9.0"
|
"node": ">=6.9.0"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/@babel/code-frame/node_modules/ansi-styles": {
|
||||||
|
"version": "3.2.1",
|
||||||
|
"resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-3.2.1.tgz",
|
||||||
|
"integrity": "sha512-VT0ZI6kZRdTh8YyJw3SMbYm/u+NqfsAxEpWO0Pf9sq8/e94WxxOpPKx9FR1FlyCtOVDNOQ+8ntlqFxiRc+r5qA==",
|
||||||
|
"dev": true,
|
||||||
|
"dependencies": {
|
||||||
|
"color-convert": "^1.9.0"
|
||||||
|
},
|
||||||
|
"engines": {
|
||||||
|
"node": ">=4"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@babel/code-frame/node_modules/chalk": {
|
||||||
|
"version": "2.4.2",
|
||||||
|
"resolved": "https://registry.npmjs.org/chalk/-/chalk-2.4.2.tgz",
|
||||||
|
"integrity": "sha512-Mti+f9lpJNcwF4tWV8/OrTTtF1gZi+f8FqlyAdouralcFWFQWF2+NgCHShjkCb+IFBLq9buZwE1xckQU4peSuQ==",
|
||||||
|
"dev": true,
|
||||||
|
"dependencies": {
|
||||||
|
"ansi-styles": "^3.2.1",
|
||||||
|
"escape-string-regexp": "^1.0.5",
|
||||||
|
"supports-color": "^5.3.0"
|
||||||
|
},
|
||||||
|
"engines": {
|
||||||
|
"node": ">=4"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@babel/code-frame/node_modules/color-convert": {
|
||||||
|
"version": "1.9.3",
|
||||||
|
"resolved": "https://registry.npmjs.org/color-convert/-/color-convert-1.9.3.tgz",
|
||||||
|
"integrity": "sha512-QfAUtd+vFdAtFQcC8CCyYt1fYWxSqAiK2cSD6zDB8N3cpsEBAvRxp9zOGg6G/SHHJYAT88/az/IuDGALsNVbGg==",
|
||||||
|
"dev": true,
|
||||||
|
"dependencies": {
|
||||||
|
"color-name": "1.1.3"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@babel/code-frame/node_modules/color-name": {
|
||||||
|
"version": "1.1.3",
|
||||||
|
"resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.3.tgz",
|
||||||
|
"integrity": "sha512-72fSenhMw2HZMTVHeCA9KCmpEIbzWiQsjN+BHcBbS9vr1mtt+vJjPdksIBNUmKAW8TFUDPJK5SUU3QhE9NEXDw==",
|
||||||
|
"dev": true
|
||||||
|
},
|
||||||
|
"node_modules/@babel/code-frame/node_modules/escape-string-regexp": {
|
||||||
|
"version": "1.0.5",
|
||||||
|
"resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-1.0.5.tgz",
|
||||||
|
"integrity": "sha512-vbRorB5FUQWvla16U8R/qgaFIya2qGzwDrNmCZuYKrbdSUMG6I1ZCGQRefkRVhuOkIGVne7BQ35DSfo1qvJqFg==",
|
||||||
|
"dev": true,
|
||||||
|
"engines": {
|
||||||
|
"node": ">=0.8.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@babel/code-frame/node_modules/has-flag": {
|
||||||
|
"version": "3.0.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/has-flag/-/has-flag-3.0.0.tgz",
|
||||||
|
"integrity": "sha512-sKJf1+ceQBr4SMkvQnBDNDtf4TXpVhVGateu0t918bl30FnbE2m4vNLX+VWe/dpjlb+HugGYzW7uQXH98HPEYw==",
|
||||||
|
"dev": true,
|
||||||
|
"engines": {
|
||||||
|
"node": ">=4"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@babel/code-frame/node_modules/supports-color": {
|
||||||
|
"version": "5.5.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/supports-color/-/supports-color-5.5.0.tgz",
|
||||||
|
"integrity": "sha512-QjVjwdXIt408MIiAqCX4oUKsgU2EqAGzs2Ppkm4aQYbjm+ZEWEcW4SfFNTr4uMNZma0ey4f5lgLrkB0aX0QMow==",
|
||||||
|
"dev": true,
|
||||||
|
"dependencies": {
|
||||||
|
"has-flag": "^3.0.0"
|
||||||
|
},
|
||||||
|
"engines": {
|
||||||
|
"node": ">=4"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/@babel/compat-data": {
|
"node_modules/@babel/compat-data": {
|
||||||
"version": "7.23.5",
|
"version": "7.23.5",
|
||||||
"resolved": "https://registry.npmjs.org/@babel/compat-data/-/compat-data-7.23.5.tgz",
|
"resolved": "https://registry.npmjs.org/@babel/compat-data/-/compat-data-7.23.5.tgz",
|
||||||
@@ -2520,21 +2589,19 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/@babel/helper-string-parser": {
|
"node_modules/@babel/helper-string-parser": {
|
||||||
"version": "7.25.9",
|
"version": "7.23.4",
|
||||||
"resolved": "https://registry.npmjs.org/@babel/helper-string-parser/-/helper-string-parser-7.25.9.tgz",
|
"resolved": "https://registry.npmjs.org/@babel/helper-string-parser/-/helper-string-parser-7.23.4.tgz",
|
||||||
"integrity": "sha512-4A/SCr/2KLd5jrtOMFzaKjVtAei3+2r/NChoBNoZ3EyP/+GlhoaEGoWOZUmFmoITP7zOJyHIMm+DYRd8o3PvHA==",
|
"integrity": "sha512-803gmbQdqwdf4olxrX4AJyFBV/RTr3rSmOj0rKwesmzlfhYNDEs+/iOcznzpNWlJlIlTJC2QfPFcHB6DlzdVLQ==",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
"license": "MIT",
|
|
||||||
"engines": {
|
"engines": {
|
||||||
"node": ">=6.9.0"
|
"node": ">=6.9.0"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/@babel/helper-validator-identifier": {
|
"node_modules/@babel/helper-validator-identifier": {
|
||||||
"version": "7.25.9",
|
"version": "7.22.20",
|
||||||
"resolved": "https://registry.npmjs.org/@babel/helper-validator-identifier/-/helper-validator-identifier-7.25.9.tgz",
|
"resolved": "https://registry.npmjs.org/@babel/helper-validator-identifier/-/helper-validator-identifier-7.22.20.tgz",
|
||||||
"integrity": "sha512-Ed61U6XJc3CVRfkERJWDz4dJwKe7iLmmJsbOGu9wSloNSFttHV0I8g6UAgb7qnK5ly5bGLPd4oXZlxCdANBOWQ==",
|
"integrity": "sha512-Y4OZ+ytlatR8AI+8KZfKuL5urKp7qey08ha31L8b3BwewJAoJamTzyvxPR/5D+KkdJCGPq/+8TukHBlY10FX9A==",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
"license": "MIT",
|
|
||||||
"engines": {
|
"engines": {
|
||||||
"node": ">=6.9.0"
|
"node": ">=6.9.0"
|
||||||
}
|
}
|
||||||
@@ -2549,28 +2616,109 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/@babel/helpers": {
|
"node_modules/@babel/helpers": {
|
||||||
"version": "7.27.0",
|
"version": "7.23.8",
|
||||||
"resolved": "https://registry.npmjs.org/@babel/helpers/-/helpers-7.27.0.tgz",
|
"resolved": "https://registry.npmjs.org/@babel/helpers/-/helpers-7.23.8.tgz",
|
||||||
"integrity": "sha512-U5eyP/CTFPuNE3qk+WZMxFkp/4zUzdceQlfzf7DdGdhp+Fezd7HD+i8Y24ZuTMKX3wQBld449jijbGq6OdGNQg==",
|
"integrity": "sha512-KDqYz4PiOWvDFrdHLPhKtCThtIcKVy6avWD2oG4GEvyQ+XDZwHD4YQd+H2vNMnq2rkdxsDkU82T+Vk8U/WXHRQ==",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
"license": "MIT",
|
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@babel/template": "^7.27.0",
|
"@babel/template": "^7.22.15",
|
||||||
"@babel/types": "^7.27.0"
|
"@babel/traverse": "^7.23.7",
|
||||||
|
"@babel/types": "^7.23.6"
|
||||||
},
|
},
|
||||||
"engines": {
|
"engines": {
|
||||||
"node": ">=6.9.0"
|
"node": ">=6.9.0"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/@babel/parser": {
|
"node_modules/@babel/highlight": {
|
||||||
"version": "7.27.0",
|
"version": "7.23.4",
|
||||||
"resolved": "https://registry.npmjs.org/@babel/parser/-/parser-7.27.0.tgz",
|
"resolved": "https://registry.npmjs.org/@babel/highlight/-/highlight-7.23.4.tgz",
|
||||||
"integrity": "sha512-iaepho73/2Pz7w2eMS0Q5f83+0RKI7i4xmiYeBmDzfRVbQtTOG7Ts0S4HzJVsTMGI9keU8rNfuZr8DKfSt7Yyg==",
|
"integrity": "sha512-acGdbYSfp2WheJoJm/EBBBLh/ID8KDc64ISZ9DYtBmC8/Q204PZJLHyzeB5qMzJ5trcOkybd78M4x2KWsUq++A==",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
"license": "MIT",
|
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@babel/types": "^7.27.0"
|
"@babel/helper-validator-identifier": "^7.22.20",
|
||||||
|
"chalk": "^2.4.2",
|
||||||
|
"js-tokens": "^4.0.0"
|
||||||
},
|
},
|
||||||
|
"engines": {
|
||||||
|
"node": ">=6.9.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@babel/highlight/node_modules/ansi-styles": {
|
||||||
|
"version": "3.2.1",
|
||||||
|
"resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-3.2.1.tgz",
|
||||||
|
"integrity": "sha512-VT0ZI6kZRdTh8YyJw3SMbYm/u+NqfsAxEpWO0Pf9sq8/e94WxxOpPKx9FR1FlyCtOVDNOQ+8ntlqFxiRc+r5qA==",
|
||||||
|
"dev": true,
|
||||||
|
"dependencies": {
|
||||||
|
"color-convert": "^1.9.0"
|
||||||
|
},
|
||||||
|
"engines": {
|
||||||
|
"node": ">=4"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@babel/highlight/node_modules/chalk": {
|
||||||
|
"version": "2.4.2",
|
||||||
|
"resolved": "https://registry.npmjs.org/chalk/-/chalk-2.4.2.tgz",
|
||||||
|
"integrity": "sha512-Mti+f9lpJNcwF4tWV8/OrTTtF1gZi+f8FqlyAdouralcFWFQWF2+NgCHShjkCb+IFBLq9buZwE1xckQU4peSuQ==",
|
||||||
|
"dev": true,
|
||||||
|
"dependencies": {
|
||||||
|
"ansi-styles": "^3.2.1",
|
||||||
|
"escape-string-regexp": "^1.0.5",
|
||||||
|
"supports-color": "^5.3.0"
|
||||||
|
},
|
||||||
|
"engines": {
|
||||||
|
"node": ">=4"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@babel/highlight/node_modules/color-convert": {
|
||||||
|
"version": "1.9.3",
|
||||||
|
"resolved": "https://registry.npmjs.org/color-convert/-/color-convert-1.9.3.tgz",
|
||||||
|
"integrity": "sha512-QfAUtd+vFdAtFQcC8CCyYt1fYWxSqAiK2cSD6zDB8N3cpsEBAvRxp9zOGg6G/SHHJYAT88/az/IuDGALsNVbGg==",
|
||||||
|
"dev": true,
|
||||||
|
"dependencies": {
|
||||||
|
"color-name": "1.1.3"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@babel/highlight/node_modules/color-name": {
|
||||||
|
"version": "1.1.3",
|
||||||
|
"resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.3.tgz",
|
||||||
|
"integrity": "sha512-72fSenhMw2HZMTVHeCA9KCmpEIbzWiQsjN+BHcBbS9vr1mtt+vJjPdksIBNUmKAW8TFUDPJK5SUU3QhE9NEXDw==",
|
||||||
|
"dev": true
|
||||||
|
},
|
||||||
|
"node_modules/@babel/highlight/node_modules/escape-string-regexp": {
|
||||||
|
"version": "1.0.5",
|
||||||
|
"resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-1.0.5.tgz",
|
||||||
|
"integrity": "sha512-vbRorB5FUQWvla16U8R/qgaFIya2qGzwDrNmCZuYKrbdSUMG6I1ZCGQRefkRVhuOkIGVne7BQ35DSfo1qvJqFg==",
|
||||||
|
"dev": true,
|
||||||
|
"engines": {
|
||||||
|
"node": ">=0.8.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@babel/highlight/node_modules/has-flag": {
|
||||||
|
"version": "3.0.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/has-flag/-/has-flag-3.0.0.tgz",
|
||||||
|
"integrity": "sha512-sKJf1+ceQBr4SMkvQnBDNDtf4TXpVhVGateu0t918bl30FnbE2m4vNLX+VWe/dpjlb+HugGYzW7uQXH98HPEYw==",
|
||||||
|
"dev": true,
|
||||||
|
"engines": {
|
||||||
|
"node": ">=4"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@babel/highlight/node_modules/supports-color": {
|
||||||
|
"version": "5.5.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/supports-color/-/supports-color-5.5.0.tgz",
|
||||||
|
"integrity": "sha512-QjVjwdXIt408MIiAqCX4oUKsgU2EqAGzs2Ppkm4aQYbjm+ZEWEcW4SfFNTr4uMNZma0ey4f5lgLrkB0aX0QMow==",
|
||||||
|
"dev": true,
|
||||||
|
"dependencies": {
|
||||||
|
"has-flag": "^3.0.0"
|
||||||
|
},
|
||||||
|
"engines": {
|
||||||
|
"node": ">=4"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@babel/parser": {
|
||||||
|
"version": "7.23.6",
|
||||||
|
"resolved": "https://registry.npmjs.org/@babel/parser/-/parser-7.23.6.tgz",
|
||||||
|
"integrity": "sha512-Z2uID7YJ7oNvAI20O9X0bblw7Qqs8Q2hFy0R9tAfnfLkp5MW0UH9eUvnDSnFwKZ0AvgS1ucqR4KzvVHgnke1VQ==",
|
||||||
|
"dev": true,
|
||||||
"bin": {
|
"bin": {
|
||||||
"parser": "bin/babel-parser.js"
|
"parser": "bin/babel-parser.js"
|
||||||
},
|
},
|
||||||
@@ -2756,15 +2904,14 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/@babel/template": {
|
"node_modules/@babel/template": {
|
||||||
"version": "7.27.0",
|
"version": "7.22.15",
|
||||||
"resolved": "https://registry.npmjs.org/@babel/template/-/template-7.27.0.tgz",
|
"resolved": "https://registry.npmjs.org/@babel/template/-/template-7.22.15.tgz",
|
||||||
"integrity": "sha512-2ncevenBqXI6qRMukPlXwHKHchC7RyMuu4xv5JBXRfOGVcTy1mXCD12qrp7Jsoxll1EV3+9sE4GugBVRjT2jFA==",
|
"integrity": "sha512-QPErUVm4uyJa60rkI73qneDacvdvzxshT3kksGqlGWYdOTIUOwJ7RDUL8sGqslY1uXWSL6xMFKEXDS3ox2uF0w==",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
"license": "MIT",
|
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@babel/code-frame": "^7.26.2",
|
"@babel/code-frame": "^7.22.13",
|
||||||
"@babel/parser": "^7.27.0",
|
"@babel/parser": "^7.22.15",
|
||||||
"@babel/types": "^7.27.0"
|
"@babel/types": "^7.22.15"
|
||||||
},
|
},
|
||||||
"engines": {
|
"engines": {
|
||||||
"node": ">=6.9.0"
|
"node": ">=6.9.0"
|
||||||
@@ -2801,14 +2948,14 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/@babel/types": {
|
"node_modules/@babel/types": {
|
||||||
"version": "7.27.0",
|
"version": "7.23.6",
|
||||||
"resolved": "https://registry.npmjs.org/@babel/types/-/types-7.27.0.tgz",
|
"resolved": "https://registry.npmjs.org/@babel/types/-/types-7.23.6.tgz",
|
||||||
"integrity": "sha512-H45s8fVLYjbhFH62dIJ3WtmJ6RSPt/3DRO0ZcT2SUiYiQyz3BLVb9ADEnLl91m74aQPS3AzzeajZHYOalWe3bg==",
|
"integrity": "sha512-+uarb83brBzPKN38NX1MkB6vb6+mwvR6amUulqAE7ccQw1pEl+bCia9TbdG1lsnFP7lZySvUn37CHyXQdfTwzg==",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
"license": "MIT",
|
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@babel/helper-string-parser": "^7.25.9",
|
"@babel/helper-string-parser": "^7.23.4",
|
||||||
"@babel/helper-validator-identifier": "^7.25.9"
|
"@babel/helper-validator-identifier": "^7.22.20",
|
||||||
|
"to-fast-properties": "^2.0.0"
|
||||||
},
|
},
|
||||||
"engines": {
|
"engines": {
|
||||||
"node": ">=6.9.0"
|
"node": ">=6.9.0"
|
||||||
@@ -5403,11 +5550,10 @@
|
|||||||
"devOptional": true
|
"devOptional": true
|
||||||
},
|
},
|
||||||
"node_modules/axios": {
|
"node_modules/axios": {
|
||||||
"version": "1.8.4",
|
"version": "1.7.7",
|
||||||
"resolved": "https://registry.npmjs.org/axios/-/axios-1.8.4.tgz",
|
"resolved": "https://registry.npmjs.org/axios/-/axios-1.7.7.tgz",
|
||||||
"integrity": "sha512-eBSYY4Y68NNlHbHBMdeDmKNtDgXWhQsJcGqzO3iLUM0GraQFSS9cVgPX5I9b3lbdFKyYoAEGAZF1DwhTaljNAw==",
|
"integrity": "sha512-S4kL7XrjgBmvdGut0sN3yJxqYzrDOnivkBiN0OFs6hLiUam3UPvswUo0kqGyhqUZGEOytHyumEdXsAkgCOUf3Q==",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
"license": "MIT",
|
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"follow-redirects": "^1.15.6",
|
"follow-redirects": "^1.15.6",
|
||||||
"form-data": "^4.0.0",
|
"form-data": "^4.0.0",
|
||||||
@@ -7723,8 +7869,7 @@
|
|||||||
"version": "4.0.0",
|
"version": "4.0.0",
|
||||||
"resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-4.0.0.tgz",
|
"resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-4.0.0.tgz",
|
||||||
"integrity": "sha512-RdJUflcE3cUzKiMqQgsCu06FPu9UdIJO0beYbPhHN4k6apgJtifcoCtT9bcxOpYBtpD2kCM6Sbzg4CausW/PKQ==",
|
"integrity": "sha512-RdJUflcE3cUzKiMqQgsCu06FPu9UdIJO0beYbPhHN4k6apgJtifcoCtT9bcxOpYBtpD2kCM6Sbzg4CausW/PKQ==",
|
||||||
"dev": true,
|
"dev": true
|
||||||
"license": "MIT"
|
|
||||||
},
|
},
|
||||||
"node_modules/js-yaml": {
|
"node_modules/js-yaml": {
|
||||||
"version": "3.14.1",
|
"version": "3.14.1",
|
||||||
@@ -9215,6 +9360,15 @@
|
|||||||
"integrity": "sha512-3f0uOEAQwIqGuWW2MVzYg8fV/QNnc/IpuJNG837rLuczAaLVHslWHZQj4IGiEl5Hs3kkbhwL9Ab7Hrsmuj+Smw==",
|
"integrity": "sha512-3f0uOEAQwIqGuWW2MVzYg8fV/QNnc/IpuJNG837rLuczAaLVHslWHZQj4IGiEl5Hs3kkbhwL9Ab7Hrsmuj+Smw==",
|
||||||
"dev": true
|
"dev": true
|
||||||
},
|
},
|
||||||
|
"node_modules/to-fast-properties": {
|
||||||
|
"version": "2.0.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/to-fast-properties/-/to-fast-properties-2.0.0.tgz",
|
||||||
|
"integrity": "sha512-/OaKK0xYrs3DmxRYqL/yDc+FxFUVYhDlXMhRmv3z915w2HF1tnN1omB354j8VUGO/hbRzyD6Y3sA7v7GS/ceog==",
|
||||||
|
"dev": true,
|
||||||
|
"engines": {
|
||||||
|
"node": ">=4"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/to-regex-range": {
|
"node_modules/to-regex-range": {
|
||||||
"version": "5.0.1",
|
"version": "5.0.1",
|
||||||
"resolved": "https://registry.npmjs.org/to-regex-range/-/to-regex-range-5.0.1.tgz",
|
"resolved": "https://registry.npmjs.org/to-regex-range/-/to-regex-range-5.0.1.tgz",
|
||||||
|
|||||||
@@ -11,7 +11,7 @@
|
|||||||
"ann"
|
"ann"
|
||||||
],
|
],
|
||||||
"private": false,
|
"private": false,
|
||||||
"version": "0.19.1-beta.1",
|
"version": "0.18.2-beta.1",
|
||||||
"main": "dist/index.js",
|
"main": "dist/index.js",
|
||||||
"exports": {
|
"exports": {
|
||||||
".": "./dist/index.js",
|
".": "./dist/index.js",
|
||||||
@@ -29,7 +29,6 @@
|
|||||||
"aarch64-apple-darwin",
|
"aarch64-apple-darwin",
|
||||||
"x86_64-unknown-linux-gnu",
|
"x86_64-unknown-linux-gnu",
|
||||||
"aarch64-unknown-linux-gnu",
|
"aarch64-unknown-linux-gnu",
|
||||||
"x86_64-unknown-linux-musl",
|
|
||||||
"aarch64-unknown-linux-musl",
|
"aarch64-unknown-linux-musl",
|
||||||
"x86_64-pc-windows-msvc",
|
"x86_64-pc-windows-msvc",
|
||||||
"aarch64-pc-windows-msvc"
|
"aarch64-pc-windows-msvc"
|
||||||
|
|||||||
@@ -37,7 +37,7 @@ impl NativeMergeInsertBuilder {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[napi(catch_unwind)]
|
#[napi(catch_unwind)]
|
||||||
pub async fn execute(&self, buf: Buffer) -> napi::Result<MergeStats> {
|
pub async fn execute(&self, buf: Buffer) -> napi::Result<()> {
|
||||||
let data = ipc_file_to_batches(buf.to_vec())
|
let data = ipc_file_to_batches(buf.to_vec())
|
||||||
.and_then(IntoArrow::into_arrow)
|
.and_then(IntoArrow::into_arrow)
|
||||||
.map_err(|e| {
|
.map_err(|e| {
|
||||||
@@ -46,14 +46,12 @@ impl NativeMergeInsertBuilder {
|
|||||||
|
|
||||||
let this = self.clone();
|
let this = self.clone();
|
||||||
|
|
||||||
let stats = this.inner.execute(data).await.map_err(|e| {
|
this.inner.execute(data).await.map_err(|e| {
|
||||||
napi::Error::from_reason(format!(
|
napi::Error::from_reason(format!(
|
||||||
"Failed to execute merge insert: {}",
|
"Failed to execute merge insert: {}",
|
||||||
convert_error(&e)
|
convert_error(&e)
|
||||||
))
|
))
|
||||||
})?;
|
})
|
||||||
|
|
||||||
Ok(stats.into())
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -62,20 +60,3 @@ impl From<MergeInsertBuilder> for NativeMergeInsertBuilder {
|
|||||||
Self { inner }
|
Self { inner }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[napi(object)]
|
|
||||||
pub struct MergeStats {
|
|
||||||
pub num_inserted_rows: BigInt,
|
|
||||||
pub num_updated_rows: BigInt,
|
|
||||||
pub num_deleted_rows: BigInt,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl From<lancedb::table::MergeStats> for MergeStats {
|
|
||||||
fn from(stats: lancedb::table::MergeStats) -> Self {
|
|
||||||
Self {
|
|
||||||
num_inserted_rows: stats.num_inserted_rows.into(),
|
|
||||||
num_updated_rows: stats.num_updated_rows.into(),
|
|
||||||
num_deleted_rows: stats.num_deleted_rows.into(),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -3,9 +3,7 @@
|
|||||||
|
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use lancedb::index::scalar::{
|
use lancedb::index::scalar::FullTextSearchQuery;
|
||||||
BoostQuery, FtsQuery, FullTextSearchQuery, MatchQuery, MultiMatchQuery, PhraseQuery,
|
|
||||||
};
|
|
||||||
use lancedb::query::ExecutableQuery;
|
use lancedb::query::ExecutableQuery;
|
||||||
use lancedb::query::Query as LanceDbQuery;
|
use lancedb::query::Query as LanceDbQuery;
|
||||||
use lancedb::query::QueryBase;
|
use lancedb::query::QueryBase;
|
||||||
@@ -40,10 +38,9 @@ impl Query {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[napi]
|
#[napi]
|
||||||
pub fn full_text_search(&mut self, query: napi::JsObject) -> napi::Result<()> {
|
pub fn full_text_search(&mut self, query: String, columns: Option<Vec<String>>) {
|
||||||
let query = parse_fts_query(query)?;
|
let query = FullTextSearchQuery::new(query).columns(columns);
|
||||||
self.inner = self.inner.clone().full_text_search(query);
|
self.inner = self.inner.clone().full_text_search(query);
|
||||||
Ok(())
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[napi]
|
#[napi]
|
||||||
@@ -90,15 +87,11 @@ impl Query {
|
|||||||
pub async fn execute(
|
pub async fn execute(
|
||||||
&self,
|
&self,
|
||||||
max_batch_length: Option<u32>,
|
max_batch_length: Option<u32>,
|
||||||
timeout_ms: Option<u32>,
|
|
||||||
) -> napi::Result<RecordBatchIterator> {
|
) -> napi::Result<RecordBatchIterator> {
|
||||||
let mut execution_opts = QueryExecutionOptions::default();
|
let mut execution_opts = QueryExecutionOptions::default();
|
||||||
if let Some(max_batch_length) = max_batch_length {
|
if let Some(max_batch_length) = max_batch_length {
|
||||||
execution_opts.max_batch_length = max_batch_length;
|
execution_opts.max_batch_length = max_batch_length;
|
||||||
}
|
}
|
||||||
if let Some(timeout_ms) = timeout_ms {
|
|
||||||
execution_opts.timeout = Some(std::time::Duration::from_millis(timeout_ms as u64))
|
|
||||||
}
|
|
||||||
let inner_stream = self
|
let inner_stream = self
|
||||||
.inner
|
.inner
|
||||||
.execute_with_options(execution_opts)
|
.execute_with_options(execution_opts)
|
||||||
@@ -121,16 +114,6 @@ impl Query {
|
|||||||
))
|
))
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
#[napi(catch_unwind)]
|
|
||||||
pub async fn analyze_plan(&self) -> napi::Result<String> {
|
|
||||||
self.inner.analyze_plan().await.map_err(|e| {
|
|
||||||
napi::Error::from_reason(format!(
|
|
||||||
"Failed to execute analyze plan: {}",
|
|
||||||
convert_error(&e)
|
|
||||||
))
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[napi]
|
#[napi]
|
||||||
@@ -202,10 +185,9 @@ impl VectorQuery {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[napi]
|
#[napi]
|
||||||
pub fn full_text_search(&mut self, query: napi::JsObject) -> napi::Result<()> {
|
pub fn full_text_search(&mut self, query: String, columns: Option<Vec<String>>) {
|
||||||
let query = parse_fts_query(query)?;
|
let query = FullTextSearchQuery::new(query).columns(columns);
|
||||||
self.inner = self.inner.clone().full_text_search(query);
|
self.inner = self.inner.clone().full_text_search(query);
|
||||||
Ok(())
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[napi]
|
#[napi]
|
||||||
@@ -250,15 +232,11 @@ impl VectorQuery {
|
|||||||
pub async fn execute(
|
pub async fn execute(
|
||||||
&self,
|
&self,
|
||||||
max_batch_length: Option<u32>,
|
max_batch_length: Option<u32>,
|
||||||
timeout_ms: Option<u32>,
|
|
||||||
) -> napi::Result<RecordBatchIterator> {
|
) -> napi::Result<RecordBatchIterator> {
|
||||||
let mut execution_opts = QueryExecutionOptions::default();
|
let mut execution_opts = QueryExecutionOptions::default();
|
||||||
if let Some(max_batch_length) = max_batch_length {
|
if let Some(max_batch_length) = max_batch_length {
|
||||||
execution_opts.max_batch_length = max_batch_length;
|
execution_opts.max_batch_length = max_batch_length;
|
||||||
}
|
}
|
||||||
if let Some(timeout_ms) = timeout_ms {
|
|
||||||
execution_opts.timeout = Some(std::time::Duration::from_millis(timeout_ms as u64))
|
|
||||||
}
|
|
||||||
let inner_stream = self
|
let inner_stream = self
|
||||||
.inner
|
.inner
|
||||||
.execute_with_options(execution_opts)
|
.execute_with_options(execution_opts)
|
||||||
@@ -281,127 +259,4 @@ impl VectorQuery {
|
|||||||
))
|
))
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
#[napi(catch_unwind)]
|
|
||||||
pub async fn analyze_plan(&self) -> napi::Result<String> {
|
|
||||||
self.inner.analyze_plan().await.map_err(|e| {
|
|
||||||
napi::Error::from_reason(format!(
|
|
||||||
"Failed to execute analyze plan: {}",
|
|
||||||
convert_error(&e)
|
|
||||||
))
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[napi]
|
|
||||||
#[derive(Debug, Clone)]
|
|
||||||
pub struct JsFullTextQuery {
|
|
||||||
pub(crate) inner: FtsQuery,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[napi]
|
|
||||||
impl JsFullTextQuery {
|
|
||||||
#[napi(factory)]
|
|
||||||
pub fn match_query(
|
|
||||||
query: String,
|
|
||||||
column: String,
|
|
||||||
boost: f64,
|
|
||||||
fuzziness: Option<u32>,
|
|
||||||
max_expansions: u32,
|
|
||||||
) -> napi::Result<Self> {
|
|
||||||
Ok(Self {
|
|
||||||
inner: MatchQuery::new(query)
|
|
||||||
.with_column(Some(column))
|
|
||||||
.with_boost(boost as f32)
|
|
||||||
.with_fuzziness(fuzziness)
|
|
||||||
.with_max_expansions(max_expansions as usize)
|
|
||||||
.into(),
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
#[napi(factory)]
|
|
||||||
pub fn phrase_query(query: String, column: String) -> napi::Result<Self> {
|
|
||||||
Ok(Self {
|
|
||||||
inner: PhraseQuery::new(query).with_column(Some(column)).into(),
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
#[napi(factory)]
|
|
||||||
#[allow(clippy::use_self)] // NAPI doesn't allow Self here but clippy reports it
|
|
||||||
pub fn boost_query(
|
|
||||||
positive: &JsFullTextQuery,
|
|
||||||
negative: &JsFullTextQuery,
|
|
||||||
negative_boost: Option<f64>,
|
|
||||||
) -> napi::Result<Self> {
|
|
||||||
Ok(Self {
|
|
||||||
inner: BoostQuery::new(
|
|
||||||
positive.inner.clone(),
|
|
||||||
negative.inner.clone(),
|
|
||||||
negative_boost.map(|v| v as f32),
|
|
||||||
)
|
|
||||||
.into(),
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
#[napi(factory)]
|
|
||||||
pub fn multi_match_query(
|
|
||||||
query: String,
|
|
||||||
columns: Vec<String>,
|
|
||||||
boosts: Option<Vec<f64>>,
|
|
||||||
) -> napi::Result<Self> {
|
|
||||||
let q = match boosts {
|
|
||||||
Some(boosts) => MultiMatchQuery::try_new(query, columns)
|
|
||||||
.and_then(|q| q.try_with_boosts(boosts.into_iter().map(|v| v as f32).collect())),
|
|
||||||
None => MultiMatchQuery::try_new(query, columns),
|
|
||||||
}
|
|
||||||
.map_err(|e| {
|
|
||||||
napi::Error::from_reason(format!("Failed to create multi match query: {}", e))
|
|
||||||
})?;
|
|
||||||
|
|
||||||
Ok(Self { inner: q.into() })
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn parse_fts_query(query: napi::JsObject) -> napi::Result<FullTextSearchQuery> {
|
|
||||||
if let Ok(Some(query)) = query.get::<_, &JsFullTextQuery>("query") {
|
|
||||||
Ok(FullTextSearchQuery::new_query(query.inner.clone()))
|
|
||||||
} else if let Ok(Some(query_text)) = query.get::<_, String>("query") {
|
|
||||||
let mut query_text = query_text;
|
|
||||||
let columns = query.get::<_, Option<Vec<String>>>("columns")?.flatten();
|
|
||||||
|
|
||||||
let is_phrase =
|
|
||||||
query_text.len() >= 2 && query_text.starts_with('"') && query_text.ends_with('"');
|
|
||||||
let is_multi_match = columns.as_ref().map(|cols| cols.len() > 1).unwrap_or(false);
|
|
||||||
|
|
||||||
if is_phrase {
|
|
||||||
// Remove the surrounding quotes for phrase queries
|
|
||||||
query_text = query_text[1..query_text.len() - 1].to_string();
|
|
||||||
}
|
|
||||||
|
|
||||||
let query: FtsQuery = match (is_phrase, is_multi_match) {
|
|
||||||
(false, _) => MatchQuery::new(query_text).into(),
|
|
||||||
(true, false) => PhraseQuery::new(query_text).into(),
|
|
||||||
(true, true) => {
|
|
||||||
return Err(napi::Error::from_reason(
|
|
||||||
"Phrase queries cannot be used with multiple columns.",
|
|
||||||
));
|
|
||||||
}
|
|
||||||
};
|
|
||||||
let mut query = FullTextSearchQuery::new_query(query);
|
|
||||||
if let Some(cols) = columns {
|
|
||||||
if !cols.is_empty() {
|
|
||||||
query = query.with_columns(&cols).map_err(|e| {
|
|
||||||
napi::Error::from_reason(format!(
|
|
||||||
"Failed to set full text search columns: {}",
|
|
||||||
e
|
|
||||||
))
|
|
||||||
})?;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Ok(query)
|
|
||||||
} else {
|
|
||||||
Err(napi::Error::from_reason(
|
|
||||||
"Invalid full text search query object".to_string(),
|
|
||||||
))
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -111,7 +111,6 @@ impl Table {
|
|||||||
index: Option<&Index>,
|
index: Option<&Index>,
|
||||||
column: String,
|
column: String,
|
||||||
replace: Option<bool>,
|
replace: Option<bool>,
|
||||||
wait_timeout_s: Option<i64>,
|
|
||||||
) -> napi::Result<()> {
|
) -> napi::Result<()> {
|
||||||
let lancedb_index = if let Some(index) = index {
|
let lancedb_index = if let Some(index) = index {
|
||||||
index.consume()?
|
index.consume()?
|
||||||
@@ -122,10 +121,6 @@ impl Table {
|
|||||||
if let Some(replace) = replace {
|
if let Some(replace) = replace {
|
||||||
builder = builder.replace(replace);
|
builder = builder.replace(replace);
|
||||||
}
|
}
|
||||||
if let Some(timeout) = wait_timeout_s {
|
|
||||||
builder =
|
|
||||||
builder.wait_timeout(std::time::Duration::from_secs(timeout.try_into().unwrap()));
|
|
||||||
}
|
|
||||||
builder.execute().await.default_error()
|
builder.execute().await.default_error()
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -137,32 +132,6 @@ impl Table {
|
|||||||
.default_error()
|
.default_error()
|
||||||
}
|
}
|
||||||
|
|
||||||
#[napi(catch_unwind)]
|
|
||||||
pub async fn prewarm_index(&self, index_name: String) -> napi::Result<()> {
|
|
||||||
self.inner_ref()?
|
|
||||||
.prewarm_index(&index_name)
|
|
||||||
.await
|
|
||||||
.default_error()
|
|
||||||
}
|
|
||||||
|
|
||||||
#[napi(catch_unwind)]
|
|
||||||
pub async fn wait_for_index(&self, index_names: Vec<String>, timeout_s: i64) -> Result<()> {
|
|
||||||
let timeout = std::time::Duration::from_secs(timeout_s.try_into().unwrap());
|
|
||||||
let index_names: Vec<&str> = index_names.iter().map(|s| s.as_str()).collect();
|
|
||||||
let slice: &[&str] = &index_names;
|
|
||||||
|
|
||||||
self.inner_ref()?
|
|
||||||
.wait_for_index(slice, timeout)
|
|
||||||
.await
|
|
||||||
.default_error()
|
|
||||||
}
|
|
||||||
|
|
||||||
#[napi(catch_unwind)]
|
|
||||||
pub async fn stats(&self) -> Result<TableStatistics> {
|
|
||||||
let stats = self.inner_ref()?.stats().await.default_error()?;
|
|
||||||
Ok(stats.into())
|
|
||||||
}
|
|
||||||
|
|
||||||
#[napi(catch_unwind)]
|
#[napi(catch_unwind)]
|
||||||
pub async fn update(
|
pub async fn update(
|
||||||
&self,
|
&self,
|
||||||
@@ -255,14 +224,6 @@ impl Table {
|
|||||||
.default_error()
|
.default_error()
|
||||||
}
|
}
|
||||||
|
|
||||||
#[napi(catch_unwind)]
|
|
||||||
pub async fn checkout_tag(&self, tag: String) -> napi::Result<()> {
|
|
||||||
self.inner_ref()?
|
|
||||||
.checkout_tag(tag.as_str())
|
|
||||||
.await
|
|
||||||
.default_error()
|
|
||||||
}
|
|
||||||
|
|
||||||
#[napi(catch_unwind)]
|
#[napi(catch_unwind)]
|
||||||
pub async fn checkout_latest(&self) -> napi::Result<()> {
|
pub async fn checkout_latest(&self) -> napi::Result<()> {
|
||||||
self.inner_ref()?.checkout_latest().await.default_error()
|
self.inner_ref()?.checkout_latest().await.default_error()
|
||||||
@@ -295,13 +256,6 @@ impl Table {
|
|||||||
self.inner_ref()?.restore().await.default_error()
|
self.inner_ref()?.restore().await.default_error()
|
||||||
}
|
}
|
||||||
|
|
||||||
#[napi(catch_unwind)]
|
|
||||||
pub async fn tags(&self) -> napi::Result<Tags> {
|
|
||||||
Ok(Tags {
|
|
||||||
inner: self.inner_ref()?.clone(),
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
#[napi(catch_unwind)]
|
#[napi(catch_unwind)]
|
||||||
pub async fn optimize(
|
pub async fn optimize(
|
||||||
&self,
|
&self,
|
||||||
@@ -561,158 +515,9 @@ impl From<lancedb::index::IndexStatistics> for IndexStatistics {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[napi(object)]
|
|
||||||
pub struct TableStatistics {
|
|
||||||
/// The total number of bytes in the table
|
|
||||||
pub total_bytes: i64,
|
|
||||||
|
|
||||||
/// The number of rows in the table
|
|
||||||
pub num_rows: i64,
|
|
||||||
|
|
||||||
/// The number of indices in the table
|
|
||||||
pub num_indices: i64,
|
|
||||||
|
|
||||||
/// Statistics on table fragments
|
|
||||||
pub fragment_stats: FragmentStatistics,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[napi(object)]
|
|
||||||
pub struct FragmentStatistics {
|
|
||||||
/// The number of fragments in the table
|
|
||||||
pub num_fragments: i64,
|
|
||||||
|
|
||||||
/// The number of uncompacted fragments in the table
|
|
||||||
pub num_small_fragments: i64,
|
|
||||||
|
|
||||||
/// Statistics on the number of rows in the table fragments
|
|
||||||
pub lengths: FragmentSummaryStats,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[napi(object)]
|
|
||||||
pub struct FragmentSummaryStats {
|
|
||||||
/// The number of rows in the fragment with the fewest rows
|
|
||||||
pub min: i64,
|
|
||||||
|
|
||||||
/// The number of rows in the fragment with the most rows
|
|
||||||
pub max: i64,
|
|
||||||
|
|
||||||
/// The mean number of rows in the fragments
|
|
||||||
pub mean: i64,
|
|
||||||
|
|
||||||
/// The 25th percentile of number of rows in the fragments
|
|
||||||
pub p25: i64,
|
|
||||||
|
|
||||||
/// The 50th percentile of number of rows in the fragments
|
|
||||||
pub p50: i64,
|
|
||||||
|
|
||||||
/// The 75th percentile of number of rows in the fragments
|
|
||||||
pub p75: i64,
|
|
||||||
|
|
||||||
/// The 99th percentile of number of rows in the fragments
|
|
||||||
pub p99: i64,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl From<lancedb::table::TableStatistics> for TableStatistics {
|
|
||||||
fn from(v: lancedb::table::TableStatistics) -> Self {
|
|
||||||
Self {
|
|
||||||
total_bytes: v.total_bytes as i64,
|
|
||||||
num_rows: v.num_rows as i64,
|
|
||||||
num_indices: v.num_indices as i64,
|
|
||||||
fragment_stats: FragmentStatistics {
|
|
||||||
num_fragments: v.fragment_stats.num_fragments as i64,
|
|
||||||
num_small_fragments: v.fragment_stats.num_small_fragments as i64,
|
|
||||||
lengths: FragmentSummaryStats {
|
|
||||||
min: v.fragment_stats.lengths.min as i64,
|
|
||||||
max: v.fragment_stats.lengths.max as i64,
|
|
||||||
mean: v.fragment_stats.lengths.mean as i64,
|
|
||||||
p25: v.fragment_stats.lengths.p25 as i64,
|
|
||||||
p50: v.fragment_stats.lengths.p50 as i64,
|
|
||||||
p75: v.fragment_stats.lengths.p75 as i64,
|
|
||||||
p99: v.fragment_stats.lengths.p99 as i64,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[napi(object)]
|
#[napi(object)]
|
||||||
pub struct Version {
|
pub struct Version {
|
||||||
pub version: i64,
|
pub version: i64,
|
||||||
pub timestamp: i64,
|
pub timestamp: i64,
|
||||||
pub metadata: HashMap<String, String>,
|
pub metadata: HashMap<String, String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[napi]
|
|
||||||
pub struct TagContents {
|
|
||||||
pub version: i64,
|
|
||||||
pub manifest_size: i64,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[napi]
|
|
||||||
pub struct Tags {
|
|
||||||
inner: LanceDbTable,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[napi]
|
|
||||||
impl Tags {
|
|
||||||
#[napi]
|
|
||||||
pub async fn list(&self) -> napi::Result<HashMap<String, TagContents>> {
|
|
||||||
let rust_tags = self.inner.tags().await.default_error()?;
|
|
||||||
let tag_list = rust_tags.as_ref().list().await.default_error()?;
|
|
||||||
let tag_contents = tag_list
|
|
||||||
.into_iter()
|
|
||||||
.map(|(k, v)| {
|
|
||||||
(
|
|
||||||
k,
|
|
||||||
TagContents {
|
|
||||||
version: v.version as i64,
|
|
||||||
manifest_size: v.manifest_size as i64,
|
|
||||||
},
|
|
||||||
)
|
|
||||||
})
|
|
||||||
.collect();
|
|
||||||
|
|
||||||
Ok(tag_contents)
|
|
||||||
}
|
|
||||||
|
|
||||||
#[napi]
|
|
||||||
pub async fn get_version(&self, tag: String) -> napi::Result<i64> {
|
|
||||||
let rust_tags = self.inner.tags().await.default_error()?;
|
|
||||||
rust_tags
|
|
||||||
.as_ref()
|
|
||||||
.get_version(tag.as_str())
|
|
||||||
.await
|
|
||||||
.map(|v| v as i64)
|
|
||||||
.default_error()
|
|
||||||
}
|
|
||||||
|
|
||||||
#[napi]
|
|
||||||
pub async unsafe fn create(&mut self, tag: String, version: i64) -> napi::Result<()> {
|
|
||||||
let mut rust_tags = self.inner.tags().await.default_error()?;
|
|
||||||
rust_tags
|
|
||||||
.as_mut()
|
|
||||||
.create(tag.as_str(), version as u64)
|
|
||||||
.await
|
|
||||||
.default_error()
|
|
||||||
}
|
|
||||||
|
|
||||||
#[napi]
|
|
||||||
pub async unsafe fn delete(&mut self, tag: String) -> napi::Result<()> {
|
|
||||||
let mut rust_tags = self.inner.tags().await.default_error()?;
|
|
||||||
rust_tags
|
|
||||||
.as_mut()
|
|
||||||
.delete(tag.as_str())
|
|
||||||
.await
|
|
||||||
.default_error()
|
|
||||||
}
|
|
||||||
|
|
||||||
#[napi]
|
|
||||||
pub async unsafe fn update(&mut self, tag: String, version: i64) -> napi::Result<()> {
|
|
||||||
let mut rust_tags = self.inner.tags().await.default_error()?;
|
|
||||||
rust_tags
|
|
||||||
.as_mut()
|
|
||||||
.update(tag.as_str(), version as u64)
|
|
||||||
.await
|
|
||||||
.default_error()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
[tool.bumpversion]
|
[tool.bumpversion]
|
||||||
current_version = "0.22.1-beta.1"
|
current_version = "0.21.2"
|
||||||
parse = """(?x)
|
parse = """(?x)
|
||||||
(?P<major>0|[1-9]\\d*)\\.
|
(?P<major>0|[1-9]\\d*)\\.
|
||||||
(?P<minor>0|[1-9]\\d*)\\.
|
(?P<minor>0|[1-9]\\d*)\\.
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "lancedb-python"
|
name = "lancedb-python"
|
||||||
version = "0.22.1-beta.1"
|
version = "0.21.2"
|
||||||
edition.workspace = true
|
edition.workspace = true
|
||||||
description = "Python bindings for LanceDB"
|
description = "Python bindings for LanceDB"
|
||||||
license.workspace = true
|
license.workspace = true
|
||||||
|
|||||||
@@ -4,12 +4,11 @@ name = "lancedb"
|
|||||||
dynamic = ["version"]
|
dynamic = ["version"]
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"deprecation",
|
"deprecation",
|
||||||
"numpy",
|
|
||||||
"overrides>=0.7",
|
|
||||||
"packaging",
|
|
||||||
"pyarrow>=16",
|
|
||||||
"pydantic>=1.10",
|
|
||||||
"tqdm>=4.27.0",
|
"tqdm>=4.27.0",
|
||||||
|
"pyarrow>=14",
|
||||||
|
"pydantic>=1.10",
|
||||||
|
"packaging",
|
||||||
|
"overrides>=0.7",
|
||||||
]
|
]
|
||||||
description = "lancedb"
|
description = "lancedb"
|
||||||
authors = [{ name = "LanceDB Devs", email = "dev@lancedb.com" }]
|
authors = [{ name = "LanceDB Devs", email = "dev@lancedb.com" }]
|
||||||
@@ -43,9 +42,6 @@ classifiers = [
|
|||||||
repository = "https://github.com/lancedb/lancedb"
|
repository = "https://github.com/lancedb/lancedb"
|
||||||
|
|
||||||
[project.optional-dependencies]
|
[project.optional-dependencies]
|
||||||
pylance = [
|
|
||||||
"pylance>=0.25",
|
|
||||||
]
|
|
||||||
tests = [
|
tests = [
|
||||||
"aiohttp",
|
"aiohttp",
|
||||||
"boto3",
|
"boto3",
|
||||||
@@ -58,8 +54,7 @@ tests = [
|
|||||||
"polars>=0.19, <=1.3.0",
|
"polars>=0.19, <=1.3.0",
|
||||||
"tantivy",
|
"tantivy",
|
||||||
"pyarrow-stubs",
|
"pyarrow-stubs",
|
||||||
"pylance>=0.25",
|
"pylance>=0.23.2",
|
||||||
"requests",
|
|
||||||
]
|
]
|
||||||
dev = [
|
dev = [
|
||||||
"ruff",
|
"ruff",
|
||||||
@@ -77,7 +72,6 @@ embeddings = [
|
|||||||
"pillow",
|
"pillow",
|
||||||
"open-clip-torch",
|
"open-clip-torch",
|
||||||
"cohere",
|
"cohere",
|
||||||
"colpali-engine>=0.3.10",
|
|
||||||
"huggingface_hub",
|
"huggingface_hub",
|
||||||
"InstructorEmbedding",
|
"InstructorEmbedding",
|
||||||
"google.generativeai",
|
"google.generativeai",
|
||||||
|
|||||||
@@ -1,5 +1,4 @@
|
|||||||
from datetime import timedelta
|
from typing import Dict, List, Optional, Tuple, Any, Union, Literal
|
||||||
from typing import Dict, List, Optional, Tuple, Any, TypedDict, Union, Literal
|
|
||||||
|
|
||||||
import pyarrow as pa
|
import pyarrow as pa
|
||||||
|
|
||||||
@@ -47,13 +46,12 @@ class Table:
|
|||||||
): ...
|
): ...
|
||||||
async def list_versions(self) -> List[Dict[str, Any]]: ...
|
async def list_versions(self) -> List[Dict[str, Any]]: ...
|
||||||
async def version(self) -> int: ...
|
async def version(self) -> int: ...
|
||||||
async def checkout(self, version: Union[int, str]): ...
|
async def checkout(self, version: int): ...
|
||||||
async def checkout_latest(self): ...
|
async def checkout_latest(self): ...
|
||||||
async def restore(self, version: Optional[int] = None): ...
|
async def restore(self): ...
|
||||||
async def list_indices(self) -> list[IndexConfig]: ...
|
async def list_indices(self) -> list[IndexConfig]: ...
|
||||||
async def delete(self, filter: str): ...
|
async def delete(self, filter: str): ...
|
||||||
async def add_columns(self, columns: list[tuple[str, str]]) -> None: ...
|
async def add_columns(self, columns: list[tuple[str, str]]) -> None: ...
|
||||||
async def add_columns_with_schema(self, schema: pa.Schema) -> None: ...
|
|
||||||
async def alter_columns(self, columns: list[dict[str, Any]]) -> None: ...
|
async def alter_columns(self, columns: list[dict[str, Any]]) -> None: ...
|
||||||
async def optimize(
|
async def optimize(
|
||||||
self,
|
self,
|
||||||
@@ -61,18 +59,9 @@ class Table:
|
|||||||
cleanup_since_ms: Optional[int] = None,
|
cleanup_since_ms: Optional[int] = None,
|
||||||
delete_unverified: Optional[bool] = None,
|
delete_unverified: Optional[bool] = None,
|
||||||
) -> OptimizeStats: ...
|
) -> OptimizeStats: ...
|
||||||
@property
|
|
||||||
def tags(self) -> Tags: ...
|
|
||||||
def query(self) -> Query: ...
|
def query(self) -> Query: ...
|
||||||
def vector_search(self) -> VectorQuery: ...
|
def vector_search(self) -> VectorQuery: ...
|
||||||
|
|
||||||
class Tags:
|
|
||||||
async def list(self) -> Dict[str, Tag]: ...
|
|
||||||
async def get_version(self, tag: str) -> int: ...
|
|
||||||
async def create(self, tag: str, version: int): ...
|
|
||||||
async def delete(self, tag: str): ...
|
|
||||||
async def update(self, tag: str, version: int): ...
|
|
||||||
|
|
||||||
class IndexConfig:
|
class IndexConfig:
|
||||||
index_type: str
|
index_type: str
|
||||||
columns: List[str]
|
columns: List[str]
|
||||||
@@ -104,11 +93,7 @@ class Query:
|
|||||||
def postfilter(self): ...
|
def postfilter(self): ...
|
||||||
def nearest_to(self, query_vec: pa.Array) -> VectorQuery: ...
|
def nearest_to(self, query_vec: pa.Array) -> VectorQuery: ...
|
||||||
def nearest_to_text(self, query: dict) -> FTSQuery: ...
|
def nearest_to_text(self, query: dict) -> FTSQuery: ...
|
||||||
async def execute(
|
async def execute(self, max_batch_length: Optional[int]) -> RecordBatchStream: ...
|
||||||
self, max_batch_length: Optional[int], timeout: Optional[timedelta]
|
|
||||||
) -> RecordBatchStream: ...
|
|
||||||
async def explain_plan(self, verbose: Optional[bool]) -> str: ...
|
|
||||||
async def analyze_plan(self) -> str: ...
|
|
||||||
def to_query_request(self) -> PyQueryRequest: ...
|
def to_query_request(self) -> PyQueryRequest: ...
|
||||||
|
|
||||||
class FTSQuery:
|
class FTSQuery:
|
||||||
@@ -122,9 +107,8 @@ class FTSQuery:
|
|||||||
def get_query(self) -> str: ...
|
def get_query(self) -> str: ...
|
||||||
def add_query_vector(self, query_vec: pa.Array) -> None: ...
|
def add_query_vector(self, query_vec: pa.Array) -> None: ...
|
||||||
def nearest_to(self, query_vec: pa.Array) -> HybridQuery: ...
|
def nearest_to(self, query_vec: pa.Array) -> HybridQuery: ...
|
||||||
async def execute(
|
async def execute(self, max_batch_length: Optional[int]) -> RecordBatchStream: ...
|
||||||
self, max_batch_length: Optional[int], timeout: Optional[timedelta]
|
async def explain_plan(self) -> str: ...
|
||||||
) -> RecordBatchStream: ...
|
|
||||||
def to_query_request(self) -> PyQueryRequest: ...
|
def to_query_request(self) -> PyQueryRequest: ...
|
||||||
|
|
||||||
class VectorQuery:
|
class VectorQuery:
|
||||||
@@ -204,7 +188,3 @@ class RemovalStats:
|
|||||||
class OptimizeStats:
|
class OptimizeStats:
|
||||||
compaction: CompactionStats
|
compaction: CompactionStats
|
||||||
prune: RemovalStats
|
prune: RemovalStats
|
||||||
|
|
||||||
class Tag(TypedDict):
|
|
||||||
version: int
|
|
||||||
manifest_size: int
|
|
||||||
|
|||||||
@@ -9,7 +9,7 @@ import numpy as np
|
|||||||
import pyarrow as pa
|
import pyarrow as pa
|
||||||
import pyarrow.dataset
|
import pyarrow.dataset
|
||||||
|
|
||||||
from .dependencies import _check_for_pandas, pandas as pd
|
from .dependencies import pandas as pd
|
||||||
|
|
||||||
DATA = Union[List[dict], "pd.DataFrame", pa.Table, Iterable[pa.RecordBatch]]
|
DATA = Union[List[dict], "pd.DataFrame", pa.Table, Iterable[pa.RecordBatch]]
|
||||||
VEC = Union[list, np.ndarray, pa.Array, pa.ChunkedArray]
|
VEC = Union[list, np.ndarray, pa.Array, pa.ChunkedArray]
|
||||||
@@ -63,7 +63,7 @@ def data_to_reader(
|
|||||||
data: DATA, schema: Optional[pa.Schema] = None
|
data: DATA, schema: Optional[pa.Schema] = None
|
||||||
) -> pa.RecordBatchReader:
|
) -> pa.RecordBatchReader:
|
||||||
"""Convert various types of input into a RecordBatchReader"""
|
"""Convert various types of input into a RecordBatchReader"""
|
||||||
if _check_for_pandas(data) and isinstance(data, pd.DataFrame):
|
if pd is not None and isinstance(data, pd.DataFrame):
|
||||||
return pa.Table.from_pandas(data, schema=schema).to_reader()
|
return pa.Table.from_pandas(data, schema=schema).to_reader()
|
||||||
elif isinstance(data, pa.Table):
|
elif isinstance(data, pa.Table):
|
||||||
return data.to_reader()
|
return data.to_reader()
|
||||||
|
|||||||
@@ -19,4 +19,3 @@ from .imagebind import ImageBindEmbeddings
|
|||||||
from .jinaai import JinaEmbeddings
|
from .jinaai import JinaEmbeddings
|
||||||
from .watsonx import WatsonxEmbeddings
|
from .watsonx import WatsonxEmbeddings
|
||||||
from .voyageai import VoyageAIEmbeddingFunction
|
from .voyageai import VoyageAIEmbeddingFunction
|
||||||
from .colpali import ColPaliEmbeddings
|
|
||||||
|
|||||||
@@ -1,255 +0,0 @@
|
|||||||
# SPDX-License-Identifier: Apache-2.0
|
|
||||||
# SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
|
||||||
|
|
||||||
|
|
||||||
from functools import lru_cache
|
|
||||||
from typing import List, Union, Optional, Any
|
|
||||||
import numpy as np
|
|
||||||
import io
|
|
||||||
|
|
||||||
from ..util import attempt_import_or_raise
|
|
||||||
from .base import EmbeddingFunction
|
|
||||||
from .registry import register
|
|
||||||
from .utils import TEXT, IMAGES, is_flash_attn_2_available
|
|
||||||
|
|
||||||
|
|
||||||
@register("colpali")
|
|
||||||
class ColPaliEmbeddings(EmbeddingFunction):
|
|
||||||
"""
|
|
||||||
An embedding function that uses the ColPali engine for
|
|
||||||
multimodal multi-vector embeddings.
|
|
||||||
|
|
||||||
This embedding function supports ColQwen2.5 models, producing multivector outputs
|
|
||||||
for both text and image inputs. The output embeddings are lists of vectors, each
|
|
||||||
vector being 128-dimensional by default, represented as List[List[float]].
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
model_name : str
|
|
||||||
The name of the model to use (e.g., "Metric-AI/ColQwen2.5-3b-multilingual-v1.0")
|
|
||||||
device : str
|
|
||||||
The device for inference (default "cuda:0").
|
|
||||||
dtype : str
|
|
||||||
Data type for model weights (default "bfloat16").
|
|
||||||
use_token_pooling : bool
|
|
||||||
Whether to use token pooling to reduce embedding size (default True).
|
|
||||||
pool_factor : int
|
|
||||||
Factor to reduce sequence length if token pooling is enabled (default 2).
|
|
||||||
quantization_config : Optional[BitsAndBytesConfig]
|
|
||||||
Quantization configuration for the model. (default None, bitsandbytes needed)
|
|
||||||
batch_size : int
|
|
||||||
Batch size for processing inputs (default 2).
|
|
||||||
"""
|
|
||||||
|
|
||||||
model_name: str = "Metric-AI/ColQwen2.5-3b-multilingual-v1.0"
|
|
||||||
device: str = "auto"
|
|
||||||
dtype: str = "bfloat16"
|
|
||||||
use_token_pooling: bool = True
|
|
||||||
pool_factor: int = 2
|
|
||||||
quantization_config: Optional[Any] = None
|
|
||||||
batch_size: int = 2
|
|
||||||
|
|
||||||
_model = None
|
|
||||||
_processor = None
|
|
||||||
_token_pooler = None
|
|
||||||
_vector_dim = None
|
|
||||||
|
|
||||||
def __init__(self, *args, **kwargs):
|
|
||||||
super().__init__(*args, **kwargs)
|
|
||||||
(
|
|
||||||
self._model,
|
|
||||||
self._processor,
|
|
||||||
self._token_pooler,
|
|
||||||
) = self._load_model(
|
|
||||||
self.model_name,
|
|
||||||
self.dtype,
|
|
||||||
self.device,
|
|
||||||
self.use_token_pooling,
|
|
||||||
self.quantization_config,
|
|
||||||
)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
@lru_cache(maxsize=1)
|
|
||||||
def _load_model(
|
|
||||||
model_name: str,
|
|
||||||
dtype: str,
|
|
||||||
device: str,
|
|
||||||
use_token_pooling: bool,
|
|
||||||
quantization_config: Optional[Any],
|
|
||||||
):
|
|
||||||
"""
|
|
||||||
Initialize and cache the ColPali model, processor, and token pooler.
|
|
||||||
"""
|
|
||||||
torch = attempt_import_or_raise("torch", "torch")
|
|
||||||
transformers = attempt_import_or_raise("transformers", "transformers")
|
|
||||||
colpali_engine = attempt_import_or_raise("colpali_engine", "colpali_engine")
|
|
||||||
from colpali_engine.compression.token_pooling import HierarchicalTokenPooler
|
|
||||||
|
|
||||||
if quantization_config is not None:
|
|
||||||
if not isinstance(quantization_config, transformers.BitsAndBytesConfig):
|
|
||||||
raise ValueError("quantization_config must be a BitsAndBytesConfig")
|
|
||||||
|
|
||||||
if dtype == "bfloat16":
|
|
||||||
torch_dtype = torch.bfloat16
|
|
||||||
elif dtype == "float16":
|
|
||||||
torch_dtype = torch.float16
|
|
||||||
elif dtype == "float64":
|
|
||||||
torch_dtype = torch.float64
|
|
||||||
else:
|
|
||||||
torch_dtype = torch.float32
|
|
||||||
|
|
||||||
model = colpali_engine.models.ColQwen2_5.from_pretrained(
|
|
||||||
model_name,
|
|
||||||
torch_dtype=torch_dtype,
|
|
||||||
device_map=device,
|
|
||||||
quantization_config=quantization_config
|
|
||||||
if quantization_config is not None
|
|
||||||
else None,
|
|
||||||
attn_implementation="flash_attention_2"
|
|
||||||
if is_flash_attn_2_available()
|
|
||||||
else None,
|
|
||||||
).eval()
|
|
||||||
processor = colpali_engine.models.ColQwen2_5_Processor.from_pretrained(
|
|
||||||
model_name
|
|
||||||
)
|
|
||||||
token_pooler = HierarchicalTokenPooler() if use_token_pooling else None
|
|
||||||
return model, processor, token_pooler
|
|
||||||
|
|
||||||
def ndims(self):
|
|
||||||
"""
|
|
||||||
Return the dimension of a vector in the multivector output (e.g., 128).
|
|
||||||
"""
|
|
||||||
torch = attempt_import_or_raise("torch", "torch")
|
|
||||||
if self._vector_dim is None:
|
|
||||||
dummy_query = "test"
|
|
||||||
batch_queries = self._processor.process_queries([dummy_query]).to(
|
|
||||||
self._model.device
|
|
||||||
)
|
|
||||||
with torch.no_grad():
|
|
||||||
query_embeddings = self._model(**batch_queries)
|
|
||||||
|
|
||||||
if self.use_token_pooling and self._token_pooler is not None:
|
|
||||||
query_embeddings = self._token_pooler.pool_embeddings(
|
|
||||||
query_embeddings,
|
|
||||||
pool_factor=self.pool_factor,
|
|
||||||
padding=True,
|
|
||||||
padding_side=self._processor.tokenizer.padding_side,
|
|
||||||
)
|
|
||||||
|
|
||||||
self._vector_dim = query_embeddings[0].shape[-1]
|
|
||||||
return self._vector_dim
|
|
||||||
|
|
||||||
def _process_embeddings(self, embeddings):
|
|
||||||
"""
|
|
||||||
Format model embeddings into List[List[float]].
|
|
||||||
Use token pooling if enabled.
|
|
||||||
"""
|
|
||||||
torch = attempt_import_or_raise("torch", "torch")
|
|
||||||
if self.use_token_pooling and self._token_pooler is not None:
|
|
||||||
embeddings = self._token_pooler.pool_embeddings(
|
|
||||||
embeddings,
|
|
||||||
pool_factor=self.pool_factor,
|
|
||||||
padding=True,
|
|
||||||
padding_side=self._processor.tokenizer.padding_side,
|
|
||||||
)
|
|
||||||
|
|
||||||
if isinstance(embeddings, torch.Tensor):
|
|
||||||
tensors = embeddings.detach().cpu()
|
|
||||||
if tensors.dtype == torch.bfloat16:
|
|
||||||
tensors = tensors.to(torch.float32)
|
|
||||||
return (
|
|
||||||
tensors.numpy()
|
|
||||||
.astype(np.float64 if self.dtype == "float64" else np.float32)
|
|
||||||
.tolist()
|
|
||||||
)
|
|
||||||
return []
|
|
||||||
|
|
||||||
def generate_text_embeddings(self, text: TEXT) -> List[List[List[float]]]:
|
|
||||||
"""
|
|
||||||
Generate embeddings for text input.
|
|
||||||
"""
|
|
||||||
torch = attempt_import_or_raise("torch", "torch")
|
|
||||||
text = self.sanitize_input(text)
|
|
||||||
all_embeddings = []
|
|
||||||
|
|
||||||
for i in range(0, len(text), self.batch_size):
|
|
||||||
batch_text = text[i : i + self.batch_size]
|
|
||||||
batch_queries = self._processor.process_queries(batch_text).to(
|
|
||||||
self._model.device
|
|
||||||
)
|
|
||||||
with torch.no_grad():
|
|
||||||
query_embeddings = self._model(**batch_queries)
|
|
||||||
all_embeddings.extend(self._process_embeddings(query_embeddings))
|
|
||||||
return all_embeddings
|
|
||||||
|
|
||||||
def _prepare_images(self, images: IMAGES) -> List:
|
|
||||||
"""
|
|
||||||
Convert image inputs to PIL Images.
|
|
||||||
"""
|
|
||||||
PIL = attempt_import_or_raise("PIL", "pillow")
|
|
||||||
requests = attempt_import_or_raise("requests", "requests")
|
|
||||||
images = self.sanitize_input(images)
|
|
||||||
pil_images = []
|
|
||||||
try:
|
|
||||||
for image in images:
|
|
||||||
if isinstance(image, str):
|
|
||||||
if image.startswith(("http://", "https://")):
|
|
||||||
response = requests.get(image, timeout=10)
|
|
||||||
response.raise_for_status()
|
|
||||||
pil_images.append(PIL.Image.open(io.BytesIO(response.content)))
|
|
||||||
else:
|
|
||||||
with PIL.Image.open(image) as im:
|
|
||||||
pil_images.append(im.copy())
|
|
||||||
elif isinstance(image, bytes):
|
|
||||||
pil_images.append(PIL.Image.open(io.BytesIO(image)))
|
|
||||||
else:
|
|
||||||
# Assume it's a PIL Image; will raise if invalid
|
|
||||||
pil_images.append(image)
|
|
||||||
except Exception as e:
|
|
||||||
raise ValueError(f"Failed to process image: {e}")
|
|
||||||
|
|
||||||
return pil_images
|
|
||||||
|
|
||||||
def generate_image_embeddings(self, images: IMAGES) -> List[List[List[float]]]:
|
|
||||||
"""
|
|
||||||
Generate embeddings for a batch of images.
|
|
||||||
"""
|
|
||||||
torch = attempt_import_or_raise("torch", "torch")
|
|
||||||
pil_images = self._prepare_images(images)
|
|
||||||
all_embeddings = []
|
|
||||||
|
|
||||||
for i in range(0, len(pil_images), self.batch_size):
|
|
||||||
batch_images = pil_images[i : i + self.batch_size]
|
|
||||||
batch_images = self._processor.process_images(batch_images).to(
|
|
||||||
self._model.device
|
|
||||||
)
|
|
||||||
with torch.no_grad():
|
|
||||||
image_embeddings = self._model(**batch_images)
|
|
||||||
all_embeddings.extend(self._process_embeddings(image_embeddings))
|
|
||||||
return all_embeddings
|
|
||||||
|
|
||||||
def compute_query_embeddings(
|
|
||||||
self, query: Union[str, IMAGES], *args, **kwargs
|
|
||||||
) -> List[List[List[float]]]:
|
|
||||||
"""
|
|
||||||
Compute embeddings for a single user query (text only).
|
|
||||||
"""
|
|
||||||
if not isinstance(query, str):
|
|
||||||
raise ValueError(
|
|
||||||
"Query must be a string, image to image search is not supported"
|
|
||||||
)
|
|
||||||
return self.generate_text_embeddings([query])
|
|
||||||
|
|
||||||
def compute_source_embeddings(
|
|
||||||
self, images: IMAGES, *args, **kwargs
|
|
||||||
) -> List[List[List[float]]]:
|
|
||||||
"""
|
|
||||||
Compute embeddings for a batch of source images.
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
images : Union[str, bytes, List, pa.Array, pa.ChunkedArray, np.ndarray]
|
|
||||||
Batch of images (paths, URLs, bytes, or PIL Images).
|
|
||||||
"""
|
|
||||||
images = self.sanitize_input(images)
|
|
||||||
return self.generate_image_embeddings(images)
|
|
||||||
@@ -18,7 +18,6 @@ import numpy as np
|
|||||||
import pyarrow as pa
|
import pyarrow as pa
|
||||||
|
|
||||||
from ..dependencies import pandas as pd
|
from ..dependencies import pandas as pd
|
||||||
from ..util import attempt_import_or_raise
|
|
||||||
|
|
||||||
|
|
||||||
# ruff: noqa: PERF203
|
# ruff: noqa: PERF203
|
||||||
@@ -276,12 +275,3 @@ def url_retrieve(url: str):
|
|||||||
def api_key_not_found_help(provider):
|
def api_key_not_found_help(provider):
|
||||||
logging.error("Could not find API key for %s", provider)
|
logging.error("Could not find API key for %s", provider)
|
||||||
raise ValueError(f"Please set the {provider.upper()}_API_KEY environment variable.")
|
raise ValueError(f"Please set the {provider.upper()}_API_KEY environment variable.")
|
||||||
|
|
||||||
|
|
||||||
def is_flash_attn_2_available():
|
|
||||||
try:
|
|
||||||
attempt_import_or_raise("flash_attn", "flash_attn")
|
|
||||||
|
|
||||||
return True
|
|
||||||
except ImportError:
|
|
||||||
return False
|
|
||||||
|
|||||||
@@ -1,12 +1,9 @@
|
|||||||
# SPDX-License-Identifier: Apache-2.0
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
# SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
# SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||||
import base64
|
|
||||||
import os
|
|
||||||
from typing import ClassVar, TYPE_CHECKING, List, Union, Any
|
|
||||||
|
|
||||||
from pathlib import Path
|
|
||||||
from urllib.parse import urlparse
|
import os
|
||||||
from io import BytesIO
|
from typing import ClassVar, TYPE_CHECKING, List, Union
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import pyarrow as pa
|
import pyarrow as pa
|
||||||
@@ -14,100 +11,12 @@ import pyarrow as pa
|
|||||||
from ..util import attempt_import_or_raise
|
from ..util import attempt_import_or_raise
|
||||||
from .base import EmbeddingFunction
|
from .base import EmbeddingFunction
|
||||||
from .registry import register
|
from .registry import register
|
||||||
from .utils import api_key_not_found_help, IMAGES, TEXT
|
from .utils import api_key_not_found_help, IMAGES
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
import PIL
|
import PIL
|
||||||
|
|
||||||
|
|
||||||
def is_valid_url(text):
|
|
||||||
try:
|
|
||||||
parsed = urlparse(text)
|
|
||||||
return bool(parsed.scheme) and bool(parsed.netloc)
|
|
||||||
except Exception:
|
|
||||||
return False
|
|
||||||
|
|
||||||
|
|
||||||
def transform_input(input_data: Union[str, bytes, Path]):
|
|
||||||
PIL = attempt_import_or_raise("PIL", "pillow")
|
|
||||||
if isinstance(input_data, str):
|
|
||||||
if is_valid_url(input_data):
|
|
||||||
content = {"type": "image_url", "image_url": input_data}
|
|
||||||
else:
|
|
||||||
content = {"type": "text", "text": input_data}
|
|
||||||
elif isinstance(input_data, PIL.Image.Image):
|
|
||||||
buffered = BytesIO()
|
|
||||||
input_data.save(buffered, format="JPEG")
|
|
||||||
img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
|
|
||||||
content = {
|
|
||||||
"type": "image_base64",
|
|
||||||
"image_base64": "data:image/jpeg;base64," + img_str,
|
|
||||||
}
|
|
||||||
elif isinstance(input_data, bytes):
|
|
||||||
img = PIL.Image.open(BytesIO(input_data))
|
|
||||||
buffered = BytesIO()
|
|
||||||
img.save(buffered, format="JPEG")
|
|
||||||
img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
|
|
||||||
content = {
|
|
||||||
"type": "image_base64",
|
|
||||||
"image_base64": "data:image/jpeg;base64," + img_str,
|
|
||||||
}
|
|
||||||
elif isinstance(input_data, Path):
|
|
||||||
img = PIL.Image.open(input_data)
|
|
||||||
buffered = BytesIO()
|
|
||||||
img.save(buffered, format="JPEG")
|
|
||||||
img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
|
|
||||||
content = {
|
|
||||||
"type": "image_base64",
|
|
||||||
"image_base64": "data:image/jpeg;base64," + img_str,
|
|
||||||
}
|
|
||||||
else:
|
|
||||||
raise ValueError("Each input should be either str, bytes, Path or Image.")
|
|
||||||
|
|
||||||
return {"content": [content]}
|
|
||||||
|
|
||||||
|
|
||||||
def sanitize_multimodal_input(inputs: Union[TEXT, IMAGES]) -> List[Any]:
|
|
||||||
"""
|
|
||||||
Sanitize the input to the embedding function.
|
|
||||||
"""
|
|
||||||
PIL = attempt_import_or_raise("PIL", "pillow")
|
|
||||||
if isinstance(inputs, (str, bytes, Path, PIL.Image.Image)):
|
|
||||||
inputs = [inputs]
|
|
||||||
elif isinstance(inputs, pa.Array):
|
|
||||||
inputs = inputs.to_pylist()
|
|
||||||
elif isinstance(inputs, pa.ChunkedArray):
|
|
||||||
inputs = inputs.combine_chunks().to_pylist()
|
|
||||||
else:
|
|
||||||
raise ValueError(
|
|
||||||
f"Input type {type(inputs)} not allowed with multimodal model."
|
|
||||||
)
|
|
||||||
|
|
||||||
if not all(isinstance(x, (str, bytes, Path, PIL.Image.Image)) for x in inputs):
|
|
||||||
raise ValueError("Each input should be either str, bytes, Path or Image.")
|
|
||||||
|
|
||||||
return [transform_input(i) for i in inputs]
|
|
||||||
|
|
||||||
|
|
||||||
def sanitize_text_input(inputs: TEXT) -> List[str]:
|
|
||||||
"""
|
|
||||||
Sanitize the input to the embedding function.
|
|
||||||
"""
|
|
||||||
if isinstance(inputs, str):
|
|
||||||
inputs = [inputs]
|
|
||||||
elif isinstance(inputs, pa.Array):
|
|
||||||
inputs = inputs.to_pylist()
|
|
||||||
elif isinstance(inputs, pa.ChunkedArray):
|
|
||||||
inputs = inputs.combine_chunks().to_pylist()
|
|
||||||
else:
|
|
||||||
raise ValueError(f"Input type {type(inputs)} not allowed with text model.")
|
|
||||||
|
|
||||||
if not all(isinstance(x, str) for x in inputs):
|
|
||||||
raise ValueError("Each input should be str.")
|
|
||||||
|
|
||||||
return inputs
|
|
||||||
|
|
||||||
|
|
||||||
@register("voyageai")
|
@register("voyageai")
|
||||||
class VoyageAIEmbeddingFunction(EmbeddingFunction):
|
class VoyageAIEmbeddingFunction(EmbeddingFunction):
|
||||||
"""
|
"""
|
||||||
@@ -165,11 +74,6 @@ class VoyageAIEmbeddingFunction(EmbeddingFunction):
|
|||||||
]
|
]
|
||||||
multimodal_embedding_models: list = ["voyage-multimodal-3"]
|
multimodal_embedding_models: list = ["voyage-multimodal-3"]
|
||||||
|
|
||||||
def _is_multimodal_model(self, model_name: str):
|
|
||||||
return (
|
|
||||||
model_name in self.multimodal_embedding_models or "multimodal" in model_name
|
|
||||||
)
|
|
||||||
|
|
||||||
def ndims(self):
|
def ndims(self):
|
||||||
if self.name == "voyage-3-lite":
|
if self.name == "voyage-3-lite":
|
||||||
return 512
|
return 512
|
||||||
@@ -181,12 +85,55 @@ class VoyageAIEmbeddingFunction(EmbeddingFunction):
|
|||||||
"voyage-finance-2",
|
"voyage-finance-2",
|
||||||
"voyage-multilingual-2",
|
"voyage-multilingual-2",
|
||||||
"voyage-law-2",
|
"voyage-law-2",
|
||||||
"voyage-multimodal-3",
|
|
||||||
]:
|
]:
|
||||||
return 1024
|
return 1024
|
||||||
else:
|
else:
|
||||||
raise ValueError(f"Model {self.name} not supported")
|
raise ValueError(f"Model {self.name} not supported")
|
||||||
|
|
||||||
|
def sanitize_input(self, images: IMAGES) -> Union[List[bytes], np.ndarray]:
|
||||||
|
"""
|
||||||
|
Sanitize the input to the embedding function.
|
||||||
|
"""
|
||||||
|
if isinstance(images, (str, bytes)):
|
||||||
|
images = [images]
|
||||||
|
elif isinstance(images, pa.Array):
|
||||||
|
images = images.to_pylist()
|
||||||
|
elif isinstance(images, pa.ChunkedArray):
|
||||||
|
images = images.combine_chunks().to_pylist()
|
||||||
|
return images
|
||||||
|
|
||||||
|
def generate_text_embeddings(self, text: str, **kwargs) -> np.ndarray:
|
||||||
|
"""
|
||||||
|
Get the embeddings for the given texts
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
texts: list[str] or np.ndarray (of str)
|
||||||
|
The texts to embed
|
||||||
|
input_type: Optional[str]
|
||||||
|
|
||||||
|
truncation: Optional[bool]
|
||||||
|
"""
|
||||||
|
client = VoyageAIEmbeddingFunction._get_client()
|
||||||
|
if self.name in self.text_embedding_models:
|
||||||
|
rs = client.embed(texts=[text], model=self.name, **kwargs)
|
||||||
|
elif self.name in self.multimodal_embedding_models:
|
||||||
|
rs = client.multimodal_embed(inputs=[[text]], model=self.name, **kwargs)
|
||||||
|
else:
|
||||||
|
raise ValueError(
|
||||||
|
f"Model {self.name} not supported to generate text embeddings"
|
||||||
|
)
|
||||||
|
|
||||||
|
return rs.embeddings[0]
|
||||||
|
|
||||||
|
def generate_image_embedding(
|
||||||
|
self, image: "PIL.Image.Image", **kwargs
|
||||||
|
) -> np.ndarray:
|
||||||
|
rs = VoyageAIEmbeddingFunction._get_client().multimodal_embed(
|
||||||
|
inputs=[[image]], model=self.name, **kwargs
|
||||||
|
)
|
||||||
|
return rs.embeddings[0]
|
||||||
|
|
||||||
def compute_query_embeddings(
|
def compute_query_embeddings(
|
||||||
self, query: Union[str, "PIL.Image.Image"], *args, **kwargs
|
self, query: Union[str, "PIL.Image.Image"], *args, **kwargs
|
||||||
) -> List[np.ndarray]:
|
) -> List[np.ndarray]:
|
||||||
@@ -197,52 +144,23 @@ class VoyageAIEmbeddingFunction(EmbeddingFunction):
|
|||||||
----------
|
----------
|
||||||
query : Union[str, PIL.Image.Image]
|
query : Union[str, PIL.Image.Image]
|
||||||
The query to embed. A query can be either text or an image.
|
The query to embed. A query can be either text or an image.
|
||||||
|
|
||||||
Returns
|
|
||||||
-------
|
|
||||||
List[np.array]: the list of embeddings
|
|
||||||
"""
|
"""
|
||||||
client = VoyageAIEmbeddingFunction._get_client()
|
if isinstance(query, str):
|
||||||
if self._is_multimodal_model(self.name):
|
return [self.generate_text_embeddings(query, input_type="query")]
|
||||||
result = client.multimodal_embed(
|
|
||||||
inputs=[[query]], model=self.name, input_type="query", **kwargs
|
|
||||||
)
|
|
||||||
else:
|
else:
|
||||||
result = client.embed(
|
PIL = attempt_import_or_raise("PIL", "pillow")
|
||||||
texts=[query], model=self.name, input_type="query", **kwargs
|
if isinstance(query, PIL.Image.Image):
|
||||||
)
|
return [self.generate_image_embedding(query, input_type="query")]
|
||||||
|
else:
|
||||||
return [result.embeddings[0]]
|
raise TypeError("Only text PIL images supported as query")
|
||||||
|
|
||||||
def compute_source_embeddings(
|
def compute_source_embeddings(
|
||||||
self, inputs: Union[TEXT, IMAGES], *args, **kwargs
|
self, images: IMAGES, *args, **kwargs
|
||||||
) -> List[np.array]:
|
) -> List[np.array]:
|
||||||
"""
|
images = self.sanitize_input(images)
|
||||||
Compute the embeddings for the inputs
|
return [
|
||||||
|
self.generate_image_embedding(img, input_type="document") for img in images
|
||||||
Parameters
|
]
|
||||||
----------
|
|
||||||
inputs : Union[TEXT, IMAGES]
|
|
||||||
The inputs to embed. The input can be either str, bytes, Path (to an image),
|
|
||||||
PIL.Image or list of these.
|
|
||||||
|
|
||||||
Returns
|
|
||||||
-------
|
|
||||||
List[np.array]: the list of embeddings
|
|
||||||
"""
|
|
||||||
client = VoyageAIEmbeddingFunction._get_client()
|
|
||||||
if self._is_multimodal_model(self.name):
|
|
||||||
inputs = sanitize_multimodal_input(inputs)
|
|
||||||
result = client.multimodal_embed(
|
|
||||||
inputs=inputs, model=self.name, input_type="document", **kwargs
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
inputs = sanitize_text_input(inputs)
|
|
||||||
result = client.embed(
|
|
||||||
texts=inputs, model=self.name, input_type="document", **kwargs
|
|
||||||
)
|
|
||||||
|
|
||||||
return result.embeddings
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _get_client():
|
def _get_client():
|
||||||
|
|||||||
@@ -152,104 +152,6 @@ def Vector(
|
|||||||
return FixedSizeList
|
return FixedSizeList
|
||||||
|
|
||||||
|
|
||||||
def MultiVector(
|
|
||||||
dim: int, value_type: pa.DataType = pa.float32(), nullable: bool = True
|
|
||||||
) -> Type:
|
|
||||||
"""Pydantic MultiVector Type for multi-vector embeddings.
|
|
||||||
|
|
||||||
This type represents a list of vectors, each with the same dimension.
|
|
||||||
Useful for models that produce multiple embeddings per input, like ColPali.
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
dim : int
|
|
||||||
The dimension of each vector in the multi-vector.
|
|
||||||
value_type : pyarrow.DataType, optional
|
|
||||||
The value type of the vectors, by default pa.float32()
|
|
||||||
nullable : bool, optional
|
|
||||||
Whether the multi-vector is nullable, by default it is True.
|
|
||||||
|
|
||||||
Examples
|
|
||||||
--------
|
|
||||||
|
|
||||||
>>> import pydantic
|
|
||||||
>>> from lancedb.pydantic import MultiVector
|
|
||||||
...
|
|
||||||
>>> class MyModel(pydantic.BaseModel):
|
|
||||||
... id: int
|
|
||||||
... text: str
|
|
||||||
... embeddings: MultiVector(128) # List of 128-dimensional vectors
|
|
||||||
>>> schema = pydantic_to_schema(MyModel)
|
|
||||||
>>> assert schema == pa.schema([
|
|
||||||
... pa.field("id", pa.int64(), False),
|
|
||||||
... pa.field("text", pa.utf8(), False),
|
|
||||||
... pa.field("embeddings", pa.list_(pa.list_(pa.float32(), 128)))
|
|
||||||
... ])
|
|
||||||
"""
|
|
||||||
|
|
||||||
class MultiVectorList(list, FixedSizeListMixin):
|
|
||||||
def __repr__(self):
|
|
||||||
return f"MultiVector(dim={dim})"
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def nullable() -> bool:
|
|
||||||
return nullable
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def dim() -> int:
|
|
||||||
return dim
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def value_arrow_type() -> pa.DataType:
|
|
||||||
return value_type
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def is_multi_vector() -> bool:
|
|
||||||
return True
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def __get_pydantic_core_schema__(
|
|
||||||
cls, _source_type: Any, _handler: pydantic.GetCoreSchemaHandler
|
|
||||||
) -> CoreSchema:
|
|
||||||
return core_schema.no_info_after_validator_function(
|
|
||||||
cls,
|
|
||||||
core_schema.list_schema(
|
|
||||||
items_schema=core_schema.list_schema(
|
|
||||||
min_length=dim,
|
|
||||||
max_length=dim,
|
|
||||||
items_schema=core_schema.float_schema(),
|
|
||||||
),
|
|
||||||
),
|
|
||||||
)
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def __get_validators__(cls) -> Generator[Callable, None, None]:
|
|
||||||
yield cls.validate
|
|
||||||
|
|
||||||
# For pydantic v1
|
|
||||||
@classmethod
|
|
||||||
def validate(cls, v):
|
|
||||||
if not isinstance(v, (list, range)):
|
|
||||||
raise TypeError("A list of vectors is needed")
|
|
||||||
for vec in v:
|
|
||||||
if not isinstance(vec, (list, range, np.ndarray)) or len(vec) != dim:
|
|
||||||
raise TypeError(f"Each vector must be a list of {dim} numbers")
|
|
||||||
return cls(v)
|
|
||||||
|
|
||||||
if PYDANTIC_VERSION.major < 2:
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def __modify_schema__(cls, field_schema: Dict[str, Any]):
|
|
||||||
field_schema["items"] = {
|
|
||||||
"type": "array",
|
|
||||||
"items": {"type": "number"},
|
|
||||||
"minItems": dim,
|
|
||||||
"maxItems": dim,
|
|
||||||
}
|
|
||||||
|
|
||||||
return MultiVectorList
|
|
||||||
|
|
||||||
|
|
||||||
def _py_type_to_arrow_type(py_type: Type[Any], field: FieldInfo) -> pa.DataType:
|
def _py_type_to_arrow_type(py_type: Type[Any], field: FieldInfo) -> pa.DataType:
|
||||||
"""Convert a field with native Python type to Arrow data type.
|
"""Convert a field with native Python type to Arrow data type.
|
||||||
|
|
||||||
@@ -304,9 +206,6 @@ def _pydantic_type_to_arrow_type(tp: Any, field: FieldInfo) -> pa.DataType:
|
|||||||
fields = _pydantic_model_to_fields(tp)
|
fields = _pydantic_model_to_fields(tp)
|
||||||
return pa.struct(fields)
|
return pa.struct(fields)
|
||||||
if issubclass(tp, FixedSizeListMixin):
|
if issubclass(tp, FixedSizeListMixin):
|
||||||
if getattr(tp, "is_multi_vector", lambda: False)():
|
|
||||||
return pa.list_(pa.list_(tp.value_arrow_type(), tp.dim()))
|
|
||||||
# For regular Vector
|
|
||||||
return pa.list_(tp.value_arrow_type(), tp.dim())
|
return pa.list_(tp.value_arrow_type(), tp.dim())
|
||||||
return _py_type_to_arrow_type(tp, field)
|
return _py_type_to_arrow_type(tp, field)
|
||||||
|
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@@ -18,7 +18,7 @@ from lancedb.merge import LanceMergeInsertBuilder
|
|||||||
from lancedb.embeddings import EmbeddingFunctionRegistry
|
from lancedb.embeddings import EmbeddingFunctionRegistry
|
||||||
|
|
||||||
from ..query import LanceVectorQueryBuilder, LanceQueryBuilder
|
from ..query import LanceVectorQueryBuilder, LanceQueryBuilder
|
||||||
from ..table import AsyncTable, IndexStatistics, Query, Table, Tags
|
from ..table import AsyncTable, IndexStatistics, Query, Table
|
||||||
|
|
||||||
|
|
||||||
class RemoteTable(Table):
|
class RemoteTable(Table):
|
||||||
@@ -54,10 +54,6 @@ class RemoteTable(Table):
|
|||||||
"""Get the current version of the table"""
|
"""Get the current version of the table"""
|
||||||
return LOOP.run(self._table.version())
|
return LOOP.run(self._table.version())
|
||||||
|
|
||||||
@property
|
|
||||||
def tags(self) -> Tags:
|
|
||||||
return Tags(self._table)
|
|
||||||
|
|
||||||
@cached_property
|
@cached_property
|
||||||
def embedding_functions(self) -> Dict[str, EmbeddingFunctionConfig]:
|
def embedding_functions(self) -> Dict[str, EmbeddingFunctionConfig]:
|
||||||
"""
|
"""
|
||||||
@@ -85,15 +81,12 @@ class RemoteTable(Table):
|
|||||||
"""to_pandas() is not yet supported on LanceDB cloud."""
|
"""to_pandas() is not yet supported on LanceDB cloud."""
|
||||||
return NotImplementedError("to_pandas() is not yet supported on LanceDB cloud.")
|
return NotImplementedError("to_pandas() is not yet supported on LanceDB cloud.")
|
||||||
|
|
||||||
def checkout(self, version: Union[int, str]):
|
def checkout(self, version: int):
|
||||||
return LOOP.run(self._table.checkout(version))
|
return LOOP.run(self._table.checkout(version))
|
||||||
|
|
||||||
def checkout_latest(self):
|
def checkout_latest(self):
|
||||||
return LOOP.run(self._table.checkout_latest())
|
return LOOP.run(self._table.checkout_latest())
|
||||||
|
|
||||||
def restore(self, version: Optional[int] = None):
|
|
||||||
return LOOP.run(self._table.restore(version))
|
|
||||||
|
|
||||||
def list_indices(self) -> Iterable[IndexConfig]:
|
def list_indices(self) -> Iterable[IndexConfig]:
|
||||||
"""List all the indices on the table"""
|
"""List all the indices on the table"""
|
||||||
return LOOP.run(self._table.list_indices())
|
return LOOP.run(self._table.list_indices())
|
||||||
@@ -108,7 +101,6 @@ class RemoteTable(Table):
|
|||||||
index_type: Literal["BTREE", "BITMAP", "LABEL_LIST", "scalar"] = "scalar",
|
index_type: Literal["BTREE", "BITMAP", "LABEL_LIST", "scalar"] = "scalar",
|
||||||
*,
|
*,
|
||||||
replace: bool = False,
|
replace: bool = False,
|
||||||
wait_timeout: timedelta = None,
|
|
||||||
):
|
):
|
||||||
"""Creates a scalar index
|
"""Creates a scalar index
|
||||||
Parameters
|
Parameters
|
||||||
@@ -131,18 +123,13 @@ class RemoteTable(Table):
|
|||||||
else:
|
else:
|
||||||
raise ValueError(f"Unknown index type: {index_type}")
|
raise ValueError(f"Unknown index type: {index_type}")
|
||||||
|
|
||||||
LOOP.run(
|
LOOP.run(self._table.create_index(column, config=config, replace=replace))
|
||||||
self._table.create_index(
|
|
||||||
column, config=config, replace=replace, wait_timeout=wait_timeout
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
def create_fts_index(
|
def create_fts_index(
|
||||||
self,
|
self,
|
||||||
column: str,
|
column: str,
|
||||||
*,
|
*,
|
||||||
replace: bool = False,
|
replace: bool = False,
|
||||||
wait_timeout: timedelta = None,
|
|
||||||
with_position: bool = True,
|
with_position: bool = True,
|
||||||
# tokenizer configs:
|
# tokenizer configs:
|
||||||
base_tokenizer: str = "simple",
|
base_tokenizer: str = "simple",
|
||||||
@@ -163,11 +150,7 @@ class RemoteTable(Table):
|
|||||||
remove_stop_words=remove_stop_words,
|
remove_stop_words=remove_stop_words,
|
||||||
ascii_folding=ascii_folding,
|
ascii_folding=ascii_folding,
|
||||||
)
|
)
|
||||||
LOOP.run(
|
LOOP.run(self._table.create_index(column, config=config, replace=replace))
|
||||||
self._table.create_index(
|
|
||||||
column, config=config, replace=replace, wait_timeout=wait_timeout
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
def create_index(
|
def create_index(
|
||||||
self,
|
self,
|
||||||
@@ -179,7 +162,6 @@ class RemoteTable(Table):
|
|||||||
replace: Optional[bool] = None,
|
replace: Optional[bool] = None,
|
||||||
accelerator: Optional[str] = None,
|
accelerator: Optional[str] = None,
|
||||||
index_type="vector",
|
index_type="vector",
|
||||||
wait_timeout: Optional[timedelta] = None,
|
|
||||||
):
|
):
|
||||||
"""Create an index on the table.
|
"""Create an index on the table.
|
||||||
Currently, the only parameters that matter are
|
Currently, the only parameters that matter are
|
||||||
@@ -251,11 +233,7 @@ class RemoteTable(Table):
|
|||||||
" 'IVF_FLAT', 'IVF_PQ', 'IVF_HNSW_PQ', 'IVF_HNSW_SQ'"
|
" 'IVF_FLAT', 'IVF_PQ', 'IVF_HNSW_PQ', 'IVF_HNSW_SQ'"
|
||||||
)
|
)
|
||||||
|
|
||||||
LOOP.run(
|
LOOP.run(self._table.create_index(vector_column_name, config=config))
|
||||||
self._table.create_index(
|
|
||||||
vector_column_name, config=config, wait_timeout=wait_timeout
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
def add(
|
def add(
|
||||||
self,
|
self,
|
||||||
@@ -374,15 +352,9 @@ class RemoteTable(Table):
|
|||||||
)
|
)
|
||||||
|
|
||||||
def _execute_query(
|
def _execute_query(
|
||||||
self,
|
self, query: Query, batch_size: Optional[int] = None
|
||||||
query: Query,
|
|
||||||
*,
|
|
||||||
batch_size: Optional[int] = None,
|
|
||||||
timeout: Optional[timedelta] = None,
|
|
||||||
) -> pa.RecordBatchReader:
|
) -> pa.RecordBatchReader:
|
||||||
async_iter = LOOP.run(
|
async_iter = LOOP.run(self._table._execute_query(query, batch_size=batch_size))
|
||||||
self._table._execute_query(query, batch_size=batch_size, timeout=timeout)
|
|
||||||
)
|
|
||||||
|
|
||||||
def iter_sync():
|
def iter_sync():
|
||||||
try:
|
try:
|
||||||
@@ -393,12 +365,6 @@ class RemoteTable(Table):
|
|||||||
|
|
||||||
return pa.RecordBatchReader.from_batches(async_iter.schema, iter_sync())
|
return pa.RecordBatchReader.from_batches(async_iter.schema, iter_sync())
|
||||||
|
|
||||||
def _explain_plan(self, query: Query, verbose: Optional[bool] = False) -> str:
|
|
||||||
return LOOP.run(self._table._explain_plan(query, verbose))
|
|
||||||
|
|
||||||
def _analyze_plan(self, query: Query) -> str:
|
|
||||||
return LOOP.run(self._table._analyze_plan(query))
|
|
||||||
|
|
||||||
def merge_insert(self, on: Union[str, Iterable[str]]) -> LanceMergeInsertBuilder:
|
def merge_insert(self, on: Union[str, Iterable[str]]) -> LanceMergeInsertBuilder:
|
||||||
"""Returns a [`LanceMergeInsertBuilder`][lancedb.merge.LanceMergeInsertBuilder]
|
"""Returns a [`LanceMergeInsertBuilder`][lancedb.merge.LanceMergeInsertBuilder]
|
||||||
that can be used to create a "merge insert" operation.
|
that can be used to create a "merge insert" operation.
|
||||||
@@ -573,14 +539,6 @@ class RemoteTable(Table):
|
|||||||
def drop_index(self, index_name: str):
|
def drop_index(self, index_name: str):
|
||||||
return LOOP.run(self._table.drop_index(index_name))
|
return LOOP.run(self._table.drop_index(index_name))
|
||||||
|
|
||||||
def wait_for_index(
|
|
||||||
self, index_names: Iterable[str], timeout: timedelta = timedelta(seconds=300)
|
|
||||||
):
|
|
||||||
return LOOP.run(self._table.wait_for_index(index_names, timeout))
|
|
||||||
|
|
||||||
def stats(self):
|
|
||||||
return LOOP.run(self._table.stats())
|
|
||||||
|
|
||||||
def uses_v2_manifest_paths(self) -> bool:
|
def uses_v2_manifest_paths(self) -> bool:
|
||||||
raise NotImplementedError(
|
raise NotImplementedError(
|
||||||
"uses_v2_manifest_paths() is not supported on the LanceDB Cloud"
|
"uses_v2_manifest_paths() is not supported on the LanceDB Cloud"
|
||||||
|
|||||||
@@ -47,9 +47,6 @@ class AnswerdotaiRerankers(Reranker):
|
|||||||
)
|
)
|
||||||
|
|
||||||
def _rerank(self, result_set: pa.Table, query: str):
|
def _rerank(self, result_set: pa.Table, query: str):
|
||||||
result_set = self._handle_empty_results(result_set)
|
|
||||||
if len(result_set) == 0:
|
|
||||||
return result_set
|
|
||||||
docs = result_set[self.column].to_pylist()
|
docs = result_set[self.column].to_pylist()
|
||||||
doc_ids = list(range(len(docs)))
|
doc_ids = list(range(len(docs)))
|
||||||
result = self.reranker.rank(query, docs, doc_ids=doc_ids)
|
result = self.reranker.rank(query, docs, doc_ids=doc_ids)
|
||||||
@@ -86,6 +83,7 @@ class AnswerdotaiRerankers(Reranker):
|
|||||||
vector_results = self._rerank(vector_results, query)
|
vector_results = self._rerank(vector_results, query)
|
||||||
if self.score == "relevance":
|
if self.score == "relevance":
|
||||||
vector_results = vector_results.drop_columns(["_distance"])
|
vector_results = vector_results.drop_columns(["_distance"])
|
||||||
|
|
||||||
vector_results = vector_results.sort_by([("_relevance_score", "descending")])
|
vector_results = vector_results.sort_by([("_relevance_score", "descending")])
|
||||||
return vector_results
|
return vector_results
|
||||||
|
|
||||||
@@ -93,5 +91,7 @@ class AnswerdotaiRerankers(Reranker):
|
|||||||
fts_results = self._rerank(fts_results, query)
|
fts_results = self._rerank(fts_results, query)
|
||||||
if self.score == "relevance":
|
if self.score == "relevance":
|
||||||
fts_results = fts_results.drop_columns(["_score"])
|
fts_results = fts_results.drop_columns(["_score"])
|
||||||
|
|
||||||
fts_results = fts_results.sort_by([("_relevance_score", "descending")])
|
fts_results = fts_results.sort_by([("_relevance_score", "descending")])
|
||||||
|
|
||||||
return fts_results
|
return fts_results
|
||||||
|
|||||||
@@ -65,16 +65,6 @@ class Reranker(ABC):
|
|||||||
f"{self.__class__.__name__} does not implement rerank_vector"
|
f"{self.__class__.__name__} does not implement rerank_vector"
|
||||||
)
|
)
|
||||||
|
|
||||||
def _handle_empty_results(self, results: pa.Table):
|
|
||||||
"""
|
|
||||||
Helper method to handle empty FTS results consistently
|
|
||||||
"""
|
|
||||||
if len(results) > 0:
|
|
||||||
return results
|
|
||||||
return results.append_column(
|
|
||||||
"_relevance_score", pa.array([], type=pa.float32())
|
|
||||||
)
|
|
||||||
|
|
||||||
def rerank_fts(
|
def rerank_fts(
|
||||||
self,
|
self,
|
||||||
query: str,
|
query: str,
|
||||||
|
|||||||
@@ -62,9 +62,6 @@ class CohereReranker(Reranker):
|
|||||||
return cohere.Client(os.environ.get("COHERE_API_KEY") or self.api_key)
|
return cohere.Client(os.environ.get("COHERE_API_KEY") or self.api_key)
|
||||||
|
|
||||||
def _rerank(self, result_set: pa.Table, query: str):
|
def _rerank(self, result_set: pa.Table, query: str):
|
||||||
result_set = self._handle_empty_results(result_set)
|
|
||||||
if len(result_set) == 0:
|
|
||||||
return result_set
|
|
||||||
docs = result_set[self.column].to_pylist()
|
docs = result_set[self.column].to_pylist()
|
||||||
response = self._client.rerank(
|
response = self._client.rerank(
|
||||||
query=query,
|
query=query,
|
||||||
@@ -102,14 +99,24 @@ class CohereReranker(Reranker):
|
|||||||
)
|
)
|
||||||
return combined_results
|
return combined_results
|
||||||
|
|
||||||
def rerank_vector(self, query: str, vector_results: pa.Table):
|
def rerank_vector(
|
||||||
vector_results = self._rerank(vector_results, query)
|
self,
|
||||||
|
query: str,
|
||||||
|
vector_results: pa.Table,
|
||||||
|
):
|
||||||
|
result_set = self._rerank(vector_results, query)
|
||||||
if self.score == "relevance":
|
if self.score == "relevance":
|
||||||
vector_results = vector_results.drop_columns(["_distance"])
|
result_set = result_set.drop_columns(["_distance"])
|
||||||
return vector_results
|
|
||||||
|
|
||||||
def rerank_fts(self, query: str, fts_results: pa.Table):
|
return result_set
|
||||||
fts_results = self._rerank(fts_results, query)
|
|
||||||
|
def rerank_fts(
|
||||||
|
self,
|
||||||
|
query: str,
|
||||||
|
fts_results: pa.Table,
|
||||||
|
):
|
||||||
|
result_set = self._rerank(fts_results, query)
|
||||||
if self.score == "relevance":
|
if self.score == "relevance":
|
||||||
fts_results = fts_results.drop_columns(["_score"])
|
result_set = result_set.drop_columns(["_score"])
|
||||||
return fts_results
|
|
||||||
|
return result_set
|
||||||
|
|||||||
@@ -63,9 +63,6 @@ class CrossEncoderReranker(Reranker):
|
|||||||
return cross_encoder
|
return cross_encoder
|
||||||
|
|
||||||
def _rerank(self, result_set: pa.Table, query: str):
|
def _rerank(self, result_set: pa.Table, query: str):
|
||||||
result_set = self._handle_empty_results(result_set)
|
|
||||||
if len(result_set) == 0:
|
|
||||||
return result_set
|
|
||||||
passages = result_set[self.column].to_pylist()
|
passages = result_set[self.column].to_pylist()
|
||||||
cross_inp = [[query, passage] for passage in passages]
|
cross_inp = [[query, passage] for passage in passages]
|
||||||
cross_scores = self.model.predict(cross_inp)
|
cross_scores = self.model.predict(cross_inp)
|
||||||
@@ -96,7 +93,11 @@ class CrossEncoderReranker(Reranker):
|
|||||||
|
|
||||||
return combined_results
|
return combined_results
|
||||||
|
|
||||||
def rerank_vector(self, query: str, vector_results: pa.Table):
|
def rerank_vector(
|
||||||
|
self,
|
||||||
|
query: str,
|
||||||
|
vector_results: pa.Table,
|
||||||
|
):
|
||||||
vector_results = self._rerank(vector_results, query)
|
vector_results = self._rerank(vector_results, query)
|
||||||
if self.score == "relevance":
|
if self.score == "relevance":
|
||||||
vector_results = vector_results.drop_columns(["_distance"])
|
vector_results = vector_results.drop_columns(["_distance"])
|
||||||
@@ -104,7 +105,11 @@ class CrossEncoderReranker(Reranker):
|
|||||||
vector_results = vector_results.sort_by([("_relevance_score", "descending")])
|
vector_results = vector_results.sort_by([("_relevance_score", "descending")])
|
||||||
return vector_results
|
return vector_results
|
||||||
|
|
||||||
def rerank_fts(self, query: str, fts_results: pa.Table):
|
def rerank_fts(
|
||||||
|
self,
|
||||||
|
query: str,
|
||||||
|
fts_results: pa.Table,
|
||||||
|
):
|
||||||
fts_results = self._rerank(fts_results, query)
|
fts_results = self._rerank(fts_results, query)
|
||||||
if self.score == "relevance":
|
if self.score == "relevance":
|
||||||
fts_results = fts_results.drop_columns(["_score"])
|
fts_results = fts_results.drop_columns(["_score"])
|
||||||
|
|||||||
@@ -62,9 +62,6 @@ class JinaReranker(Reranker):
|
|||||||
return self._session
|
return self._session
|
||||||
|
|
||||||
def _rerank(self, result_set: pa.Table, query: str):
|
def _rerank(self, result_set: pa.Table, query: str):
|
||||||
result_set = self._handle_empty_results(result_set)
|
|
||||||
if len(result_set) == 0:
|
|
||||||
return result_set
|
|
||||||
docs = result_set[self.column].to_pylist()
|
docs = result_set[self.column].to_pylist()
|
||||||
response = self._client.post( # type: ignore
|
response = self._client.post( # type: ignore
|
||||||
API_URL,
|
API_URL,
|
||||||
@@ -107,14 +104,24 @@ class JinaReranker(Reranker):
|
|||||||
)
|
)
|
||||||
return combined_results
|
return combined_results
|
||||||
|
|
||||||
def rerank_vector(self, query: str, vector_results: pa.Table):
|
def rerank_vector(
|
||||||
vector_results = self._rerank(vector_results, query)
|
self,
|
||||||
|
query: str,
|
||||||
|
vector_results: pa.Table,
|
||||||
|
):
|
||||||
|
result_set = self._rerank(vector_results, query)
|
||||||
if self.score == "relevance":
|
if self.score == "relevance":
|
||||||
vector_results = vector_results.drop_columns(["_distance"])
|
result_set = result_set.drop_columns(["_distance"])
|
||||||
return vector_results
|
|
||||||
|
|
||||||
def rerank_fts(self, query: str, fts_results: pa.Table):
|
return result_set
|
||||||
fts_results = self._rerank(fts_results, query)
|
|
||||||
|
def rerank_fts(
|
||||||
|
self,
|
||||||
|
query: str,
|
||||||
|
fts_results: pa.Table,
|
||||||
|
):
|
||||||
|
result_set = self._rerank(fts_results, query)
|
||||||
if self.score == "relevance":
|
if self.score == "relevance":
|
||||||
fts_results = fts_results.drop_columns(["_score"])
|
result_set = result_set.drop_columns(["_score"])
|
||||||
return fts_results
|
|
||||||
|
return result_set
|
||||||
|
|||||||
@@ -44,9 +44,6 @@ class OpenaiReranker(Reranker):
|
|||||||
self.api_key = api_key
|
self.api_key = api_key
|
||||||
|
|
||||||
def _rerank(self, result_set: pa.Table, query: str):
|
def _rerank(self, result_set: pa.Table, query: str):
|
||||||
result_set = self._handle_empty_results(result_set)
|
|
||||||
if len(result_set) == 0:
|
|
||||||
return result_set
|
|
||||||
docs = result_set[self.column].to_pylist()
|
docs = result_set[self.column].to_pylist()
|
||||||
response = self._client.chat.completions.create(
|
response = self._client.chat.completions.create(
|
||||||
model=self.model_name,
|
model=self.model_name,
|
||||||
@@ -107,14 +104,18 @@ class OpenaiReranker(Reranker):
|
|||||||
vector_results = self._rerank(vector_results, query)
|
vector_results = self._rerank(vector_results, query)
|
||||||
if self.score == "relevance":
|
if self.score == "relevance":
|
||||||
vector_results = vector_results.drop_columns(["_distance"])
|
vector_results = vector_results.drop_columns(["_distance"])
|
||||||
|
|
||||||
vector_results = vector_results.sort_by([("_relevance_score", "descending")])
|
vector_results = vector_results.sort_by([("_relevance_score", "descending")])
|
||||||
|
|
||||||
return vector_results
|
return vector_results
|
||||||
|
|
||||||
def rerank_fts(self, query: str, fts_results: pa.Table):
|
def rerank_fts(self, query: str, fts_results: pa.Table):
|
||||||
fts_results = self._rerank(fts_results, query)
|
fts_results = self._rerank(fts_results, query)
|
||||||
if self.score == "relevance":
|
if self.score == "relevance":
|
||||||
fts_results = fts_results.drop_columns(["_score"])
|
fts_results = fts_results.drop_columns(["_score"])
|
||||||
|
|
||||||
fts_results = fts_results.sort_by([("_relevance_score", "descending")])
|
fts_results = fts_results.sort_by([("_relevance_score", "descending")])
|
||||||
|
|
||||||
return fts_results
|
return fts_results
|
||||||
|
|
||||||
@cached_property
|
@cached_property
|
||||||
|
|||||||
@@ -63,9 +63,6 @@ class VoyageAIReranker(Reranker):
|
|||||||
)
|
)
|
||||||
|
|
||||||
def _rerank(self, result_set: pa.Table, query: str):
|
def _rerank(self, result_set: pa.Table, query: str):
|
||||||
result_set = self._handle_empty_results(result_set)
|
|
||||||
if len(result_set) == 0:
|
|
||||||
return result_set
|
|
||||||
docs = result_set[self.column].to_pylist()
|
docs = result_set[self.column].to_pylist()
|
||||||
response = self._client.rerank(
|
response = self._client.rerank(
|
||||||
query=query,
|
query=query,
|
||||||
@@ -104,14 +101,24 @@ class VoyageAIReranker(Reranker):
|
|||||||
)
|
)
|
||||||
return combined_results
|
return combined_results
|
||||||
|
|
||||||
def rerank_vector(self, query: str, vector_results: pa.Table):
|
def rerank_vector(
|
||||||
vector_results = self._rerank(vector_results, query)
|
self,
|
||||||
|
query: str,
|
||||||
|
vector_results: pa.Table,
|
||||||
|
):
|
||||||
|
result_set = self._rerank(vector_results, query)
|
||||||
if self.score == "relevance":
|
if self.score == "relevance":
|
||||||
vector_results = vector_results.drop_columns(["_distance"])
|
result_set = result_set.drop_columns(["_distance"])
|
||||||
return vector_results
|
|
||||||
|
|
||||||
def rerank_fts(self, query: str, fts_results: pa.Table):
|
return result_set
|
||||||
fts_results = self._rerank(fts_results, query)
|
|
||||||
|
def rerank_fts(
|
||||||
|
self,
|
||||||
|
query: str,
|
||||||
|
fts_results: pa.Table,
|
||||||
|
):
|
||||||
|
result_set = self._rerank(fts_results, query)
|
||||||
if self.score == "relevance":
|
if self.score == "relevance":
|
||||||
fts_results = fts_results.drop_columns(["_score"])
|
result_set = result_set.drop_columns(["_score"])
|
||||||
return fts_results
|
|
||||||
|
return result_set
|
||||||
|
|||||||
@@ -52,7 +52,6 @@ from .query import (
|
|||||||
AsyncHybridQuery,
|
AsyncHybridQuery,
|
||||||
AsyncQuery,
|
AsyncQuery,
|
||||||
AsyncVectorQuery,
|
AsyncVectorQuery,
|
||||||
FullTextQuery,
|
|
||||||
LanceEmptyQueryBuilder,
|
LanceEmptyQueryBuilder,
|
||||||
LanceFtsQueryBuilder,
|
LanceFtsQueryBuilder,
|
||||||
LanceHybridQueryBuilder,
|
LanceHybridQueryBuilder,
|
||||||
@@ -77,7 +76,6 @@ if TYPE_CHECKING:
|
|||||||
OptimizeStats,
|
OptimizeStats,
|
||||||
CleanupStats,
|
CleanupStats,
|
||||||
CompactionStats,
|
CompactionStats,
|
||||||
Tag,
|
|
||||||
)
|
)
|
||||||
from .db import LanceDBConnection
|
from .db import LanceDBConnection
|
||||||
from .index import IndexConfig
|
from .index import IndexConfig
|
||||||
@@ -583,35 +581,6 @@ class Table(ABC):
|
|||||||
"""
|
"""
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
@property
|
|
||||||
@abstractmethod
|
|
||||||
def tags(self) -> Tags:
|
|
||||||
"""Tag management for the table.
|
|
||||||
|
|
||||||
Similar to Git, tags are a way to add metadata to a specific version of the
|
|
||||||
table.
|
|
||||||
|
|
||||||
.. warning::
|
|
||||||
|
|
||||||
Tagged versions are exempted from the :py:meth:`cleanup_old_versions()`
|
|
||||||
process.
|
|
||||||
|
|
||||||
To remove a version that has been tagged, you must first
|
|
||||||
:py:meth:`~Tags.delete` the associated tag.
|
|
||||||
|
|
||||||
Examples
|
|
||||||
--------
|
|
||||||
|
|
||||||
.. code-block:: python
|
|
||||||
|
|
||||||
table = db.open_table("my_table")
|
|
||||||
table.tags.create("v2-prod-20250203", 10)
|
|
||||||
|
|
||||||
tags = table.tags.list()
|
|
||||||
|
|
||||||
"""
|
|
||||||
raise NotImplementedError
|
|
||||||
|
|
||||||
@property
|
@property
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def embedding_functions(self) -> Dict[str, EmbeddingFunctionConfig]:
|
def embedding_functions(self) -> Dict[str, EmbeddingFunctionConfig]:
|
||||||
@@ -661,7 +630,6 @@ class Table(ABC):
|
|||||||
index_cache_size: Optional[int] = None,
|
index_cache_size: Optional[int] = None,
|
||||||
*,
|
*,
|
||||||
index_type: VectorIndexType = "IVF_PQ",
|
index_type: VectorIndexType = "IVF_PQ",
|
||||||
wait_timeout: Optional[timedelta] = None,
|
|
||||||
num_bits: int = 8,
|
num_bits: int = 8,
|
||||||
max_iterations: int = 50,
|
max_iterations: int = 50,
|
||||||
sample_rate: int = 256,
|
sample_rate: int = 256,
|
||||||
@@ -697,8 +665,6 @@ class Table(ABC):
|
|||||||
num_bits: int
|
num_bits: int
|
||||||
The number of bits to encode sub-vectors. Only used with the IVF_PQ index.
|
The number of bits to encode sub-vectors. Only used with the IVF_PQ index.
|
||||||
Only 4 and 8 are supported.
|
Only 4 and 8 are supported.
|
||||||
wait_timeout: timedelta, optional
|
|
||||||
The timeout to wait if indexing is asynchronous.
|
|
||||||
"""
|
"""
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
@@ -722,30 +688,6 @@ class Table(ABC):
|
|||||||
"""
|
"""
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
def wait_for_index(
|
|
||||||
self, index_names: Iterable[str], timeout: timedelta = timedelta(seconds=300)
|
|
||||||
) -> None:
|
|
||||||
"""
|
|
||||||
Wait for indexing to complete for the given index names.
|
|
||||||
This will poll the table until all the indices are fully indexed,
|
|
||||||
or raise a timeout exception if the timeout is reached.
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
index_names: str
|
|
||||||
The name of the indices to poll
|
|
||||||
timeout: timedelta
|
|
||||||
Timeout to wait for asynchronous indexing. The default is 5 minutes.
|
|
||||||
"""
|
|
||||||
raise NotImplementedError
|
|
||||||
|
|
||||||
@abstractmethod
|
|
||||||
def stats(self) -> TableStatistics:
|
|
||||||
"""
|
|
||||||
Retrieve table and fragment statistics.
|
|
||||||
"""
|
|
||||||
raise NotImplementedError
|
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def create_scalar_index(
|
def create_scalar_index(
|
||||||
self,
|
self,
|
||||||
@@ -753,7 +695,6 @@ class Table(ABC):
|
|||||||
*,
|
*,
|
||||||
replace: bool = True,
|
replace: bool = True,
|
||||||
index_type: ScalarIndexType = "BTREE",
|
index_type: ScalarIndexType = "BTREE",
|
||||||
wait_timeout: Optional[timedelta] = None,
|
|
||||||
):
|
):
|
||||||
"""Create a scalar index on a column.
|
"""Create a scalar index on a column.
|
||||||
|
|
||||||
@@ -766,8 +707,7 @@ class Table(ABC):
|
|||||||
Replace the existing index if it exists.
|
Replace the existing index if it exists.
|
||||||
index_type: Literal["BTREE", "BITMAP", "LABEL_LIST"], default "BTREE"
|
index_type: Literal["BTREE", "BITMAP", "LABEL_LIST"], default "BTREE"
|
||||||
The type of index to create.
|
The type of index to create.
|
||||||
wait_timeout: timedelta, optional
|
|
||||||
The timeout to wait if indexing is asynchronous.
|
|
||||||
Examples
|
Examples
|
||||||
--------
|
--------
|
||||||
|
|
||||||
@@ -826,7 +766,6 @@ class Table(ABC):
|
|||||||
stem: bool = False,
|
stem: bool = False,
|
||||||
remove_stop_words: bool = False,
|
remove_stop_words: bool = False,
|
||||||
ascii_folding: bool = False,
|
ascii_folding: bool = False,
|
||||||
wait_timeout: Optional[timedelta] = None,
|
|
||||||
):
|
):
|
||||||
"""Create a full-text search index on the table.
|
"""Create a full-text search index on the table.
|
||||||
|
|
||||||
@@ -882,8 +821,6 @@ class Table(ABC):
|
|||||||
ascii_folding : bool, default False
|
ascii_folding : bool, default False
|
||||||
Whether to fold ASCII characters. This converts accented characters to
|
Whether to fold ASCII characters. This converts accented characters to
|
||||||
their ASCII equivalent. For example, "café" would be converted to "cafe".
|
their ASCII equivalent. For example, "café" would be converted to "cafe".
|
||||||
wait_timeout: timedelta, optional
|
|
||||||
The timeout to wait if indexing is asynchronous.
|
|
||||||
"""
|
"""
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
@@ -962,12 +899,10 @@ class Table(ABC):
|
|||||||
>>> table = db.create_table("my_table", data)
|
>>> table = db.create_table("my_table", data)
|
||||||
>>> new_data = pa.table({"a": [2, 3, 4], "b": ["x", "y", "z"]})
|
>>> new_data = pa.table({"a": [2, 3, 4], "b": ["x", "y", "z"]})
|
||||||
>>> # Perform a "upsert" operation
|
>>> # Perform a "upsert" operation
|
||||||
>>> stats = table.merge_insert("a") \\
|
>>> table.merge_insert("a") \\
|
||||||
... .when_matched_update_all() \\
|
... .when_matched_update_all() \\
|
||||||
... .when_not_matched_insert_all() \\
|
... .when_not_matched_insert_all() \\
|
||||||
... .execute(new_data)
|
... .execute(new_data)
|
||||||
>>> stats
|
|
||||||
{'num_inserted_rows': 1, 'num_updated_rows': 2, 'num_deleted_rows': 0}
|
|
||||||
>>> # The order of new rows is non-deterministic since we use
|
>>> # The order of new rows is non-deterministic since we use
|
||||||
>>> # a hash-join as part of this operation and so we sort here
|
>>> # a hash-join as part of this operation and so we sort here
|
||||||
>>> table.to_arrow().sort_by("a").to_pandas()
|
>>> table.to_arrow().sort_by("a").to_pandas()
|
||||||
@@ -984,9 +919,7 @@ class Table(ABC):
|
|||||||
@abstractmethod
|
@abstractmethod
|
||||||
def search(
|
def search(
|
||||||
self,
|
self,
|
||||||
query: Optional[
|
query: Optional[Union[VEC, str, "PIL.Image.Image", Tuple]] = None,
|
||||||
Union[VEC, str, "PIL.Image.Image", Tuple, FullTextQuery]
|
|
||||||
] = None,
|
|
||||||
vector_column_name: Optional[str] = None,
|
vector_column_name: Optional[str] = None,
|
||||||
query_type: QueryType = "auto",
|
query_type: QueryType = "auto",
|
||||||
ordering_field_name: Optional[str] = None,
|
ordering_field_name: Optional[str] = None,
|
||||||
@@ -1071,19 +1004,9 @@ class Table(ABC):
|
|||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def _execute_query(
|
def _execute_query(
|
||||||
self,
|
self, query: Query, batch_size: Optional[int] = None
|
||||||
query: Query,
|
|
||||||
*,
|
|
||||||
batch_size: Optional[int] = None,
|
|
||||||
timeout: Optional[timedelta] = None,
|
|
||||||
) -> pa.RecordBatchReader: ...
|
) -> pa.RecordBatchReader: ...
|
||||||
|
|
||||||
@abstractmethod
|
|
||||||
def _explain_plan(self, query: Query, verbose: Optional[bool] = False) -> str: ...
|
|
||||||
|
|
||||||
@abstractmethod
|
|
||||||
def _analyze_plan(self, query: Query) -> str: ...
|
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def _do_merge(
|
def _do_merge(
|
||||||
self,
|
self,
|
||||||
@@ -1339,21 +1262,16 @@ class Table(ABC):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def add_columns(
|
def add_columns(self, transforms: Dict[str, str]):
|
||||||
self, transforms: Dict[str, str] | pa.Field | List[pa.Field] | pa.Schema
|
|
||||||
):
|
|
||||||
"""
|
"""
|
||||||
Add new columns with defined values.
|
Add new columns with defined values.
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
transforms: Dict[str, str], pa.Field, List[pa.Field], pa.Schema
|
transforms: Dict[str, str]
|
||||||
A map of column name to a SQL expression to use to calculate the
|
A map of column name to a SQL expression to use to calculate the
|
||||||
value of the new column. These expressions will be evaluated for
|
value of the new column. These expressions will be evaluated for
|
||||||
each row in the table, and can reference existing columns.
|
each row in the table, and can reference existing columns.
|
||||||
Alternatively, a pyarrow Field or Schema can be provided to add
|
|
||||||
new columns with the specified data types. The new columns will
|
|
||||||
be initialized with null values.
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
@@ -1393,7 +1311,7 @@ class Table(ABC):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def checkout(self, version: Union[int, str]):
|
def checkout(self, version: int):
|
||||||
"""
|
"""
|
||||||
Checks out a specific version of the Table
|
Checks out a specific version of the Table
|
||||||
|
|
||||||
@@ -1408,12 +1326,6 @@ class Table(ABC):
|
|||||||
Any operation that modifies the table will fail while the table is in a checked
|
Any operation that modifies the table will fail while the table is in a checked
|
||||||
out state.
|
out state.
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
version: int | str,
|
|
||||||
The version to check out. A version number (`int`) or a tag
|
|
||||||
(`str`) can be provided.
|
|
||||||
|
|
||||||
To return the table to a normal state use `[Self::checkout_latest]`
|
To return the table to a normal state use `[Self::checkout_latest]`
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@@ -1427,21 +1339,6 @@ class Table(ABC):
|
|||||||
It can also be used to undo a `[Self::checkout]` operation
|
It can also be used to undo a `[Self::checkout]` operation
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@abstractmethod
|
|
||||||
def restore(self, version: Optional[int] = None):
|
|
||||||
"""Restore a version of the table. This is an in-place operation.
|
|
||||||
|
|
||||||
This creates a new version where the data is equivalent to the
|
|
||||||
specified previous version. Data is not copied (as of python-v0.2.1).
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
version : int, default None
|
|
||||||
The version to restore. If unspecified then restores the currently
|
|
||||||
checked out version. If the currently checked out version is the
|
|
||||||
latest version then this is a no-op.
|
|
||||||
"""
|
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def list_versions(self) -> List[Dict[str, Any]]:
|
def list_versions(self) -> List[Dict[str, Any]]:
|
||||||
"""List all versions of the table"""
|
"""List all versions of the table"""
|
||||||
@@ -1583,45 +1480,7 @@ class LanceTable(Table):
|
|||||||
"""Get the current version of the table"""
|
"""Get the current version of the table"""
|
||||||
return LOOP.run(self._table.version())
|
return LOOP.run(self._table.version())
|
||||||
|
|
||||||
@property
|
def checkout(self, version: int):
|
||||||
def tags(self) -> Tags:
|
|
||||||
"""Tag management for the table.
|
|
||||||
|
|
||||||
Similar to Git, tags are a way to add metadata to a specific version of the
|
|
||||||
table.
|
|
||||||
|
|
||||||
.. warning::
|
|
||||||
|
|
||||||
Tagged versions are exempted from the :py:meth:`cleanup_old_versions()`
|
|
||||||
process.
|
|
||||||
|
|
||||||
To remove a version that has been tagged, you must first
|
|
||||||
:py:meth:`~Tags.delete` the associated tag.
|
|
||||||
|
|
||||||
Returns
|
|
||||||
-------
|
|
||||||
Tags
|
|
||||||
The tag manager for managing tags for the table.
|
|
||||||
|
|
||||||
Examples
|
|
||||||
--------
|
|
||||||
>>> import lancedb
|
|
||||||
>>> db = lancedb.connect("./.lancedb")
|
|
||||||
>>> table = db.create_table("my_table",
|
|
||||||
... [{"vector": [1.1, 0.9], "type": "vector"}])
|
|
||||||
>>> table.tags.create("v1", table.version)
|
|
||||||
>>> table.add([{"vector": [0.5, 0.2], "type": "vector"}])
|
|
||||||
>>> tags = table.tags.list()
|
|
||||||
>>> print(tags["v1"]["version"])
|
|
||||||
1
|
|
||||||
>>> table.checkout("v1")
|
|
||||||
>>> table.to_pandas()
|
|
||||||
vector type
|
|
||||||
0 [1.1, 0.9] vector
|
|
||||||
"""
|
|
||||||
return Tags(self._table)
|
|
||||||
|
|
||||||
def checkout(self, version: Union[int, str]):
|
|
||||||
"""Checkout a version of the table. This is an in-place operation.
|
"""Checkout a version of the table. This is an in-place operation.
|
||||||
|
|
||||||
This allows viewing previous versions of the table. If you wish to
|
This allows viewing previous versions of the table. If you wish to
|
||||||
@@ -1633,9 +1492,8 @@ class LanceTable(Table):
|
|||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
version: int | str,
|
version : int
|
||||||
The version to check out. A version number (`int`) or a tag
|
The version to checkout.
|
||||||
(`str`) can be provided.
|
|
||||||
|
|
||||||
Examples
|
Examples
|
||||||
--------
|
--------
|
||||||
@@ -1854,40 +1712,8 @@ class LanceTable(Table):
|
|||||||
)
|
)
|
||||||
|
|
||||||
def drop_index(self, name: str) -> None:
|
def drop_index(self, name: str) -> None:
|
||||||
"""
|
|
||||||
Drops an index from the table
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
name: str
|
|
||||||
The name of the index to drop
|
|
||||||
"""
|
|
||||||
return LOOP.run(self._table.drop_index(name))
|
return LOOP.run(self._table.drop_index(name))
|
||||||
|
|
||||||
def prewarm_index(self, name: str) -> None:
|
|
||||||
"""
|
|
||||||
Prewarms an index in the table
|
|
||||||
|
|
||||||
This loads the entire index into memory
|
|
||||||
|
|
||||||
If the index does not fit into the available cache this call
|
|
||||||
may be wasteful
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
name: str
|
|
||||||
The name of the index to prewarm
|
|
||||||
"""
|
|
||||||
return LOOP.run(self._table.prewarm_index(name))
|
|
||||||
|
|
||||||
def wait_for_index(
|
|
||||||
self, index_names: Iterable[str], timeout: timedelta = timedelta(seconds=300)
|
|
||||||
) -> None:
|
|
||||||
return LOOP.run(self._table.wait_for_index(index_names, timeout))
|
|
||||||
|
|
||||||
def stats(self) -> TableStatistics:
|
|
||||||
return LOOP.run(self._table.stats())
|
|
||||||
|
|
||||||
def create_scalar_index(
|
def create_scalar_index(
|
||||||
self,
|
self,
|
||||||
column: str,
|
column: str,
|
||||||
@@ -2187,9 +2013,7 @@ class LanceTable(Table):
|
|||||||
@overload
|
@overload
|
||||||
def search(
|
def search(
|
||||||
self,
|
self,
|
||||||
query: Optional[
|
query: Optional[Union[VEC, str, "PIL.Image.Image", Tuple]] = None,
|
||||||
Union[VEC, str, "PIL.Image.Image", Tuple, FullTextQuery]
|
|
||||||
] = None,
|
|
||||||
vector_column_name: Optional[str] = None,
|
vector_column_name: Optional[str] = None,
|
||||||
query_type: Literal["hybrid"] = "hybrid",
|
query_type: Literal["hybrid"] = "hybrid",
|
||||||
ordering_field_name: Optional[str] = None,
|
ordering_field_name: Optional[str] = None,
|
||||||
@@ -2208,9 +2032,7 @@ class LanceTable(Table):
|
|||||||
|
|
||||||
def search(
|
def search(
|
||||||
self,
|
self,
|
||||||
query: Optional[
|
query: Optional[Union[VEC, str, "PIL.Image.Image", Tuple]] = None,
|
||||||
Union[VEC, str, "PIL.Image.Image", Tuple, FullTextQuery]
|
|
||||||
] = None,
|
|
||||||
vector_column_name: Optional[str] = None,
|
vector_column_name: Optional[str] = None,
|
||||||
query_type: QueryType = "auto",
|
query_type: QueryType = "auto",
|
||||||
ordering_field_name: Optional[str] = None,
|
ordering_field_name: Optional[str] = None,
|
||||||
@@ -2282,8 +2104,6 @@ class LanceTable(Table):
|
|||||||
and also the "_distance" column which is the distance between the query
|
and also the "_distance" column which is the distance between the query
|
||||||
vector and the returned vector.
|
vector and the returned vector.
|
||||||
"""
|
"""
|
||||||
if isinstance(query, FullTextQuery):
|
|
||||||
query_type = "fts"
|
|
||||||
vector_column_name = infer_vector_column_name(
|
vector_column_name = infer_vector_column_name(
|
||||||
schema=self.schema,
|
schema=self.schema,
|
||||||
query_type=query_type,
|
query_type=query_type,
|
||||||
@@ -2459,15 +2279,9 @@ class LanceTable(Table):
|
|||||||
LOOP.run(self._table.update(values, where=where, updates_sql=values_sql))
|
LOOP.run(self._table.update(values, where=where, updates_sql=values_sql))
|
||||||
|
|
||||||
def _execute_query(
|
def _execute_query(
|
||||||
self,
|
self, query: Query, batch_size: Optional[int] = None
|
||||||
query: Query,
|
|
||||||
*,
|
|
||||||
batch_size: Optional[int] = None,
|
|
||||||
timeout: Optional[timedelta] = None,
|
|
||||||
) -> pa.RecordBatchReader:
|
) -> pa.RecordBatchReader:
|
||||||
async_iter = LOOP.run(
|
async_iter = LOOP.run(self._table._execute_query(query, batch_size))
|
||||||
self._table._execute_query(query, batch_size=batch_size, timeout=timeout)
|
|
||||||
)
|
|
||||||
|
|
||||||
def iter_sync():
|
def iter_sync():
|
||||||
try:
|
try:
|
||||||
@@ -2478,11 +2292,8 @@ class LanceTable(Table):
|
|||||||
|
|
||||||
return pa.RecordBatchReader.from_batches(async_iter.schema, iter_sync())
|
return pa.RecordBatchReader.from_batches(async_iter.schema, iter_sync())
|
||||||
|
|
||||||
def _explain_plan(self, query: Query, verbose: Optional[bool] = False) -> str:
|
def _explain_plan(self, query: Query) -> str:
|
||||||
return LOOP.run(self._table._explain_plan(query, verbose))
|
return LOOP.run(self._table._explain_plan(query))
|
||||||
|
|
||||||
def _analyze_plan(self, query: Query) -> str:
|
|
||||||
return LOOP.run(self._table._analyze_plan(query))
|
|
||||||
|
|
||||||
def _do_merge(
|
def _do_merge(
|
||||||
self,
|
self,
|
||||||
@@ -2491,9 +2302,7 @@ class LanceTable(Table):
|
|||||||
on_bad_vectors: OnBadVectorsType,
|
on_bad_vectors: OnBadVectorsType,
|
||||||
fill_value: float,
|
fill_value: float,
|
||||||
):
|
):
|
||||||
return LOOP.run(
|
LOOP.run(self._table._do_merge(merge, new_data, on_bad_vectors, fill_value))
|
||||||
self._table._do_merge(merge, new_data, on_bad_vectors, fill_value)
|
|
||||||
)
|
|
||||||
|
|
||||||
@deprecation.deprecated(
|
@deprecation.deprecated(
|
||||||
deprecated_in="0.21.0",
|
deprecated_in="0.21.0",
|
||||||
@@ -2633,9 +2442,7 @@ class LanceTable(Table):
|
|||||||
"""
|
"""
|
||||||
return LOOP.run(self._table.index_stats(index_name))
|
return LOOP.run(self._table.index_stats(index_name))
|
||||||
|
|
||||||
def add_columns(
|
def add_columns(self, transforms: Dict[str, str]):
|
||||||
self, transforms: Dict[str, str] | pa.field | List[pa.field] | pa.Schema
|
|
||||||
):
|
|
||||||
LOOP.run(self._table.add_columns(transforms))
|
LOOP.run(self._table.add_columns(transforms))
|
||||||
|
|
||||||
def alter_columns(self, *alterations: Iterable[Dict[str, str]]):
|
def alter_columns(self, *alterations: Iterable[Dict[str, str]]):
|
||||||
@@ -3083,7 +2890,6 @@ class AsyncTable:
|
|||||||
config: Optional[
|
config: Optional[
|
||||||
Union[IvfFlat, IvfPq, HnswPq, HnswSq, BTree, Bitmap, LabelList, FTS]
|
Union[IvfFlat, IvfPq, HnswPq, HnswSq, BTree, Bitmap, LabelList, FTS]
|
||||||
] = None,
|
] = None,
|
||||||
wait_timeout: Optional[timedelta] = None,
|
|
||||||
):
|
):
|
||||||
"""Create an index to speed up queries
|
"""Create an index to speed up queries
|
||||||
|
|
||||||
@@ -3108,8 +2914,6 @@ class AsyncTable:
|
|||||||
For advanced configuration you can specify the type of index you would
|
For advanced configuration you can specify the type of index you would
|
||||||
like to create. You can also specify index-specific parameters when
|
like to create. You can also specify index-specific parameters when
|
||||||
creating an index object.
|
creating an index object.
|
||||||
wait_timeout: timedelta, optional
|
|
||||||
The timeout to wait if indexing is asynchronous.
|
|
||||||
"""
|
"""
|
||||||
if config is not None:
|
if config is not None:
|
||||||
if not isinstance(
|
if not isinstance(
|
||||||
@@ -3120,9 +2924,7 @@ class AsyncTable:
|
|||||||
" Bitmap, LabelList, or FTS"
|
" Bitmap, LabelList, or FTS"
|
||||||
)
|
)
|
||||||
try:
|
try:
|
||||||
await self._inner.create_index(
|
await self._inner.create_index(column, index=config, replace=replace)
|
||||||
column, index=config, replace=replace, wait_timeout=wait_timeout
|
|
||||||
)
|
|
||||||
except ValueError as e:
|
except ValueError as e:
|
||||||
if "not support the requested language" in str(e):
|
if "not support the requested language" in str(e):
|
||||||
supported_langs = ", ".join(lang_mapping.values())
|
supported_langs = ", ".join(lang_mapping.values())
|
||||||
@@ -3150,46 +2952,6 @@ class AsyncTable:
|
|||||||
"""
|
"""
|
||||||
await self._inner.drop_index(name)
|
await self._inner.drop_index(name)
|
||||||
|
|
||||||
async def prewarm_index(self, name: str) -> None:
|
|
||||||
"""
|
|
||||||
Prewarm an index in the table.
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
name: str
|
|
||||||
The name of the index to prewarm
|
|
||||||
|
|
||||||
Notes
|
|
||||||
-----
|
|
||||||
This will load the index into memory. This may reduce the cold-start time for
|
|
||||||
future queries. If the index does not fit in the cache then this call may be
|
|
||||||
wasteful.
|
|
||||||
"""
|
|
||||||
await self._inner.prewarm_index(name)
|
|
||||||
|
|
||||||
async def wait_for_index(
|
|
||||||
self, index_names: Iterable[str], timeout: timedelta = timedelta(seconds=300)
|
|
||||||
) -> None:
|
|
||||||
"""
|
|
||||||
Wait for indexing to complete for the given index names.
|
|
||||||
This will poll the table until all the indices are fully indexed,
|
|
||||||
or raise a timeout exception if the timeout is reached.
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
index_names: str
|
|
||||||
The name of the indices to poll
|
|
||||||
timeout: timedelta
|
|
||||||
Timeout to wait for asynchronous indexing. The default is 5 minutes.
|
|
||||||
"""
|
|
||||||
await self._inner.wait_for_index(index_names, timeout)
|
|
||||||
|
|
||||||
async def stats(self) -> TableStatistics:
|
|
||||||
"""
|
|
||||||
Retrieve table and fragment statistics.
|
|
||||||
"""
|
|
||||||
return await self._inner.stats()
|
|
||||||
|
|
||||||
async def add(
|
async def add(
|
||||||
self,
|
self,
|
||||||
data: DATA,
|
data: DATA,
|
||||||
@@ -3281,12 +3043,10 @@ class AsyncTable:
|
|||||||
>>> table = db.create_table("my_table", data)
|
>>> table = db.create_table("my_table", data)
|
||||||
>>> new_data = pa.table({"a": [2, 3, 4], "b": ["x", "y", "z"]})
|
>>> new_data = pa.table({"a": [2, 3, 4], "b": ["x", "y", "z"]})
|
||||||
>>> # Perform a "upsert" operation
|
>>> # Perform a "upsert" operation
|
||||||
>>> stats = table.merge_insert("a") \\
|
>>> table.merge_insert("a") \\
|
||||||
... .when_matched_update_all() \\
|
... .when_matched_update_all() \\
|
||||||
... .when_not_matched_insert_all() \\
|
... .when_not_matched_insert_all() \\
|
||||||
... .execute(new_data)
|
... .execute(new_data)
|
||||||
>>> stats
|
|
||||||
{'num_inserted_rows': 1, 'num_updated_rows': 2, 'num_deleted_rows': 0}
|
|
||||||
>>> # The order of new rows is non-deterministic since we use
|
>>> # The order of new rows is non-deterministic since we use
|
||||||
>>> # a hash-join as part of this operation and so we sort here
|
>>> # a hash-join as part of this operation and so we sort here
|
||||||
>>> table.to_arrow().sort_by("a").to_pandas()
|
>>> table.to_arrow().sort_by("a").to_pandas()
|
||||||
@@ -3343,9 +3103,7 @@ class AsyncTable:
|
|||||||
@overload
|
@overload
|
||||||
async def search(
|
async def search(
|
||||||
self,
|
self,
|
||||||
query: Optional[
|
query: Optional[Union[VEC, str, "PIL.Image.Image", Tuple]] = None,
|
||||||
Union[VEC, str, "PIL.Image.Image", Tuple, FullTextQuery]
|
|
||||||
] = None,
|
|
||||||
vector_column_name: Optional[str] = None,
|
vector_column_name: Optional[str] = None,
|
||||||
query_type: Literal["vector"] = ...,
|
query_type: Literal["vector"] = ...,
|
||||||
ordering_field_name: Optional[str] = None,
|
ordering_field_name: Optional[str] = None,
|
||||||
@@ -3354,9 +3112,7 @@ class AsyncTable:
|
|||||||
|
|
||||||
async def search(
|
async def search(
|
||||||
self,
|
self,
|
||||||
query: Optional[
|
query: Optional[Union[VEC, str, "PIL.Image.Image", Tuple]] = None,
|
||||||
Union[VEC, str, "PIL.Image.Image", Tuple, FullTextQuery]
|
|
||||||
] = None,
|
|
||||||
vector_column_name: Optional[str] = None,
|
vector_column_name: Optional[str] = None,
|
||||||
query_type: QueryType = "auto",
|
query_type: QueryType = "auto",
|
||||||
ordering_field_name: Optional[str] = None,
|
ordering_field_name: Optional[str] = None,
|
||||||
@@ -3415,10 +3171,8 @@ class AsyncTable:
|
|||||||
async def get_embedding_func(
|
async def get_embedding_func(
|
||||||
vector_column_name: Optional[str],
|
vector_column_name: Optional[str],
|
||||||
query_type: QueryType,
|
query_type: QueryType,
|
||||||
query: Optional[Union[VEC, str, "PIL.Image.Image", Tuple, FullTextQuery]],
|
query: Optional[Union[VEC, str, "PIL.Image.Image", Tuple]],
|
||||||
) -> Tuple[str, EmbeddingFunctionConfig]:
|
) -> Tuple[str, EmbeddingFunctionConfig]:
|
||||||
if isinstance(query, FullTextQuery):
|
|
||||||
query_type = "fts"
|
|
||||||
schema = await self.schema()
|
schema = await self.schema()
|
||||||
vector_column_name = infer_vector_column_name(
|
vector_column_name = infer_vector_column_name(
|
||||||
schema=schema,
|
schema=schema,
|
||||||
@@ -3468,8 +3222,6 @@ class AsyncTable:
|
|||||||
if is_embedding(query):
|
if is_embedding(query):
|
||||||
vector_query = query
|
vector_query = query
|
||||||
query_type = "vector"
|
query_type = "vector"
|
||||||
elif isinstance(query, FullTextQuery):
|
|
||||||
query_type = "fts"
|
|
||||||
elif isinstance(query, str):
|
elif isinstance(query, str):
|
||||||
try:
|
try:
|
||||||
(
|
(
|
||||||
@@ -3590,15 +3342,13 @@ class AsyncTable:
|
|||||||
async_query = async_query.nearest_to_text(
|
async_query = async_query.nearest_to_text(
|
||||||
query.full_text_query.query, query.full_text_query.columns
|
query.full_text_query.query, query.full_text_query.columns
|
||||||
)
|
)
|
||||||
|
if query.full_text_query.limit is not None:
|
||||||
|
async_query = async_query.limit(query.full_text_query.limit)
|
||||||
|
|
||||||
return async_query
|
return async_query
|
||||||
|
|
||||||
async def _execute_query(
|
async def _execute_query(
|
||||||
self,
|
self, query: Query, batch_size: Optional[int] = None
|
||||||
query: Query,
|
|
||||||
*,
|
|
||||||
batch_size: Optional[int] = None,
|
|
||||||
timeout: Optional[timedelta] = None,
|
|
||||||
) -> pa.RecordBatchReader:
|
) -> pa.RecordBatchReader:
|
||||||
# The sync table calls into this method, so we need to map the
|
# The sync table calls into this method, so we need to map the
|
||||||
# query to the async version of the query and run that here. This is only
|
# query to the async version of the query and run that here. This is only
|
||||||
@@ -3606,19 +3356,12 @@ class AsyncTable:
|
|||||||
|
|
||||||
async_query = self._sync_query_to_async(query)
|
async_query = self._sync_query_to_async(query)
|
||||||
|
|
||||||
return await async_query.to_batches(
|
return await async_query.to_batches(max_batch_length=batch_size)
|
||||||
max_batch_length=batch_size, timeout=timeout
|
|
||||||
)
|
|
||||||
|
|
||||||
async def _explain_plan(self, query: Query, verbose: Optional[bool]) -> str:
|
async def _explain_plan(self, query: Query) -> str:
|
||||||
# This method is used by the sync table
|
# This method is used by the sync table
|
||||||
async_query = self._sync_query_to_async(query)
|
async_query = self._sync_query_to_async(query)
|
||||||
return await async_query.explain_plan(verbose)
|
return await async_query.explain_plan()
|
||||||
|
|
||||||
async def _analyze_plan(self, query: Query) -> str:
|
|
||||||
# This method is used by the sync table
|
|
||||||
async_query = self._sync_query_to_async(query)
|
|
||||||
return await async_query.analyze_plan()
|
|
||||||
|
|
||||||
async def _do_merge(
|
async def _do_merge(
|
||||||
self,
|
self,
|
||||||
@@ -3642,7 +3385,7 @@ class AsyncTable:
|
|||||||
)
|
)
|
||||||
if isinstance(data, pa.Table):
|
if isinstance(data, pa.Table):
|
||||||
data = pa.RecordBatchReader.from_batches(data.schema, data.to_batches())
|
data = pa.RecordBatchReader.from_batches(data.schema, data.to_batches())
|
||||||
return await self._inner.execute_merge_insert(
|
await self._inner.execute_merge_insert(
|
||||||
data,
|
data,
|
||||||
dict(
|
dict(
|
||||||
on=merge._on,
|
on=merge._on,
|
||||||
@@ -3758,9 +3501,7 @@ class AsyncTable:
|
|||||||
|
|
||||||
return await self._inner.update(updates_sql, where)
|
return await self._inner.update(updates_sql, where)
|
||||||
|
|
||||||
async def add_columns(
|
async def add_columns(self, transforms: dict[str, str]):
|
||||||
self, transforms: dict[str, str] | pa.field | List[pa.field] | pa.Schema
|
|
||||||
):
|
|
||||||
"""
|
"""
|
||||||
Add new columns with defined values.
|
Add new columns with defined values.
|
||||||
|
|
||||||
@@ -3770,19 +3511,8 @@ class AsyncTable:
|
|||||||
A map of column name to a SQL expression to use to calculate the
|
A map of column name to a SQL expression to use to calculate the
|
||||||
value of the new column. These expressions will be evaluated for
|
value of the new column. These expressions will be evaluated for
|
||||||
each row in the table, and can reference existing columns.
|
each row in the table, and can reference existing columns.
|
||||||
Alternatively, you can pass a pyarrow field or schema to add
|
|
||||||
new columns with NULLs.
|
|
||||||
"""
|
"""
|
||||||
if isinstance(transforms, pa.Field):
|
await self._inner.add_columns(list(transforms.items()))
|
||||||
transforms = [transforms]
|
|
||||||
if isinstance(transforms, list) and all(
|
|
||||||
{isinstance(f, pa.Field) for f in transforms}
|
|
||||||
):
|
|
||||||
transforms = pa.schema(transforms)
|
|
||||||
if isinstance(transforms, pa.Schema):
|
|
||||||
await self._inner.add_columns_with_schema(transforms)
|
|
||||||
else:
|
|
||||||
await self._inner.add_columns(list(transforms.items()))
|
|
||||||
|
|
||||||
async def alter_columns(self, *alterations: Iterable[dict[str, Any]]):
|
async def alter_columns(self, *alterations: Iterable[dict[str, Any]]):
|
||||||
"""
|
"""
|
||||||
@@ -3843,7 +3573,7 @@ class AsyncTable:
|
|||||||
|
|
||||||
return versions
|
return versions
|
||||||
|
|
||||||
async def checkout(self, version: int | str):
|
async def checkout(self, version: int):
|
||||||
"""
|
"""
|
||||||
Checks out a specific version of the Table
|
Checks out a specific version of the Table
|
||||||
|
|
||||||
@@ -3858,12 +3588,6 @@ class AsyncTable:
|
|||||||
Any operation that modifies the table will fail while the table is in a checked
|
Any operation that modifies the table will fail while the table is in a checked
|
||||||
out state.
|
out state.
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
version: int | str,
|
|
||||||
The version to check out. A version number (`int`) or a tag
|
|
||||||
(`str`) can be provided.
|
|
||||||
|
|
||||||
To return the table to a normal state use `[Self::checkout_latest]`
|
To return the table to a normal state use `[Self::checkout_latest]`
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
@@ -3886,7 +3610,7 @@ class AsyncTable:
|
|||||||
"""
|
"""
|
||||||
await self._inner.checkout_latest()
|
await self._inner.checkout_latest()
|
||||||
|
|
||||||
async def restore(self, version: Optional[int] = None):
|
async def restore(self):
|
||||||
"""
|
"""
|
||||||
Restore the table to the currently checked out version
|
Restore the table to the currently checked out version
|
||||||
|
|
||||||
@@ -3899,25 +3623,7 @@ class AsyncTable:
|
|||||||
Once the operation concludes the table will no longer be in a checked
|
Once the operation concludes the table will no longer be in a checked
|
||||||
out state and the read_consistency_interval, if any, will apply.
|
out state and the read_consistency_interval, if any, will apply.
|
||||||
"""
|
"""
|
||||||
await self._inner.restore(version)
|
await self._inner.restore()
|
||||||
|
|
||||||
@property
|
|
||||||
def tags(self) -> AsyncTags:
|
|
||||||
"""Tag management for the dataset.
|
|
||||||
|
|
||||||
Similar to Git, tags are a way to add metadata to a specific version of the
|
|
||||||
dataset.
|
|
||||||
|
|
||||||
.. warning::
|
|
||||||
|
|
||||||
Tagged versions are exempted from the
|
|
||||||
:py:meth:`optimize(cleanup_older_than)` process.
|
|
||||||
|
|
||||||
To remove a version that has been tagged, you must first
|
|
||||||
:py:meth:`~Tags.delete` the associated tag.
|
|
||||||
|
|
||||||
"""
|
|
||||||
return AsyncTags(self._inner)
|
|
||||||
|
|
||||||
async def optimize(
|
async def optimize(
|
||||||
self,
|
self,
|
||||||
@@ -4088,217 +3794,3 @@ class IndexStatistics:
|
|||||||
# a dictionary instead of a class.
|
# a dictionary instead of a class.
|
||||||
def __getitem__(self, key):
|
def __getitem__(self, key):
|
||||||
return getattr(self, key)
|
return getattr(self, key)
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class TableStatistics:
|
|
||||||
"""
|
|
||||||
Statistics about a table and fragments.
|
|
||||||
|
|
||||||
Attributes
|
|
||||||
----------
|
|
||||||
total_bytes: int
|
|
||||||
The total number of bytes in the table.
|
|
||||||
num_rows: int
|
|
||||||
The total number of rows in the table.
|
|
||||||
num_indices: int
|
|
||||||
The total number of indices in the table.
|
|
||||||
fragment_stats: FragmentStatistics
|
|
||||||
Statistics about fragments in the table.
|
|
||||||
"""
|
|
||||||
|
|
||||||
total_bytes: int
|
|
||||||
num_rows: int
|
|
||||||
num_indices: int
|
|
||||||
fragment_stats: FragmentStatistics
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class FragmentStatistics:
|
|
||||||
"""
|
|
||||||
Statistics about fragments.
|
|
||||||
|
|
||||||
Attributes
|
|
||||||
----------
|
|
||||||
num_fragments: int
|
|
||||||
The total number of fragments in the table.
|
|
||||||
num_small_fragments: int
|
|
||||||
The total number of small fragments in the table.
|
|
||||||
Small fragments have low row counts and may need to be compacted.
|
|
||||||
lengths: FragmentSummaryStats
|
|
||||||
Statistics about the number of rows in the table fragments.
|
|
||||||
"""
|
|
||||||
|
|
||||||
num_fragments: int
|
|
||||||
num_small_fragments: int
|
|
||||||
lengths: FragmentSummaryStats
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class FragmentSummaryStats:
|
|
||||||
"""
|
|
||||||
Statistics about fragments sizes
|
|
||||||
|
|
||||||
Attributes
|
|
||||||
----------
|
|
||||||
min: int
|
|
||||||
The number of rows in the fragment with the fewest rows.
|
|
||||||
max: int
|
|
||||||
The number of rows in the fragment with the most rows.
|
|
||||||
mean: int
|
|
||||||
The mean number of rows in the fragments.
|
|
||||||
p25: int
|
|
||||||
The 25th percentile of number of rows in the fragments.
|
|
||||||
p50: int
|
|
||||||
The 50th percentile of number of rows in the fragments.
|
|
||||||
p75: int
|
|
||||||
The 75th percentile of number of rows in the fragments.
|
|
||||||
p99: int
|
|
||||||
The 99th percentile of number of rows in the fragments.
|
|
||||||
"""
|
|
||||||
|
|
||||||
min: int
|
|
||||||
max: int
|
|
||||||
mean: int
|
|
||||||
p25: int
|
|
||||||
p50: int
|
|
||||||
p75: int
|
|
||||||
p99: int
|
|
||||||
|
|
||||||
|
|
||||||
class Tags:
|
|
||||||
"""
|
|
||||||
Table tag manager.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, table):
|
|
||||||
self._table = table
|
|
||||||
|
|
||||||
def list(self) -> Dict[str, Tag]:
|
|
||||||
"""
|
|
||||||
List all table tags.
|
|
||||||
|
|
||||||
Returns
|
|
||||||
-------
|
|
||||||
dict[str, Tag]
|
|
||||||
A dictionary mapping tag names to version numbers.
|
|
||||||
"""
|
|
||||||
return LOOP.run(self._table.tags.list())
|
|
||||||
|
|
||||||
def get_version(self, tag: str) -> int:
|
|
||||||
"""
|
|
||||||
Get the version of a tag.
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
tag: str,
|
|
||||||
The name of the tag to get the version for.
|
|
||||||
"""
|
|
||||||
return LOOP.run(self._table.tags.get_version(tag))
|
|
||||||
|
|
||||||
def create(self, tag: str, version: int) -> None:
|
|
||||||
"""
|
|
||||||
Create a tag for a given table version.
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
tag: str,
|
|
||||||
The name of the tag to create. This name must be unique among all tag
|
|
||||||
names for the table.
|
|
||||||
version: int,
|
|
||||||
The table version to tag.
|
|
||||||
"""
|
|
||||||
LOOP.run(self._table.tags.create(tag, version))
|
|
||||||
|
|
||||||
def delete(self, tag: str) -> None:
|
|
||||||
"""
|
|
||||||
Delete tag from the table.
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
tag: str,
|
|
||||||
The name of the tag to delete.
|
|
||||||
"""
|
|
||||||
LOOP.run(self._table.tags.delete(tag))
|
|
||||||
|
|
||||||
def update(self, tag: str, version: int) -> None:
|
|
||||||
"""
|
|
||||||
Update tag to a new version.
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
tag: str,
|
|
||||||
The name of the tag to update.
|
|
||||||
version: int,
|
|
||||||
The new table version to tag.
|
|
||||||
"""
|
|
||||||
LOOP.run(self._table.tags.update(tag, version))
|
|
||||||
|
|
||||||
|
|
||||||
class AsyncTags:
|
|
||||||
"""
|
|
||||||
Async table tag manager.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, table):
|
|
||||||
self._table = table
|
|
||||||
|
|
||||||
async def list(self) -> Dict[str, Tag]:
|
|
||||||
"""
|
|
||||||
List all table tags.
|
|
||||||
|
|
||||||
Returns
|
|
||||||
-------
|
|
||||||
dict[str, Tag]
|
|
||||||
A dictionary mapping tag names to version numbers.
|
|
||||||
"""
|
|
||||||
return await self._table.tags.list()
|
|
||||||
|
|
||||||
async def get_version(self, tag: str) -> int:
|
|
||||||
"""
|
|
||||||
Get the version of a tag.
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
tag: str,
|
|
||||||
The name of the tag to get the version for.
|
|
||||||
"""
|
|
||||||
return await self._table.tags.get_version(tag)
|
|
||||||
|
|
||||||
async def create(self, tag: str, version: int) -> None:
|
|
||||||
"""
|
|
||||||
Create a tag for a given table version.
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
tag: str,
|
|
||||||
The name of the tag to create. This name must be unique among all tag
|
|
||||||
names for the table.
|
|
||||||
version: int,
|
|
||||||
The table version to tag.
|
|
||||||
"""
|
|
||||||
await self._table.tags.create(tag, version)
|
|
||||||
|
|
||||||
async def delete(self, tag: str) -> None:
|
|
||||||
"""
|
|
||||||
Delete tag from the table.
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
tag: str,
|
|
||||||
The name of the tag to delete.
|
|
||||||
"""
|
|
||||||
await self._table.tags.delete(tag)
|
|
||||||
|
|
||||||
async def update(self, tag: str, version: int) -> None:
|
|
||||||
"""
|
|
||||||
Update tag to a new version.
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
tag: str,
|
|
||||||
The name of the tag to update.
|
|
||||||
version: int,
|
|
||||||
The new table version to tag.
|
|
||||||
"""
|
|
||||||
await self._table.tags.update(tag, version)
|
|
||||||
|
|||||||
@@ -253,14 +253,9 @@ def infer_vector_column_name(
|
|||||||
query: Optional[Any], # inferred later in query builder
|
query: Optional[Any], # inferred later in query builder
|
||||||
vector_column_name: Optional[str],
|
vector_column_name: Optional[str],
|
||||||
):
|
):
|
||||||
if vector_column_name is not None:
|
if (vector_column_name is None and query is not None and query_type != "fts") or (
|
||||||
return vector_column_name
|
vector_column_name is None and query_type == "hybrid"
|
||||||
|
):
|
||||||
if query_type == "fts":
|
|
||||||
# FTS queries do not require a vector column
|
|
||||||
return None
|
|
||||||
|
|
||||||
if query is not None or query_type == "hybrid":
|
|
||||||
try:
|
try:
|
||||||
vector_column_name = inf_vector_column_query(schema)
|
vector_column_name = inf_vector_column_query(schema)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|||||||
@@ -562,7 +562,7 @@ async def test_table_async():
|
|||||||
async_db = await lancedb.connect_async(uri, read_consistency_interval=timedelta(0))
|
async_db = await lancedb.connect_async(uri, read_consistency_interval=timedelta(0))
|
||||||
async_tbl = await async_db.open_table("test_table_async")
|
async_tbl = await async_db.open_table("test_table_async")
|
||||||
# --8<-- [end:table_async_strong_consistency]
|
# --8<-- [end:table_async_strong_consistency]
|
||||||
# --8<-- [start:table_async_eventual_consistency]
|
# --8<-- [start:table_async_ventual_consistency]
|
||||||
uri = "data/sample-lancedb"
|
uri = "data/sample-lancedb"
|
||||||
async_db = await lancedb.connect_async(
|
async_db = await lancedb.connect_async(
|
||||||
uri, read_consistency_interval=timedelta(seconds=5)
|
uri, read_consistency_interval=timedelta(seconds=5)
|
||||||
|
|||||||
@@ -18,19 +18,15 @@ def test_upsert(mem_db):
|
|||||||
{"id": 1, "name": "Bobby"},
|
{"id": 1, "name": "Bobby"},
|
||||||
{"id": 2, "name": "Charlie"},
|
{"id": 2, "name": "Charlie"},
|
||||||
]
|
]
|
||||||
stats = (
|
(
|
||||||
table.merge_insert("id")
|
table.merge_insert("id")
|
||||||
.when_matched_update_all()
|
.when_matched_update_all()
|
||||||
.when_not_matched_insert_all()
|
.when_not_matched_insert_all()
|
||||||
.execute(new_users)
|
.execute(new_users)
|
||||||
)
|
)
|
||||||
table.count_rows() # 3
|
table.count_rows() # 3
|
||||||
stats # {'num_inserted_rows': 1, 'num_updated_rows': 1, 'num_deleted_rows': 0}
|
|
||||||
# --8<-- [end:upsert_basic]
|
# --8<-- [end:upsert_basic]
|
||||||
assert table.count_rows() == 3
|
assert table.count_rows() == 3
|
||||||
assert stats["num_inserted_rows"] == 1
|
|
||||||
assert stats["num_updated_rows"] == 1
|
|
||||||
assert stats["num_deleted_rows"] == 0
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
@@ -48,19 +44,15 @@ async def test_upsert_async(mem_db_async):
|
|||||||
{"id": 1, "name": "Bobby"},
|
{"id": 1, "name": "Bobby"},
|
||||||
{"id": 2, "name": "Charlie"},
|
{"id": 2, "name": "Charlie"},
|
||||||
]
|
]
|
||||||
stats = await (
|
await (
|
||||||
table.merge_insert("id")
|
table.merge_insert("id")
|
||||||
.when_matched_update_all()
|
.when_matched_update_all()
|
||||||
.when_not_matched_insert_all()
|
.when_not_matched_insert_all()
|
||||||
.execute(new_users)
|
.execute(new_users)
|
||||||
)
|
)
|
||||||
await table.count_rows() # 3
|
await table.count_rows() # 3
|
||||||
stats # {'num_inserted_rows': 1, 'num_updated_rows': 1, 'num_deleted_rows': 0}
|
|
||||||
# --8<-- [end:upsert_basic_async]
|
# --8<-- [end:upsert_basic_async]
|
||||||
assert await table.count_rows() == 3
|
assert await table.count_rows() == 3
|
||||||
assert stats["num_inserted_rows"] == 1
|
|
||||||
assert stats["num_updated_rows"] == 1
|
|
||||||
assert stats["num_deleted_rows"] == 0
|
|
||||||
|
|
||||||
|
|
||||||
def test_insert_if_not_exists(mem_db):
|
def test_insert_if_not_exists(mem_db):
|
||||||
@@ -77,16 +69,10 @@ def test_insert_if_not_exists(mem_db):
|
|||||||
{"domain": "google.com", "name": "Google"},
|
{"domain": "google.com", "name": "Google"},
|
||||||
{"domain": "facebook.com", "name": "Facebook"},
|
{"domain": "facebook.com", "name": "Facebook"},
|
||||||
]
|
]
|
||||||
stats = (
|
(table.merge_insert("domain").when_not_matched_insert_all().execute(new_domains))
|
||||||
table.merge_insert("domain").when_not_matched_insert_all().execute(new_domains)
|
|
||||||
)
|
|
||||||
table.count_rows() # 3
|
table.count_rows() # 3
|
||||||
stats # {'num_inserted_rows': 1, 'num_updated_rows': 0, 'num_deleted_rows': 0}
|
|
||||||
# --8<-- [end:insert_if_not_exists]
|
# --8<-- [end:insert_if_not_exists]
|
||||||
assert table.count_rows() == 3
|
assert table.count_rows() == 3
|
||||||
assert stats["num_inserted_rows"] == 1
|
|
||||||
assert stats["num_updated_rows"] == 0
|
|
||||||
assert stats["num_deleted_rows"] == 0
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
@@ -104,16 +90,12 @@ async def test_insert_if_not_exists_async(mem_db_async):
|
|||||||
{"domain": "google.com", "name": "Google"},
|
{"domain": "google.com", "name": "Google"},
|
||||||
{"domain": "facebook.com", "name": "Facebook"},
|
{"domain": "facebook.com", "name": "Facebook"},
|
||||||
]
|
]
|
||||||
stats = await (
|
await (
|
||||||
table.merge_insert("domain").when_not_matched_insert_all().execute(new_domains)
|
table.merge_insert("domain").when_not_matched_insert_all().execute(new_domains)
|
||||||
)
|
)
|
||||||
await table.count_rows() # 3
|
await table.count_rows() # 3
|
||||||
stats # {'num_inserted_rows': 1, 'num_updated_rows': 0, 'num_deleted_rows': 0}
|
|
||||||
# --8<-- [end:insert_if_not_exists_async]
|
# --8<-- [end:insert_if_not_exists_async]
|
||||||
assert await table.count_rows() == 3
|
assert await table.count_rows() == 3
|
||||||
assert stats["num_inserted_rows"] == 1
|
|
||||||
assert stats["num_updated_rows"] == 0
|
|
||||||
assert stats["num_deleted_rows"] == 0
|
|
||||||
|
|
||||||
|
|
||||||
def test_replace_range(mem_db):
|
def test_replace_range(mem_db):
|
||||||
@@ -131,7 +113,7 @@ def test_replace_range(mem_db):
|
|||||||
new_chunks = [
|
new_chunks = [
|
||||||
{"doc_id": 1, "chunk_id": 0, "text": "Baz"},
|
{"doc_id": 1, "chunk_id": 0, "text": "Baz"},
|
||||||
]
|
]
|
||||||
stats = (
|
(
|
||||||
table.merge_insert(["doc_id", "chunk_id"])
|
table.merge_insert(["doc_id", "chunk_id"])
|
||||||
.when_matched_update_all()
|
.when_matched_update_all()
|
||||||
.when_not_matched_insert_all()
|
.when_not_matched_insert_all()
|
||||||
@@ -139,12 +121,8 @@ def test_replace_range(mem_db):
|
|||||||
.execute(new_chunks)
|
.execute(new_chunks)
|
||||||
)
|
)
|
||||||
table.count_rows("doc_id = 1") # 1
|
table.count_rows("doc_id = 1") # 1
|
||||||
stats # {'num_inserted_rows': 0, 'num_updated_rows': 1, 'num_deleted_rows': 1}
|
|
||||||
# --8<-- [end:replace_range]
|
# --8<-- [end:replace_range]
|
||||||
assert table.count_rows("doc_id = 1") == 1
|
assert table.count_rows("doc_id = 1") == 1
|
||||||
assert stats["num_inserted_rows"] == 0
|
|
||||||
assert stats["num_updated_rows"] == 1
|
|
||||||
assert stats["num_deleted_rows"] == 1
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
@@ -163,7 +141,7 @@ async def test_replace_range_async(mem_db_async):
|
|||||||
new_chunks = [
|
new_chunks = [
|
||||||
{"doc_id": 1, "chunk_id": 0, "text": "Baz"},
|
{"doc_id": 1, "chunk_id": 0, "text": "Baz"},
|
||||||
]
|
]
|
||||||
stats = await (
|
await (
|
||||||
table.merge_insert(["doc_id", "chunk_id"])
|
table.merge_insert(["doc_id", "chunk_id"])
|
||||||
.when_matched_update_all()
|
.when_matched_update_all()
|
||||||
.when_not_matched_insert_all()
|
.when_not_matched_insert_all()
|
||||||
@@ -171,9 +149,5 @@ async def test_replace_range_async(mem_db_async):
|
|||||||
.execute(new_chunks)
|
.execute(new_chunks)
|
||||||
)
|
)
|
||||||
await table.count_rows("doc_id = 1") # 1
|
await table.count_rows("doc_id = 1") # 1
|
||||||
stats # {'num_inserted_rows': 0, 'num_updated_rows': 1, 'num_deleted_rows': 1}
|
|
||||||
# --8<-- [end:replace_range_async]
|
# --8<-- [end:replace_range_async]
|
||||||
assert await table.count_rows("doc_id = 1") == 1
|
assert await table.count_rows("doc_id = 1") == 1
|
||||||
assert stats["num_inserted_rows"] == 0
|
|
||||||
assert stats["num_updated_rows"] == 1
|
|
||||||
assert stats["num_deleted_rows"] == 1
|
|
||||||
|
|||||||
@@ -6,9 +6,7 @@ import lancedb
|
|||||||
|
|
||||||
# --8<-- [end:import-lancedb]
|
# --8<-- [end:import-lancedb]
|
||||||
# --8<-- [start:import-numpy]
|
# --8<-- [start:import-numpy]
|
||||||
from lancedb.query import BoostQuery, MatchQuery
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import pyarrow as pa
|
|
||||||
|
|
||||||
# --8<-- [end:import-numpy]
|
# --8<-- [end:import-numpy]
|
||||||
# --8<-- [start:import-datetime]
|
# --8<-- [start:import-datetime]
|
||||||
@@ -156,84 +154,6 @@ async def test_vector_search_async():
|
|||||||
# --8<-- [end:search_result_async_as_list]
|
# --8<-- [end:search_result_async_as_list]
|
||||||
|
|
||||||
|
|
||||||
def test_fts_fuzzy_query():
|
|
||||||
uri = "data/fuzzy-example"
|
|
||||||
db = lancedb.connect(uri)
|
|
||||||
|
|
||||||
table = db.create_table(
|
|
||||||
"my_table_fts_fuzzy",
|
|
||||||
data=pa.table(
|
|
||||||
{
|
|
||||||
"text": [
|
|
||||||
"fa",
|
|
||||||
"fo", # spellchecker:disable-line
|
|
||||||
"fob",
|
|
||||||
"focus",
|
|
||||||
"foo",
|
|
||||||
"food",
|
|
||||||
"foul",
|
|
||||||
]
|
|
||||||
}
|
|
||||||
),
|
|
||||||
mode="overwrite",
|
|
||||||
)
|
|
||||||
table.create_fts_index("text", use_tantivy=False, replace=True)
|
|
||||||
|
|
||||||
results = table.search(MatchQuery("foo", "text", fuzziness=1)).to_pandas()
|
|
||||||
assert len(results) == 4
|
|
||||||
assert set(results["text"].to_list()) == {
|
|
||||||
"foo",
|
|
||||||
"fo", # 1 deletion # spellchecker:disable-line
|
|
||||||
"fob", # 1 substitution
|
|
||||||
"food", # 1 insertion
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def test_fts_boost_query():
|
|
||||||
uri = "data/boost-example"
|
|
||||||
db = lancedb.connect(uri)
|
|
||||||
|
|
||||||
table = db.create_table(
|
|
||||||
"my_table_fts_boost",
|
|
||||||
data=pa.table(
|
|
||||||
{
|
|
||||||
"title": [
|
|
||||||
"The Hidden Gems of Travel",
|
|
||||||
"Exploring Nature's Wonders",
|
|
||||||
"Cultural Treasures Unveiled",
|
|
||||||
"The Nightlife Chronicles",
|
|
||||||
"Scenic Escapes and Challenges",
|
|
||||||
],
|
|
||||||
"desc": [
|
|
||||||
"A vibrant city with occasional traffic jams.",
|
|
||||||
"Beautiful landscapes but overpriced tourist spots.",
|
|
||||||
"Rich cultural heritage but humid summers.",
|
|
||||||
"Bustling nightlife but noisy streets.",
|
|
||||||
"Scenic views but limited public transport options.",
|
|
||||||
],
|
|
||||||
}
|
|
||||||
),
|
|
||||||
mode="overwrite",
|
|
||||||
)
|
|
||||||
table.create_fts_index("desc", use_tantivy=False, replace=True)
|
|
||||||
|
|
||||||
results = table.search(
|
|
||||||
BoostQuery(
|
|
||||||
MatchQuery("beautiful, cultural, nightlife", "desc"),
|
|
||||||
MatchQuery("bad traffic jams, overpriced", "desc"),
|
|
||||||
),
|
|
||||||
).to_pandas()
|
|
||||||
|
|
||||||
# we will hit 3 results because the positive query has 3 hits
|
|
||||||
assert len(results) == 3
|
|
||||||
# the one containing "overpriced" will be negatively boosted,
|
|
||||||
# so it will be the last one
|
|
||||||
assert (
|
|
||||||
results["desc"].to_list()[2]
|
|
||||||
== "Beautiful landscapes but overpriced tourist spots."
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def test_fts_native():
|
def test_fts_native():
|
||||||
# --8<-- [start:basic_fts]
|
# --8<-- [start:basic_fts]
|
||||||
uri = "data/sample-lancedb"
|
uri = "data/sample-lancedb"
|
||||||
|
|||||||
@@ -11,8 +11,7 @@ import pandas as pd
|
|||||||
import pyarrow as pa
|
import pyarrow as pa
|
||||||
import pytest
|
import pytest
|
||||||
from lancedb.embeddings import get_registry
|
from lancedb.embeddings import get_registry
|
||||||
from lancedb.pydantic import LanceModel, Vector, MultiVector
|
from lancedb.pydantic import LanceModel, Vector
|
||||||
import requests
|
|
||||||
|
|
||||||
# These are integration tests for embedding functions.
|
# These are integration tests for embedding functions.
|
||||||
# They are slow because they require downloading models
|
# They are slow because they require downloading models
|
||||||
@@ -517,125 +516,3 @@ def test_voyageai_embedding_function():
|
|||||||
|
|
||||||
tbl.add(df)
|
tbl.add(df)
|
||||||
assert len(tbl.to_pandas()["vector"][0]) == voyageai.ndims()
|
assert len(tbl.to_pandas()["vector"][0]) == voyageai.ndims()
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.slow
|
|
||||||
@pytest.mark.skipif(
|
|
||||||
os.environ.get("VOYAGE_API_KEY") is None, reason="VOYAGE_API_KEY not set"
|
|
||||||
)
|
|
||||||
def test_voyageai_multimodal_embedding_function():
|
|
||||||
voyageai = (
|
|
||||||
get_registry().get("voyageai").create(name="voyage-multimodal-3", max_retries=0)
|
|
||||||
)
|
|
||||||
|
|
||||||
class Images(LanceModel):
|
|
||||||
label: str
|
|
||||||
image_uri: str = voyageai.SourceField() # image uri as the source
|
|
||||||
image_bytes: bytes = voyageai.SourceField() # image bytes as the source
|
|
||||||
vector: Vector(voyageai.ndims()) = voyageai.VectorField() # vector column
|
|
||||||
vec_from_bytes: Vector(voyageai.ndims()) = (
|
|
||||||
voyageai.VectorField()
|
|
||||||
) # Another vector column
|
|
||||||
|
|
||||||
db = lancedb.connect("~/lancedb")
|
|
||||||
table = db.create_table("test", schema=Images, mode="overwrite")
|
|
||||||
labels = ["cat", "cat", "dog", "dog", "horse", "horse"]
|
|
||||||
uris = [
|
|
||||||
"http://farm1.staticflickr.com/53/167798175_7c7845bbbd_z.jpg",
|
|
||||||
"http://farm1.staticflickr.com/134/332220238_da527d8140_z.jpg",
|
|
||||||
"http://farm9.staticflickr.com/8387/8602747737_2e5c2a45d4_z.jpg",
|
|
||||||
"http://farm5.staticflickr.com/4092/5017326486_1f46057f5f_z.jpg",
|
|
||||||
"http://farm9.staticflickr.com/8216/8434969557_d37882c42d_z.jpg",
|
|
||||||
"http://farm6.staticflickr.com/5142/5835678453_4f3a4edb45_z.jpg",
|
|
||||||
]
|
|
||||||
# get each uri as bytes
|
|
||||||
image_bytes = [requests.get(uri).content for uri in uris]
|
|
||||||
table.add(
|
|
||||||
pd.DataFrame({"label": labels, "image_uri": uris, "image_bytes": image_bytes})
|
|
||||||
)
|
|
||||||
assert len(table.to_pandas()["vector"][0]) == voyageai.ndims()
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.slow
|
|
||||||
@pytest.mark.skipif(
|
|
||||||
os.environ.get("VOYAGE_API_KEY") is None, reason="VOYAGE_API_KEY not set"
|
|
||||||
)
|
|
||||||
def test_voyageai_multimodal_embedding_text_function():
|
|
||||||
voyageai = (
|
|
||||||
get_registry().get("voyageai").create(name="voyage-multimodal-3", max_retries=0)
|
|
||||||
)
|
|
||||||
|
|
||||||
class TextModel(LanceModel):
|
|
||||||
text: str = voyageai.SourceField()
|
|
||||||
vector: Vector(voyageai.ndims()) = voyageai.VectorField()
|
|
||||||
|
|
||||||
df = pd.DataFrame({"text": ["hello world", "goodbye world"]})
|
|
||||||
db = lancedb.connect("~/lancedb")
|
|
||||||
tbl = db.create_table("test", schema=TextModel, mode="overwrite")
|
|
||||||
|
|
||||||
tbl.add(df)
|
|
||||||
assert len(tbl.to_pandas()["vector"][0]) == voyageai.ndims()
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.slow
|
|
||||||
@pytest.mark.skipif(
|
|
||||||
importlib.util.find_spec("colpali_engine") is None,
|
|
||||||
reason="colpali_engine not installed",
|
|
||||||
)
|
|
||||||
def test_colpali(tmp_path):
|
|
||||||
import requests
|
|
||||||
from lancedb.pydantic import LanceModel
|
|
||||||
|
|
||||||
db = lancedb.connect(tmp_path)
|
|
||||||
registry = get_registry()
|
|
||||||
func = registry.get("colpali").create()
|
|
||||||
|
|
||||||
class MediaItems(LanceModel):
|
|
||||||
text: str
|
|
||||||
image_uri: str = func.SourceField()
|
|
||||||
image_bytes: bytes = func.SourceField()
|
|
||||||
image_vectors: MultiVector(func.ndims()) = (
|
|
||||||
func.VectorField()
|
|
||||||
) # Multivector image embeddings
|
|
||||||
|
|
||||||
table = db.create_table("media", schema=MediaItems)
|
|
||||||
|
|
||||||
texts = [
|
|
||||||
"a cute cat playing with yarn",
|
|
||||||
"a puppy in a flower field",
|
|
||||||
"a red sports car on the highway",
|
|
||||||
"a vintage bicycle leaning against a wall",
|
|
||||||
"a plate of delicious pasta",
|
|
||||||
"fresh fruit salad in a bowl",
|
|
||||||
]
|
|
||||||
|
|
||||||
uris = [
|
|
||||||
"http://farm1.staticflickr.com/53/167798175_7c7845bbbd_z.jpg",
|
|
||||||
"http://farm1.staticflickr.com/134/332220238_da527d8140_z.jpg",
|
|
||||||
"http://farm9.staticflickr.com/8387/8602747737_2e5c2a45d4_z.jpg",
|
|
||||||
"http://farm5.staticflickr.com/4092/5017326486_1f46057f5f_z.jpg",
|
|
||||||
"http://farm9.staticflickr.com/8216/8434969557_d37882c42d_z.jpg",
|
|
||||||
"http://farm6.staticflickr.com/5142/5835678453_4f3a4edb45_z.jpg",
|
|
||||||
]
|
|
||||||
|
|
||||||
# Get images as bytes
|
|
||||||
image_bytes = [requests.get(uri).content for uri in uris]
|
|
||||||
|
|
||||||
table.add(
|
|
||||||
pd.DataFrame({"text": texts, "image_uri": uris, "image_bytes": image_bytes})
|
|
||||||
)
|
|
||||||
|
|
||||||
# Test text-to-image search
|
|
||||||
image_results = (
|
|
||||||
table.search("fluffy companion", vector_column_name="image_vectors")
|
|
||||||
.limit(1)
|
|
||||||
.to_pydantic(MediaItems)[0]
|
|
||||||
)
|
|
||||||
assert "cat" in image_results.text.lower() or "puppy" in image_results.text.lower()
|
|
||||||
|
|
||||||
# Verify multivector dimensions
|
|
||||||
first_row = table.to_arrow().to_pylist()[0]
|
|
||||||
assert len(first_row["image_vectors"]) > 1, "Should have multiple image vectors"
|
|
||||||
assert len(first_row["image_vectors"][0]) == func.ndims(), (
|
|
||||||
"Vector dimension mismatch"
|
|
||||||
)
|
|
||||||
|
|||||||
@@ -20,9 +20,7 @@ from unittest import mock
|
|||||||
import lancedb as ldb
|
import lancedb as ldb
|
||||||
from lancedb.db import DBConnection
|
from lancedb.db import DBConnection
|
||||||
from lancedb.index import FTS
|
from lancedb.index import FTS
|
||||||
from lancedb.query import BoostQuery, MatchQuery, MultiMatchQuery, PhraseQuery
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import pyarrow as pa
|
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import pytest
|
import pytest
|
||||||
from utils import exception_output
|
from utils import exception_output
|
||||||
@@ -180,47 +178,11 @@ def test_search_fts(table, use_tantivy):
|
|||||||
results = table.search("puppy").select(["id", "text"]).to_list()
|
results = table.search("puppy").select(["id", "text"]).to_list()
|
||||||
assert len(results) == 10
|
assert len(results) == 10
|
||||||
|
|
||||||
if not use_tantivy:
|
|
||||||
# Test with a query
|
|
||||||
results = (
|
|
||||||
table.search(MatchQuery("puppy", "text"))
|
|
||||||
.select(["id", "text"])
|
|
||||||
.limit(5)
|
|
||||||
.to_list()
|
|
||||||
)
|
|
||||||
assert len(results) == 5
|
|
||||||
|
|
||||||
# Test boost query
|
|
||||||
results = (
|
|
||||||
table.search(
|
|
||||||
BoostQuery(
|
|
||||||
MatchQuery("puppy", "text"),
|
|
||||||
MatchQuery("runs", "text"),
|
|
||||||
)
|
|
||||||
)
|
|
||||||
.select(["id", "text"])
|
|
||||||
.limit(5)
|
|
||||||
.to_list()
|
|
||||||
)
|
|
||||||
assert len(results) == 5
|
|
||||||
|
|
||||||
# Test multi match query
|
|
||||||
table.create_fts_index("text2", use_tantivy=use_tantivy)
|
|
||||||
results = (
|
|
||||||
table.search(MultiMatchQuery("puppy", ["text", "text2"]))
|
|
||||||
.select(["id", "text"])
|
|
||||||
.limit(5)
|
|
||||||
.to_list()
|
|
||||||
)
|
|
||||||
assert len(results) == 5
|
|
||||||
assert len(results[0]) == 3 # id, text, _score
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_fts_select_async(async_table):
|
async def test_fts_select_async(async_table):
|
||||||
tbl = await async_table
|
tbl = await async_table
|
||||||
await tbl.create_index("text", config=FTS())
|
await tbl.create_index("text", config=FTS())
|
||||||
await tbl.create_index("text2", config=FTS())
|
|
||||||
results = (
|
results = (
|
||||||
await tbl.query()
|
await tbl.query()
|
||||||
.nearest_to_text("puppy")
|
.nearest_to_text("puppy")
|
||||||
@@ -231,54 +193,6 @@ async def test_fts_select_async(async_table):
|
|||||||
assert len(results) == 5
|
assert len(results) == 5
|
||||||
assert len(results[0]) == 3 # id, text, _score
|
assert len(results[0]) == 3 # id, text, _score
|
||||||
|
|
||||||
# Test with FullTextQuery
|
|
||||||
results = (
|
|
||||||
await tbl.query()
|
|
||||||
.nearest_to_text(MatchQuery("puppy", "text"))
|
|
||||||
.select(["id", "text"])
|
|
||||||
.limit(5)
|
|
||||||
.to_list()
|
|
||||||
)
|
|
||||||
assert len(results) == 5
|
|
||||||
assert len(results[0]) == 3 # id, text, _score
|
|
||||||
|
|
||||||
# Test with BoostQuery
|
|
||||||
results = (
|
|
||||||
await tbl.query()
|
|
||||||
.nearest_to_text(
|
|
||||||
BoostQuery(
|
|
||||||
MatchQuery("puppy", "text"),
|
|
||||||
MatchQuery("runs", "text"),
|
|
||||||
)
|
|
||||||
)
|
|
||||||
.select(["id", "text"])
|
|
||||||
.limit(5)
|
|
||||||
.to_list()
|
|
||||||
)
|
|
||||||
assert len(results) == 5
|
|
||||||
assert len(results[0]) == 3 # id, text, _score
|
|
||||||
|
|
||||||
# Test with MultiMatchQuery
|
|
||||||
results = (
|
|
||||||
await tbl.query()
|
|
||||||
.nearest_to_text(MultiMatchQuery("puppy", ["text", "text2"]))
|
|
||||||
.select(["id", "text"])
|
|
||||||
.limit(5)
|
|
||||||
.to_list()
|
|
||||||
)
|
|
||||||
assert len(results) == 5
|
|
||||||
assert len(results[0]) == 3 # id, text, _score
|
|
||||||
|
|
||||||
# Test with search() API
|
|
||||||
results = (
|
|
||||||
await (await tbl.search(MatchQuery("puppy", "text")))
|
|
||||||
.select(["id", "text"])
|
|
||||||
.limit(5)
|
|
||||||
.to_list()
|
|
||||||
)
|
|
||||||
assert len(results) == 5
|
|
||||||
assert len(results[0]) == 3 # id, text, _score
|
|
||||||
|
|
||||||
|
|
||||||
def test_search_fts_phrase_query(table):
|
def test_search_fts_phrase_query(table):
|
||||||
table.create_fts_index("text", use_tantivy=False, with_position=False)
|
table.create_fts_index("text", use_tantivy=False, with_position=False)
|
||||||
@@ -293,13 +207,6 @@ def test_search_fts_phrase_query(table):
|
|||||||
assert len(results) > len(phrase_results)
|
assert len(results) > len(phrase_results)
|
||||||
assert len(phrase_results) > 0
|
assert len(phrase_results) > 0
|
||||||
|
|
||||||
# Test with a query
|
|
||||||
phrase_results = (
|
|
||||||
table.search(PhraseQuery("puppy runs", "text")).limit(100).to_list()
|
|
||||||
)
|
|
||||||
assert len(results) > len(phrase_results)
|
|
||||||
assert len(phrase_results) > 0
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_search_fts_phrase_query_async(async_table):
|
async def test_search_fts_phrase_query_async(async_table):
|
||||||
@@ -320,16 +227,6 @@ async def test_search_fts_phrase_query_async(async_table):
|
|||||||
assert len(results) > len(phrase_results)
|
assert len(results) > len(phrase_results)
|
||||||
assert len(phrase_results) > 0
|
assert len(phrase_results) > 0
|
||||||
|
|
||||||
# Test with a query
|
|
||||||
phrase_results = (
|
|
||||||
await async_table.query()
|
|
||||||
.nearest_to_text(PhraseQuery("puppy runs", "text"))
|
|
||||||
.limit(100)
|
|
||||||
.to_list()
|
|
||||||
)
|
|
||||||
assert len(results) > len(phrase_results)
|
|
||||||
assert len(phrase_results) > 0
|
|
||||||
|
|
||||||
|
|
||||||
def test_search_fts_specify_column(table):
|
def test_search_fts_specify_column(table):
|
||||||
table.create_fts_index("text", use_tantivy=False)
|
table.create_fts_index("text", use_tantivy=False)
|
||||||
@@ -627,32 +524,3 @@ def test_language(mem_db: DBConnection):
|
|||||||
# Stop words -> no results
|
# Stop words -> no results
|
||||||
results = table.search("la", query_type="fts").limit(5).to_list()
|
results = table.search("la", query_type="fts").limit(5).to_list()
|
||||||
assert len(results) == 0
|
assert len(results) == 0
|
||||||
|
|
||||||
|
|
||||||
def test_fts_on_list(mem_db: DBConnection):
|
|
||||||
data = pa.table(
|
|
||||||
{
|
|
||||||
"text": [
|
|
||||||
["lance database", "the", "search"],
|
|
||||||
["lance database"],
|
|
||||||
["lance", "search"],
|
|
||||||
["database", "search"],
|
|
||||||
["unrelated", "doc"],
|
|
||||||
],
|
|
||||||
"vector": [
|
|
||||||
[1.0, 2.0, 3.0],
|
|
||||||
[4.0, 5.0, 6.0],
|
|
||||||
[7.0, 8.0, 9.0],
|
|
||||||
[10.0, 11.0, 12.0],
|
|
||||||
[13.0, 14.0, 15.0],
|
|
||||||
],
|
|
||||||
}
|
|
||||||
)
|
|
||||||
table = mem_db.create_table("test", data=data)
|
|
||||||
table.create_fts_index("text", use_tantivy=False)
|
|
||||||
|
|
||||||
res = table.search("lance").limit(5).to_list()
|
|
||||||
assert len(res) == 3
|
|
||||||
|
|
||||||
res = table.search(PhraseQuery("lance database", "text")).limit(5).to_list()
|
|
||||||
assert len(res) == 2
|
|
||||||
|
|||||||
@@ -4,32 +4,13 @@
|
|||||||
import lancedb
|
import lancedb
|
||||||
|
|
||||||
from lancedb.query import LanceHybridQueryBuilder
|
from lancedb.query import LanceHybridQueryBuilder
|
||||||
from lancedb.rerankers.rrf import RRFReranker
|
|
||||||
import pyarrow as pa
|
import pyarrow as pa
|
||||||
import pyarrow.compute as pc
|
import pyarrow.compute as pc
|
||||||
import pytest
|
import pytest
|
||||||
import pytest_asyncio
|
import pytest_asyncio
|
||||||
|
|
||||||
from lancedb.index import FTS
|
from lancedb.index import FTS
|
||||||
from lancedb.table import AsyncTable, Table
|
from lancedb.table import AsyncTable
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
|
||||||
def sync_table(tmpdir_factory) -> Table:
|
|
||||||
tmp_path = str(tmpdir_factory.mktemp("data"))
|
|
||||||
db = lancedb.connect(tmp_path)
|
|
||||||
data = pa.table(
|
|
||||||
{
|
|
||||||
"text": pa.array(["a", "b", "cat", "dog"]),
|
|
||||||
"vector": pa.array(
|
|
||||||
[[0.1, 0.1], [2, 2], [-0.1, -0.1], [0.5, -0.5]],
|
|
||||||
type=pa.list_(pa.float32(), list_size=2),
|
|
||||||
),
|
|
||||||
}
|
|
||||||
)
|
|
||||||
table = db.create_table("test", data)
|
|
||||||
table.create_fts_index("text", with_position=False, use_tantivy=False)
|
|
||||||
return table
|
|
||||||
|
|
||||||
|
|
||||||
@pytest_asyncio.fixture
|
@pytest_asyncio.fixture
|
||||||
@@ -121,42 +102,6 @@ async def test_async_hybrid_query_default_limit(table: AsyncTable):
|
|||||||
assert texts.count("a") == 1
|
assert texts.count("a") == 1
|
||||||
|
|
||||||
|
|
||||||
def test_hybrid_query_distance_range(sync_table: Table):
|
|
||||||
reranker = RRFReranker(return_score="all")
|
|
||||||
result = (
|
|
||||||
sync_table.search(query_type="hybrid")
|
|
||||||
.vector([0.0, 0.4])
|
|
||||||
.text("cat and dog")
|
|
||||||
.distance_range(lower_bound=0.2, upper_bound=0.5)
|
|
||||||
.rerank(reranker)
|
|
||||||
.limit(2)
|
|
||||||
.to_arrow()
|
|
||||||
)
|
|
||||||
assert len(result) == 2
|
|
||||||
print(result)
|
|
||||||
for dist in result["_distance"]:
|
|
||||||
if dist.is_valid:
|
|
||||||
assert 0.2 <= dist.as_py() <= 0.5
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_hybrid_query_distance_range_async(table: AsyncTable):
|
|
||||||
reranker = RRFReranker(return_score="all")
|
|
||||||
result = await (
|
|
||||||
table.query()
|
|
||||||
.nearest_to([0.0, 0.4])
|
|
||||||
.nearest_to_text("cat and dog")
|
|
||||||
.distance_range(lower_bound=0.2, upper_bound=0.5)
|
|
||||||
.rerank(reranker)
|
|
||||||
.limit(2)
|
|
||||||
.to_arrow()
|
|
||||||
)
|
|
||||||
assert len(result) == 2
|
|
||||||
for dist in result["_distance"]:
|
|
||||||
if dist.is_valid:
|
|
||||||
assert 0.2 <= dist.as_py() <= 0.5
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_explain_plan(table: AsyncTable):
|
async def test_explain_plan(table: AsyncTable):
|
||||||
plan = await (
|
plan = await (
|
||||||
@@ -169,16 +114,6 @@ async def test_explain_plan(table: AsyncTable):
|
|||||||
assert "LanceScan" in plan
|
assert "LanceScan" in plan
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_analyze_plan(table: AsyncTable):
|
|
||||||
res = await (
|
|
||||||
table.query().nearest_to_text("dog").nearest_to([0.1, 0.1]).analyze_plan()
|
|
||||||
)
|
|
||||||
|
|
||||||
assert "AnalyzeExec" in res
|
|
||||||
assert "metrics=" in res
|
|
||||||
|
|
||||||
|
|
||||||
def test_normalize_scores():
|
def test_normalize_scores():
|
||||||
cases = [
|
cases = [
|
||||||
(pa.array([0.1, 0.4]), pa.array([0.0, 1.0])),
|
(pa.array([0.1, 0.4]), pa.array([0.0, 1.0])),
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ import pyarrow as pa
|
|||||||
import pytest
|
import pytest
|
||||||
import pytest_asyncio
|
import pytest_asyncio
|
||||||
from lancedb import AsyncConnection, AsyncTable, connect_async
|
from lancedb import AsyncConnection, AsyncTable, connect_async
|
||||||
from lancedb.index import BTree, IvfFlat, IvfPq, Bitmap, LabelList, HnswPq, HnswSq, FTS
|
from lancedb.index import BTree, IvfFlat, IvfPq, Bitmap, LabelList, HnswPq, HnswSq
|
||||||
|
|
||||||
|
|
||||||
@pytest_asyncio.fixture
|
@pytest_asyncio.fixture
|
||||||
@@ -31,7 +31,6 @@ async def some_table(db_async):
|
|||||||
{
|
{
|
||||||
"id": list(range(NROWS)),
|
"id": list(range(NROWS)),
|
||||||
"vector": sample_fixed_size_list_array(NROWS, DIM),
|
"vector": sample_fixed_size_list_array(NROWS, DIM),
|
||||||
"fsb": pa.array([bytes([i]) for i in range(NROWS)], pa.binary(1)),
|
|
||||||
"tags": [
|
"tags": [
|
||||||
[f"tag{random.randint(0, 8)}" for _ in range(2)] for _ in range(NROWS)
|
[f"tag{random.randint(0, 8)}" for _ in range(2)] for _ in range(NROWS)
|
||||||
],
|
],
|
||||||
@@ -86,16 +85,6 @@ async def test_create_scalar_index(some_table: AsyncTable):
|
|||||||
assert len(indices) == 0
|
assert len(indices) == 0
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_create_fixed_size_binary_index(some_table: AsyncTable):
|
|
||||||
await some_table.create_index("fsb", config=BTree())
|
|
||||||
indices = await some_table.list_indices()
|
|
||||||
assert str(indices) == '[Index(BTree, columns=["fsb"], name="fsb_idx")]'
|
|
||||||
assert len(indices) == 1
|
|
||||||
assert indices[0].index_type == "BTree"
|
|
||||||
assert indices[0].columns == ["fsb"]
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_create_bitmap_index(some_table: AsyncTable):
|
async def test_create_bitmap_index(some_table: AsyncTable):
|
||||||
await some_table.create_index("id", config=Bitmap())
|
await some_table.create_index("id", config=Bitmap())
|
||||||
@@ -119,18 +108,6 @@ async def test_create_label_list_index(some_table: AsyncTable):
|
|||||||
assert str(indices) == '[Index(LabelList, columns=["tags"], name="tags_idx")]'
|
assert str(indices) == '[Index(LabelList, columns=["tags"], name="tags_idx")]'
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_full_text_search_index(some_table: AsyncTable):
|
|
||||||
await some_table.create_index("tags", config=FTS(with_position=False))
|
|
||||||
indices = await some_table.list_indices()
|
|
||||||
assert str(indices) == '[Index(FTS, columns=["tags"], name="tags_idx")]'
|
|
||||||
|
|
||||||
await some_table.prewarm_index("tags_idx")
|
|
||||||
|
|
||||||
res = await (await some_table.search("tag0")).to_arrow()
|
|
||||||
assert res.num_rows > 0
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_create_vector_index(some_table: AsyncTable):
|
async def test_create_vector_index(some_table: AsyncTable):
|
||||||
# Can create
|
# Can create
|
||||||
|
|||||||
@@ -9,13 +9,7 @@ from typing import List, Optional, Tuple
|
|||||||
import pyarrow as pa
|
import pyarrow as pa
|
||||||
import pydantic
|
import pydantic
|
||||||
import pytest
|
import pytest
|
||||||
from lancedb.pydantic import (
|
from lancedb.pydantic import PYDANTIC_VERSION, LanceModel, Vector, pydantic_to_schema
|
||||||
PYDANTIC_VERSION,
|
|
||||||
LanceModel,
|
|
||||||
Vector,
|
|
||||||
pydantic_to_schema,
|
|
||||||
MultiVector,
|
|
||||||
)
|
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
from pydantic import Field
|
from pydantic import Field
|
||||||
|
|
||||||
@@ -360,55 +354,3 @@ def test_optional_nested_model():
|
|||||||
),
|
),
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def test_multi_vector():
|
|
||||||
class TestModel(pydantic.BaseModel):
|
|
||||||
vec: MultiVector(8)
|
|
||||||
|
|
||||||
schema = pydantic_to_schema(TestModel)
|
|
||||||
assert schema == pa.schema(
|
|
||||||
[pa.field("vec", pa.list_(pa.list_(pa.float32(), 8)), True)]
|
|
||||||
)
|
|
||||||
|
|
||||||
with pytest.raises(pydantic.ValidationError):
|
|
||||||
TestModel(vec=[[1.0] * 7])
|
|
||||||
|
|
||||||
with pytest.raises(pydantic.ValidationError):
|
|
||||||
TestModel(vec=[[1.0] * 9])
|
|
||||||
|
|
||||||
TestModel(vec=[[1.0] * 8])
|
|
||||||
TestModel(vec=[[1.0] * 8, [2.0] * 8])
|
|
||||||
|
|
||||||
TestModel(vec=[])
|
|
||||||
|
|
||||||
|
|
||||||
def test_multi_vector_nullable():
|
|
||||||
class NullableModel(pydantic.BaseModel):
|
|
||||||
vec: MultiVector(16, nullable=False)
|
|
||||||
|
|
||||||
schema = pydantic_to_schema(NullableModel)
|
|
||||||
assert schema == pa.schema(
|
|
||||||
[pa.field("vec", pa.list_(pa.list_(pa.float32(), 16)), False)]
|
|
||||||
)
|
|
||||||
|
|
||||||
class DefaultModel(pydantic.BaseModel):
|
|
||||||
vec: MultiVector(16)
|
|
||||||
|
|
||||||
schema = pydantic_to_schema(DefaultModel)
|
|
||||||
assert schema == pa.schema(
|
|
||||||
[pa.field("vec", pa.list_(pa.list_(pa.float32(), 16)), True)]
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def test_multi_vector_in_lance_model():
|
|
||||||
class TestModel(LanceModel):
|
|
||||||
id: int
|
|
||||||
vectors: MultiVector(16) = Field(default=[[0.0] * 16])
|
|
||||||
|
|
||||||
schema = pydantic_to_schema(TestModel)
|
|
||||||
assert schema == TestModel.to_arrow_schema()
|
|
||||||
assert TestModel.field_names() == ["id", "vectors"]
|
|
||||||
|
|
||||||
t = TestModel(id=1)
|
|
||||||
assert t.vectors == [[0.0] * 16]
|
|
||||||
|
|||||||
@@ -257,9 +257,7 @@ async def test_distance_range_with_new_rows_async():
|
|||||||
}
|
}
|
||||||
)
|
)
|
||||||
table = await conn.create_table("test", data)
|
table = await conn.create_table("test", data)
|
||||||
await table.create_index(
|
table.create_index("vector", config=IvfPq(num_partitions=1, num_sub_vectors=2))
|
||||||
"vector", config=IvfPq(num_partitions=1, num_sub_vectors=2)
|
|
||||||
)
|
|
||||||
|
|
||||||
q = [0, 0]
|
q = [0, 0]
|
||||||
rs = await table.query().nearest_to(q).to_arrow()
|
rs = await table.query().nearest_to(q).to_arrow()
|
||||||
@@ -513,8 +511,7 @@ def test_query_builder_with_different_vector_column():
|
|||||||
columns=["b"],
|
columns=["b"],
|
||||||
vector_column="foo_vector",
|
vector_column="foo_vector",
|
||||||
),
|
),
|
||||||
batch_size=None,
|
None,
|
||||||
timeout=None,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -705,20 +702,6 @@ async def test_fast_search_async(tmp_path):
|
|||||||
assert "LanceScan" not in plan
|
assert "LanceScan" not in plan
|
||||||
|
|
||||||
|
|
||||||
def test_analyze_plan(table):
|
|
||||||
q = LanceVectorQueryBuilder(table, [0, 0], "vector")
|
|
||||||
res = q.analyze_plan()
|
|
||||||
assert "AnalyzeExec" in res
|
|
||||||
assert "metrics=" in res
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_analyze_plan_async(table_async: AsyncTable):
|
|
||||||
res = await table_async.query().nearest_to(pa.array([1, 2])).analyze_plan()
|
|
||||||
assert "AnalyzeExec" in res
|
|
||||||
assert "metrics=" in res
|
|
||||||
|
|
||||||
|
|
||||||
def test_explain_plan(table):
|
def test_explain_plan(table):
|
||||||
q = LanceVectorQueryBuilder(table, [0, 0], "vector")
|
q = LanceVectorQueryBuilder(table, [0, 0], "vector")
|
||||||
plan = q.explain_plan(verbose=True)
|
plan = q.explain_plan(verbose=True)
|
||||||
@@ -1079,67 +1062,3 @@ async def test_query_serialization_async(table_async: AsyncTable):
|
|||||||
full_text_query=FullTextSearchQuery(columns=[], query="foo"),
|
full_text_query=FullTextSearchQuery(columns=[], query="foo"),
|
||||||
with_row_id=False,
|
with_row_id=False,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def test_query_timeout(tmp_path):
|
|
||||||
# Use local directory instead of memory:// to add a bit of latency to
|
|
||||||
# operations so a timeout of zero will trigger exceptions.
|
|
||||||
db = lancedb.connect(tmp_path)
|
|
||||||
data = pa.table(
|
|
||||||
{
|
|
||||||
"text": ["a", "b"],
|
|
||||||
"vector": pa.FixedSizeListArray.from_arrays(
|
|
||||||
pc.random(4).cast(pa.float32()), 2
|
|
||||||
),
|
|
||||||
}
|
|
||||||
)
|
|
||||||
table = db.create_table("test", data)
|
|
||||||
table.create_fts_index("text", use_tantivy=False)
|
|
||||||
|
|
||||||
with pytest.raises(Exception, match="Query timeout"):
|
|
||||||
table.search().where("text = 'a'").to_list(timeout=timedelta(0))
|
|
||||||
|
|
||||||
with pytest.raises(Exception, match="Query timeout"):
|
|
||||||
table.search([0.0, 0.0]).to_arrow(timeout=timedelta(0))
|
|
||||||
|
|
||||||
with pytest.raises(Exception, match="Query timeout"):
|
|
||||||
table.search("a", query_type="fts").to_pandas(timeout=timedelta(0))
|
|
||||||
|
|
||||||
with pytest.raises(Exception, match="Query timeout"):
|
|
||||||
table.search(query_type="hybrid").vector([0.0, 0.0]).text("a").to_arrow(
|
|
||||||
timeout=timedelta(0)
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_query_timeout_async(tmp_path):
|
|
||||||
db = await lancedb.connect_async(tmp_path)
|
|
||||||
data = pa.table(
|
|
||||||
{
|
|
||||||
"text": ["a", "b"],
|
|
||||||
"vector": pa.FixedSizeListArray.from_arrays(
|
|
||||||
pc.random(4).cast(pa.float32()), 2
|
|
||||||
),
|
|
||||||
}
|
|
||||||
)
|
|
||||||
table = await db.create_table("test", data)
|
|
||||||
await table.create_index("text", config=FTS())
|
|
||||||
|
|
||||||
with pytest.raises(Exception, match="Query timeout"):
|
|
||||||
await table.query().where("text != 'a'").to_list(timeout=timedelta(0))
|
|
||||||
|
|
||||||
with pytest.raises(Exception, match="Query timeout"):
|
|
||||||
await table.vector_search([0.0, 0.0]).to_arrow(timeout=timedelta(0))
|
|
||||||
|
|
||||||
with pytest.raises(Exception, match="Query timeout"):
|
|
||||||
await (await table.search("a", query_type="fts")).to_pandas(
|
|
||||||
timeout=timedelta(0)
|
|
||||||
)
|
|
||||||
|
|
||||||
with pytest.raises(Exception, match="Query timeout"):
|
|
||||||
await (
|
|
||||||
table.query()
|
|
||||||
.nearest_to_text("a")
|
|
||||||
.nearest_to([0.0, 0.0])
|
|
||||||
.to_list(timeout=timedelta(0))
|
|
||||||
)
|
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
# SPDX-License-Identifier: Apache-2.0
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
# SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
# SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||||
import re
|
|
||||||
from concurrent.futures import ThreadPoolExecutor
|
from concurrent.futures import ThreadPoolExecutor
|
||||||
import contextlib
|
import contextlib
|
||||||
from datetime import timedelta
|
from datetime import timedelta
|
||||||
@@ -235,10 +235,6 @@ def test_table_add_in_threadpool():
|
|||||||
|
|
||||||
def test_table_create_indices():
|
def test_table_create_indices():
|
||||||
def handler(request):
|
def handler(request):
|
||||||
index_stats = dict(
|
|
||||||
index_type="IVF_PQ", num_indexed_rows=1000, num_unindexed_rows=0
|
|
||||||
)
|
|
||||||
|
|
||||||
if request.path == "/v1/table/test/create_index/":
|
if request.path == "/v1/table/test/create_index/":
|
||||||
request.send_response(200)
|
request.send_response(200)
|
||||||
request.end_headers()
|
request.end_headers()
|
||||||
@@ -262,47 +258,6 @@ def test_table_create_indices():
|
|||||||
)
|
)
|
||||||
)
|
)
|
||||||
request.wfile.write(payload.encode())
|
request.wfile.write(payload.encode())
|
||||||
elif request.path == "/v1/table/test/index/list/":
|
|
||||||
request.send_response(200)
|
|
||||||
request.send_header("Content-Type", "application/json")
|
|
||||||
request.end_headers()
|
|
||||||
payload = json.dumps(
|
|
||||||
dict(
|
|
||||||
indexes=[
|
|
||||||
{
|
|
||||||
"index_name": "id_idx",
|
|
||||||
"columns": ["id"],
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"index_name": "text_idx",
|
|
||||||
"columns": ["text"],
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"index_name": "vector_idx",
|
|
||||||
"columns": ["vector"],
|
|
||||||
},
|
|
||||||
]
|
|
||||||
)
|
|
||||||
)
|
|
||||||
request.wfile.write(payload.encode())
|
|
||||||
elif request.path == "/v1/table/test/index/id_idx/stats/":
|
|
||||||
request.send_response(200)
|
|
||||||
request.send_header("Content-Type", "application/json")
|
|
||||||
request.end_headers()
|
|
||||||
payload = json.dumps(index_stats)
|
|
||||||
request.wfile.write(payload.encode())
|
|
||||||
elif request.path == "/v1/table/test/index/text_idx/stats/":
|
|
||||||
request.send_response(200)
|
|
||||||
request.send_header("Content-Type", "application/json")
|
|
||||||
request.end_headers()
|
|
||||||
payload = json.dumps(index_stats)
|
|
||||||
request.wfile.write(payload.encode())
|
|
||||||
elif request.path == "/v1/table/test/index/vector_idx/stats/":
|
|
||||||
request.send_response(200)
|
|
||||||
request.send_header("Content-Type", "application/json")
|
|
||||||
request.end_headers()
|
|
||||||
payload = json.dumps(index_stats)
|
|
||||||
request.wfile.write(payload.encode())
|
|
||||||
elif "/drop/" in request.path:
|
elif "/drop/" in request.path:
|
||||||
request.send_response(200)
|
request.send_response(200)
|
||||||
request.end_headers()
|
request.end_headers()
|
||||||
@@ -314,125 +269,14 @@ def test_table_create_indices():
|
|||||||
# Parameters are well-tested through local and async tests.
|
# Parameters are well-tested through local and async tests.
|
||||||
# This is a smoke-test.
|
# This is a smoke-test.
|
||||||
table = db.create_table("test", [{"id": 1}])
|
table = db.create_table("test", [{"id": 1}])
|
||||||
table.create_scalar_index("id", wait_timeout=timedelta(seconds=2))
|
table.create_scalar_index("id")
|
||||||
table.create_fts_index("text", wait_timeout=timedelta(seconds=2))
|
table.create_fts_index("text")
|
||||||
table.create_index(
|
table.create_scalar_index("vector")
|
||||||
vector_column_name="vector", wait_timeout=timedelta(seconds=10)
|
|
||||||
)
|
|
||||||
table.wait_for_index(["id_idx"], timedelta(seconds=2))
|
|
||||||
table.wait_for_index(["text_idx", "vector_idx"], timedelta(seconds=2))
|
|
||||||
table.drop_index("vector_idx")
|
table.drop_index("vector_idx")
|
||||||
table.drop_index("id_idx")
|
table.drop_index("id_idx")
|
||||||
table.drop_index("text_idx")
|
table.drop_index("text_idx")
|
||||||
|
|
||||||
|
|
||||||
def test_table_wait_for_index_timeout():
|
|
||||||
def handler(request):
|
|
||||||
index_stats = dict(
|
|
||||||
index_type="BTREE", num_indexed_rows=1000, num_unindexed_rows=1
|
|
||||||
)
|
|
||||||
|
|
||||||
if request.path == "/v1/table/test/create/?mode=create":
|
|
||||||
request.send_response(200)
|
|
||||||
request.send_header("Content-Type", "application/json")
|
|
||||||
request.end_headers()
|
|
||||||
request.wfile.write(b"{}")
|
|
||||||
elif request.path == "/v1/table/test/describe/":
|
|
||||||
request.send_response(200)
|
|
||||||
request.send_header("Content-Type", "application/json")
|
|
||||||
request.end_headers()
|
|
||||||
payload = json.dumps(
|
|
||||||
dict(
|
|
||||||
version=1,
|
|
||||||
schema=dict(
|
|
||||||
fields=[
|
|
||||||
dict(name="id", type={"type": "int64"}, nullable=False),
|
|
||||||
]
|
|
||||||
),
|
|
||||||
)
|
|
||||||
)
|
|
||||||
request.wfile.write(payload.encode())
|
|
||||||
elif request.path == "/v1/table/test/index/list/":
|
|
||||||
request.send_response(200)
|
|
||||||
request.send_header("Content-Type", "application/json")
|
|
||||||
request.end_headers()
|
|
||||||
payload = json.dumps(
|
|
||||||
dict(
|
|
||||||
indexes=[
|
|
||||||
{
|
|
||||||
"index_name": "id_idx",
|
|
||||||
"columns": ["id"],
|
|
||||||
},
|
|
||||||
]
|
|
||||||
)
|
|
||||||
)
|
|
||||||
request.wfile.write(payload.encode())
|
|
||||||
elif request.path == "/v1/table/test/index/id_idx/stats/":
|
|
||||||
request.send_response(200)
|
|
||||||
request.send_header("Content-Type", "application/json")
|
|
||||||
request.end_headers()
|
|
||||||
payload = json.dumps(index_stats)
|
|
||||||
print(f"{index_stats=}")
|
|
||||||
request.wfile.write(payload.encode())
|
|
||||||
else:
|
|
||||||
request.send_response(404)
|
|
||||||
request.end_headers()
|
|
||||||
|
|
||||||
with mock_lancedb_connection(handler) as db:
|
|
||||||
table = db.create_table("test", [{"id": 1}])
|
|
||||||
with pytest.raises(
|
|
||||||
RuntimeError,
|
|
||||||
match=re.escape(
|
|
||||||
'Timeout error: timed out waiting for indices: ["id_idx"] after 1s'
|
|
||||||
),
|
|
||||||
):
|
|
||||||
table.wait_for_index(["id_idx"], timedelta(seconds=1))
|
|
||||||
|
|
||||||
|
|
||||||
def test_stats():
|
|
||||||
stats = {
|
|
||||||
"total_bytes": 38,
|
|
||||||
"num_rows": 2,
|
|
||||||
"num_indices": 0,
|
|
||||||
"fragment_stats": {
|
|
||||||
"num_fragments": 1,
|
|
||||||
"num_small_fragments": 1,
|
|
||||||
"lengths": {
|
|
||||||
"min": 2,
|
|
||||||
"max": 2,
|
|
||||||
"mean": 2,
|
|
||||||
"p25": 2,
|
|
||||||
"p50": 2,
|
|
||||||
"p75": 2,
|
|
||||||
"p99": 2,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
def handler(request):
|
|
||||||
if request.path == "/v1/table/test/create/?mode=create":
|
|
||||||
request.send_response(200)
|
|
||||||
request.send_header("Content-Type", "application/json")
|
|
||||||
request.end_headers()
|
|
||||||
request.wfile.write(b"{}")
|
|
||||||
elif request.path == "/v1/table/test/stats/":
|
|
||||||
request.send_response(200)
|
|
||||||
request.send_header("Content-Type", "application/json")
|
|
||||||
request.end_headers()
|
|
||||||
payload = json.dumps(stats)
|
|
||||||
request.wfile.write(payload.encode())
|
|
||||||
else:
|
|
||||||
print(request.path)
|
|
||||||
request.send_response(404)
|
|
||||||
request.end_headers()
|
|
||||||
|
|
||||||
with mock_lancedb_connection(handler) as db:
|
|
||||||
table = db.create_table("test", [{"id": 1}])
|
|
||||||
res = table.stats()
|
|
||||||
print(f"{res=}")
|
|
||||||
assert res == stats
|
|
||||||
|
|
||||||
|
|
||||||
@contextlib.contextmanager
|
@contextlib.contextmanager
|
||||||
def query_test_table(query_handler, *, server_version=Version("0.1.0")):
|
def query_test_table(query_handler, *, server_version=Version("0.1.0")):
|
||||||
def handler(request):
|
def handler(request):
|
||||||
@@ -600,16 +444,6 @@ def test_query_sync_fts():
|
|||||||
"prefilter": True,
|
"prefilter": True,
|
||||||
"with_row_id": True,
|
"with_row_id": True,
|
||||||
"version": None,
|
"version": None,
|
||||||
} or body == {
|
|
||||||
"full_text_query": {
|
|
||||||
"query": "puppy",
|
|
||||||
"columns": ["description", "name"],
|
|
||||||
},
|
|
||||||
"k": 42,
|
|
||||||
"vector": [],
|
|
||||||
"prefilter": True,
|
|
||||||
"with_row_id": True,
|
|
||||||
"version": None,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return pa.table({"id": [1, 2, 3]})
|
return pa.table({"id": [1, 2, 3]})
|
||||||
|
|||||||
@@ -457,45 +457,3 @@ def test_voyageai_reranker(tmp_path, use_tantivy):
|
|||||||
reranker = VoyageAIReranker(model_name="rerank-2")
|
reranker = VoyageAIReranker(model_name="rerank-2")
|
||||||
table, schema = get_test_table(tmp_path, use_tantivy)
|
table, schema = get_test_table(tmp_path, use_tantivy)
|
||||||
_run_test_reranker(reranker, table, "single player experience", None, schema)
|
_run_test_reranker(reranker, table, "single player experience", None, schema)
|
||||||
|
|
||||||
|
|
||||||
def test_empty_result_reranker():
|
|
||||||
pytest.importorskip("sentence_transformers")
|
|
||||||
db = lancedb.connect("memory://")
|
|
||||||
|
|
||||||
# Define schema
|
|
||||||
schema = pa.schema(
|
|
||||||
[
|
|
||||||
("id", pa.int64()),
|
|
||||||
("text", pa.string()),
|
|
||||||
("vector", pa.list_(pa.float32(), 128)), # 128-dimensional vector
|
|
||||||
]
|
|
||||||
)
|
|
||||||
|
|
||||||
# Create empty table with schema
|
|
||||||
empty_table = db.create_table("empty_table", schema=schema, mode="overwrite")
|
|
||||||
empty_table.create_fts_index("text", use_tantivy=False, replace=True)
|
|
||||||
for reranker in [
|
|
||||||
CrossEncoderReranker(),
|
|
||||||
# ColbertReranker(),
|
|
||||||
# AnswerdotaiRerankers(),
|
|
||||||
# OpenaiReranker(),
|
|
||||||
# JinaReranker(),
|
|
||||||
# VoyageAIReranker(model_name="rerank-2"),
|
|
||||||
]:
|
|
||||||
results = (
|
|
||||||
empty_table.search(list(range(128)))
|
|
||||||
.limit(3)
|
|
||||||
.rerank(reranker, "query")
|
|
||||||
.to_arrow()
|
|
||||||
)
|
|
||||||
# check if empty set contains _relevance_score column
|
|
||||||
assert "_relevance_score" in results.column_names
|
|
||||||
assert len(results) == 0
|
|
||||||
|
|
||||||
results = (
|
|
||||||
empty_table.search("query", query_type="fts")
|
|
||||||
.limit(3)
|
|
||||||
.rerank(reranker)
|
|
||||||
.to_arrow()
|
|
||||||
)
|
|
||||||
|
|||||||
@@ -9,9 +9,9 @@ from typing import List
|
|||||||
from unittest.mock import patch
|
from unittest.mock import patch
|
||||||
|
|
||||||
import lancedb
|
import lancedb
|
||||||
from lancedb.dependencies import _PANDAS_AVAILABLE
|
|
||||||
from lancedb.index import HnswPq, HnswSq, IvfPq
|
from lancedb.index import HnswPq, HnswSq, IvfPq
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
import polars as pl
|
import polars as pl
|
||||||
import pyarrow as pa
|
import pyarrow as pa
|
||||||
import pyarrow.dataset
|
import pyarrow.dataset
|
||||||
@@ -138,16 +138,13 @@ def test_create_table(mem_db: DBConnection):
|
|||||||
{"vector": [3.1, 4.1], "item": "foo", "price": 10.0},
|
{"vector": [3.1, 4.1], "item": "foo", "price": 10.0},
|
||||||
{"vector": [5.9, 26.5], "item": "bar", "price": 20.0},
|
{"vector": [5.9, 26.5], "item": "bar", "price": 20.0},
|
||||||
]
|
]
|
||||||
pa_table = pa.Table.from_pylist(rows, schema=schema)
|
df = pd.DataFrame(rows)
|
||||||
|
pa_table = pa.Table.from_pandas(df, schema=schema)
|
||||||
data = [
|
data = [
|
||||||
("Rows", rows),
|
("Rows", rows),
|
||||||
|
("pd_DataFrame", df),
|
||||||
("pa_Table", pa_table),
|
("pa_Table", pa_table),
|
||||||
]
|
]
|
||||||
if _PANDAS_AVAILABLE:
|
|
||||||
import pandas as pd
|
|
||||||
|
|
||||||
df = pd.DataFrame(rows)
|
|
||||||
data.append(("pd_DataFrame", df))
|
|
||||||
|
|
||||||
for name, d in data:
|
for name, d in data:
|
||||||
tbl = mem_db.create_table(name, data=d, schema=schema).to_arrow()
|
tbl = mem_db.create_table(name, data=d, schema=schema).to_arrow()
|
||||||
@@ -299,7 +296,7 @@ def test_add_subschema(mem_db: DBConnection):
|
|||||||
|
|
||||||
data = {"price": 10.0, "item": "foo"}
|
data = {"price": 10.0, "item": "foo"}
|
||||||
table.add([data])
|
table.add([data])
|
||||||
data = pa.Table.from_pydict({"price": [2.0], "vector": [[3.1, 4.1]]})
|
data = pd.DataFrame({"price": [2.0], "vector": [[3.1, 4.1]]})
|
||||||
table.add(data)
|
table.add(data)
|
||||||
data = {"price": 3.0, "vector": [5.9, 26.5], "item": "bar"}
|
data = {"price": 3.0, "vector": [5.9, 26.5], "item": "bar"}
|
||||||
table.add([data])
|
table.add([data])
|
||||||
@@ -408,7 +405,6 @@ def test_add_nullability(mem_db: DBConnection):
|
|||||||
|
|
||||||
|
|
||||||
def test_add_pydantic_model(mem_db: DBConnection):
|
def test_add_pydantic_model(mem_db: DBConnection):
|
||||||
pytest.importorskip("pandas")
|
|
||||||
# https://github.com/lancedb/lancedb/issues/562
|
# https://github.com/lancedb/lancedb/issues/562
|
||||||
|
|
||||||
class Metadata(BaseModel):
|
class Metadata(BaseModel):
|
||||||
@@ -477,10 +473,10 @@ def test_polars(mem_db: DBConnection):
|
|||||||
table = mem_db.create_table("test", data=pl.DataFrame(data))
|
table = mem_db.create_table("test", data=pl.DataFrame(data))
|
||||||
assert len(table) == 2
|
assert len(table) == 2
|
||||||
|
|
||||||
result = table.to_arrow()
|
result = table.to_pandas()
|
||||||
assert np.allclose(result["vector"].to_pylist(), data["vector"])
|
assert np.allclose(result["vector"].tolist(), data["vector"])
|
||||||
assert result["item"].to_pylist() == data["item"]
|
assert result["item"].tolist() == data["item"]
|
||||||
assert np.allclose(result["price"].to_pylist(), data["price"])
|
assert np.allclose(result["price"].tolist(), data["price"])
|
||||||
|
|
||||||
schema = pa.schema(
|
schema = pa.schema(
|
||||||
[
|
[
|
||||||
@@ -529,113 +525,6 @@ def test_versioning(mem_db: DBConnection):
|
|||||||
assert len(table) == 2
|
assert len(table) == 2
|
||||||
|
|
||||||
|
|
||||||
def test_tags(mem_db: DBConnection):
|
|
||||||
table = mem_db.create_table(
|
|
||||||
"test",
|
|
||||||
data=[
|
|
||||||
{"vector": [3.1, 4.1], "item": "foo", "price": 10.0},
|
|
||||||
{"vector": [5.9, 26.5], "item": "bar", "price": 20.0},
|
|
||||||
],
|
|
||||||
)
|
|
||||||
|
|
||||||
table.tags.create("tag1", 1)
|
|
||||||
tags = table.tags.list()
|
|
||||||
assert "tag1" in tags
|
|
||||||
assert tags["tag1"]["version"] == 1
|
|
||||||
|
|
||||||
table.add(
|
|
||||||
data=[
|
|
||||||
{"vector": [10.0, 11.0], "item": "baz", "price": 30.0},
|
|
||||||
],
|
|
||||||
)
|
|
||||||
|
|
||||||
table.tags.create("tag2", 2)
|
|
||||||
tags = table.tags.list()
|
|
||||||
assert "tag1" in tags
|
|
||||||
assert "tag2" in tags
|
|
||||||
assert tags["tag1"]["version"] == 1
|
|
||||||
assert tags["tag2"]["version"] == 2
|
|
||||||
|
|
||||||
table.tags.delete("tag2")
|
|
||||||
table.tags.update("tag1", 2)
|
|
||||||
tags = table.tags.list()
|
|
||||||
assert "tag1" in tags
|
|
||||||
assert tags["tag1"]["version"] == 2
|
|
||||||
|
|
||||||
table.tags.update("tag1", 1)
|
|
||||||
tags = table.tags.list()
|
|
||||||
assert "tag1" in tags
|
|
||||||
assert tags["tag1"]["version"] == 1
|
|
||||||
|
|
||||||
table.checkout("tag1")
|
|
||||||
assert table.version == 1
|
|
||||||
assert table.count_rows() == 2
|
|
||||||
table.tags.create("tag2", 2)
|
|
||||||
table.checkout("tag2")
|
|
||||||
assert table.version == 2
|
|
||||||
assert table.count_rows() == 3
|
|
||||||
table.checkout_latest()
|
|
||||||
table.add(
|
|
||||||
data=[
|
|
||||||
{"vector": [12.0, 13.0], "item": "baz", "price": 40.0},
|
|
||||||
],
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_async_tags(mem_db_async: AsyncConnection):
|
|
||||||
table = await mem_db_async.create_table(
|
|
||||||
"test",
|
|
||||||
data=[
|
|
||||||
{"vector": [3.1, 4.1], "item": "foo", "price": 10.0},
|
|
||||||
{"vector": [5.9, 26.5], "item": "bar", "price": 20.0},
|
|
||||||
],
|
|
||||||
)
|
|
||||||
|
|
||||||
await table.tags.create("tag1", 1)
|
|
||||||
tags = await table.tags.list()
|
|
||||||
assert "tag1" in tags
|
|
||||||
assert tags["tag1"]["version"] == 1
|
|
||||||
|
|
||||||
await table.add(
|
|
||||||
data=[
|
|
||||||
{"vector": [10.0, 11.0], "item": "baz", "price": 30.0},
|
|
||||||
],
|
|
||||||
)
|
|
||||||
|
|
||||||
await table.tags.create("tag2", 2)
|
|
||||||
tags = await table.tags.list()
|
|
||||||
assert "tag1" in tags
|
|
||||||
assert "tag2" in tags
|
|
||||||
assert tags["tag1"]["version"] == 1
|
|
||||||
assert tags["tag2"]["version"] == 2
|
|
||||||
|
|
||||||
await table.tags.delete("tag2")
|
|
||||||
await table.tags.update("tag1", 2)
|
|
||||||
tags = await table.tags.list()
|
|
||||||
assert "tag1" in tags
|
|
||||||
assert tags["tag1"]["version"] == 2
|
|
||||||
|
|
||||||
await table.tags.update("tag1", 1)
|
|
||||||
tags = await table.tags.list()
|
|
||||||
assert "tag1" in tags
|
|
||||||
assert tags["tag1"]["version"] == 1
|
|
||||||
|
|
||||||
await table.checkout("tag1")
|
|
||||||
assert await table.version() == 1
|
|
||||||
assert await table.count_rows() == 2
|
|
||||||
await table.tags.create("tag2", 2)
|
|
||||||
await table.checkout("tag2")
|
|
||||||
assert await table.version() == 2
|
|
||||||
assert await table.count_rows() == 3
|
|
||||||
await table.checkout_latest()
|
|
||||||
await table.add(
|
|
||||||
data=[
|
|
||||||
{"vector": [12.0, 13.0], "item": "baz", "price": 40.0},
|
|
||||||
],
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
@patch("lancedb.table.AsyncTable.create_index")
|
@patch("lancedb.table.AsyncTable.create_index")
|
||||||
def test_create_index_method(mock_create_index, mem_db: DBConnection):
|
def test_create_index_method(mock_create_index, mem_db: DBConnection):
|
||||||
table = mem_db.create_table(
|
table = mem_db.create_table(
|
||||||
@@ -799,7 +688,7 @@ def test_delete(mem_db: DBConnection):
|
|||||||
assert len(table.list_versions()) == 2
|
assert len(table.list_versions()) == 2
|
||||||
assert table.version == 2
|
assert table.version == 2
|
||||||
assert len(table) == 1
|
assert len(table) == 1
|
||||||
assert table.to_arrow()["id"].to_pylist() == [1]
|
assert table.to_pandas()["id"].tolist() == [1]
|
||||||
|
|
||||||
|
|
||||||
def test_update(mem_db: DBConnection):
|
def test_update(mem_db: DBConnection):
|
||||||
@@ -963,7 +852,6 @@ def test_merge_insert(mem_db: DBConnection):
|
|||||||
ids=["pa.Table", "pd.DataFrame", "rows"],
|
ids=["pa.Table", "pd.DataFrame", "rows"],
|
||||||
)
|
)
|
||||||
def test_merge_insert_subschema(mem_db: DBConnection, data_format):
|
def test_merge_insert_subschema(mem_db: DBConnection, data_format):
|
||||||
pytest.importorskip("pandas")
|
|
||||||
initial_data = pa.table(
|
initial_data = pa.table(
|
||||||
{"id": range(3), "a": [1.0, 2.0, 3.0], "c": ["x", "x", "x"]}
|
{"id": range(3), "a": [1.0, 2.0, 3.0], "c": ["x", "x", "x"]}
|
||||||
)
|
)
|
||||||
@@ -1060,7 +948,7 @@ def test_create_with_embedding_function(mem_db: DBConnection):
|
|||||||
|
|
||||||
func = MockTextEmbeddingFunction.create()
|
func = MockTextEmbeddingFunction.create()
|
||||||
texts = ["hello world", "goodbye world", "foo bar baz fizz buzz"]
|
texts = ["hello world", "goodbye world", "foo bar baz fizz buzz"]
|
||||||
df = pa.table({"text": texts, "vector": func.compute_source_embeddings(texts)})
|
df = pd.DataFrame({"text": texts, "vector": func.compute_source_embeddings(texts)})
|
||||||
|
|
||||||
conf = EmbeddingFunctionConfig(
|
conf = EmbeddingFunctionConfig(
|
||||||
source_column="text", vector_column="vector", function=func
|
source_column="text", vector_column="vector", function=func
|
||||||
@@ -1085,7 +973,7 @@ def test_create_f16_table(mem_db: DBConnection):
|
|||||||
text: str
|
text: str
|
||||||
vector: Vector(32, value_type=pa.float16())
|
vector: Vector(32, value_type=pa.float16())
|
||||||
|
|
||||||
df = pa.table(
|
df = pd.DataFrame(
|
||||||
{
|
{
|
||||||
"text": [f"s-{i}" for i in range(512)],
|
"text": [f"s-{i}" for i in range(512)],
|
||||||
"vector": [np.random.randn(32).astype(np.float16) for _ in range(512)],
|
"vector": [np.random.randn(32).astype(np.float16) for _ in range(512)],
|
||||||
@@ -1098,7 +986,7 @@ def test_create_f16_table(mem_db: DBConnection):
|
|||||||
table.add(df)
|
table.add(df)
|
||||||
table.create_index(num_partitions=2, num_sub_vectors=2)
|
table.create_index(num_partitions=2, num_sub_vectors=2)
|
||||||
|
|
||||||
query = df["vector"][2].as_py()
|
query = df.vector.iloc[2]
|
||||||
expected = table.search(query).limit(2).to_arrow()
|
expected = table.search(query).limit(2).to_arrow()
|
||||||
|
|
||||||
assert "s-2" in expected["text"].to_pylist()
|
assert "s-2" in expected["text"].to_pylist()
|
||||||
@@ -1114,7 +1002,7 @@ def test_add_with_embedding_function(mem_db: DBConnection):
|
|||||||
table = mem_db.create_table("my_table", schema=MyTable)
|
table = mem_db.create_table("my_table", schema=MyTable)
|
||||||
|
|
||||||
texts = ["hello world", "goodbye world", "foo bar baz fizz buzz"]
|
texts = ["hello world", "goodbye world", "foo bar baz fizz buzz"]
|
||||||
df = pa.table({"text": texts})
|
df = pd.DataFrame({"text": texts})
|
||||||
table.add(df)
|
table.add(df)
|
||||||
|
|
||||||
texts = ["the quick brown fox", "jumped over the lazy dog"]
|
texts = ["the quick brown fox", "jumped over the lazy dog"]
|
||||||
@@ -1145,14 +1033,14 @@ def test_multiple_vector_columns(mem_db: DBConnection):
|
|||||||
{"vector1": v1, "vector2": v2, "text": "foo"},
|
{"vector1": v1, "vector2": v2, "text": "foo"},
|
||||||
{"vector1": v2, "vector2": v1, "text": "bar"},
|
{"vector1": v2, "vector2": v1, "text": "bar"},
|
||||||
]
|
]
|
||||||
df = pa.Table.from_pylist(data)
|
df = pd.DataFrame(data)
|
||||||
table.add(df)
|
table.add(df)
|
||||||
|
|
||||||
q = np.random.randn(10)
|
q = np.random.randn(10)
|
||||||
result1 = table.search(q, vector_column_name="vector1").limit(1).to_arrow()
|
result1 = table.search(q, vector_column_name="vector1").limit(1).to_pandas()
|
||||||
result2 = table.search(q, vector_column_name="vector2").limit(1).to_arrow()
|
result2 = table.search(q, vector_column_name="vector2").limit(1).to_pandas()
|
||||||
|
|
||||||
assert result1["text"][0] != result2["text"][0]
|
assert result1["text"].iloc[0] != result2["text"].iloc[0]
|
||||||
|
|
||||||
|
|
||||||
def test_create_scalar_index(mem_db: DBConnection):
|
def test_create_scalar_index(mem_db: DBConnection):
|
||||||
@@ -1190,22 +1078,22 @@ def test_empty_query(mem_db: DBConnection):
|
|||||||
"my_table",
|
"my_table",
|
||||||
data=[{"text": "foo", "id": 0}, {"text": "bar", "id": 1}],
|
data=[{"text": "foo", "id": 0}, {"text": "bar", "id": 1}],
|
||||||
)
|
)
|
||||||
df = table.search().select(["id"]).where("text='bar'").limit(1).to_arrow()
|
df = table.search().select(["id"]).where("text='bar'").limit(1).to_pandas()
|
||||||
val = df["id"][0].as_py()
|
val = df.id.iloc[0]
|
||||||
assert val == 1
|
assert val == 1
|
||||||
|
|
||||||
table = mem_db.create_table("my_table2", data=[{"id": i} for i in range(100)])
|
table = mem_db.create_table("my_table2", data=[{"id": i} for i in range(100)])
|
||||||
df = table.search().select(["id"]).to_arrow()
|
df = table.search().select(["id"]).to_pandas()
|
||||||
assert df.num_rows == 100
|
assert len(df) == 100
|
||||||
# None is the same as default
|
# None is the same as default
|
||||||
df = table.search().select(["id"]).limit(None).to_arrow()
|
df = table.search().select(["id"]).limit(None).to_pandas()
|
||||||
assert df.num_rows == 100
|
assert len(df) == 100
|
||||||
# invalid limist is the same as None, wihch is the same as default
|
# invalid limist is the same as None, wihch is the same as default
|
||||||
df = table.search().select(["id"]).limit(-1).to_arrow()
|
df = table.search().select(["id"]).limit(-1).to_pandas()
|
||||||
assert df.num_rows == 100
|
assert len(df) == 100
|
||||||
# valid limit should work
|
# valid limit should work
|
||||||
df = table.search().select(["id"]).limit(42).to_arrow()
|
df = table.search().select(["id"]).limit(42).to_pandas()
|
||||||
assert df.num_rows == 42
|
assert len(df) == 42
|
||||||
|
|
||||||
|
|
||||||
def test_search_with_schema_inf_single_vector(mem_db: DBConnection):
|
def test_search_with_schema_inf_single_vector(mem_db: DBConnection):
|
||||||
@@ -1224,14 +1112,14 @@ def test_search_with_schema_inf_single_vector(mem_db: DBConnection):
|
|||||||
{"vector_col": v1, "text": "foo"},
|
{"vector_col": v1, "text": "foo"},
|
||||||
{"vector_col": v2, "text": "bar"},
|
{"vector_col": v2, "text": "bar"},
|
||||||
]
|
]
|
||||||
df = pa.Table.from_pylist(data)
|
df = pd.DataFrame(data)
|
||||||
table.add(df)
|
table.add(df)
|
||||||
|
|
||||||
q = np.random.randn(10)
|
q = np.random.randn(10)
|
||||||
result1 = table.search(q, vector_column_name="vector_col").limit(1).to_arrow()
|
result1 = table.search(q, vector_column_name="vector_col").limit(1).to_pandas()
|
||||||
result2 = table.search(q).limit(1).to_arrow()
|
result2 = table.search(q).limit(1).to_pandas()
|
||||||
|
|
||||||
assert result1["text"][0].as_py() == result2["text"][0].as_py()
|
assert result1["text"].iloc[0] == result2["text"].iloc[0]
|
||||||
|
|
||||||
|
|
||||||
def test_search_with_schema_inf_multiple_vector(mem_db: DBConnection):
|
def test_search_with_schema_inf_multiple_vector(mem_db: DBConnection):
|
||||||
@@ -1251,12 +1139,12 @@ def test_search_with_schema_inf_multiple_vector(mem_db: DBConnection):
|
|||||||
{"vector1": v1, "vector2": v2, "text": "foo"},
|
{"vector1": v1, "vector2": v2, "text": "foo"},
|
||||||
{"vector1": v2, "vector2": v1, "text": "bar"},
|
{"vector1": v2, "vector2": v1, "text": "bar"},
|
||||||
]
|
]
|
||||||
df = pa.Table.from_pylist(data)
|
df = pd.DataFrame(data)
|
||||||
table.add(df)
|
table.add(df)
|
||||||
|
|
||||||
q = np.random.randn(10)
|
q = np.random.randn(10)
|
||||||
with pytest.raises(ValueError):
|
with pytest.raises(ValueError):
|
||||||
table.search(q).limit(1).to_arrow()
|
table.search(q).limit(1).to_pandas()
|
||||||
|
|
||||||
|
|
||||||
def test_compact_cleanup(tmp_db: DBConnection):
|
def test_compact_cleanup(tmp_db: DBConnection):
|
||||||
@@ -1496,37 +1384,6 @@ async def test_add_columns_async(mem_db_async: AsyncConnection):
|
|||||||
assert data["new_col"].to_pylist() == [2, 3]
|
assert data["new_col"].to_pylist() == [2, 3]
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_add_columns_with_schema(mem_db_async: AsyncConnection):
|
|
||||||
data = pa.table({"id": [0, 1]})
|
|
||||||
table = await mem_db_async.create_table("my_table", data=data)
|
|
||||||
await table.add_columns(
|
|
||||||
[pa.field("x", pa.int64()), pa.field("vector", pa.list_(pa.float32(), 8))]
|
|
||||||
)
|
|
||||||
|
|
||||||
assert await table.schema() == pa.schema(
|
|
||||||
[
|
|
||||||
pa.field("id", pa.int64()),
|
|
||||||
pa.field("x", pa.int64()),
|
|
||||||
pa.field("vector", pa.list_(pa.float32(), 8)),
|
|
||||||
]
|
|
||||||
)
|
|
||||||
|
|
||||||
table = await mem_db_async.create_table("table2", data=data)
|
|
||||||
await table.add_columns(
|
|
||||||
pa.schema(
|
|
||||||
[pa.field("y", pa.int64()), pa.field("emb", pa.list_(pa.float32(), 8))]
|
|
||||||
)
|
|
||||||
)
|
|
||||||
assert await table.schema() == pa.schema(
|
|
||||||
[
|
|
||||||
pa.field("id", pa.int64()),
|
|
||||||
pa.field("y", pa.int64()),
|
|
||||||
pa.field("emb", pa.list_(pa.float32(), 8)),
|
|
||||||
]
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def test_alter_columns(mem_db: DBConnection):
|
def test_alter_columns(mem_db: DBConnection):
|
||||||
data = pa.table({"id": [0, 1]})
|
data = pa.table({"id": [0, 1]})
|
||||||
table = mem_db.create_table("my_table", data=data)
|
table = mem_db.create_table("my_table", data=data)
|
||||||
@@ -1695,31 +1552,3 @@ def test_replace_field_metadata(tmp_path):
|
|||||||
schema = table.schema
|
schema = table.schema
|
||||||
field = schema[0].metadata
|
field = schema[0].metadata
|
||||||
assert field == {b"foo": b"bar"}
|
assert field == {b"foo": b"bar"}
|
||||||
|
|
||||||
|
|
||||||
def test_stats(mem_db: DBConnection):
|
|
||||||
table = mem_db.create_table(
|
|
||||||
"my_table",
|
|
||||||
data=[{"text": "foo", "id": 0}, {"text": "bar", "id": 1}],
|
|
||||||
)
|
|
||||||
assert len(table) == 2
|
|
||||||
stats = table.stats()
|
|
||||||
print(f"{stats=}")
|
|
||||||
assert stats == {
|
|
||||||
"total_bytes": 38,
|
|
||||||
"num_rows": 2,
|
|
||||||
"num_indices": 0,
|
|
||||||
"fragment_stats": {
|
|
||||||
"num_fragments": 1,
|
|
||||||
"num_small_fragments": 1,
|
|
||||||
"lengths": {
|
|
||||||
"min": 2,
|
|
||||||
"max": 2,
|
|
||||||
"mean": 2,
|
|
||||||
"p25": 2,
|
|
||||||
"p50": 2,
|
|
||||||
"p75": 2,
|
|
||||||
"p99": 2,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -2,26 +2,25 @@
|
|||||||
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||||
|
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
use std::time::Duration;
|
|
||||||
|
|
||||||
use arrow::array::make_array;
|
use arrow::array::make_array;
|
||||||
use arrow::array::Array;
|
use arrow::array::Array;
|
||||||
use arrow::array::ArrayData;
|
use arrow::array::ArrayData;
|
||||||
use arrow::pyarrow::FromPyArrow;
|
use arrow::pyarrow::FromPyArrow;
|
||||||
use arrow::pyarrow::IntoPyArrow;
|
use arrow::pyarrow::IntoPyArrow;
|
||||||
use lancedb::index::scalar::{FtsQuery, FullTextSearchQuery, MatchQuery, PhraseQuery};
|
use lancedb::index::scalar::FullTextSearchQuery;
|
||||||
use lancedb::query::QueryExecutionOptions;
|
use lancedb::query::QueryExecutionOptions;
|
||||||
use lancedb::query::QueryFilter;
|
use lancedb::query::QueryFilter;
|
||||||
use lancedb::query::{
|
use lancedb::query::{
|
||||||
ExecutableQuery, Query as LanceDbQuery, QueryBase, Select, VectorQuery as LanceDbVectorQuery,
|
ExecutableQuery, Query as LanceDbQuery, QueryBase, Select, VectorQuery as LanceDbVectorQuery,
|
||||||
};
|
};
|
||||||
use lancedb::table::AnyQuery;
|
use lancedb::table::AnyQuery;
|
||||||
|
use pyo3::exceptions::PyNotImplementedError;
|
||||||
use pyo3::exceptions::PyRuntimeError;
|
use pyo3::exceptions::PyRuntimeError;
|
||||||
use pyo3::exceptions::{PyNotImplementedError, PyValueError};
|
|
||||||
use pyo3::prelude::{PyAnyMethods, PyDictMethods};
|
use pyo3::prelude::{PyAnyMethods, PyDictMethods};
|
||||||
use pyo3::pymethods;
|
use pyo3::pymethods;
|
||||||
|
use pyo3::types::PyDict;
|
||||||
use pyo3::types::PyList;
|
use pyo3::types::PyList;
|
||||||
use pyo3::types::{PyDict, PyString};
|
|
||||||
use pyo3::Bound;
|
use pyo3::Bound;
|
||||||
use pyo3::IntoPyObject;
|
use pyo3::IntoPyObject;
|
||||||
use pyo3::PyAny;
|
use pyo3::PyAny;
|
||||||
@@ -32,7 +31,7 @@ use pyo3_async_runtimes::tokio::future_into_py;
|
|||||||
|
|
||||||
use crate::arrow::RecordBatchStream;
|
use crate::arrow::RecordBatchStream;
|
||||||
use crate::error::PythonErrorExt;
|
use crate::error::PythonErrorExt;
|
||||||
use crate::util::{parse_distance_type, parse_fts_query};
|
use crate::util::parse_distance_type;
|
||||||
|
|
||||||
// Python representation of full text search parameters
|
// Python representation of full text search parameters
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
@@ -46,9 +45,9 @@ pub struct PyFullTextSearchQuery {
|
|||||||
|
|
||||||
impl From<FullTextSearchQuery> for PyFullTextSearchQuery {
|
impl From<FullTextSearchQuery> for PyFullTextSearchQuery {
|
||||||
fn from(query: FullTextSearchQuery) -> Self {
|
fn from(query: FullTextSearchQuery) -> Self {
|
||||||
Self {
|
PyFullTextSearchQuery {
|
||||||
columns: query.columns().into_iter().collect(),
|
columns: query.columns,
|
||||||
query: query.query.query().to_owned(),
|
query: query.query,
|
||||||
limit: query.limit,
|
limit: query.limit,
|
||||||
wand_factor: query.wand_factor,
|
wand_factor: query.wand_factor,
|
||||||
}
|
}
|
||||||
@@ -100,7 +99,7 @@ pub struct PyQueryRequest {
|
|||||||
impl From<AnyQuery> for PyQueryRequest {
|
impl From<AnyQuery> for PyQueryRequest {
|
||||||
fn from(query: AnyQuery) -> Self {
|
fn from(query: AnyQuery) -> Self {
|
||||||
match query {
|
match query {
|
||||||
AnyQuery::Query(query_request) => Self {
|
AnyQuery::Query(query_request) => PyQueryRequest {
|
||||||
limit: query_request.limit,
|
limit: query_request.limit,
|
||||||
offset: query_request.offset,
|
offset: query_request.offset,
|
||||||
filter: query_request.filter.map(PyQueryFilter),
|
filter: query_request.filter.map(PyQueryFilter),
|
||||||
@@ -122,7 +121,7 @@ impl From<AnyQuery> for PyQueryRequest {
|
|||||||
postfilter: None,
|
postfilter: None,
|
||||||
norm: None,
|
norm: None,
|
||||||
},
|
},
|
||||||
AnyQuery::VectorQuery(vector_query) => Self {
|
AnyQuery::VectorQuery(vector_query) => PyQueryRequest {
|
||||||
limit: vector_query.base.limit,
|
limit: vector_query.base.limit,
|
||||||
offset: vector_query.base.offset,
|
offset: vector_query.base.offset,
|
||||||
filter: vector_query.base.filter.map(PyQueryFilter),
|
filter: vector_query.base.filter.map(PyQueryFilter),
|
||||||
@@ -237,69 +236,29 @@ impl Query {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub fn nearest_to_text(&mut self, query: Bound<'_, PyDict>) -> PyResult<FTSQuery> {
|
pub fn nearest_to_text(&mut self, query: Bound<'_, PyDict>) -> PyResult<FTSQuery> {
|
||||||
let fts_query = query
|
let query_text = query
|
||||||
.get_item("query")?
|
.get_item("query")?
|
||||||
.ok_or(PyErr::new::<PyRuntimeError, _>(
|
.ok_or(PyErr::new::<PyRuntimeError, _>(
|
||||||
"Query text is required for nearest_to_text",
|
"Query text is required for nearest_to_text",
|
||||||
))?;
|
))?
|
||||||
|
.extract::<String>()?;
|
||||||
|
let columns = query
|
||||||
|
.get_item("columns")?
|
||||||
|
.map(|columns| columns.extract::<Vec<String>>())
|
||||||
|
.transpose()?;
|
||||||
|
|
||||||
let query = if let Ok(query_text) = fts_query.downcast::<PyString>() {
|
let fts_query = FullTextSearchQuery::new(query_text).columns(columns);
|
||||||
let mut query_text = query_text.to_string();
|
|
||||||
let columns = query
|
|
||||||
.get_item("columns")?
|
|
||||||
.map(|columns| columns.extract::<Vec<String>>())
|
|
||||||
.transpose()?;
|
|
||||||
|
|
||||||
let is_phrase =
|
|
||||||
query_text.len() >= 2 && query_text.starts_with('"') && query_text.ends_with('"');
|
|
||||||
let is_multi_match = columns.as_ref().map(|cols| cols.len() > 1).unwrap_or(false);
|
|
||||||
|
|
||||||
if is_phrase {
|
|
||||||
// Remove the surrounding quotes for phrase queries
|
|
||||||
query_text = query_text[1..query_text.len() - 1].to_string();
|
|
||||||
}
|
|
||||||
|
|
||||||
let query: FtsQuery = match (is_phrase, is_multi_match) {
|
|
||||||
(false, _) => MatchQuery::new(query_text).into(),
|
|
||||||
(true, false) => PhraseQuery::new(query_text).into(),
|
|
||||||
(true, true) => {
|
|
||||||
return Err(PyValueError::new_err(
|
|
||||||
"Phrase queries cannot be used with multiple columns.",
|
|
||||||
));
|
|
||||||
}
|
|
||||||
};
|
|
||||||
let mut query = FullTextSearchQuery::new_query(query);
|
|
||||||
if let Some(cols) = columns {
|
|
||||||
if !cols.is_empty() {
|
|
||||||
query = query.with_columns(&cols).map_err(|e| {
|
|
||||||
PyValueError::new_err(format!(
|
|
||||||
"Failed to set full text search columns: {}",
|
|
||||||
e
|
|
||||||
))
|
|
||||||
})?;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
query
|
|
||||||
} else if let Ok(query) = fts_query.downcast::<PyDict>() {
|
|
||||||
let query = parse_fts_query(query)?;
|
|
||||||
FullTextSearchQuery::new_query(query)
|
|
||||||
} else {
|
|
||||||
return Err(PyValueError::new_err(
|
|
||||||
"query must be a string or a Query object",
|
|
||||||
));
|
|
||||||
};
|
|
||||||
|
|
||||||
Ok(FTSQuery {
|
Ok(FTSQuery {
|
||||||
|
fts_query,
|
||||||
inner: self.inner.clone(),
|
inner: self.inner.clone(),
|
||||||
fts_query: query,
|
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
#[pyo3(signature = (max_batch_length=None, timeout=None))]
|
#[pyo3(signature = (max_batch_length=None))]
|
||||||
pub fn execute(
|
pub fn execute(
|
||||||
self_: PyRef<'_, Self>,
|
self_: PyRef<'_, Self>,
|
||||||
max_batch_length: Option<u32>,
|
max_batch_length: Option<u32>,
|
||||||
timeout: Option<Duration>,
|
|
||||||
) -> PyResult<Bound<'_, PyAny>> {
|
) -> PyResult<Bound<'_, PyAny>> {
|
||||||
let inner = self_.inner.clone();
|
let inner = self_.inner.clone();
|
||||||
future_into_py(self_.py(), async move {
|
future_into_py(self_.py(), async move {
|
||||||
@@ -307,15 +266,12 @@ impl Query {
|
|||||||
if let Some(max_batch_length) = max_batch_length {
|
if let Some(max_batch_length) = max_batch_length {
|
||||||
opts.max_batch_length = max_batch_length;
|
opts.max_batch_length = max_batch_length;
|
||||||
}
|
}
|
||||||
if let Some(timeout) = timeout {
|
|
||||||
opts.timeout = Some(timeout);
|
|
||||||
}
|
|
||||||
let inner_stream = inner.execute_with_options(opts).await.infer_error()?;
|
let inner_stream = inner.execute_with_options(opts).await.infer_error()?;
|
||||||
Ok(RecordBatchStream::new(inner_stream))
|
Ok(RecordBatchStream::new(inner_stream))
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn explain_plan(self_: PyRef<'_, Self>, verbose: bool) -> PyResult<Bound<'_, PyAny>> {
|
fn explain_plan(self_: PyRef<'_, Self>, verbose: bool) -> PyResult<Bound<'_, PyAny>> {
|
||||||
let inner = self_.inner.clone();
|
let inner = self_.inner.clone();
|
||||||
future_into_py(self_.py(), async move {
|
future_into_py(self_.py(), async move {
|
||||||
inner
|
inner
|
||||||
@@ -325,16 +281,6 @@ impl Query {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn analyze_plan(self_: PyRef<'_, Self>) -> PyResult<Bound<'_, PyAny>> {
|
|
||||||
let inner = self_.inner.clone();
|
|
||||||
future_into_py(self_.py(), async move {
|
|
||||||
inner
|
|
||||||
.analyze_plan()
|
|
||||||
.await
|
|
||||||
.map_err(|e| PyRuntimeError::new_err(e.to_string()))
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn to_query_request(&self) -> PyQueryRequest {
|
pub fn to_query_request(&self) -> PyQueryRequest {
|
||||||
PyQueryRequest::from(AnyQuery::Query(self.inner.clone().into_request()))
|
PyQueryRequest::from(AnyQuery::Query(self.inner.clone().into_request()))
|
||||||
}
|
}
|
||||||
@@ -381,11 +327,10 @@ impl FTSQuery {
|
|||||||
self.inner = self.inner.clone().postfilter();
|
self.inner = self.inner.clone().postfilter();
|
||||||
}
|
}
|
||||||
|
|
||||||
#[pyo3(signature = (max_batch_length=None, timeout=None))]
|
#[pyo3(signature = (max_batch_length=None))]
|
||||||
pub fn execute(
|
pub fn execute(
|
||||||
self_: PyRef<'_, Self>,
|
self_: PyRef<'_, Self>,
|
||||||
max_batch_length: Option<u32>,
|
max_batch_length: Option<u32>,
|
||||||
timeout: Option<Duration>,
|
|
||||||
) -> PyResult<Bound<'_, PyAny>> {
|
) -> PyResult<Bound<'_, PyAny>> {
|
||||||
let inner = self_
|
let inner = self_
|
||||||
.inner
|
.inner
|
||||||
@@ -397,9 +342,6 @@ impl FTSQuery {
|
|||||||
if let Some(max_batch_length) = max_batch_length {
|
if let Some(max_batch_length) = max_batch_length {
|
||||||
opts.max_batch_length = max_batch_length;
|
opts.max_batch_length = max_batch_length;
|
||||||
}
|
}
|
||||||
if let Some(timeout) = timeout {
|
|
||||||
opts.timeout = Some(timeout);
|
|
||||||
}
|
|
||||||
let inner_stream = inner.execute_with_options(opts).await.infer_error()?;
|
let inner_stream = inner.execute_with_options(opts).await.infer_error()?;
|
||||||
Ok(RecordBatchStream::new(inner_stream))
|
Ok(RecordBatchStream::new(inner_stream))
|
||||||
})
|
})
|
||||||
@@ -423,18 +365,8 @@ impl FTSQuery {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn analyze_plan(self_: PyRef<'_, Self>) -> PyResult<Bound<'_, PyAny>> {
|
|
||||||
let inner = self_.inner.clone();
|
|
||||||
future_into_py(self_.py(), async move {
|
|
||||||
inner
|
|
||||||
.analyze_plan()
|
|
||||||
.await
|
|
||||||
.map_err(|e| PyRuntimeError::new_err(e.to_string()))
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn get_query(&self) -> String {
|
pub fn get_query(&self) -> String {
|
||||||
self.fts_query.query.query().to_owned()
|
self.fts_query.query.clone()
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn to_query_request(&self) -> PyQueryRequest {
|
pub fn to_query_request(&self) -> PyQueryRequest {
|
||||||
@@ -522,11 +454,10 @@ impl VectorQuery {
|
|||||||
self.inner = self.inner.clone().bypass_vector_index()
|
self.inner = self.inner.clone().bypass_vector_index()
|
||||||
}
|
}
|
||||||
|
|
||||||
#[pyo3(signature = (max_batch_length=None, timeout=None))]
|
#[pyo3(signature = (max_batch_length=None))]
|
||||||
pub fn execute(
|
pub fn execute(
|
||||||
self_: PyRef<'_, Self>,
|
self_: PyRef<'_, Self>,
|
||||||
max_batch_length: Option<u32>,
|
max_batch_length: Option<u32>,
|
||||||
timeout: Option<Duration>,
|
|
||||||
) -> PyResult<Bound<'_, PyAny>> {
|
) -> PyResult<Bound<'_, PyAny>> {
|
||||||
let inner = self_.inner.clone();
|
let inner = self_.inner.clone();
|
||||||
future_into_py(self_.py(), async move {
|
future_into_py(self_.py(), async move {
|
||||||
@@ -534,15 +465,12 @@ impl VectorQuery {
|
|||||||
if let Some(max_batch_length) = max_batch_length {
|
if let Some(max_batch_length) = max_batch_length {
|
||||||
opts.max_batch_length = max_batch_length;
|
opts.max_batch_length = max_batch_length;
|
||||||
}
|
}
|
||||||
if let Some(timeout) = timeout {
|
|
||||||
opts.timeout = Some(timeout);
|
|
||||||
}
|
|
||||||
let inner_stream = inner.execute_with_options(opts).await.infer_error()?;
|
let inner_stream = inner.execute_with_options(opts).await.infer_error()?;
|
||||||
Ok(RecordBatchStream::new(inner_stream))
|
Ok(RecordBatchStream::new(inner_stream))
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn explain_plan(self_: PyRef<'_, Self>, verbose: bool) -> PyResult<Bound<'_, PyAny>> {
|
fn explain_plan(self_: PyRef<'_, Self>, verbose: bool) -> PyResult<Bound<'_, PyAny>> {
|
||||||
let inner = self_.inner.clone();
|
let inner = self_.inner.clone();
|
||||||
future_into_py(self_.py(), async move {
|
future_into_py(self_.py(), async move {
|
||||||
inner
|
inner
|
||||||
@@ -552,16 +480,6 @@ impl VectorQuery {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn analyze_plan(self_: PyRef<'_, Self>) -> PyResult<Bound<'_, PyAny>> {
|
|
||||||
let inner = self_.inner.clone();
|
|
||||||
future_into_py(self_.py(), async move {
|
|
||||||
inner
|
|
||||||
.analyze_plan()
|
|
||||||
.await
|
|
||||||
.map_err(|e| PyRuntimeError::new_err(e.to_string()))
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn nearest_to_text(&mut self, query: Bound<'_, PyDict>) -> PyResult<HybridQuery> {
|
pub fn nearest_to_text(&mut self, query: Bound<'_, PyDict>) -> PyResult<HybridQuery> {
|
||||||
let base_query = self.inner.clone().into_plain();
|
let base_query = self.inner.clone().into_plain();
|
||||||
let fts_query = Query::new(base_query).nearest_to_text(query)?;
|
let fts_query = Query::new(base_query).nearest_to_text(query)?;
|
||||||
@@ -652,11 +570,6 @@ impl HybridQuery {
|
|||||||
self.inner_vec.bypass_vector_index();
|
self.inner_vec.bypass_vector_index();
|
||||||
}
|
}
|
||||||
|
|
||||||
#[pyo3(signature = (lower_bound=None, upper_bound=None))]
|
|
||||||
pub fn distance_range(&mut self, lower_bound: Option<f32>, upper_bound: Option<f32>) {
|
|
||||||
self.inner_vec.distance_range(lower_bound, upper_bound);
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn to_vector_query(&mut self) -> PyResult<VectorQuery> {
|
pub fn to_vector_query(&mut self) -> PyResult<VectorQuery> {
|
||||||
Ok(VectorQuery {
|
Ok(VectorQuery {
|
||||||
inner: self.inner_vec.inner.clone(),
|
inner: self.inner_vec.inner.clone(),
|
||||||
|
|||||||
@@ -1,28 +1,28 @@
|
|||||||
// SPDX-License-Identifier: Apache-2.0
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||||
use std::{collections::HashMap, sync::Arc};
|
|
||||||
|
|
||||||
use crate::{
|
|
||||||
error::PythonErrorExt,
|
|
||||||
index::{extract_index_params, IndexConfig},
|
|
||||||
query::Query,
|
|
||||||
};
|
|
||||||
use arrow::{
|
use arrow::{
|
||||||
datatypes::{DataType, Schema},
|
datatypes::DataType,
|
||||||
ffi_stream::ArrowArrayStreamReader,
|
ffi_stream::ArrowArrayStreamReader,
|
||||||
pyarrow::{FromPyArrow, PyArrowType, ToPyArrow},
|
pyarrow::{FromPyArrow, ToPyArrow},
|
||||||
};
|
};
|
||||||
use lancedb::table::{
|
use lancedb::table::{
|
||||||
AddDataMode, ColumnAlteration, Duration, NewColumnTransform, OptimizeAction, OptimizeOptions,
|
AddDataMode, ColumnAlteration, Duration, NewColumnTransform, OptimizeAction, OptimizeOptions,
|
||||||
Table as LanceDbTable,
|
Table as LanceDbTable,
|
||||||
};
|
};
|
||||||
use pyo3::{
|
use pyo3::{
|
||||||
exceptions::{PyIOError, PyKeyError, PyRuntimeError, PyValueError},
|
exceptions::{PyKeyError, PyRuntimeError, PyValueError},
|
||||||
pyclass, pymethods,
|
pyclass, pymethods,
|
||||||
types::{IntoPyDict, PyAnyMethods, PyDict, PyDictMethods, PyInt, PyString},
|
types::{IntoPyDict, PyAnyMethods, PyDict, PyDictMethods},
|
||||||
Bound, FromPyObject, PyAny, PyObject, PyRef, PyResult, Python,
|
Bound, FromPyObject, PyAny, PyRef, PyResult, Python,
|
||||||
};
|
};
|
||||||
use pyo3_async_runtimes::tokio::future_into_py;
|
use pyo3_async_runtimes::tokio::future_into_py;
|
||||||
|
use std::collections::HashMap;
|
||||||
|
|
||||||
|
use crate::{
|
||||||
|
error::PythonErrorExt,
|
||||||
|
index::{extract_index_params, IndexConfig},
|
||||||
|
query::Query,
|
||||||
|
};
|
||||||
|
|
||||||
/// Statistics about a compaction operation.
|
/// Statistics about a compaction operation.
|
||||||
#[pyclass(get_all)]
|
#[pyclass(get_all)]
|
||||||
@@ -176,19 +176,15 @@ impl Table {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
#[pyo3(signature = (column, index=None, replace=None, wait_timeout=None))]
|
#[pyo3(signature = (column, index=None, replace=None))]
|
||||||
pub fn create_index<'a>(
|
pub fn create_index<'a>(
|
||||||
self_: PyRef<'a, Self>,
|
self_: PyRef<'a, Self>,
|
||||||
column: String,
|
column: String,
|
||||||
index: Option<Bound<'_, PyAny>>,
|
index: Option<Bound<'_, PyAny>>,
|
||||||
replace: Option<bool>,
|
replace: Option<bool>,
|
||||||
wait_timeout: Option<Bound<'_, PyAny>>,
|
|
||||||
) -> PyResult<Bound<'a, PyAny>> {
|
) -> PyResult<Bound<'a, PyAny>> {
|
||||||
let index = extract_index_params(&index)?;
|
let index = extract_index_params(&index)?;
|
||||||
let timeout = wait_timeout.map(|t| t.extract::<std::time::Duration>().unwrap());
|
let mut op = self_.inner_ref()?.create_index(&[column], index);
|
||||||
let mut op = self_
|
|
||||||
.inner_ref()?
|
|
||||||
.create_index_with_timeout(&[column], index, timeout);
|
|
||||||
if let Some(replace) = replace {
|
if let Some(replace) = replace {
|
||||||
op = op.replace(replace);
|
op = op.replace(replace);
|
||||||
}
|
}
|
||||||
@@ -207,34 +203,6 @@ impl Table {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn wait_for_index<'a>(
|
|
||||||
self_: PyRef<'a, Self>,
|
|
||||||
index_names: Vec<String>,
|
|
||||||
timeout: Bound<'_, PyAny>,
|
|
||||||
) -> PyResult<Bound<'a, PyAny>> {
|
|
||||||
let inner = self_.inner_ref()?.clone();
|
|
||||||
let timeout = timeout.extract::<std::time::Duration>()?;
|
|
||||||
future_into_py(self_.py(), async move {
|
|
||||||
let index_refs = index_names
|
|
||||||
.iter()
|
|
||||||
.map(String::as_str)
|
|
||||||
.collect::<Vec<&str>>();
|
|
||||||
inner
|
|
||||||
.wait_for_index(&index_refs, timeout)
|
|
||||||
.await
|
|
||||||
.infer_error()?;
|
|
||||||
Ok(())
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn prewarm_index(self_: PyRef<'_, Self>, index_name: String) -> PyResult<Bound<'_, PyAny>> {
|
|
||||||
let inner = self_.inner_ref()?.clone();
|
|
||||||
future_into_py(self_.py(), async move {
|
|
||||||
inner.prewarm_index(&index_name).await.infer_error()?;
|
|
||||||
Ok(())
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn list_indices(self_: PyRef<'_, Self>) -> PyResult<Bound<'_, PyAny>> {
|
pub fn list_indices(self_: PyRef<'_, Self>) -> PyResult<Bound<'_, PyAny>> {
|
||||||
let inner = self_.inner_ref()?.clone();
|
let inner = self_.inner_ref()?.clone();
|
||||||
future_into_py(self_.py(), async move {
|
future_into_py(self_.py(), async move {
|
||||||
@@ -279,40 +247,6 @@ impl Table {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn stats(self_: PyRef<'_, Self>) -> PyResult<Bound<'_, PyAny>> {
|
|
||||||
let inner = self_.inner_ref()?.clone();
|
|
||||||
future_into_py(self_.py(), async move {
|
|
||||||
let stats = inner.stats().await.infer_error()?;
|
|
||||||
Python::with_gil(|py| {
|
|
||||||
let dict = PyDict::new(py);
|
|
||||||
dict.set_item("total_bytes", stats.total_bytes)?;
|
|
||||||
dict.set_item("num_rows", stats.num_rows)?;
|
|
||||||
dict.set_item("num_indices", stats.num_indices)?;
|
|
||||||
|
|
||||||
let fragment_stats = PyDict::new(py);
|
|
||||||
fragment_stats.set_item("num_fragments", stats.fragment_stats.num_fragments)?;
|
|
||||||
fragment_stats.set_item(
|
|
||||||
"num_small_fragments",
|
|
||||||
stats.fragment_stats.num_small_fragments,
|
|
||||||
)?;
|
|
||||||
|
|
||||||
let fragment_lengths = PyDict::new(py);
|
|
||||||
fragment_lengths.set_item("min", stats.fragment_stats.lengths.min)?;
|
|
||||||
fragment_lengths.set_item("max", stats.fragment_stats.lengths.max)?;
|
|
||||||
fragment_lengths.set_item("mean", stats.fragment_stats.lengths.mean)?;
|
|
||||||
fragment_lengths.set_item("p25", stats.fragment_stats.lengths.p25)?;
|
|
||||||
fragment_lengths.set_item("p50", stats.fragment_stats.lengths.p50)?;
|
|
||||||
fragment_lengths.set_item("p75", stats.fragment_stats.lengths.p75)?;
|
|
||||||
fragment_lengths.set_item("p99", stats.fragment_stats.lengths.p99)?;
|
|
||||||
|
|
||||||
fragment_stats.set_item("lengths", fragment_lengths)?;
|
|
||||||
dict.set_item("fragment_stats", fragment_stats)?;
|
|
||||||
|
|
||||||
Ok(Some(dict.unbind()))
|
|
||||||
})
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn __repr__(&self) -> String {
|
pub fn __repr__(&self) -> String {
|
||||||
match &self.inner {
|
match &self.inner {
|
||||||
None => format!("ClosedTable({})", self.name),
|
None => format!("ClosedTable({})", self.name),
|
||||||
@@ -355,26 +289,10 @@ impl Table {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn checkout(self_: PyRef<'_, Self>, version: PyObject) -> PyResult<Bound<'_, PyAny>> {
|
pub fn checkout(self_: PyRef<'_, Self>, version: u64) -> PyResult<Bound<'_, PyAny>> {
|
||||||
let inner = self_.inner_ref()?.clone();
|
let inner = self_.inner_ref()?.clone();
|
||||||
let py = self_.py();
|
future_into_py(self_.py(), async move {
|
||||||
let (is_int, int_value, string_value) = if let Ok(i) = version.downcast_bound::<PyInt>(py) {
|
inner.checkout(version).await.infer_error()
|
||||||
let num: u64 = i.extract()?;
|
|
||||||
(true, num, String::new())
|
|
||||||
} else if let Ok(s) = version.downcast_bound::<PyString>(py) {
|
|
||||||
let str_value = s.to_string();
|
|
||||||
(false, 0, str_value)
|
|
||||||
} else {
|
|
||||||
return Err(PyIOError::new_err(
|
|
||||||
"version must be an integer or a string.",
|
|
||||||
));
|
|
||||||
};
|
|
||||||
future_into_py(py, async move {
|
|
||||||
if is_int {
|
|
||||||
inner.checkout(int_value).await.infer_error()
|
|
||||||
} else {
|
|
||||||
inner.checkout_tag(&string_value).await.infer_error()
|
|
||||||
}
|
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -385,27 +303,18 @@ impl Table {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
#[pyo3(signature = (version=None))]
|
pub fn restore(self_: PyRef<'_, Self>) -> PyResult<Bound<'_, PyAny>> {
|
||||||
pub fn restore(self_: PyRef<'_, Self>, version: Option<u64>) -> PyResult<Bound<'_, PyAny>> {
|
|
||||||
let inner = self_.inner_ref()?.clone();
|
let inner = self_.inner_ref()?.clone();
|
||||||
|
future_into_py(
|
||||||
future_into_py(self_.py(), async move {
|
self_.py(),
|
||||||
if let Some(version) = version {
|
async move { inner.restore().await.infer_error() },
|
||||||
inner.checkout(version).await.infer_error()?;
|
)
|
||||||
}
|
|
||||||
inner.restore().await.infer_error()
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn query(&self) -> Query {
|
pub fn query(&self) -> Query {
|
||||||
Query::new(self.inner_ref().unwrap().query())
|
Query::new(self.inner_ref().unwrap().query())
|
||||||
}
|
}
|
||||||
|
|
||||||
#[getter]
|
|
||||||
pub fn tags(&self) -> PyResult<Tags> {
|
|
||||||
Ok(Tags::new(self.inner_ref()?.clone()))
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Optimize the on-disk data by compacting and pruning old data, for better performance.
|
/// Optimize the on-disk data by compacting and pruning old data, for better performance.
|
||||||
#[pyo3(signature = (cleanup_since_ms=None, delete_unverified=None, retrain=None))]
|
#[pyo3(signature = (cleanup_since_ms=None, delete_unverified=None, retrain=None))]
|
||||||
pub fn optimize(
|
pub fn optimize(
|
||||||
@@ -489,14 +398,8 @@ impl Table {
|
|||||||
}
|
}
|
||||||
|
|
||||||
future_into_py(self_.py(), async move {
|
future_into_py(self_.py(), async move {
|
||||||
let stats = builder.execute(Box::new(batches)).await.infer_error()?;
|
builder.execute(Box::new(batches)).await.infer_error()?;
|
||||||
Python::with_gil(|py| {
|
Ok(())
|
||||||
let dict = PyDict::new(py);
|
|
||||||
dict.set_item("num_inserted_rows", stats.num_inserted_rows)?;
|
|
||||||
dict.set_item("num_updated_rows", stats.num_updated_rows)?;
|
|
||||||
dict.set_item("num_deleted_rows", stats.num_deleted_rows)?;
|
|
||||||
Ok(dict.unbind())
|
|
||||||
})
|
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -537,20 +440,6 @@ impl Table {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn add_columns_with_schema(
|
|
||||||
self_: PyRef<'_, Self>,
|
|
||||||
schema: PyArrowType<Schema>,
|
|
||||||
) -> PyResult<Bound<'_, PyAny>> {
|
|
||||||
let arrow_schema = &schema.0;
|
|
||||||
let transform = NewColumnTransform::AllNulls(Arc::new(arrow_schema.clone()));
|
|
||||||
|
|
||||||
let inner = self_.inner_ref()?.clone();
|
|
||||||
future_into_py(self_.py(), async move {
|
|
||||||
inner.add_columns(transform, None).await.infer_error()?;
|
|
||||||
Ok(())
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn alter_columns<'a>(
|
pub fn alter_columns<'a>(
|
||||||
self_: PyRef<'a, Self>,
|
self_: PyRef<'a, Self>,
|
||||||
alterations: Vec<Bound<PyDict>>,
|
alterations: Vec<Bound<PyDict>>,
|
||||||
@@ -646,72 +535,3 @@ pub struct MergeInsertParams {
|
|||||||
when_not_matched_by_source_delete: bool,
|
when_not_matched_by_source_delete: bool,
|
||||||
when_not_matched_by_source_condition: Option<String>,
|
when_not_matched_by_source_condition: Option<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[pyclass]
|
|
||||||
pub struct Tags {
|
|
||||||
inner: LanceDbTable,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Tags {
|
|
||||||
pub fn new(table: LanceDbTable) -> Self {
|
|
||||||
Self { inner: table }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[pymethods]
|
|
||||||
impl Tags {
|
|
||||||
pub fn list(self_: PyRef<'_, Self>) -> PyResult<Bound<'_, PyAny>> {
|
|
||||||
let inner = self_.inner.clone();
|
|
||||||
future_into_py(self_.py(), async move {
|
|
||||||
let tags = inner.tags().await.infer_error()?;
|
|
||||||
let res = tags.list().await.infer_error()?;
|
|
||||||
|
|
||||||
Python::with_gil(|py| {
|
|
||||||
let py_dict = PyDict::new(py);
|
|
||||||
for (key, contents) in res {
|
|
||||||
let value_dict = PyDict::new(py);
|
|
||||||
value_dict.set_item("version", contents.version)?;
|
|
||||||
value_dict.set_item("manifest_size", contents.manifest_size)?;
|
|
||||||
py_dict.set_item(key, value_dict)?;
|
|
||||||
}
|
|
||||||
Ok(py_dict.unbind())
|
|
||||||
})
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn get_version(self_: PyRef<'_, Self>, tag: String) -> PyResult<Bound<'_, PyAny>> {
|
|
||||||
let inner = self_.inner.clone();
|
|
||||||
future_into_py(self_.py(), async move {
|
|
||||||
let tags = inner.tags().await.infer_error()?;
|
|
||||||
let res = tags.get_version(tag.as_str()).await.infer_error()?;
|
|
||||||
Ok(res)
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn create(self_: PyRef<Self>, tag: String, version: u64) -> PyResult<Bound<PyAny>> {
|
|
||||||
let inner = self_.inner.clone();
|
|
||||||
future_into_py(self_.py(), async move {
|
|
||||||
let mut tags = inner.tags().await.infer_error()?;
|
|
||||||
tags.create(tag.as_str(), version).await.infer_error()?;
|
|
||||||
Ok(())
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn delete(self_: PyRef<Self>, tag: String) -> PyResult<Bound<PyAny>> {
|
|
||||||
let inner = self_.inner.clone();
|
|
||||||
future_into_py(self_.py(), async move {
|
|
||||||
let mut tags = inner.tags().await.infer_error()?;
|
|
||||||
tags.delete(tag.as_str()).await.infer_error()?;
|
|
||||||
Ok(())
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn update(self_: PyRef<Self>, tag: String, version: u64) -> PyResult<Bound<PyAny>> {
|
|
||||||
let inner = self_.inner.clone();
|
|
||||||
future_into_py(self_.py(), async move {
|
|
||||||
let mut tags = inner.tags().await.infer_error()?;
|
|
||||||
tags.update(tag.as_str(), version).await.infer_error()?;
|
|
||||||
Ok(())
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -3,15 +3,11 @@
|
|||||||
|
|
||||||
use std::sync::Mutex;
|
use std::sync::Mutex;
|
||||||
|
|
||||||
use lancedb::index::scalar::{BoostQuery, FtsQuery, MatchQuery, MultiMatchQuery, PhraseQuery};
|
|
||||||
use lancedb::DistanceType;
|
use lancedb::DistanceType;
|
||||||
use pyo3::prelude::{PyAnyMethods, PyDictMethods, PyListMethods};
|
|
||||||
use pyo3::types::PyDict;
|
|
||||||
use pyo3::{
|
use pyo3::{
|
||||||
exceptions::{PyRuntimeError, PyValueError},
|
exceptions::{PyRuntimeError, PyValueError},
|
||||||
pyfunction, PyResult,
|
pyfunction, PyResult,
|
||||||
};
|
};
|
||||||
use pyo3::{Bound, PyAny};
|
|
||||||
|
|
||||||
/// A wrapper around a rust builder
|
/// A wrapper around a rust builder
|
||||||
///
|
///
|
||||||
@@ -63,117 +59,3 @@ pub fn validate_table_name(table_name: &str) -> PyResult<()> {
|
|||||||
lancedb::utils::validate_table_name(table_name)
|
lancedb::utils::validate_table_name(table_name)
|
||||||
.map_err(|e| PyValueError::new_err(e.to_string()))
|
.map_err(|e| PyValueError::new_err(e.to_string()))
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn parse_fts_query(query: &Bound<'_, PyDict>) -> PyResult<FtsQuery> {
|
|
||||||
let query_type = query.keys().get_item(0)?.extract::<String>()?;
|
|
||||||
let query_value = query
|
|
||||||
.get_item(&query_type)?
|
|
||||||
.ok_or(PyValueError::new_err(format!(
|
|
||||||
"Query type {} not found",
|
|
||||||
query_type
|
|
||||||
)))?;
|
|
||||||
let query_value = query_value.downcast::<PyDict>()?;
|
|
||||||
|
|
||||||
match query_type.as_str() {
|
|
||||||
"match" => {
|
|
||||||
let column = query_value.keys().get_item(0)?.extract::<String>()?;
|
|
||||||
let params = query_value
|
|
||||||
.get_item(&column)?
|
|
||||||
.ok_or(PyValueError::new_err(format!(
|
|
||||||
"column {} not found",
|
|
||||||
column
|
|
||||||
)))?;
|
|
||||||
let params = params.downcast::<PyDict>()?;
|
|
||||||
|
|
||||||
let query = params
|
|
||||||
.get_item("query")?
|
|
||||||
.ok_or(PyValueError::new_err("query not found"))?
|
|
||||||
.extract::<String>()?;
|
|
||||||
let boost = params
|
|
||||||
.get_item("boost")?
|
|
||||||
.ok_or(PyValueError::new_err("boost not found"))?
|
|
||||||
.extract::<f32>()?;
|
|
||||||
let fuzziness = params
|
|
||||||
.get_item("fuzziness")?
|
|
||||||
.ok_or(PyValueError::new_err("fuzziness not found"))?
|
|
||||||
.extract::<Option<u32>>()?;
|
|
||||||
let max_expansions = params
|
|
||||||
.get_item("max_expansions")?
|
|
||||||
.ok_or(PyValueError::new_err("max_expansions not found"))?
|
|
||||||
.extract::<usize>()?;
|
|
||||||
|
|
||||||
let query = MatchQuery::new(query)
|
|
||||||
.with_column(Some(column))
|
|
||||||
.with_boost(boost)
|
|
||||||
.with_fuzziness(fuzziness)
|
|
||||||
.with_max_expansions(max_expansions);
|
|
||||||
Ok(query.into())
|
|
||||||
}
|
|
||||||
|
|
||||||
"match_phrase" => {
|
|
||||||
let column = query_value.keys().get_item(0)?.extract::<String>()?;
|
|
||||||
let query = query_value
|
|
||||||
.get_item(&column)?
|
|
||||||
.ok_or(PyValueError::new_err(format!(
|
|
||||||
"column {} not found",
|
|
||||||
column
|
|
||||||
)))?
|
|
||||||
.extract::<String>()?;
|
|
||||||
|
|
||||||
let query = PhraseQuery::new(query).with_column(Some(column));
|
|
||||||
Ok(query.into())
|
|
||||||
}
|
|
||||||
|
|
||||||
"boost" => {
|
|
||||||
let positive: Bound<'_, PyAny> = query_value
|
|
||||||
.get_item("positive")?
|
|
||||||
.ok_or(PyValueError::new_err("positive not found"))?;
|
|
||||||
let positive = positive.downcast::<PyDict>()?;
|
|
||||||
|
|
||||||
let negative = query_value
|
|
||||||
.get_item("negative")?
|
|
||||||
.ok_or(PyValueError::new_err("negative not found"))?;
|
|
||||||
let negative = negative.downcast::<PyDict>()?;
|
|
||||||
|
|
||||||
let negative_boost = query_value
|
|
||||||
.get_item("negative_boost")?
|
|
||||||
.ok_or(PyValueError::new_err("negative_boost not found"))?
|
|
||||||
.extract::<f32>()?;
|
|
||||||
|
|
||||||
let positive_query = parse_fts_query(positive)?;
|
|
||||||
let negative_query = parse_fts_query(negative)?;
|
|
||||||
let query = BoostQuery::new(positive_query, negative_query, Some(negative_boost));
|
|
||||||
|
|
||||||
Ok(query.into())
|
|
||||||
}
|
|
||||||
|
|
||||||
"multi_match" => {
|
|
||||||
let query = query_value
|
|
||||||
.get_item("query")?
|
|
||||||
.ok_or(PyValueError::new_err("query not found"))?
|
|
||||||
.extract::<String>()?;
|
|
||||||
|
|
||||||
let columns = query_value
|
|
||||||
.get_item("columns")?
|
|
||||||
.ok_or(PyValueError::new_err("columns not found"))?
|
|
||||||
.extract::<Vec<String>>()?;
|
|
||||||
|
|
||||||
let boost = query_value
|
|
||||||
.get_item("boost")?
|
|
||||||
.ok_or(PyValueError::new_err("boost not found"))?
|
|
||||||
.extract::<Vec<f32>>()?;
|
|
||||||
|
|
||||||
let query = MultiMatchQuery::try_new(query, columns)
|
|
||||||
.and_then(|q| q.try_with_boosts(boost))
|
|
||||||
.map_err(|e| {
|
|
||||||
PyValueError::new_err(format!("Error creating MultiMatchQuery: {}", e))
|
|
||||||
})?;
|
|
||||||
Ok(query.into())
|
|
||||||
}
|
|
||||||
|
|
||||||
_ => Err(PyValueError::new_err(format!(
|
|
||||||
"Unsupported query type: {}",
|
|
||||||
query_type
|
|
||||||
))),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "lancedb-node"
|
name = "lancedb-node"
|
||||||
version = "0.19.1-beta.1"
|
version = "0.18.2-beta.1"
|
||||||
description = "Serverless, low-latency vector database for AI applications"
|
description = "Serverless, low-latency vector database for AI applications"
|
||||||
license.workspace = true
|
license.workspace = true
|
||||||
edition.workspace = true
|
edition.workspace = true
|
||||||
|
|||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user