Compare commits

..

12 Commits

Author SHA1 Message Date
Colin P. McCabe
d6ea17073c test 2025-09-30 11:58:20 -07:00
BubbleCal
c123bbf391 Merge branch 'main' of https://github.com/lancedb/lancedb into add-ivfrq 2025-09-30 16:30:58 +08:00
Ayush Chaurasia
f941054baf docs: fix doc deployment and remove recipes workflow trigger (#2688) 2025-09-30 13:10:39 +05:30
BubbleCal
fb856005a9 update docs
Signed-off-by: BubbleCal <bubble-cal@outlook.com>
2025-09-29 18:24:58 +08:00
BubbleCal
5c1c2e2dd6 fmt
Signed-off-by: BubbleCal <bubble-cal@outlook.com>
2025-09-29 17:47:59 +08:00
BubbleCal
1beef5f6e3 fix
Signed-off-by: BubbleCal <bubble-cal@outlook.com>
2025-09-29 17:08:12 +08:00
BubbleCal
0913632584 feat: support IVF_RQ index type
Signed-off-by: BubbleCal <bubble-cal@outlook.com>
2025-09-29 16:53:43 +08:00
Ayush Chaurasia
1a81c46505 docs: transition to new docs (#2681) 2025-09-29 11:37:08 +05:30
Colin Patrick McCabe
82b25a71e9 feat: add support for test_remote_connections (#2666)
Add a new test feature which allows for running the lancedb tests
against a remote server. Convert over a few tests in src/connection.rs
as a proof of concept.

To make local development easier, the remote tests can be run locally
from a Makefile. This file can also be used to run the feature tests,
with a single invocation of 'make'. (The feature tests require bringing
up a docker compose environment.)
2025-09-26 11:24:43 -07:00
Jack Ye
13c613d45f chore: upgrade lance to v0.37.1-beta.1 (#2682) 2025-09-25 23:12:09 -07:00
Weston Pace
e07389a36c feat: allow bitmap indexes on large-string, binary, large-binary, and bitmap (#2678)
The underlying `pylance` already supported this, it was just blocked out
by an over-eager validation function

Closes #1981
2025-09-25 09:46:42 -07:00
Lance Release
e7e9e80b1d Bump version: 0.22.1 → 0.22.2-beta.0 2025-09-24 22:54:54 +00:00
48 changed files with 898 additions and 127 deletions

View File

@@ -1,5 +1,5 @@
[tool.bumpversion]
current_version = "0.22.1"
current_version = "0.22.2-beta.0"
parse = """(?x)
(?P<major>0|[1-9]\\d*)\\.
(?P<minor>0|[1-9]\\d*)\\.

View File

@@ -56,6 +56,7 @@ jobs:
with:
node-version: 20
cache: 'npm'
cache-dependency-path: docs/package-lock.json
- name: Install node dependencies
working-directory: node
run: |

View File

@@ -96,6 +96,7 @@ jobs:
# Need up-to-date compilers for kernels
CC: clang-18
CXX: clang++-18
GH_TOKEN: ${{ secrets.SOPHON_READ_TOKEN }}
steps:
- uses: actions/checkout@v4
with:
@@ -117,15 +118,14 @@ jobs:
sudo chmod 600 /swapfile
sudo mkswap /swapfile
sudo swapon /swapfile
- name: Start S3 integration test environment
working-directory: .
run: docker compose up --detach --wait
- name: Build
run: cargo build --all-features --tests --locked --examples
- name: Run tests
run: cargo test --all-features --locked
- name: Run feature tests
run: make -C ./lancedb feature-tests
- name: Run examples
run: cargo run --example simple --locked
- name: Run remote tests
run: make -C ./lancedb remote-tests
macos:
timeout-minutes: 30

View File

@@ -1,26 +0,0 @@
name: Trigger vectordb-recipers workflow
on:
push:
branches: [ main ]
pull_request:
paths:
- .github/workflows/trigger-vectordb-recipes.yml
workflow_dispatch:
jobs:
build:
runs-on: ubuntu-latest
steps:
- name: Trigger vectordb-recipes workflow
uses: actions/github-script@v6
with:
github-token: ${{ secrets.VECTORDB_RECIPES_ACTION_TOKEN }}
script: |
const result = await github.rest.actions.createWorkflowDispatch({
owner: 'lancedb',
repo: 'vectordb-recipes',
workflow_id: 'examples-test.yml',
ref: 'main'
});
console.log(result);

95
Cargo.lock generated
View File

@@ -3597,8 +3597,7 @@ dependencies = [
[[package]]
name = "fsst"
version = "0.37.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fe0a0b1d16ce6b863be8ab766004d89ebf0779fd6ce31b0ef3bbc7fedaaad373"
source = "git+https://github.com/lancedb/lance.git?tag=v0.37.1-beta.1#aa3e902a448fc345b44a32945c7700e6b9228154"
dependencies = [
"arrow-array",
"rand 0.9.2",
@@ -4839,8 +4838,7 @@ dependencies = [
[[package]]
name = "lance"
version = "0.37.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "42171f2af5d377e6bbcc8a8572144ee15b73a8f78ceb6160f1adeabf0d0f3e3c"
source = "git+https://github.com/lancedb/lance.git?tag=v0.37.1-beta.1#aa3e902a448fc345b44a32945c7700e6b9228154"
dependencies = [
"arrow",
"arrow-arith",
@@ -4923,8 +4921,7 @@ dependencies = [
[[package]]
name = "lance-arrow"
version = "0.37.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "25ef9499a1e581112f45fbf743fdc8e24830cda0bd13396b11c71aa6e6cba083"
source = "git+https://github.com/lancedb/lance.git?tag=v0.37.1-beta.1#aa3e902a448fc345b44a32945c7700e6b9228154"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -4943,8 +4940,7 @@ dependencies = [
[[package]]
name = "lance-bitpacking"
version = "0.37.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1101fffd5b161bbdc6e932d6c0a7f94cb1752b0f8cd6d18ef9064052ab901a84"
source = "git+https://github.com/lancedb/lance.git?tag=v0.37.1-beta.1#aa3e902a448fc345b44a32945c7700e6b9228154"
dependencies = [
"arrayref",
"paste",
@@ -4991,8 +4987,7 @@ dependencies = [
[[package]]
name = "lance-core"
version = "0.37.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "527ee5e6472d058d8c66c702fbe318a3f60f971e652e60dcfc6349bdbc9b0733"
source = "git+https://github.com/lancedb/lance.git?tag=v0.37.1-beta.1#aa3e902a448fc345b44a32945c7700e6b9228154"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -5058,8 +5053,7 @@ dependencies = [
[[package]]
name = "lance-datafusion"
version = "0.37.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "65a80f7f15f2d941ec7b8253625cbb8e12081ea27584dd1fbc657fb9fb377f7a"
source = "git+https://github.com/lancedb/lance.git?tag=v0.37.1-beta.1#aa3e902a448fc345b44a32945c7700e6b9228154"
dependencies = [
"arrow",
"arrow-array",
@@ -5107,8 +5101,7 @@ dependencies = [
[[package]]
name = "lance-datagen"
version = "0.37.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0495c8afa18f246ac4b337c47d7827560283783963dd2177862d91161478fd79"
source = "git+https://github.com/lancedb/lance.git?tag=v0.37.1-beta.1#aa3e902a448fc345b44a32945c7700e6b9228154"
dependencies = [
"arrow",
"arrow-array",
@@ -5167,8 +5160,7 @@ dependencies = [
[[package]]
name = "lance-encoding"
version = "0.37.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0e80e9ae49d68b95d58e77d9177f68983dce4f0803ef42840e1631b38dd66adc"
source = "git+https://github.com/lancedb/lance.git?tag=v0.37.1-beta.1#aa3e902a448fc345b44a32945c7700e6b9228154"
dependencies = [
"arrow-arith",
"arrow-array",
@@ -5242,8 +5234,7 @@ dependencies = [
[[package]]
name = "lance-file"
version = "0.37.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f1707f9f5097b36c82d3a8524bb41c762c80d5dfa5e32aa7bfc6a1c0847a1cce"
source = "git+https://github.com/lancedb/lance.git?tag=v0.37.1-beta.1#aa3e902a448fc345b44a32945c7700e6b9228154"
dependencies = [
"arrow-arith",
"arrow-array",
@@ -5334,10 +5325,10 @@ dependencies = [
[[package]]
name = "lance-index"
version = "0.37.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "28ab52586a5a7f5371a5abf4862968231f8c0232ce0780bc456f1ec16e9370f9"
source = "git+https://github.com/lancedb/lance.git?tag=v0.37.1-beta.1#aa3e902a448fc345b44a32945c7700e6b9228154"
dependencies = [
"arrow",
"arrow-arith",
"arrow-array",
"arrow-ord",
"arrow-schema",
@@ -5373,12 +5364,14 @@ dependencies = [
"lance-table 0.37.0",
"libm",
"log",
"ndarray",
"num-traits",
"object_store",
"prost",
"prost-build",
"prost-types",
"rand 0.9.2",
"rand_distr 0.5.1",
"rayon",
"roaring",
"serde",
@@ -5435,8 +5428,7 @@ dependencies = [
[[package]]
name = "lance-io"
version = "0.37.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d606f9f6a7f8ec2cacf28dfce7b2fc39e7db9f0ec77f907b8e47c756e3dd163b"
source = "git+https://github.com/lancedb/lance.git?tag=v0.37.1-beta.1#aa3e902a448fc345b44a32945c7700e6b9228154"
dependencies = [
"arrow",
"arrow-arith",
@@ -5501,8 +5493,7 @@ dependencies = [
[[package]]
name = "lance-linalg"
version = "0.37.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c9f1a94a5d966ff1eae817a835e3a57b34f73300f83a43bb28e7e2806695b8ba"
source = "git+https://github.com/lancedb/lance.git?tag=v0.37.1-beta.1#aa3e902a448fc345b44a32945c7700e6b9228154"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -5596,8 +5587,7 @@ dependencies = [
[[package]]
name = "lance-table"
version = "0.37.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fac5c0ca6e5c285645465b95fb99fc464a1fd22a6d4b32ae0e0760f06b4b8a7f"
source = "git+https://github.com/lancedb/lance.git?tag=v0.37.1-beta.1#aa3e902a448fc345b44a32945c7700e6b9228154"
dependencies = [
"arrow",
"arrow-array",
@@ -5636,8 +5626,7 @@ dependencies = [
[[package]]
name = "lance-testing"
version = "0.37.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "384acc1dd13379a2ae24f3e3635d9c1f4fb4dc1534f7ffd2740c268f2eb73455"
source = "git+https://github.com/lancedb/lance.git?tag=v0.37.1-beta.1#aa3e902a448fc345b44a32945c7700e6b9228154"
dependencies = [
"arrow-array",
"arrow-schema",
@@ -5648,8 +5637,9 @@ dependencies = [
[[package]]
name = "lancedb"
version = "0.22.1"
version = "0.22.2-beta.0"
dependencies = [
"anyhow",
"arrow",
"arrow-array",
"arrow-cast",
@@ -5736,7 +5726,7 @@ dependencies = [
[[package]]
name = "lancedb-nodejs"
version = "0.22.1"
version = "0.22.2-beta.0"
dependencies = [
"arrow-array",
"arrow-ipc",
@@ -5756,7 +5746,7 @@ dependencies = [
[[package]]
name = "lancedb-python"
version = "0.25.1"
version = "0.25.2-beta.0"
dependencies = [
"arrow",
"async-trait",
@@ -6019,6 +6009,19 @@ dependencies = [
"regex-automata",
]
[[package]]
name = "matrixmultiply"
version = "0.3.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a06de3016e9fae57a36fd14dba131fccf49f74b40b7fbdb472f96e361ec71a08"
dependencies = [
"autocfg",
"num_cpus",
"once_cell",
"rawpointer",
"thread-tree",
]
[[package]]
name = "md-5"
version = "0.10.6"
@@ -6258,6 +6261,21 @@ dependencies = [
"libloading",
]
[[package]]
name = "ndarray"
version = "0.16.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "882ed72dce9365842bf196bdeedf5055305f11fc8c03dee7bb0194a6cad34841"
dependencies = [
"matrixmultiply",
"num-complex",
"num-integer",
"num-traits",
"portable-atomic",
"portable-atomic-util",
"rawpointer",
]
[[package]]
name = "nom"
version = "7.1.3"
@@ -7739,6 +7757,12 @@ dependencies = [
"bitflags 2.9.4",
]
[[package]]
name = "rawpointer"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "60a357793950651c4ed0f3f52338f53b2f809f32d83a07f72909fa13e4c6c1e3"
[[package]]
name = "rayon"
version = "1.11.0"
@@ -9179,6 +9203,15 @@ dependencies = [
"syn 2.0.106",
]
[[package]]
name = "thread-tree"
version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ffbd370cb847953a25954d9f63e14824a36113f8c72eecf6eccef5dc4b45d630"
dependencies = [
"crossbeam-channel",
]
[[package]]
name = "thread_local"
version = "1.1.9"

View File

@@ -15,14 +15,14 @@ categories = ["database-implementations"]
rust-version = "1.78.0"
[workspace.dependencies]
lance = { "version" = "=0.37.0", default-features = false, "features" = ["dynamodb"] }
lance-io = { "version" = "=0.37.0", default-features = false }
lance-index = "=0.37.0"
lance-linalg = "=0.37.0"
lance-table = "=0.37.0"
lance-testing = "=0.37.0"
lance-datafusion = "=0.37.0"
lance-encoding = "=0.37.0"
lance = { "version" = "=0.37.0", default-features = false, "features" = ["dynamodb"], "tag" = "v0.37.1-beta.1", "git" = "https://github.com/lancedb/lance.git" }
lance-io = { "version" = "=0.37.0", default-features = false, "tag" = "v0.37.1-beta.1", "git" = "https://github.com/lancedb/lance.git" }
lance-index = { "version" = "=0.37.0", "tag" = "v0.37.1-beta.1", "git" = "https://github.com/lancedb/lance.git" }
lance-linalg = { "version" = "=0.37.0", "tag" = "v0.37.1-beta.1", "git" = "https://github.com/lancedb/lance.git" }
lance-table = { "version" = "=0.37.0", "tag" = "v0.37.1-beta.1", "git" = "https://github.com/lancedb/lance.git" }
lance-testing = { "version" = "=0.37.0", "tag" = "v0.37.1-beta.1", "git" = "https://github.com/lancedb/lance.git" }
lance-datafusion = { "version" = "=0.37.0", "tag" = "v0.37.1-beta.1", "git" = "https://github.com/lancedb/lance.git" }
lance-encoding = { "version" = "=0.37.0", "tag" = "v0.37.1-beta.1", "git" = "https://github.com/lancedb/lance.git" }
lance-namespace = "0.0.15"
# Note that this one does not include pyarrow
arrow = { version = "55.1", optional = false }

View File

@@ -0,0 +1,4 @@
#!/usr/bin/env bash
export RUST_LOG=info
exec ./lancedb server --port 0 --sql-port 0 --data-dir "${1}"

18
ci/run_with_docker_compose.sh Executable file
View File

@@ -0,0 +1,18 @@
#!/usr/bin/env bash
#
# A script for running the given command together with a docker compose environment.
#
# Bring down the docker setup once the command is done running.
tear_down() {
docker compose -p fixture down
}
trap tear_down EXIT
set +xe
# Clean up any existing docker setup and bring up a new one.
docker compose -p fixture up --detach --wait || exit 1
"${@}"

51
ci/run_with_test_connection.sh Executable file
View File

@@ -0,0 +1,51 @@
#!/usr/bin/env bash
#
# A script for running the given command together with the lancedb cli.
#
die() {
echo $?
exit 1
}
check_command_exists() {
command="${1}"
which ${command} &> /dev/null || \
die "Unable to locate command: ${command}. Did you install it?"
}
if [[ ! -e ./lancedb ]]; then
ARCH="x64"
if [[ $OSTYPE == 'darwin'* ]]; then
UNAME=$(uname -m)
if [[ $UNAME == 'arm64' ]]; then
ARCH='arm64'
fi
OSTYPE="macos"
elif [[ $OSTYPE == 'linux'* ]]; then
if [[ $UNAME == 'aarch64' ]]; then
ARCH='arm64'
fi
OSTYPE="linux"
else
die "unknown OSTYPE: $OSTYPE"
fi
check_command_exists gh
TARGET="lancedb-${OSTYPE}-${ARCH}.tar.gz"
gh release \
--repo lancedb/sophon \
download lancedb-cli-v0.0.3 \
--pattern "${TARGET}" \
|| die "failed to fetch cli."
check_command_exists tar
tar xvf "${TARGET}" || die "tar failed."
[[ -e ./lancedb ]] || die "failed to extract lancedb."
fi
SCRIPT_DIR=$(dirname "$(readlink -f "$0")")
export CREATE_LANCEDB_TEST_CONNECTION_SCRIPT="${SCRIPT_DIR}/create_lancedb_test_connection.sh"
"${@}"

View File

@@ -70,6 +70,22 @@ plugins:
- mkdocs-jupyter
- render_swagger:
allow_arbitrary_locations: true
- redirects:
redirect_maps:
# Redirect the home page and other top-level markdown files. This enables maximum SEO benefit
# other sub-pages are handled by the ingected js in overrides/partials/header.html
'index.md': 'https://lancedb.com/docs/'
'guides/tables.md': 'https://lancedb.com/docs/tables/'
'ann_indexes.md': 'https://lancedb.com/docs/indexing/'
'basic.md': 'https://lancedb.com/docs/quickstart/'
'faq.md': 'https://lancedb.com/docs/faq/'
'embeddings/understanding_embeddings.md': 'https://lancedb.com/docs/embedding/'
'integrations.md': 'https://lancedb.com/docs/integrations/'
'examples.md': 'https://lancedb.com/docs/tutorials/'
'concepts/vector_search.md': 'https://lancedb.com/docs/search/vector-search/'
'troubleshooting.md': 'https://lancedb.com/docs/troubleshooting/'
markdown_extensions:
- admonition
@@ -386,4 +402,4 @@ extra:
- icon: fontawesome/brands/x-twitter
link: https://twitter.com/lancedb
- icon: fontawesome/brands/linkedin
link: https://www.linkedin.com/company/lancedb
link: https://www.linkedin.com/company/lancedb

View File

@@ -19,7 +19,13 @@
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
IN THE SOFTWARE.
-->
<div id="deprecation-banner" style="background-color: #f8d7da; color: #721c24; padding: 1em; text-align: center;">
<p style="margin: 0; font-size: 1.1em;">
<strong>This documentation site is deprecated.</strong>
Please visit our new documentation site at <a href="https://lancedb.com/docs" style="color: #721c24; text-decoration: underline;">
lancedb.com/docs</a> for the latest information.
</p>
</div>
{% set class = "md-header" %}
{% if "navigation.tabs.sticky" in features %}
{% set class = class ~ " md-header--shadow md-header--lifted" %}
@@ -150,9 +156,9 @@
<div style="margin-left: 10px; margin-right: 5px;">
<a href="https://discord.com/invite/zMM32dvNtd" target="_blank" rel="noopener noreferrer">
<svg fill="#FFFFFF" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 50 50" width="25px" height="25px"><path d="M 41.625 10.769531 C 37.644531 7.566406 31.347656 7.023438 31.078125 7.003906 C 30.660156 6.96875 30.261719 7.203125 30.089844 7.589844 C 30.074219 7.613281 29.9375 7.929688 29.785156 8.421875 C 32.417969 8.867188 35.652344 9.761719 38.578125 11.578125 C 39.046875 11.867188 39.191406 12.484375 38.902344 12.953125 C 38.710938 13.261719 38.386719 13.429688 38.050781 13.429688 C 37.871094 13.429688 37.6875 13.378906 37.523438 13.277344 C 32.492188 10.15625 26.210938 10 25 10 C 23.789063 10 17.503906 10.15625 12.476563 13.277344 C 12.007813 13.570313 11.390625 13.425781 11.101563 12.957031 C 10.808594 12.484375 10.953125 11.871094 11.421875 11.578125 C 14.347656 9.765625 17.582031 8.867188 20.214844 8.425781 C 20.0625 7.929688 19.925781 7.617188 19.914063 7.589844 C 19.738281 7.203125 19.34375 6.960938 18.921875 7.003906 C 18.652344 7.023438 12.355469 7.566406 8.320313 10.8125 C 6.214844 12.761719 2 24.152344 2 34 C 2 34.175781 2.046875 34.34375 2.132813 34.496094 C 5.039063 39.605469 12.972656 40.941406 14.78125 41 C 14.789063 41 14.800781 41 14.8125 41 C 15.132813 41 15.433594 40.847656 15.621094 40.589844 L 17.449219 38.074219 C 12.515625 36.800781 9.996094 34.636719 9.851563 34.507813 C 9.4375 34.144531 9.398438 33.511719 9.765625 33.097656 C 10.128906 32.683594 10.761719 32.644531 11.175781 33.007813 C 11.234375 33.0625 15.875 37 25 37 C 34.140625 37 38.78125 33.046875 38.828125 33.007813 C 39.242188 32.648438 39.871094 32.683594 40.238281 33.101563 C 40.601563 33.515625 40.5625 34.144531 40.148438 34.507813 C 40.003906 34.636719 37.484375 36.800781 32.550781 38.074219 L 34.378906 40.589844 C 34.566406 40.847656 34.867188 41 35.1875 41 C 35.199219 41 35.210938 41 35.21875 41 C 37.027344 40.941406 44.960938 39.605469 47.867188 34.496094 C 47.953125 34.34375 48 34.175781 48 34 C 48 24.152344 43.785156 12.761719 41.625 10.769531 Z M 18.5 30 C 16.566406 30 15 28.210938 15 26 C 15 23.789063 16.566406 22 18.5 22 C 20.433594 22 22 23.789063 22 26 C 22 28.210938 20.433594 30 18.5 30 Z M 31.5 30 C 29.566406 30 28 28.210938 28 26 C 28 23.789063 29.566406 22 31.5 22 C 33.433594 22 35 23.789063 35 26 C 35 28.210938 33.433594 30 31.5 30 Z"/></svg>
</a>
</div>
<svg fill="#FFFFFF" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 50 50" width="25px" height="25px"><path d="M 41.625 10.769531 C 37.644531 7.566406 31.347656 7.023438 31.078125 7.003906 C 30.660156 6.96875 30.261719 7.203125 30.089844 7.589844 C 30.074219 7.613281 29.9375 7.929688 29.785156 8.421875 C 32.417969 8.867188 35.652344 9.761719 38.578125 11.578125 C 39.046875 11.867188 39.191406 12.484375 38.902344 12.953125 C 38.710938 13.261719 38.386719 13.429688 38.050781 13.429688 C 37.871094 13.429688 37.6875 13.378906 37.523438 13.277344 C 32.492188 10.15625 26.210938 10 25 10 C 23.789063 10 17.503906 10.15625 12.476563 13.277344 C 12.007813 13.570313 11.390625 13.425781 11.101563 12.957031 C 10.808594 12.484375 10.953125 11.871094 11.421875 11.578125 C 14.347656 9.765625 17.582031 8.867188 20.214844 8.425781 C 20.0625 7.929688 19.925781 7.617188 19.914063 7.589844 C 19.738281 7.203125 19.34375 6.960938 18.921875 7.003906 C 18.652344 7.023438 12.355469 7.566406 8.320313 10.8125 C 6.214844 12.761719 2 24.152344 2 34 C 2 34.175781 2.046875 34.34375 2.132813 34.496094 C 5.039063 39.605469 12.972656 40.941406 14.78125 41 C 14.789063 41 14.800781 41 14.8125 41 C 15.132813 41 15.433594 40.847656 15.621094 40.589844 L 17.449219 38.074219 C 12.515625 36.800781 9.996094 34.636719 9.851563 34.507813 C 9.4375 34.144531 9.398438 33.511719 9.765625 33.097656 C 10.128906 32.683594 10.761719 32.644531 11.175781 33.007813 C 11.234375 33.0625 15.875 37 25 37 C 34.140625 37 38.78125 33.046875 38.828125 33.007813 C 39.242188 32.648438 39.871094 32.683594 40.238281 33.101563 C 40.601563 33.515625 40.5625 34.144531 40.148438 34.507813 C 40.003906 34.636719 37.484375 36.800781 32.550781 38.074219 L 34.378906 40.589844 C 34.566406 40.847656 34.867188 41 35.1875 41 C 35.199219 41 35.210938 41 35.21875 41 C 37.027344 40.941406 44.960938 39.605469 47.867188 34.496094 C 47.953125 34.34375 48 34.175781 48 34 C 48 24.152344 43.785156 12.761719 41.625 10.769531 Z M 18.5 30 C 16.566406 30 15 28.210938 15 26 C 15 23.789063 16.566406 22 18.5 22 C 20.433594 22 22 23.789063 22 26 C 22 28.210938 20.433594 30 18.5 30 Z M 31.5 30 C 29.566406 30 28 28.210938 28 26 C 28 23.789063 29.566406 22 31.5 22 C 33.433594 22 35 23.789063 35 26 C 35 28.210938 33.433594 30 31.5 30 Z"/></svg>
</a>
</div>
<div style="margin-left: 5px; margin-right: 5px;">
<a href="https://twitter.com/lancedb" target="_blank" rel="noopener noreferrer">
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0,0,256,256" width="25px" height="25px" fill-rule="nonzero"><g fill-opacity="0" fill="#ffffff" fill-rule="nonzero" stroke="none" stroke-width="1" stroke-linecap="butt" stroke-linejoin="miter" stroke-miterlimit="10" stroke-dasharray="" stroke-dashoffset="0" font-family="none" font-weight="none" font-size="none" text-anchor="none" style="mix-blend-mode: normal"><path d="M0,256v-256h256v256z" id="bgRectangle"></path></g><g fill="#ffffff" fill-rule="nonzero" stroke="none" stroke-width="1" stroke-linecap="butt" stroke-linejoin="miter" stroke-miterlimit="10" stroke-dasharray="" stroke-dashoffset="0" font-family="none" font-weight="none" font-size="none" text-anchor="none" style="mix-blend-mode: normal"><g transform="scale(4,4)"><path d="M57,17.114c-1.32,1.973 -2.991,3.707 -4.916,5.097c0.018,0.423 0.028,0.847 0.028,1.274c0,13.013 -9.902,28.018 -28.016,28.018c-5.562,0 -12.81,-1.948 -15.095,-4.423c0.772,0.092 1.556,0.138 2.35,0.138c4.615,0 8.861,-1.575 12.23,-4.216c-4.309,-0.079 -7.946,-2.928 -9.199,-6.84c1.96,0.308 4.447,-0.17 4.447,-0.17c0,0 -7.7,-1.322 -7.899,-9.779c2.226,1.291 4.46,1.231 4.46,1.231c0,0 -4.441,-2.734 -4.379,-8.195c0.037,-3.221 1.331,-4.953 1.331,-4.953c8.414,10.361 20.298,10.29 20.298,10.29c0,0 -0.255,-1.471 -0.255,-2.243c0,-5.437 4.408,-9.847 9.847,-9.847c2.832,0 5.391,1.196 7.187,3.111c2.245,-0.443 4.353,-1.263 6.255,-2.391c-0.859,3.44 -4.329,5.448 -4.329,5.448c0,0 2.969,-0.329 5.655,-1.55z"></path></g></g></svg>
@@ -173,4 +179,77 @@
{% include "partials/tabs.html" %}
{% endif %}
{% endif %}
</header>
</header>
<script>
(function() {
function checkPathAndRedirect() {
var banner = document.getElementById('deprecation-banner');
if (document.querySelector('meta[http-equiv="refresh"]')) {
return; // The redirects plugin is already handling this page.
}
var currentPath = window.location.pathname;
var cleanPath = currentPath.endsWith('/') && currentPath.length > 1
? currentPath.slice(0, -1)
: currentPath;
// These are the ONLY paths that should remain on the old site
var apiPaths = [
'/lancedb/python',
'/lancedb/javascript',
'/lancedb/js',
'/lancedb/api_reference'
];
var isApiPage = apiPaths.some(function(apiPath) {
return cleanPath.startsWith(apiPath);
});
if (isApiPage) {
if (banner) {
banner.style.display = 'none';
}
} else {
if (banner) {
banner.style.display = 'block';
}
// Add noindex meta tag to prevent indexing of old docs for seo
var noindexMeta = document.createElement('meta');
noindexMeta.setAttribute('name', 'robots');
noindexMeta.setAttribute('content', 'noindex, follow');
document.head.appendChild(noindexMeta);
// Add canonical link to point to the new docs to reward new site for seo
var canonicalLink = document.createElement('link');
canonicalLink.setAttribute('rel', 'canonical');
canonicalLink.setAttribute('href', 'https://lancedb.com/docs');
document.head.appendChild(canonicalLink);
window.location.replace('https://lancedb.com/docs');
}
}
// Run the check only if doc is ready. This makes sure we catch the initial load
// and redirect.
if (document.readyState === 'loading') {
document.addEventListener('DOMContentLoaded', checkPathAndRedirect);
} else {
checkPathAndRedirect();
}
// Use an interval to handle subsequent navigation clicks.
var lastPath = window.location.pathname;
setInterval(function() {
if (window.location.pathname !== lastPath) {
lastPath = window.location.pathname;
checkPathAndRedirect();
}
}, 2000); // keeping it 2 second to make it easy for user to understand
// what's happening
})();
</script>

View File

@@ -5,3 +5,4 @@ mkdocstrings[python]==0.25.2
griffe
mkdocs-render-swagger-plugin
pydantic
mkdocs-redirects

View File

@@ -194,6 +194,37 @@ currently is also a memory intensive operation.
***
### ivfRq()
```ts
static ivfRq(options?): Index
```
Create an IvfRq index
IVF-RQ (RabitQ Quantization) compresses vectors using RabitQ quantization
and organizes them into IVF partitions.
The compression scheme is called RabitQ quantization. Each dimension is quantized into a small number of bits.
The parameters `num_bits` and `num_partitions` control this process, providing a tradeoff
between index size (and thus search speed) and index accuracy.
The partitioning process is called IVF and the `num_partitions` parameter controls how
many groups to create.
Note that training an IVF RQ index on a large dataset is a slow operation and
currently is also a memory intensive operation.
#### Parameters
* **options?**: `Partial`&lt;[`IvfRqOptions`](../interfaces/IvfRqOptions.md)&gt;
#### Returns
[`Index`](Index.md)
***
### labelList()
```ts

View File

@@ -52,6 +52,30 @@ the merge result
***
### useIndex()
```ts
useIndex(useIndex): MergeInsertBuilder
```
Controls whether to use indexes for the merge operation.
When set to `true` (the default), the operation will use an index if available
on the join key for improved performance. When set to `false`, it forces a full
table scan even if an index exists. This can be useful for benchmarking or when
the query optimizer chooses a suboptimal path.
#### Parameters
* **useIndex**: `boolean`
Whether to use indices for the merge operation. Defaults to `true`.
#### Returns
[`MergeInsertBuilder`](MergeInsertBuilder.md)
***
### whenMatchedUpdateAll()
```ts

View File

@@ -68,6 +68,7 @@
- [IndexStatistics](interfaces/IndexStatistics.md)
- [IvfFlatOptions](interfaces/IvfFlatOptions.md)
- [IvfPqOptions](interfaces/IvfPqOptions.md)
- [IvfRqOptions](interfaces/IvfRqOptions.md)
- [MergeResult](interfaces/MergeResult.md)
- [OpenTableOptions](interfaces/OpenTableOptions.md)
- [OptimizeOptions](interfaces/OptimizeOptions.md)

View File

@@ -8,7 +8,7 @@
<parent>
<groupId>com.lancedb</groupId>
<artifactId>lancedb-parent</artifactId>
<version>0.22.1-final.0</version>
<version>0.22.2-beta.0</version>
<relativePath>../pom.xml</relativePath>
</parent>

View File

@@ -8,7 +8,7 @@
<parent>
<groupId>com.lancedb</groupId>
<artifactId>lancedb-parent</artifactId>
<version>0.22.1-final.0</version>
<version>0.22.2-beta.0</version>
<relativePath>../pom.xml</relativePath>
</parent>

View File

@@ -6,7 +6,7 @@
<groupId>com.lancedb</groupId>
<artifactId>lancedb-parent</artifactId>
<version>0.22.1-final.0</version>
<version>0.22.2-beta.0</version>
<packaging>pom</packaging>
<name>${project.artifactId}</name>
<description>LanceDB Java SDK Parent POM</description>

View File

@@ -1,7 +1,7 @@
[package]
name = "lancedb-nodejs"
edition.workspace = true
version = "0.22.1"
version = "0.22.2-beta.0"
license.workspace = true
description.workspace = true
repository.workspace = true

View File

@@ -804,6 +804,15 @@ describe("When creating an index", () => {
});
});
it("should be able to create IVF_RQ", async () => {
await tbl.createIndex("vec", {
config: Index.ivfRq({
numPartitions: 10,
numBits: 1,
}),
});
});
it("should allow me to replace (or not) an existing index", async () => {
await tbl.createIndex("id");
// Default is replace=true

View File

@@ -85,6 +85,7 @@ export {
Index,
IndexOptions,
IvfPqOptions,
IvfRqOptions,
IvfFlatOptions,
HnswPqOptions,
HnswSqOptions,

View File

@@ -112,6 +112,77 @@ export interface IvfPqOptions {
sampleRate?: number;
}
export interface IvfRqOptions {
/**
* The number of IVF partitions to create.
*
* This value should generally scale with the number of rows in the dataset.
* By default the number of partitions is the square root of the number of
* rows.
*
* If this value is too large then the first part of the search (picking the
* right partition) will be slow. If this value is too small then the second
* part of the search (searching within a partition) will be slow.
*/
numPartitions?: number;
/**
* Number of bits per dimension for residual quantization.
*
* This value controls how much each residual component is compressed. The more
* bits, the more accurate the index will be but the slower search. Typical values
* are small integers; the default is 1 bit per dimension.
*/
numBits?: number;
/**
* Distance type to use to build the index.
*
* Default value is "l2".
*
* This is used when training the index to calculate the IVF partitions
* (vectors are grouped in partitions with similar vectors according to this
* distance type) and during quantization.
*
* The distance type used to train an index MUST match the distance type used
* to search the index. Failure to do so will yield inaccurate results.
*
* The following distance types are available:
*
* "l2" - Euclidean distance.
* "cosine" - Cosine distance.
* "dot" - Dot product.
*/
distanceType?: "l2" | "cosine" | "dot";
/**
* Max iterations to train IVF kmeans.
*
* When training an IVF index we use kmeans to calculate the partitions. This parameter
* controls how many iterations of kmeans to run.
*
* The default value is 50.
*/
maxIterations?: number;
/**
* The number of vectors, per partition, to sample when training IVF kmeans.
*
* When an IVF index is trained, we need to calculate partitions. These are groups
* of vectors that are similar to each other. To do this we use an algorithm called kmeans.
*
* Running kmeans on a large dataset can be slow. To speed this up we run kmeans on a
* random sample of the data. This parameter controls the size of the sample. The total
* number of vectors used to train the index is `sample_rate * num_partitions`.
*
* Increasing this value might improve the quality of the index but in most cases the
* default should be sufficient.
*
* The default value is 256.
*/
sampleRate?: number;
}
/**
* Options to create an `HNSW_PQ` index
*/
@@ -523,6 +594,35 @@ export class Index {
options?.distanceType,
options?.numPartitions,
options?.numSubVectors,
options?.numBits,
options?.maxIterations,
options?.sampleRate,
),
);
}
/**
* Create an IvfRq index
*
* IVF-RQ (RabitQ Quantization) compresses vectors using RabitQ quantization
* and organizes them into IVF partitions.
*
* The compression scheme is called RabitQ quantization. Each dimension is quantized into a small number of bits.
* The parameters `num_bits` and `num_partitions` control this process, providing a tradeoff
* between index size (and thus search speed) and index accuracy.
*
* The partitioning process is called IVF and the `num_partitions` parameter controls how
* many groups to create.
*
* Note that training an IVF RQ index on a large dataset is a slow operation and
* currently is also a memory intensive operation.
*/
static ivfRq(options?: Partial<IvfRqOptions>) {
return new Index(
LanceDbIndex.ivfRq(
options?.distanceType,
options?.numPartitions,
options?.numBits,
options?.maxIterations,
options?.sampleRate,
),

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-darwin-arm64",
"version": "0.22.1",
"version": "0.22.2-beta.0",
"os": ["darwin"],
"cpu": ["arm64"],
"main": "lancedb.darwin-arm64.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-darwin-x64",
"version": "0.22.1",
"version": "0.22.2-beta.0",
"os": ["darwin"],
"cpu": ["x64"],
"main": "lancedb.darwin-x64.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-linux-arm64-gnu",
"version": "0.22.1",
"version": "0.22.2-beta.0",
"os": ["linux"],
"cpu": ["arm64"],
"main": "lancedb.linux-arm64-gnu.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-linux-arm64-musl",
"version": "0.22.1",
"version": "0.22.2-beta.0",
"os": ["linux"],
"cpu": ["arm64"],
"main": "lancedb.linux-arm64-musl.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-linux-x64-gnu",
"version": "0.22.1",
"version": "0.22.2-beta.0",
"os": ["linux"],
"cpu": ["x64"],
"main": "lancedb.linux-x64-gnu.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-linux-x64-musl",
"version": "0.22.1",
"version": "0.22.2-beta.0",
"os": ["linux"],
"cpu": ["x64"],
"main": "lancedb.linux-x64-musl.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-win32-arm64-msvc",
"version": "0.22.1",
"version": "0.22.2-beta.0",
"os": [
"win32"
],

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-win32-x64-msvc",
"version": "0.22.1",
"version": "0.22.2-beta.0",
"os": ["win32"],
"cpu": ["x64"],
"main": "lancedb.win32-x64-msvc.node",

View File

@@ -1,12 +1,12 @@
{
"name": "@lancedb/lancedb",
"version": "0.22.1",
"version": "0.22.2-beta.0",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "@lancedb/lancedb",
"version": "0.22.1",
"version": "0.22.2-beta.0",
"cpu": [
"x64",
"arm64"

View File

@@ -11,7 +11,7 @@
"ann"
],
"private": false,
"version": "0.22.1",
"version": "0.22.2-beta.0",
"main": "dist/index.js",
"exports": {
".": "./dist/index.js",

View File

@@ -6,6 +6,7 @@ use std::sync::Mutex;
use lancedb::index::scalar::{BTreeIndexBuilder, FtsIndexBuilder};
use lancedb::index::vector::{
IvfFlatIndexBuilder, IvfHnswPqIndexBuilder, IvfHnswSqIndexBuilder, IvfPqIndexBuilder,
IvfRqIndexBuilder,
};
use lancedb::index::Index as LanceDbIndex;
use napi_derive::napi;
@@ -65,6 +66,36 @@ impl Index {
})
}
#[napi(factory)]
pub fn ivf_rq(
distance_type: Option<String>,
num_partitions: Option<u32>,
num_bits: Option<u32>,
max_iterations: Option<u32>,
sample_rate: Option<u32>,
) -> napi::Result<Self> {
let mut ivf_rq_builder = IvfRqIndexBuilder::default();
if let Some(distance_type) = distance_type {
let distance_type = parse_distance_type(distance_type)?;
ivf_rq_builder = ivf_rq_builder.distance_type(distance_type);
}
if let Some(num_partitions) = num_partitions {
ivf_rq_builder = ivf_rq_builder.num_partitions(num_partitions);
}
if let Some(num_bits) = num_bits {
ivf_rq_builder = ivf_rq_builder.num_bits(num_bits);
}
if let Some(max_iterations) = max_iterations {
ivf_rq_builder = ivf_rq_builder.max_iterations(max_iterations);
}
if let Some(sample_rate) = sample_rate {
ivf_rq_builder = ivf_rq_builder.sample_rate(sample_rate);
}
Ok(Self {
inner: Mutex::new(Some(LanceDbIndex::IvfRq(ivf_rq_builder))),
})
}
#[napi(factory)]
pub fn ivf_flat(
distance_type: Option<String>,

View File

@@ -605,9 +605,53 @@ class IvfPq:
target_partition_size: Optional[int] = None
@dataclass
class IvfRq:
"""Describes an IVF RQ Index
IVF-RQ (Residual Quantization) stores a compressed copy of each vector using
residual quantization and organizes them into IVF partitions. Parameters
largely mirror IVF-PQ for consistency.
Attributes
----------
distance_type: str, default "l2"
Distance metric used to train the index and for quantization.
The following distance types are available:
"l2" - Euclidean distance.
"cosine" - Cosine distance.
"dot" - Dot product.
num_partitions: int, default sqrt(num_rows)
Number of IVF partitions to create.
num_bits: int, default 1
Number of bits to encode each dimension.
max_iterations: int, default 50
Max iterations to train kmeans when computing IVF partitions.
sample_rate: int, default 256
Controls the number of training vectors: sample_rate * num_partitions.
target_partition_size, default is 8192
Target size of each partition.
"""
distance_type: Literal["l2", "cosine", "dot"] = "l2"
num_partitions: Optional[int] = None
num_bits: int = 1
max_iterations: int = 50
sample_rate: int = 256
target_partition_size: Optional[int] = None
__all__ = [
"BTree",
"IvfPq",
"IvfRq",
"IvfFlat",
"HnswPq",
"HnswSq",

View File

@@ -44,7 +44,7 @@ import numpy as np
from .common import DATA, VEC, VECTOR_COLUMN_NAME
from .embeddings import EmbeddingFunctionConfig, EmbeddingFunctionRegistry
from .index import BTree, IvfFlat, IvfPq, Bitmap, LabelList, HnswPq, HnswSq, FTS
from .index import BTree, IvfFlat, IvfPq, Bitmap, IvfRq, LabelList, HnswPq, HnswSq, FTS
from .merge import LanceMergeInsertBuilder
from .pydantic import LanceModel, model_to_dict
from .query import (
@@ -1991,7 +1991,7 @@ class LanceTable(Table):
index_cache_size: Optional[int] = None,
num_bits: int = 8,
index_type: Literal[
"IVF_FLAT", "IVF_PQ", "IVF_HNSW_SQ", "IVF_HNSW_PQ"
"IVF_FLAT", "IVF_PQ", "IVF_RQ", "IVF_HNSW_SQ", "IVF_HNSW_PQ"
] = "IVF_PQ",
max_iterations: int = 50,
sample_rate: int = 256,
@@ -2039,6 +2039,15 @@ class LanceTable(Table):
sample_rate=sample_rate,
target_partition_size=target_partition_size,
)
elif index_type == "IVF_RQ":
config = IvfRq(
distance_type=metric,
num_partitions=num_partitions,
num_bits=num_bits,
max_iterations=max_iterations,
sample_rate=sample_rate,
target_partition_size=target_partition_size,
)
elif index_type == "IVF_HNSW_PQ":
config = HnswPq(
distance_type=metric,
@@ -3330,7 +3339,7 @@ class AsyncTable:
*,
replace: Optional[bool] = None,
config: Optional[
Union[IvfFlat, IvfPq, HnswPq, HnswSq, BTree, Bitmap, LabelList, FTS]
Union[IvfFlat, IvfPq, IvfRq, HnswPq, HnswSq, BTree, Bitmap, LabelList, FTS]
] = None,
wait_timeout: Optional[timedelta] = None,
name: Optional[str] = None,
@@ -3369,11 +3378,12 @@ class AsyncTable:
"""
if config is not None:
if not isinstance(
config, (IvfFlat, IvfPq, HnswPq, HnswSq, BTree, Bitmap, LabelList, FTS)
config,
(IvfFlat, IvfPq, IvfRq, HnswPq, HnswSq, BTree, Bitmap, LabelList, FTS),
):
raise TypeError(
"config must be an instance of IvfPq, HnswPq, HnswSq, BTree,"
" Bitmap, LabelList, or FTS"
"config must be an instance of IvfPq, IvfRq, HnswPq, HnswSq, BTree,"
" Bitmap, LabelList, or FTS, but got " + str(type(config))
)
try:
await self._inner.create_index(

View File

@@ -18,10 +18,17 @@ AddMode = Literal["append", "overwrite"]
CreateMode = Literal["create", "overwrite"]
# Index type literals
VectorIndexType = Literal["IVF_FLAT", "IVF_PQ", "IVF_HNSW_SQ", "IVF_HNSW_PQ"]
VectorIndexType = Literal["IVF_FLAT", "IVF_PQ", "IVF_HNSW_SQ", "IVF_HNSW_PQ", "IVF_RQ"]
ScalarIndexType = Literal["BTREE", "BITMAP", "LABEL_LIST"]
IndexType = Literal[
"IVF_PQ", "IVF_HNSW_PQ", "IVF_HNSW_SQ", "FTS", "BTREE", "BITMAP", "LABEL_LIST"
"IVF_PQ",
"IVF_HNSW_PQ",
"IVF_HNSW_SQ",
"FTS",
"BTREE",
"BITMAP",
"LABEL_LIST",
"IVF_RQ",
]
# Tokenizer literals

View File

@@ -8,7 +8,17 @@ import pyarrow as pa
import pytest
import pytest_asyncio
from lancedb import AsyncConnection, AsyncTable, connect_async
from lancedb.index import BTree, IvfFlat, IvfPq, Bitmap, LabelList, HnswPq, HnswSq, FTS
from lancedb.index import (
BTree,
IvfFlat,
IvfPq,
IvfRq,
Bitmap,
LabelList,
HnswPq,
HnswSq,
FTS,
)
@pytest_asyncio.fixture
@@ -35,6 +45,8 @@ async def some_table(db_async):
"tags": [
[f"tag{random.randint(0, 8)}" for _ in range(2)] for _ in range(NROWS)
],
"is_active": [random.choice([True, False]) for _ in range(NROWS)],
"data": [random.randbytes(random.randint(0, 128)) for _ in range(NROWS)],
}
)
return await db_async.create_table(
@@ -99,10 +111,17 @@ async def test_create_fixed_size_binary_index(some_table: AsyncTable):
@pytest.mark.asyncio
async def test_create_bitmap_index(some_table: AsyncTable):
await some_table.create_index("id", config=Bitmap())
await some_table.create_index("is_active", config=Bitmap())
await some_table.create_index("data", config=Bitmap())
indices = await some_table.list_indices()
assert str(indices) == '[Index(Bitmap, columns=["id"], name="id_idx")]'
indices = await some_table.list_indices()
assert len(indices) == 1
assert len(indices) == 3
assert indices[0].index_type == "Bitmap"
assert indices[0].columns == ["id"]
assert indices[1].index_type == "Bitmap"
assert indices[1].columns == ["is_active"]
assert indices[2].index_type == "Bitmap"
assert indices[2].columns == ["data"]
index_name = indices[0].name
stats = await some_table.index_stats(index_name)
assert stats.index_type == "BITMAP"
@@ -111,6 +130,11 @@ async def test_create_bitmap_index(some_table: AsyncTable):
assert stats.num_unindexed_rows == 0
assert stats.num_indices == 1
assert (
"ScalarIndexQuery"
in await some_table.query().where("is_active = TRUE").explain_plan()
)
@pytest.mark.asyncio
async def test_create_label_list_index(some_table: AsyncTable):
@@ -181,6 +205,16 @@ async def test_create_4bit_ivfpq_index(some_table: AsyncTable):
assert stats.loss >= 0.0
@pytest.mark.asyncio
async def test_create_ivfrq_index(some_table: AsyncTable):
await some_table.create_index("vector", config=IvfRq(num_bits=1))
indices = await some_table.list_indices()
assert len(indices) == 1
assert indices[0].index_type == "IvfRq"
assert indices[0].columns == ["vector"]
assert indices[0].name == "vector_idx"
@pytest.mark.asyncio
async def test_create_hnswpq_index(some_table: AsyncTable):
await some_table.create_index("vector", config=HnswPq(num_partitions=10))

View File

@@ -1,7 +1,7 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
use lancedb::index::vector::IvfFlatIndexBuilder;
use lancedb::index::vector::{IvfFlatIndexBuilder, IvfRqIndexBuilder};
use lancedb::index::{
scalar::{BTreeIndexBuilder, FtsIndexBuilder},
vector::{IvfHnswPqIndexBuilder, IvfHnswSqIndexBuilder, IvfPqIndexBuilder},
@@ -87,6 +87,22 @@ pub fn extract_index_params(source: &Option<Bound<'_, PyAny>>) -> PyResult<Lance
}
Ok(LanceDbIndex::IvfPq(ivf_pq_builder))
},
"IvfRq" => {
let params = source.extract::<IvfRqParams>()?;
let distance_type = parse_distance_type(params.distance_type)?;
let mut ivf_rq_builder = IvfRqIndexBuilder::default()
.distance_type(distance_type)
.max_iterations(params.max_iterations)
.sample_rate(params.sample_rate)
.num_bits(params.num_bits);
if let Some(num_partitions) = params.num_partitions {
ivf_rq_builder = ivf_rq_builder.num_partitions(num_partitions);
}
if let Some(target_partition_size) = params.target_partition_size {
ivf_rq_builder = ivf_rq_builder.target_partition_size(target_partition_size);
}
Ok(LanceDbIndex::IvfRq(ivf_rq_builder))
},
"HnswPq" => {
let params = source.extract::<IvfHnswPqParams>()?;
let distance_type = parse_distance_type(params.distance_type)?;
@@ -170,6 +186,16 @@ struct IvfPqParams {
target_partition_size: Option<u32>,
}
#[derive(FromPyObject)]
struct IvfRqParams {
distance_type: String,
num_partitions: Option<u32>,
num_bits: u32,
max_iterations: u32,
sample_rate: u32,
target_partition_size: Option<u32>,
}
#[derive(FromPyObject)]
struct IvfHnswPqParams {
distance_type: String,

View File

@@ -1,6 +1,6 @@
[package]
name = "lancedb"
version = "0.22.1"
version = "0.22.2-beta.0"
edition.workspace = true
description = "LanceDB: A serverless, low-latency vector database for AI applications"
license.workspace = true
@@ -82,6 +82,7 @@ crunchy.workspace = true
bytemuck_derive.workspace = true
[dev-dependencies]
anyhow = "1"
tempfile = "3.5.0"
rand = { version = "0.9", features = ["small_rng"] }
random_word = { version = "0.4.3", features = ["en"] }

19
rust/lancedb/Makefile Normal file
View File

@@ -0,0 +1,19 @@
#
# Makefile for running tests.
#
# Run all tests.
all-tests: feature-tests remote-tests
# Run tests for every feature. This requires using docker compose to set up
# the environment.
feature-tests:
../../ci/run_with_docker_compose.sh \
cargo test --all-features --tests --locked --examples
.PHONY: feature-tests
# Run tests against remote endpoints.
remote-tests:
../../ci/run_with_test_connection.sh \
cargo test --features remote --locked
.PHONY: remote-tests

View File

@@ -1170,6 +1170,7 @@ mod tests {
use crate::database::listing::{ListingDatabaseOptions, NewTableConfig};
use crate::query::QueryBase;
use crate::query::{ExecutableQuery, QueryExecutionOptions};
use crate::test_connection::test_utils::new_test_connection;
use arrow::compute::concat_batches;
use arrow_array::RecordBatchReader;
use arrow_schema::{DataType, Field, Schema};
@@ -1185,11 +1186,8 @@ mod tests {
#[tokio::test]
async fn test_connect() {
let tmp_dir = tempdir().unwrap();
let uri = tmp_dir.path().to_str().unwrap();
let db = connect(uri).execute().await.unwrap();
assert_eq!(db.uri, uri);
let tc = new_test_connection().await.unwrap();
assert_eq!(tc.connection.uri, tc.uri);
}
#[cfg(not(windows))]
@@ -1255,16 +1253,10 @@ mod tests {
assert_eq!(tables, names[..7]);
}
#[tokio::test]
async fn test_connect_s3() {
// let db = Database::connect("s3://bucket/path/to/database").await.unwrap();
}
#[tokio::test]
async fn test_open_table() {
let tmp_dir = tempdir().unwrap();
let uri = tmp_dir.path().to_str().unwrap();
let db = connect(uri).execute().await.unwrap();
let tc = new_test_connection().await.unwrap();
let db = tc.connection;
assert_eq!(db.table_names().execute().await.unwrap().len(), 0);
// open non-exist table

View File

@@ -728,7 +728,7 @@ impl Database for ListingDatabase {
let target_uri = self.table_uri(&request.target_table_name)?;
source_dataset
.shallow_clone(&target_uri, version_ref, storage_params)
.shallow_clone(&target_uri, version_ref, Some(storage_params))
.await
.map_err(|e| Error::Lance { source: e })?;

View File

@@ -8,6 +8,7 @@ use std::sync::Arc;
use std::time::Duration;
use vector::IvfFlatIndexBuilder;
use crate::index::vector::IvfRqIndexBuilder;
use crate::{table::BaseTable, DistanceType, Error, Result};
use self::{
@@ -53,6 +54,9 @@ pub enum Index {
/// IVF index with Product Quantization
IvfPq(IvfPqIndexBuilder),
/// IVF index with RabitQ Quantization
IvfRq(IvfRqIndexBuilder),
/// IVF-HNSW index with Product Quantization
/// It is a variant of the HNSW algorithm that uses product quantization to compress the vectors.
IvfHnswPq(IvfHnswPqIndexBuilder),
@@ -275,6 +279,8 @@ pub enum IndexType {
IvfFlat,
#[serde(alias = "IVF_PQ")]
IvfPq,
#[serde(alias = "IVF_RQ")]
IvfRq,
#[serde(alias = "IVF_HNSW_PQ")]
IvfHnswPq,
#[serde(alias = "IVF_HNSW_SQ")]
@@ -296,6 +302,7 @@ impl std::fmt::Display for IndexType {
match self {
Self::IvfFlat => write!(f, "IVF_FLAT"),
Self::IvfPq => write!(f, "IVF_PQ"),
Self::IvfRq => write!(f, "IVF_RQ"),
Self::IvfHnswPq => write!(f, "IVF_HNSW_PQ"),
Self::IvfHnswSq => write!(f, "IVF_HNSW_SQ"),
Self::BTree => write!(f, "BTREE"),
@@ -317,6 +324,7 @@ impl std::str::FromStr for IndexType {
"FTS" | "INVERTED" => Ok(Self::FTS),
"IVF_FLAT" => Ok(Self::IvfFlat),
"IVF_PQ" => Ok(Self::IvfPq),
"IVF_RQ" => Ok(Self::IvfRq),
"IVF_HNSW_PQ" => Ok(Self::IvfHnswPq),
"IVF_HNSW_SQ" => Ok(Self::IvfHnswSq),
_ => Err(Error::InvalidInput {

View File

@@ -291,6 +291,52 @@ pub(crate) fn suggested_num_sub_vectors(dim: u32) -> u32 {
}
}
/// Builder for an IVF RQ index.
///
/// This index stores a compressed (quantized) copy of every vector. Each dimension
/// is quantized into a small number of bits.
/// The parameters `num_bits` control this process, providing a tradeoff
/// between index size (and thus search speed) and index accuracy.
///
/// The partitioning process is called IVF and the `num_partitions` parameter controls how
/// many groups to create.
///
/// Note that training an IVF RQ index on a large dataset is a slow operation and
/// currently is also a memory intensive operation.
#[derive(Debug, Clone)]
pub struct IvfRqIndexBuilder {
// IVF
pub(crate) distance_type: DistanceType,
pub(crate) num_partitions: Option<u32>,
pub(crate) num_bits: Option<u32>,
pub(crate) sample_rate: u32,
pub(crate) max_iterations: u32,
pub(crate) target_partition_size: Option<u32>,
}
impl Default for IvfRqIndexBuilder {
fn default() -> Self {
Self {
distance_type: DistanceType::L2,
num_partitions: None,
num_bits: None,
sample_rate: 256,
max_iterations: 50,
target_partition_size: None,
}
}
}
impl IvfRqIndexBuilder {
impl_distance_type_setter!();
impl_ivf_params_setter!();
pub fn num_bits(mut self, num_bits: u32) -> Self {
self.num_bits = Some(num_bits);
self
}
}
/// Builder for an IVF HNSW PQ index.
///
/// This index is a combination of IVF and HNSW.

View File

@@ -206,6 +206,7 @@ pub mod query;
pub mod remote;
pub mod rerankers;
pub mod table;
pub mod test_connection;
pub mod utils;
use std::fmt::Display;

View File

@@ -1838,6 +1838,18 @@ impl NativeTable {
);
Ok(Box::new(lance_idx_params))
}
Index::IvfRq(index) => {
Self::validate_index_type(field, "IVF RQ", supported_vector_data_type)?;
let num_partitions = self
.get_num_partitions(index.num_partitions, false, None)
.await?;
let lance_idx_params = VectorIndexParams::ivf_rq(
num_partitions as usize,
index.num_bits.unwrap_or(1) as u8,
index.distance_type.into(),
);
Ok(Box::new(lance_idx_params))
}
Index::IvfHnswPq(index) => {
Self::validate_index_type(field, "IVF HNSW PQ", supported_vector_data_type)?;
let dim = Self::get_vector_dimension(field)?;
@@ -1907,9 +1919,11 @@ impl NativeTable {
Index::Bitmap(_) => IndexType::Bitmap,
Index::LabelList(_) => IndexType::LabelList,
Index::FTS(_) => IndexType::Inverted,
Index::IvfFlat(_) | Index::IvfPq(_) | Index::IvfHnswPq(_) | Index::IvfHnswSq(_) => {
IndexType::Vector
}
Index::IvfFlat(_)
| Index::IvfPq(_)
| Index::IvfRq(_)
| Index::IvfHnswPq(_)
| Index::IvfHnswSq(_) => IndexType::Vector,
}
}
@@ -2760,6 +2774,7 @@ mod tests {
RecordBatchReader, StringArray, TimestampMillisecondArray, TimestampNanosecondArray,
UInt32Array,
};
use arrow_array::{BinaryArray, LargeBinaryArray};
use arrow_data::ArrayDataBuilder;
use arrow_schema::{DataType, Field, Schema, TimeUnit};
use futures::TryStreamExt;
@@ -3725,6 +3740,10 @@ mod tests {
let schema = Arc::new(Schema::new(vec![
Field::new("id", DataType::Int32, false),
Field::new("category", DataType::Utf8, true),
Field::new("large_category", DataType::LargeUtf8, true),
Field::new("is_active", DataType::Boolean, true),
Field::new("data", DataType::Binary, true),
Field::new("large_data", DataType::LargeBinary, true),
]));
let batch = RecordBatch::try_new(
@@ -3734,6 +3753,16 @@ mod tests {
Arc::new(StringArray::from_iter_values(
(0..100).map(|i| format!("category_{}", i % 5)),
)),
Arc::new(LargeStringArray::from_iter_values(
(0..100).map(|i| format!("large_category_{}", i % 5)),
)),
Arc::new(BooleanArray::from_iter((0..100).map(|i| Some(i % 2 == 0)))),
Arc::new(BinaryArray::from_iter_values(
(0_u32..100).map(|i| i.to_le_bytes()),
)),
Arc::new(LargeBinaryArray::from_iter_values(
(0_u32..100).map(|i| i.to_le_bytes()),
)),
],
)
.unwrap();
@@ -3754,12 +3783,58 @@ mod tests {
.await
.unwrap();
// Create bitmap index on the "is_active" column
table
.create_index(&["is_active"], Index::Bitmap(Default::default()))
.execute()
.await
.unwrap();
// Create bitmap index on the "data" column
table
.create_index(&["data"], Index::Bitmap(Default::default()))
.execute()
.await
.unwrap();
// Create bitmap index on the "large_data" column
table
.create_index(&["large_data"], Index::Bitmap(Default::default()))
.execute()
.await
.unwrap();
// Create bitmap index on the "large_category" column
table
.create_index(&["large_category"], Index::Bitmap(Default::default()))
.execute()
.await
.unwrap();
// Verify the index was created
let index_configs = table.list_indices().await.unwrap();
assert_eq!(index_configs.len(), 1);
let index = index_configs.into_iter().next().unwrap();
assert_eq!(index_configs.len(), 5);
let mut configs_iter = index_configs.into_iter();
let index = configs_iter.next().unwrap();
assert_eq!(index.index_type, crate::index::IndexType::Bitmap);
assert_eq!(index.columns, vec!["category".to_string()]);
let index = configs_iter.next().unwrap();
assert_eq!(index.index_type, crate::index::IndexType::Bitmap);
assert_eq!(index.columns, vec!["is_active".to_string()]);
let index = configs_iter.next().unwrap();
assert_eq!(index.index_type, crate::index::IndexType::Bitmap);
assert_eq!(index.columns, vec!["data".to_string()]);
let index = configs_iter.next().unwrap();
assert_eq!(index.index_type, crate::index::IndexType::Bitmap);
assert_eq!(index.columns, vec!["large_data".to_string()]);
let index = configs_iter.next().unwrap();
assert_eq!(index.index_type, crate::index::IndexType::Bitmap);
assert_eq!(index.columns, vec!["large_category".to_string()]);
}
#[tokio::test]

View File

@@ -0,0 +1,126 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
//! Functions for testing connections.
#[cfg(test)]
pub mod test_utils {
use regex::Regex;
use std::env;
use std::io::{BufRead, BufReader};
use std::process::{Child, ChildStdout, Command, Stdio};
use crate::{connect, Connection};
use anyhow::{bail, Result};
use tempfile::{tempdir, TempDir};
pub struct TestConnection {
pub uri: String,
pub connection: Connection,
_temp_dir: Option<TempDir>,
_process: Option<TestProcess>,
}
struct TestProcess {
child: Child,
}
impl Drop for TestProcess {
#[allow(unused_must_use)]
fn drop(&mut self) {
self.child.kill();
}
}
pub async fn new_test_connection() -> Result<TestConnection> {
match env::var("CREATE_LANCEDB_TEST_CONNECTION_SCRIPT") {
Ok(script_path) => new_remote_connection(&script_path).await,
Err(_e) => new_local_connection().await,
}
}
async fn new_remote_connection(script_path: &str) -> Result<TestConnection> {
let temp_dir = tempdir()?;
let data_path = temp_dir.path().to_str().unwrap().to_string();
let child_result = Command::new(script_path)
.stdin(Stdio::null())
.stdout(Stdio::piped())
.stderr(Stdio::piped())
.arg(data_path.clone())
.spawn();
if child_result.is_err() {
bail!(format!(
"Unable to run {}: {:?}",
script_path,
child_result.err()
));
}
let mut process = TestProcess {
child: child_result.unwrap(),
};
let stdout = BufReader::new(process.child.stdout.take().unwrap());
let port = read_process_port(stdout)?;
let uri = "db://test";
let host_override = format!("http://localhost:{}", port);
let connection = create_new_connection(uri, &host_override).await?;
Ok(TestConnection {
uri: uri.to_string(),
connection,
_temp_dir: Some(temp_dir),
_process: Some(process),
})
}
fn read_process_port(mut stdout: BufReader<ChildStdout>) -> Result<String> {
let mut line = String::new();
let re = Regex::new(r"Query node now listening on 0.0.0.0:(.*)").unwrap();
loop {
let result = stdout.read_line(&mut line);
if let Err(err) = result {
bail!(format!(
"read_process_port: error while reading from process output: {}",
err
));
} else if result.unwrap() == 0 {
bail!("read_process_port: hit EOF before reading port from process output.");
}
if re.is_match(&line) {
let caps = re.captures(&line).unwrap();
return Ok(caps[1].to_string());
}
}
}
#[cfg(feature = "remote")]
async fn create_new_connection(
uri: &str,
host_override: &str,
) -> crate::error::Result<Connection> {
connect(uri)
.region("us-east-1")
.api_key("sk_localtest")
.host_override(host_override)
.execute()
.await
}
#[cfg(not(feature = "remote"))]
async fn create_new_connection(
_uri: &str,
_host_override: &str,
) -> crate::error::Result<Connection> {
panic!("remote feature not supported");
}
async fn new_local_connection() -> Result<TestConnection> {
let temp_dir = tempdir()?;
let uri = temp_dir.path().to_str().unwrap();
let connection = connect(uri).execute().await?;
Ok(TestConnection {
uri: uri.to_string(),
connection,
_temp_dir: Some(temp_dir),
_process: None,
})
}
}

View File

@@ -39,7 +39,7 @@ impl PatchStoreParam for Option<ObjectStoreParams> {
let mut params = self.unwrap_or_default();
if params.object_store_wrapper.is_some() {
return Err(Error::Other {
message: "can not patch param because object store is already set".into(),
message: "can not patch param because object store is already set.".into(),
source: None,
});
}
@@ -195,7 +195,15 @@ pub fn supported_btree_data_type(dtype: &DataType) -> bool {
}
pub fn supported_bitmap_data_type(dtype: &DataType) -> bool {
dtype.is_integer() || matches!(dtype, DataType::Utf8)
dtype.is_integer()
|| matches!(
dtype,
DataType::Utf8
| DataType::LargeUtf8
| DataType::Binary
| DataType::LargeBinary
| DataType::Boolean
)
}
pub fn supported_label_list_data_type(dtype: &DataType) -> bool {