Compare commits

..

24 Commits

Author SHA1 Message Date
Lance Release
12c7bd18a5 Bump version: 0.17.0-beta.2 → 0.17.0-beta.3 2024-12-04 01:13:18 +00:00
LuQQiu
c6bf6a25d6 feat: add remote db uri path with folder prefix (#1901)
Add remote database folder prefix
support db://bucket/path/to/folder/
2024-12-03 16:51:18 -08:00
Weston Pace
c998a47e17 feat: add a pyarrow dataset adapater for LanceDB tables (#1902)
This currently only works for local tables (remote tables cannot be
queried)
This is also exclusive to the sync interface. However, since the pyarrow
dataset interface is synchronous I am not sure if there is much value in
making an async-wrapping variant.

In addition, I added a `to_batches` method to the base query in the sync
API. This already exists in the async API. In the sync API this PR only
adds support for vector queries and scalar queries and not for hybrid or
FTS queries.
2024-12-03 15:42:54 -08:00
Frank Liu
d8c758513c feat: add multimodal capabilities for Voyage embedder (#1878)
Co-authored-by: Will Jones <willjones127@gmail.com>
2024-12-03 10:25:48 -08:00
Will Jones
3795e02ee3 chore: fix ci on main (#1899) 2024-12-02 15:21:18 -08:00
Mr. Doge
c7d424b2f3 ci: aarch64-pc-windows-msvc (#1890)
`npm run pack-build -- -t $TARGET_TRIPLE`
was needed instead of
`npm run pack-build -t $TARGET_TRIPLE`
https://github.com/lancedb/lancedb/pull/1889

some documentation about `*-pc-windows-msvc` cross-compilation (from
alpine):
https://github.com/lancedb/lancedb/pull/1831#issuecomment-2497156918

only `arm64` in `matrix` config is used
since `x86_64` built by `runs-on: windows-2022` is working
2024-12-02 11:17:37 -08:00
Bert
1efb9914ee ci: fix failing python release (#1896)
Fix failing python release for windows:
https://github.com/lancedb/lancedb/actions/runs/12019637086/job/33506642964

Also updates pkginfo to fix twine build as suggested here:
https://github.com/pypi/warehouse/issues/15611
failing release:
https://github.com/lancedb/lancedb/actions/runs/12091344173/job/33719622146
2024-12-02 11:05:29 -08:00
Lance Release
83e26a231e Updating package-lock.json 2024-11-29 22:46:45 +00:00
Lance Release
72a17b2de4 Bump version: 0.14.0-beta.0 → 0.14.0-beta.1 2024-11-29 22:46:20 +00:00
Lance Release
4231925476 Bump version: 0.17.0-beta.1 → 0.17.0-beta.2 2024-11-29 22:45:55 +00:00
Lance Release
84a6693294 Bump version: 0.17.0-beta.0 → 0.17.0-beta.1 2024-11-29 18:16:02 +00:00
Ryan Green
6c2d4c10a4 feat: support remote options for remote lancedb connection (#1895)
* Support subset of storage options as remote options
* Send Azure storage account name via HTTP header
2024-11-29 14:08:13 -03:30
Ryan Green
d914722f79 Revert "feat: support remote options for remote lancedb connection. Send Azure storage account name via HTTP header."
This reverts commit a6e4034dba.
2024-11-29 11:06:18 -03:30
Ryan Green
a6e4034dba feat: support remote options for remote lancedb connection. Send Azure storage account name via HTTP header. 2024-11-29 11:05:04 -03:30
QianZhu
2616a50502 fix: test errors after setting default limit (#1891) 2024-11-26 16:03:16 -08:00
LuQQiu
7b5e9d824a fix: dynamodb external manifest drop table (#1866)
second pr of https://github.com/lancedb/lancedb/issues/1812
2024-11-26 13:20:48 -08:00
QianZhu
3b173e7cb9 fix: default limit for remote nodejs client (#1886)
https://github.com/lancedb/lancedb/issues/1804
2024-11-26 11:01:25 -08:00
Mr. Doge
d496ab13a0 ci: linux: specify target triple for neon pack-build (vectordb) (#1889)
fixes that all `neon pack-build` packs are named
`vectordb-linux-x64-musl-*.tgz` even when cross-compiling

adds 2nd param:
`TARGET_TRIPLE=${2:-x86_64-unknown-linux-gnu}`
`npm run pack-build -- -t $TARGET_TRIPLE`
2024-11-26 10:57:17 -08:00
Will Jones
69d9beebc7 docs: improve style and introduction to Python API docs (#1885)
I found the signatures difficult to read and the parameter section not
very space efficient.
2024-11-26 09:17:35 -08:00
Bert
d32360b99d feat: support overwrite and exist_ok mode for remote create_table (#1883)
Support passing modes "overwrite" and "exist_ok" when creating a remote
table.
2024-11-26 11:38:36 -05:00
Will Jones
9fa08bfa93 ci: use correct runner for vectordb (#1881)
We already do this for `gnu` builds, we should do this also for `musl`
builds.
2024-11-25 16:17:10 -08:00
LuQQiu
d6d9cb7415 feat: bump lance to 0.20.0b3 (#1882)
Bump lance version.
Upstream change log:
https://github.com/lancedb/lance/releases/tag/v0.20.0-beta.3
2024-11-25 16:15:44 -08:00
Lance Release
990d93f553 Updating package-lock.json 2024-11-25 22:06:39 +00:00
Lance Release
0832cba3c6 Bump version: 0.13.1-beta.0 → 0.14.0-beta.0 2024-11-25 22:06:14 +00:00
45 changed files with 1022 additions and 97 deletions

View File

@@ -1,5 +1,5 @@
[tool.bumpversion]
current_version = "0.13.1-beta.0"
current_version = "0.14.0-beta.1"
parse = """(?x)
(?P<major>0|[1-9]\\d*)\\.
(?P<minor>0|[1-9]\\d*)\\.

View File

@@ -133,7 +133,7 @@ jobs:
free -h
- name: Build Linux Artifacts
run: |
bash ci/build_linux_artifacts.sh ${{ matrix.config.arch }}
bash ci/build_linux_artifacts.sh ${{ matrix.config.arch }} ${{ matrix.config.arch }}-unknown-linux-gnu
- name: Upload Linux Artifacts
uses: actions/upload-artifact@v4
with:
@@ -143,7 +143,7 @@ jobs:
node-linux-musl:
name: vectordb (${{ matrix.config.arch}}-unknown-linux-musl)
runs-on: ubuntu-latest
runs-on: ${{ matrix.config.runner }}
container: alpine:edge
# Only runs on tags that matches the make-release action
if: startsWith(github.ref, 'refs/tags/v')
@@ -152,7 +152,10 @@ jobs:
matrix:
config:
- arch: x86_64
runner: ubuntu-latest
- arch: aarch64
# For successful fat LTO builds, we need a large runner to avoid OOM errors.
runner: buildjet-16vcpu-ubuntu-2204-arm
steps:
- name: Checkout
uses: actions/checkout@v4
@@ -185,7 +188,7 @@ jobs:
- name: Build Linux Artifacts
run: |
source ./saved_env
bash ci/manylinux_node/build_vectordb.sh ${{ matrix.config.arch }}
bash ci/manylinux_node/build_vectordb.sh ${{ matrix.config.arch }} ${{ matrix.config.arch }}-unknown-linux-musl
- name: Upload Linux Artifacts
uses: actions/upload-artifact@v4
with:
@@ -246,7 +249,7 @@ jobs:
nodejs-linux-musl:
name: lancedb (${{ matrix.config.arch}}-unknown-linux-musl
runs-on: ubuntu-latest
runs-on: ${{ matrix.config.runner }}
container: alpine:edge
# Only runs on tags that matches the make-release action
if: startsWith(github.ref, 'refs/tags/v')
@@ -255,7 +258,10 @@ jobs:
matrix:
config:
- arch: x86_64
runner: ubuntu-latest
- arch: aarch64
# For successful fat LTO builds, we need a large runner to avoid OOM errors.
runner: buildjet-16vcpu-ubuntu-2204-arm
steps:
- name: Checkout
uses: actions/checkout@v4
@@ -334,6 +340,50 @@ jobs:
path: |
node/dist/lancedb-vectordb-win32*.tgz
node-windows-arm64:
name: vectordb ${{ matrix.config.arch }}-pc-windows-msvc
runs-on: ubuntu-latest
container: alpine:edge
strategy:
fail-fast: false
matrix:
config:
# - arch: x86_64
- arch: aarch64
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Install dependencies
run: |
apk add protobuf-dev curl clang lld llvm19 grep npm bash msitools sed
curl --proto '=https' --tlsv1.3 -sSf https://raw.githubusercontent.com/rust-lang/rustup/refs/heads/master/rustup-init.sh | sh -s -- -y --default-toolchain 1.80.0
echo "source $HOME/.cargo/env" >> saved_env
echo "export CC=clang" >> saved_env
echo "export AR=llvm-ar" >> saved_env
source "$HOME/.cargo/env"
rustup target add ${{ matrix.config.arch }}-pc-windows-msvc --toolchain 1.80.0
(mkdir -p sysroot && cd sysroot && sh ../ci/sysroot-${{ matrix.config.arch }}-pc-windows-msvc.sh)
echo "export C_INCLUDE_PATH=/usr/${{ matrix.config.arch }}-pc-windows-msvc/usr/include" >> saved_env
echo "export CARGO_BUILD_TARGET=${{ matrix.config.arch }}-pc-windows-msvc" >> saved_env
- name: Configure x86_64 build
if: ${{ matrix.config.arch == 'x86_64' }}
run: |
echo "export RUSTFLAGS='-Ctarget-cpu=haswell -Ctarget-feature=+crt-static,+avx2,+fma,+f16c -Clinker=lld -Clink-arg=/LIBPATH:/usr/x86_64-pc-windows-msvc/usr/lib'" >> saved_env
- name: Configure aarch64 build
if: ${{ matrix.config.arch == 'aarch64' }}
run: |
echo "export RUSTFLAGS='-Ctarget-feature=+crt-static,+neon,+fp16,+fhm,+dotprod -Clinker=lld -Clink-arg=/LIBPATH:/usr/aarch64-pc-windows-msvc/usr/lib -Clink-arg=arm64rt.lib'" >> saved_env
- name: Build Windows Artifacts
run: |
source ./saved_env
bash ci/manylinux_node/build_vectordb.sh ${{ matrix.config.arch }} ${{ matrix.config.arch }}-pc-windows-msvc
- name: Upload Windows Artifacts
uses: actions/upload-artifact@v4
with:
name: node-native-windows-${{ matrix.config.arch }}
path: |
node/dist/lancedb-vectordb-win32*.tgz
# TODO: re-enable once working https://github.com/lancedb/lancedb/pull/1831
# node-windows-arm64:
# name: vectordb win32-arm64-msvc
@@ -472,6 +522,52 @@ jobs:
path: |
nodejs/dist/*.node
nodejs-windows-arm64:
name: lancedb ${{ matrix.config.arch }}-pc-windows-msvc
runs-on: ubuntu-latest
container: alpine:edge
strategy:
fail-fast: false
matrix:
config:
# - arch: x86_64
- arch: aarch64
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Install dependencies
run: |
apk add protobuf-dev curl clang lld llvm19 grep npm bash msitools sed
curl --proto '=https' --tlsv1.3 -sSf https://raw.githubusercontent.com/rust-lang/rustup/refs/heads/master/rustup-init.sh | sh -s -- -y --default-toolchain 1.80.0
echo "source $HOME/.cargo/env" >> saved_env
echo "export CC=clang" >> saved_env
echo "export AR=llvm-ar" >> saved_env
source "$HOME/.cargo/env"
rustup target add ${{ matrix.config.arch }}-pc-windows-msvc --toolchain 1.80.0
(mkdir -p sysroot && cd sysroot && sh ../ci/sysroot-${{ matrix.config.arch }}-pc-windows-msvc.sh)
echo "export C_INCLUDE_PATH=/usr/${{ matrix.config.arch }}-pc-windows-msvc/usr/include" >> saved_env
echo "export CARGO_BUILD_TARGET=${{ matrix.config.arch }}-pc-windows-msvc" >> saved_env
printf '#!/bin/sh\ncargo "$@"' > $HOME/.cargo/bin/cargo-xwin
chmod u+x $HOME/.cargo/bin/cargo-xwin
- name: Configure x86_64 build
if: ${{ matrix.config.arch == 'x86_64' }}
run: |
echo "export RUSTFLAGS='-Ctarget-cpu=haswell -Ctarget-feature=+crt-static,+avx2,+fma,+f16c -Clinker=lld -Clink-arg=/LIBPATH:/usr/x86_64-pc-windows-msvc/usr/lib'" >> saved_env
- name: Configure aarch64 build
if: ${{ matrix.config.arch == 'aarch64' }}
run: |
echo "export RUSTFLAGS='-Ctarget-feature=+crt-static,+neon,+fp16,+fhm,+dotprod -Clinker=lld -Clink-arg=/LIBPATH:/usr/aarch64-pc-windows-msvc/usr/lib -Clink-arg=arm64rt.lib'" >> saved_env
- name: Build Windows Artifacts
run: |
source ./saved_env
bash ci/manylinux_node/build_lancedb.sh ${{ matrix.config.arch }}
- name: Upload Windows Artifacts
uses: actions/upload-artifact@v4
with:
name: nodejs-native-windows-${{ matrix.config.arch }}
path: |
nodejs/dist/*.node
# TODO: re-enable once working https://github.com/lancedb/lancedb/pull/1831
# nodejs-windows-arm64:
# name: lancedb win32-arm64-msvc
@@ -568,7 +664,7 @@ jobs:
release:
name: vectordb NPM Publish
needs: [node, node-macos, node-linux-gnu, node-linux-musl, node-windows]
needs: [node, node-macos, node-linux-gnu, node-linux-musl, node-windows, node-windows-arm64]
runs-on: ubuntu-latest
# Only runs on tags that matches the make-release action
if: startsWith(github.ref, 'refs/tags/v')
@@ -608,7 +704,7 @@ jobs:
release-nodejs:
name: lancedb NPM Publish
needs: [nodejs-macos, nodejs-linux-gnu, nodejs-linux-musl, nodejs-windows]
needs: [nodejs-macos, nodejs-linux-gnu, nodejs-linux-musl, nodejs-windows, nodejs-windows-arm64]
runs-on: ubuntu-latest
# Only runs on tags that matches the make-release action
if: startsWith(github.ref, 'refs/tags/v')

View File

@@ -83,7 +83,7 @@ jobs:
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: 3.8
python-version: 3.12
- uses: ./.github/workflows/build_windows_wheel
with:
python-minor-version: 8

View File

@@ -17,6 +17,7 @@ runs:
run: |
python -m pip install --upgrade pip
pip install twine
python3 -m pip install --upgrade pkginfo
- name: Choose repo
shell: bash
id: choose_repo

View File

@@ -23,13 +23,14 @@ rust-version = "1.80.0" # TO
[workspace.dependencies]
lance = { "version" = "=0.20.0", "features" = [
"dynamodb",
], git = "https://github.com/lancedb/lance.git", tag = "v0.20.0-beta.2" }
lance-index = { version = "=0.20.0", git = "https://github.com/lancedb/lance.git", tag = "v0.20.0-beta.2" }
lance-linalg = { version = "=0.20.0", git = "https://github.com/lancedb/lance.git", tag = "v0.20.0-beta.2" }
lance-table = { version = "=0.20.0", git = "https://github.com/lancedb/lance.git", tag = "v0.20.0-beta.2" }
lance-testing = { version = "=0.20.0", git = "https://github.com/lancedb/lance.git", tag = "v0.20.0-beta.2" }
lance-datafusion = { version = "=0.20.0", git = "https://github.com/lancedb/lance.git", tag = "v0.20.0-beta.2" }
lance-encoding = { version = "=0.20.0", git = "https://github.com/lancedb/lance.git", tag = "v0.20.0-beta.2" }
], git = "https://github.com/lancedb/lance.git", tag = "v0.20.0-beta.3" }
lance-io = { version = "=0.20.0", git = "https://github.com/lancedb/lance.git", tag = "v0.20.0-beta.3" }
lance-index = { version = "=0.20.0", git = "https://github.com/lancedb/lance.git", tag = "v0.20.0-beta.3" }
lance-linalg = { version = "=0.20.0", git = "https://github.com/lancedb/lance.git", tag = "v0.20.0-beta.3" }
lance-table = { version = "=0.20.0", git = "https://github.com/lancedb/lance.git", tag = "v0.20.0-beta.3" }
lance-testing = { version = "=0.20.0", git = "https://github.com/lancedb/lance.git", tag = "v0.20.0-beta.3" }
lance-datafusion = { version = "=0.20.0", git = "https://github.com/lancedb/lance.git", tag = "v0.20.0-beta.3" }
lance-encoding = { version = "=0.20.0", git = "https://github.com/lancedb/lance.git", tag = "v0.20.0-beta.3" }
# Note that this one does not include pyarrow
arrow = { version = "52.2", optional = false }
arrow-array = "52.2"

View File

@@ -1,8 +1,9 @@
#!/bin/bash
set -e
ARCH=${1:-x86_64}
TARGET_TRIPLE=${2:-x86_64-unknown-linux-gnu}
# We pass down the current user so that when we later mount the local files
# We pass down the current user so that when we later mount the local files
# into the container, the files are accessible by the current user.
pushd ci/manylinux_node
docker build \
@@ -18,4 +19,4 @@ docker run \
-v $(pwd):/io -w /io \
--memory-swap=-1 \
lancedb-node-manylinux \
bash ci/manylinux_node/build_vectordb.sh $ARCH
bash ci/manylinux_node/build_vectordb.sh $ARCH $TARGET_TRIPLE

View File

@@ -2,6 +2,7 @@
# Builds the node module for manylinux. Invoked by ci/build_linux_artifacts.sh.
set -e
ARCH=${1:-x86_64}
TARGET_TRIPLE=${2:-x86_64-unknown-linux-gnu}
if [ "$ARCH" = "x86_64" ]; then
export OPENSSL_LIB_DIR=/usr/local/lib64/
@@ -17,4 +18,4 @@ FILE=$HOME/.bashrc && test -f $FILE && source $FILE
cd node
npm ci
npm run build-release
npm run pack-build
npm run pack-build -- -t $TARGET_TRIPLE

View File

@@ -0,0 +1,105 @@
#!/bin/sh
# https://github.com/mstorsjo/msvc-wine/blob/master/vsdownload.py
# https://github.com/mozilla/gecko-dev/blob/6027d1d91f2d3204a3992633b3ef730ff005fc64/build/vs/vs2022-car.yaml
# function dl() {
# curl -O https://download.visualstudio.microsoft.com/download/pr/$1
# }
# [[.h]]
# "id": "Win11SDK_10.0.26100"
# "version": "10.0.26100.7"
# libucrt.lib
# example: <assert.h>
# dir: ucrt/
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/2ee3a5fc6e9fc832af7295b138e93839/universal%20crt%20headers%20libraries%20and%20sources-x86_en-us.msi
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/b1aa09b90fe314aceb090f6ec7626624/16ab2ea2187acffa6435e334796c8c89.cab
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/400609bb0ff5804e36dbe6dcd42a7f01/6ee7bbee8435130a869cf971694fd9e2.cab
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/2ac327317abb865a0e3f56b2faefa918/78fa3c824c2c48bd4a49ab5969adaaf7.cab
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/f034bc0b2680f67dccd4bfeea3d0f932/7afc7b670accd8e3cc94cfffd516f5cb.cab
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/7ed5e12f9d50f80825a8b27838cf4c7f/96076045170fe5db6d5dcf14b6f6688e.cab
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/764edc185a696bda9e07df8891dddbbb/a1e2a83aa8a71c48c742eeaff6e71928.cab
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/66854bedc6dbd5ccb5dd82c8e2412231/b2f03f34ff83ec013b9e45c7cd8e8a73.cab
# example: <windows.h>
# dir: um/
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/b286efac4d83a54fc49190bddef1edc9/windows%20sdk%20for%20windows%20store%20apps%20headers-x86_en-us.msi
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/e0dc3811d92ab96fcb72bf63d6c08d71/766c0ffd568bbb31bf7fb6793383e24a.cab
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/613503da4b5628768497822826aed39f/8125ee239710f33ea485965f76fae646.cab
# example: <winapifamily.h>
# dir: /shared
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/122979f0348d3a2a36b6aa1a111d5d0c/windows%20sdk%20for%20windows%20store%20apps%20headers%20onecoreuap-x86_en-us.msi
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/766e04beecdfccff39e91dd9eb32834a/e89e3dcbb016928c7e426238337d69eb.cab
# "id": "Microsoft.VisualC.14.16.CRT.Headers"
# "version": "14.16.27045"
# example: <vcruntime.h>
# dir: MSVC/
curl -O https://download.visualstudio.microsoft.com/download/pr/bac0afd7-cc9e-4182-8a83-9898fa20e092/87bbe41e09a2f83711e72696f49681429327eb7a4b90618c35667a6ba2e2880e/Microsoft.VisualC.14.16.CRT.Headers.vsix
# [[.lib]]
# advapi32.lib bcrypt.lib kernel32.lib ntdll.lib user32.lib uuid.lib ws2_32.lib userenv.lib cfgmgr32.lib runtimeobject.lib
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/944c4153b849a1f7d0c0404a4f1c05ea/windows%20sdk%20for%20windows%20store%20apps%20libs-x86_en-us.msi
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/5306aed3e1a38d1e8bef5934edeb2a9b/05047a45609f311645eebcac2739fc4c.cab
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/13c8a73a0f5a6474040b26d016a26fab/13d68b8a7b6678a368e2d13ff4027521.cab
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/149578fb3b621cdb61ee1813b9b3e791/463ad1b0783ebda908fd6c16a4abfe93.cab
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/5c986c4f393c6b09d5aec3b539e9fb4a/5a22e5cde814b041749fb271547f4dd5.cab
# fwpuclnt.lib arm64rt.lib
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/7a332420d812f7c1d41da865ae5a7c52/windows%20sdk%20desktop%20libs%20arm64-x86_en-us.msi
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/19de98ed4a79938d0045d19c047936b3/3e2f7be479e3679d700ce0782e4cc318.cab
# libcmt.lib libvcruntime.lib
curl -O https://download.visualstudio.microsoft.com/download/pr/bac0afd7-cc9e-4182-8a83-9898fa20e092/227f40682a88dc5fa0ccb9cadc9ad30af99ad1f1a75db63407587d079f60d035/Microsoft.VisualC.14.16.CRT.ARM64.Desktop.vsix
msiextract universal%20crt%20headers%20libraries%20and%20sources-x86_en-us.msi
msiextract windows%20sdk%20for%20windows%20store%20apps%20headers-x86_en-us.msi
msiextract windows%20sdk%20for%20windows%20store%20apps%20headers%20onecoreuap-x86_en-us.msi
msiextract windows%20sdk%20for%20windows%20store%20apps%20libs-x86_en-us.msi
msiextract windows%20sdk%20desktop%20libs%20arm64-x86_en-us.msi
unzip -o Microsoft.VisualC.14.16.CRT.Headers.vsix
unzip -o Microsoft.VisualC.14.16.CRT.ARM64.Desktop.vsix
mkdir -p /usr/aarch64-pc-windows-msvc/usr/include
mkdir -p /usr/aarch64-pc-windows-msvc/usr/lib
# lowercase folder/file names
echo "$(find . -regex ".*/[^/]*[A-Z][^/]*")" | xargs -I{} sh -c 'mv "$(echo "{}" | sed -E '"'"'s/(.*\/)/\L\1/'"'"')" "$(echo "{}" | tr [A-Z] [a-z])"'
# .h
(cd 'program files/windows kits/10/include/10.0.26100.0' && cp -r ucrt/* um/* shared/* -t /usr/aarch64-pc-windows-msvc/usr/include)
cp -r contents/vc/tools/msvc/14.16.27023/include/* /usr/aarch64-pc-windows-msvc/usr/include
# lowercase #include "" and #include <>
find /usr/aarch64-pc-windows-msvc/usr/include -type f -exec sed -i -E 's/(#include <[^<>]*?[A-Z][^<>]*?>)|(#include "[^"]*?[A-Z][^"]*?")/\L\1\2/' "{}" ';'
# ARM intrinsics
# original dir: MSVC/
# '__n128x4' redefined in arm_neon.h
# "arm64_neon.h" included from intrin.h
(cd /usr/lib/llvm19/lib/clang/19/include && cp arm_neon.h intrin.h -t /usr/aarch64-pc-windows-msvc/usr/include)
# .lib
# _Interlocked intrinsics
# must always link with arm64rt.lib
# reason: https://developercommunity.visualstudio.com/t/libucrtlibstreamobj-error-lnk2001-unresolved-exter/1544787#T-ND1599818
# I don't understand the 'correct' fix for this, arm64rt.lib is supposed to be the workaround
(cd 'program files/windows kits/10/lib/10.0.26100.0/um/arm64' && cp advapi32.lib bcrypt.lib kernel32.lib ntdll.lib user32.lib uuid.lib ws2_32.lib userenv.lib cfgmgr32.lib runtimeobject.lib fwpuclnt.lib arm64rt.lib -t /usr/aarch64-pc-windows-msvc/usr/lib)
(cd 'contents/vc/tools/msvc/14.16.27023/lib/arm64' && cp libcmt.lib libvcruntime.lib -t /usr/aarch64-pc-windows-msvc/usr/lib)
cp 'program files/windows kits/10/lib/10.0.26100.0/ucrt/arm64/libucrt.lib' /usr/aarch64-pc-windows-msvc/usr/lib

View File

@@ -0,0 +1,105 @@
#!/bin/sh
# https://github.com/mstorsjo/msvc-wine/blob/master/vsdownload.py
# https://github.com/mozilla/gecko-dev/blob/6027d1d91f2d3204a3992633b3ef730ff005fc64/build/vs/vs2022-car.yaml
# function dl() {
# curl -O https://download.visualstudio.microsoft.com/download/pr/$1
# }
# [[.h]]
# "id": "Win11SDK_10.0.26100"
# "version": "10.0.26100.7"
# libucrt.lib
# example: <assert.h>
# dir: ucrt/
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/2ee3a5fc6e9fc832af7295b138e93839/universal%20crt%20headers%20libraries%20and%20sources-x86_en-us.msi
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/b1aa09b90fe314aceb090f6ec7626624/16ab2ea2187acffa6435e334796c8c89.cab
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/400609bb0ff5804e36dbe6dcd42a7f01/6ee7bbee8435130a869cf971694fd9e2.cab
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/2ac327317abb865a0e3f56b2faefa918/78fa3c824c2c48bd4a49ab5969adaaf7.cab
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/f034bc0b2680f67dccd4bfeea3d0f932/7afc7b670accd8e3cc94cfffd516f5cb.cab
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/7ed5e12f9d50f80825a8b27838cf4c7f/96076045170fe5db6d5dcf14b6f6688e.cab
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/764edc185a696bda9e07df8891dddbbb/a1e2a83aa8a71c48c742eeaff6e71928.cab
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/66854bedc6dbd5ccb5dd82c8e2412231/b2f03f34ff83ec013b9e45c7cd8e8a73.cab
# example: <windows.h>
# dir: um/
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/b286efac4d83a54fc49190bddef1edc9/windows%20sdk%20for%20windows%20store%20apps%20headers-x86_en-us.msi
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/e0dc3811d92ab96fcb72bf63d6c08d71/766c0ffd568bbb31bf7fb6793383e24a.cab
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/613503da4b5628768497822826aed39f/8125ee239710f33ea485965f76fae646.cab
# example: <winapifamily.h>
# dir: /shared
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/122979f0348d3a2a36b6aa1a111d5d0c/windows%20sdk%20for%20windows%20store%20apps%20headers%20onecoreuap-x86_en-us.msi
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/766e04beecdfccff39e91dd9eb32834a/e89e3dcbb016928c7e426238337d69eb.cab
# "id": "Microsoft.VisualC.14.16.CRT.Headers"
# "version": "14.16.27045"
# example: <vcruntime.h>
# dir: MSVC/
curl -O https://download.visualstudio.microsoft.com/download/pr/bac0afd7-cc9e-4182-8a83-9898fa20e092/87bbe41e09a2f83711e72696f49681429327eb7a4b90618c35667a6ba2e2880e/Microsoft.VisualC.14.16.CRT.Headers.vsix
# [[.lib]]
# advapi32.lib bcrypt.lib kernel32.lib ntdll.lib user32.lib uuid.lib ws2_32.lib userenv.lib cfgmgr32.lib
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/944c4153b849a1f7d0c0404a4f1c05ea/windows%20sdk%20for%20windows%20store%20apps%20libs-x86_en-us.msi
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/5306aed3e1a38d1e8bef5934edeb2a9b/05047a45609f311645eebcac2739fc4c.cab
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/13c8a73a0f5a6474040b26d016a26fab/13d68b8a7b6678a368e2d13ff4027521.cab
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/149578fb3b621cdb61ee1813b9b3e791/463ad1b0783ebda908fd6c16a4abfe93.cab
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/5c986c4f393c6b09d5aec3b539e9fb4a/5a22e5cde814b041749fb271547f4dd5.cab
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/bfc3904a0195453419ae4dfea7abd6fb/e10768bb6e9d0ea730280336b697da66.cab
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/637f9f3be880c71f9e3ca07b4d67345c/f9b24c8280986c0683fbceca5326d806.cab
# dbghelp.lib fwpuclnt.lib
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/9f51690d5aa804b1340ce12d1ec80f89/windows%20sdk%20desktop%20libs%20x64-x86_en-us.msi
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/d3a7df4ca3303a698640a29e558a5e5b/58314d0646d7e1a25e97c902166c3155.cab
# libcmt.lib libvcruntime.lib
curl -O https://download.visualstudio.microsoft.com/download/pr/bac0afd7-cc9e-4182-8a83-9898fa20e092/8728f21ae09940f1f4b4ee47b4a596be2509e2a47d2f0c83bbec0ea37d69644b/Microsoft.VisualC.14.16.CRT.x64.Desktop.vsix
msiextract universal%20crt%20headers%20libraries%20and%20sources-x86_en-us.msi
msiextract windows%20sdk%20for%20windows%20store%20apps%20headers-x86_en-us.msi
msiextract windows%20sdk%20for%20windows%20store%20apps%20headers%20onecoreuap-x86_en-us.msi
msiextract windows%20sdk%20for%20windows%20store%20apps%20libs-x86_en-us.msi
msiextract windows%20sdk%20desktop%20libs%20x64-x86_en-us.msi
unzip -o Microsoft.VisualC.14.16.CRT.Headers.vsix
unzip -o Microsoft.VisualC.14.16.CRT.x64.Desktop.vsix
mkdir -p /usr/x86_64-pc-windows-msvc/usr/include
mkdir -p /usr/x86_64-pc-windows-msvc/usr/lib
# lowercase folder/file names
echo "$(find . -regex ".*/[^/]*[A-Z][^/]*")" | xargs -I{} sh -c 'mv "$(echo "{}" | sed -E '"'"'s/(.*\/)/\L\1/'"'"')" "$(echo "{}" | tr [A-Z] [a-z])"'
# .h
(cd 'program files/windows kits/10/include/10.0.26100.0' && cp -r ucrt/* um/* shared/* -t /usr/x86_64-pc-windows-msvc/usr/include)
cp -r contents/vc/tools/msvc/14.16.27023/include/* /usr/x86_64-pc-windows-msvc/usr/include
# lowercase #include "" and #include <>
find /usr/x86_64-pc-windows-msvc/usr/include -type f -exec sed -i -E 's/(#include <[^<>]*?[A-Z][^<>]*?>)|(#include "[^"]*?[A-Z][^"]*?")/\L\1\2/' "{}" ';'
# x86 intrinsics
# original dir: MSVC/
# '_mm_movemask_epi8' defined in emmintrin.h
# '__v4sf' defined in xmmintrin.h
# '__v2si' defined in mmintrin.h
# '__m128d' redefined in immintrin.h
# '__m128i' redefined in intrin.h
# '_mm_comlt_epu8' defined in ammintrin.h
(cd /usr/lib/llvm19/lib/clang/19/include && cp emmintrin.h xmmintrin.h mmintrin.h immintrin.h intrin.h ammintrin.h -t /usr/x86_64-pc-windows-msvc/usr/include)
# .lib
(cd 'program files/windows kits/10/lib/10.0.26100.0/um/x64' && cp advapi32.lib bcrypt.lib kernel32.lib ntdll.lib user32.lib uuid.lib ws2_32.lib userenv.lib cfgmgr32.lib dbghelp.lib fwpuclnt.lib -t /usr/x86_64-pc-windows-msvc/usr/lib)
(cd 'contents/vc/tools/msvc/14.16.27023/lib/x64' && cp libcmt.lib libvcruntime.lib -t /usr/x86_64-pc-windows-msvc/usr/lib)
cp 'program files/windows kits/10/lib/10.0.26100.0/ucrt/x64/libucrt.lib' /usr/x86_64-pc-windows-msvc/usr/lib

View File

@@ -55,6 +55,9 @@ plugins:
show_signature_annotations: true
show_root_heading: true
members_order: source
docstring_section_style: list
signature_crossrefs: true
separate_signature: true
import:
# for cross references
- https://arrow.apache.org/docs/objects.inv

View File

@@ -1,6 +1,16 @@
# Python API Reference
This section contains the API reference for the OSS Python API.
This section contains the API reference for the Python API. There is a
synchronous and an asynchronous API client.
The general flow of using the API is:
1. Use [lancedb.connect][] or [lancedb.connect_async][] to connect to a database.
2. Use the returned [lancedb.DBConnection][] or [lancedb.AsyncConnection][] to
create or open tables.
3. Use the returned [lancedb.table.Table][] or [lancedb.AsyncTable][] to query
or modify tables.
## Installation

View File

@@ -8,7 +8,7 @@
<parent>
<groupId>com.lancedb</groupId>
<artifactId>lancedb-parent</artifactId>
<version>0.13.1-beta.0</version>
<version>0.14.0-beta.1</version>
<relativePath>../pom.xml</relativePath>
</parent>

View File

@@ -6,7 +6,7 @@
<groupId>com.lancedb</groupId>
<artifactId>lancedb-parent</artifactId>
<version>0.13.1-beta.0</version>
<version>0.14.0-beta.1</version>
<packaging>pom</packaging>
<name>LanceDB Parent</name>

20
node/package-lock.json generated
View File

@@ -1,12 +1,12 @@
{
"name": "vectordb",
"version": "0.13.1-beta.0",
"version": "0.14.0-beta.1",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "vectordb",
"version": "0.13.1-beta.0",
"version": "0.14.0-beta.1",
"cpu": [
"x64",
"arm64"
@@ -52,14 +52,14 @@
"uuid": "^9.0.0"
},
"optionalDependencies": {
"@lancedb/vectordb-darwin-arm64": "0.13.1-beta.0",
"@lancedb/vectordb-darwin-x64": "0.13.1-beta.0",
"@lancedb/vectordb-linux-arm64-gnu": "0.13.1-beta.0",
"@lancedb/vectordb-linux-arm64-musl": "0.13.1-beta.0",
"@lancedb/vectordb-linux-x64-gnu": "0.13.1-beta.0",
"@lancedb/vectordb-linux-x64-musl": "0.13.1-beta.0",
"@lancedb/vectordb-win32-arm64-msvc": "0.13.1-beta.0",
"@lancedb/vectordb-win32-x64-msvc": "0.13.1-beta.0"
"@lancedb/vectordb-darwin-arm64": "0.14.0-beta.1",
"@lancedb/vectordb-darwin-x64": "0.14.0-beta.1",
"@lancedb/vectordb-linux-arm64-gnu": "0.14.0-beta.1",
"@lancedb/vectordb-linux-arm64-musl": "0.14.0-beta.1",
"@lancedb/vectordb-linux-x64-gnu": "0.14.0-beta.1",
"@lancedb/vectordb-linux-x64-musl": "0.14.0-beta.1",
"@lancedb/vectordb-win32-arm64-msvc": "0.14.0-beta.1",
"@lancedb/vectordb-win32-x64-msvc": "0.14.0-beta.1"
},
"peerDependencies": {
"@apache-arrow/ts": "^14.0.2",

View File

@@ -1,6 +1,6 @@
{
"name": "vectordb",
"version": "0.13.1-beta.0",
"version": "0.14.0-beta.1",
"description": " Serverless, low-latency vector database for AI applications",
"main": "dist/index.js",
"types": "dist/index.d.ts",
@@ -91,13 +91,13 @@
}
},
"optionalDependencies": {
"@lancedb/vectordb-darwin-x64": "0.13.1-beta.0",
"@lancedb/vectordb-darwin-arm64": "0.13.1-beta.0",
"@lancedb/vectordb-linux-x64-gnu": "0.13.1-beta.0",
"@lancedb/vectordb-linux-arm64-gnu": "0.13.1-beta.0",
"@lancedb/vectordb-linux-x64-musl": "0.13.1-beta.0",
"@lancedb/vectordb-linux-arm64-musl": "0.13.1-beta.0",
"@lancedb/vectordb-win32-x64-msvc": "0.13.1-beta.0",
"@lancedb/vectordb-win32-arm64-msvc": "0.13.1-beta.0"
"@lancedb/vectordb-darwin-x64": "0.14.0-beta.1",
"@lancedb/vectordb-darwin-arm64": "0.14.0-beta.1",
"@lancedb/vectordb-linux-x64-gnu": "0.14.0-beta.1",
"@lancedb/vectordb-linux-arm64-gnu": "0.14.0-beta.1",
"@lancedb/vectordb-linux-x64-musl": "0.14.0-beta.1",
"@lancedb/vectordb-linux-arm64-musl": "0.14.0-beta.1",
"@lancedb/vectordb-win32-x64-msvc": "0.14.0-beta.1",
"@lancedb/vectordb-win32-arm64-msvc": "0.14.0-beta.1"
}
}

View File

@@ -1,7 +1,7 @@
[package]
name = "lancedb-nodejs"
edition.workspace = true
version = "0.13.1-beta.0"
version = "0.14.0-beta.1"
license.workspace = true
description.workspace = true
repository.workspace = true

View File

@@ -110,7 +110,10 @@ describe("given a connection", () => {
let table = await db.createTable("test", data, { useLegacyFormat: true });
const isV2 = async (table: Table) => {
const data = await table.query().toArrow({ maxBatchLength: 100000 });
const data = await table
.query()
.limit(10000)
.toArrow({ maxBatchLength: 100000 });
console.log(data.batches.length);
return data.batches.length < 5;
};

View File

@@ -585,11 +585,11 @@ describe("When creating an index", () => {
expect(fs.readdirSync(indexDir)).toHaveLength(1);
for await (const r of tbl.query().where("id > 1").select(["id"])) {
expect(r.numRows).toBe(298);
expect(r.numRows).toBe(10);
}
// should also work with 'filter' alias
for await (const r of tbl.query().filter("id > 1").select(["id"])) {
expect(r.numRows).toBe(298);
expect(r.numRows).toBe(10);
}
});

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-darwin-arm64",
"version": "0.13.1-beta.0",
"version": "0.14.0-beta.1",
"os": ["darwin"],
"cpu": ["arm64"],
"main": "lancedb.darwin-arm64.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-darwin-x64",
"version": "0.13.1-beta.0",
"version": "0.14.0-beta.1",
"os": ["darwin"],
"cpu": ["x64"],
"main": "lancedb.darwin-x64.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-linux-arm64-gnu",
"version": "0.13.1-beta.0",
"version": "0.14.0-beta.1",
"os": ["linux"],
"cpu": ["arm64"],
"main": "lancedb.linux-arm64-gnu.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-linux-arm64-musl",
"version": "0.13.1-beta.0",
"version": "0.14.0-beta.1",
"os": ["linux"],
"cpu": ["arm64"],
"main": "lancedb.linux-arm64-musl.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-linux-x64-gnu",
"version": "0.13.1-beta.0",
"version": "0.14.0-beta.1",
"os": ["linux"],
"cpu": ["x64"],
"main": "lancedb.linux-x64-gnu.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-linux-x64-musl",
"version": "0.13.1-beta.0",
"version": "0.14.0-beta.1",
"os": ["linux"],
"cpu": ["x64"],
"main": "lancedb.linux-x64-musl.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-win32-arm64-msvc",
"version": "0.13.1-beta.0",
"version": "0.14.0-beta.1",
"os": [
"win32"
],

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-win32-x64-msvc",
"version": "0.13.1-beta.0",
"version": "0.14.0-beta.1",
"os": ["win32"],
"cpu": ["x64"],
"main": "lancedb.win32-x64-msvc.node",

View File

@@ -10,7 +10,7 @@
"vector database",
"ann"
],
"version": "0.13.1-beta.0",
"version": "0.14.0-beta.1",
"main": "dist/index.js",
"exports": {
".": "./dist/index.js",

View File

@@ -1,5 +1,5 @@
[tool.bumpversion]
current_version = "0.17.0-beta.0"
current_version = "0.17.0-beta.3"
parse = """(?x)
(?P<major>0|[1-9]\\d*)\\.
(?P<minor>0|[1-9]\\d*)\\.

View File

@@ -1,6 +1,6 @@
[package]
name = "lancedb-python"
version = "0.17.0-beta.0"
version = "0.17.0-beta.3"
edition.workspace = true
description = "Python bindings for LanceDB"
license.workspace = true

View File

@@ -3,7 +3,7 @@ name = "lancedb"
# version in Cargo.toml
dependencies = [
"deprecation",
"pylance==0.20.0b2",
"pylance==0.20.0b3",
"tqdm>=4.27.0",
"pydantic>=1.10",
"packaging",

View File

@@ -12,18 +12,22 @@
# limitations under the License.
import os
from typing import ClassVar, List, Union
from typing import ClassVar, TYPE_CHECKING, List, Union
import numpy as np
import pyarrow as pa
from ..util import attempt_import_or_raise
from .base import TextEmbeddingFunction
from .base import EmbeddingFunction
from .registry import register
from .utils import api_key_not_found_help, TEXT
from .utils import api_key_not_found_help, IMAGES
if TYPE_CHECKING:
import PIL
@register("voyageai")
class VoyageAIEmbeddingFunction(TextEmbeddingFunction):
class VoyageAIEmbeddingFunction(EmbeddingFunction):
"""
An embedding function that uses the VoyageAI API
@@ -36,6 +40,7 @@ class VoyageAIEmbeddingFunction(TextEmbeddingFunction):
* voyage-3
* voyage-3-lite
* voyage-multimodal-3
* voyage-finance-2
* voyage-multilingual-2
* voyage-law-2
@@ -54,7 +59,7 @@ class VoyageAIEmbeddingFunction(TextEmbeddingFunction):
.create(name="voyage-3")
class TextModel(LanceModel):
text: str = voyageai.SourceField()
data: str = voyageai.SourceField()
vector: Vector(voyageai.ndims()) = voyageai.VectorField()
data = [ { "text": "hello world" },
@@ -77,6 +82,7 @@ class VoyageAIEmbeddingFunction(TextEmbeddingFunction):
return 1536
elif self.name in [
"voyage-3",
"voyage-multimodal-3",
"voyage-finance-2",
"voyage-multilingual-2",
"voyage-law-2",
@@ -85,19 +91,19 @@ class VoyageAIEmbeddingFunction(TextEmbeddingFunction):
else:
raise ValueError(f"Model {self.name} not supported")
def compute_query_embeddings(self, query: str, *args, **kwargs) -> List[np.array]:
return self.compute_source_embeddings(query, input_type="query")
def sanitize_input(self, images: IMAGES) -> Union[List[bytes], np.ndarray]:
"""
Sanitize the input to the embedding function.
"""
if isinstance(images, (str, bytes)):
images = [images]
elif isinstance(images, pa.Array):
images = images.to_pylist()
elif isinstance(images, pa.ChunkedArray):
images = images.combine_chunks().to_pylist()
return images
def compute_source_embeddings(self, texts: TEXT, *args, **kwargs) -> List[np.array]:
texts = self.sanitize_input(texts)
input_type = (
kwargs.get("input_type") or "document"
) # assume source input type if not passed by `compute_query_embeddings`
return self.generate_embeddings(texts, input_type=input_type)
def generate_embeddings(
self, texts: Union[List[str], np.ndarray], *args, **kwargs
) -> List[np.array]:
def generate_text_embeddings(self, text: str, **kwargs) -> np.ndarray:
"""
Get the embeddings for the given texts
@@ -109,15 +115,55 @@ class VoyageAIEmbeddingFunction(TextEmbeddingFunction):
truncation: Optional[bool]
"""
VoyageAIEmbeddingFunction._init_client()
rs = VoyageAIEmbeddingFunction.client.embed(
texts=texts, model=self.name, **kwargs
)
if self.name in ["voyage-multimodal-3"]:
rs = VoyageAIEmbeddingFunction._get_client().multimodal_embed(
inputs=[[text]], model=self.name, **kwargs
)
else:
rs = VoyageAIEmbeddingFunction._get_client().embed(
texts=[text], model=self.name, **kwargs
)
return [emb for emb in rs.embeddings]
return rs.embeddings[0]
def generate_image_embedding(
self, image: "PIL.Image.Image", **kwargs
) -> np.ndarray:
rs = VoyageAIEmbeddingFunction._get_client().multimodal_embed(
inputs=[[image]], model=self.name, **kwargs
)
return rs.embeddings[0]
def compute_query_embeddings(
self, query: Union[str, "PIL.Image.Image"], *args, **kwargs
) -> List[np.ndarray]:
"""
Compute the embeddings for a given user query
Parameters
----------
query : Union[str, PIL.Image.Image]
The query to embed. A query can be either text or an image.
"""
if isinstance(query, str):
return [self.generate_text_embeddings(query, input_type="query")]
else:
PIL = attempt_import_or_raise("PIL", "pillow")
if isinstance(query, PIL.Image.Image):
return [self.generate_image_embedding(query, input_type="query")]
else:
raise TypeError("Only text PIL images supported as query")
def compute_source_embeddings(
self, images: IMAGES, *args, **kwargs
) -> List[np.array]:
images = self.sanitize_input(images)
return [
self.generate_image_embedding(img, input_type="document") for img in images
]
@staticmethod
def _init_client():
def _get_client():
if VoyageAIEmbeddingFunction.client is None:
voyageai = attempt_import_or_raise("voyageai")
if os.environ.get("VOYAGE_API_KEY") is None:
@@ -125,3 +171,4 @@ class VoyageAIEmbeddingFunction(TextEmbeddingFunction):
VoyageAIEmbeddingFunction.client = voyageai.Client(
os.environ["VOYAGE_API_KEY"]
)
return VoyageAIEmbeddingFunction.client

View File

@@ -0,0 +1,248 @@
import logging
from typing import Any, List, Optional, Tuple, Union, Literal
import pyarrow as pa
from ..table import Table
Filter = Union[str, pa.compute.Expression]
Keys = Union[str, List[str]]
JoinType = Literal[
"left semi",
"right semi",
"left anti",
"right anti",
"inner",
"left outer",
"right outer",
"full outer",
]
class PyarrowScannerAdapter(pa.dataset.Scanner):
def __init__(
self,
table: Table,
columns: Optional[List[str]] = None,
filter: Optional[Filter] = None,
batch_size: Optional[int] = None,
batch_readahead: Optional[int] = None,
fragment_readahead: Optional[int] = None,
fragment_scan_options: Optional[Any] = None,
use_threads: bool = True,
memory_pool: Optional[Any] = None,
):
self.table = table
self.columns = columns
self.filter = filter
self.batch_size = batch_size
if batch_readahead is not None:
logging.debug("ignoring batch_readahead which has no lance equivalent")
if fragment_readahead is not None:
logging.debug("ignoring fragment_readahead which has no lance equivalent")
if fragment_scan_options is not None:
raise NotImplementedError("fragment_scan_options not supported")
if use_threads is False:
raise NotImplementedError("use_threads=False not supported")
if memory_pool is not None:
raise NotImplementedError("memory_pool not supported")
def count_rows(self):
return self.table.count_rows(self.filter)
def from_batches(self, **kwargs):
raise NotImplementedError
def from_dataset(self, **kwargs):
raise NotImplementedError
def from_fragment(self, **kwargs):
raise NotImplementedError
def head(self, num_rows: int):
return self.to_reader(limit=num_rows).read_all()
@property
def projected_schema(self):
return self.head(1).schema
def scan_batches(self):
return self.to_reader()
def take(self, indices: List[int]):
raise NotImplementedError
def to_batches(self):
return self.to_reader()
def to_table(self):
return self.to_reader().read_all()
def to_reader(self, *, limit: Optional[int] = None):
query = self.table.search()
# Disable the builtin limit
if limit is None:
num_rows = self.count_rows()
query.limit(num_rows)
elif limit <= 0:
raise ValueError("limit must be positive")
else:
query.limit(limit)
if self.columns is not None:
query = query.select(self.columns)
if self.filter is not None:
query = query.where(self.filter, prefilter=True)
return query.to_batches(batch_size=self.batch_size)
class PyarrowDatasetAdapter(pa.dataset.Dataset):
def __init__(self, table: Table):
self.table = table
def count_rows(self, filter: Optional[Filter] = None):
return self.table.count_rows(filter)
def get_fragments(self, filter: Optional[Filter] = None):
raise NotImplementedError
def head(
self,
num_rows: int,
columns: Optional[List[str]] = None,
filter: Optional[Filter] = None,
batch_size: Optional[int] = None,
batch_readahead: Optional[int] = None,
fragment_readahead: Optional[int] = None,
fragment_scan_options: Optional[Any] = None,
use_threads: bool = True,
memory_pool: Optional[Any] = None,
):
return self.scanner(
columns,
filter,
batch_size,
batch_readahead,
fragment_readahead,
fragment_scan_options,
use_threads,
memory_pool,
).head(num_rows)
def join(
self,
right_dataset: Any,
keys: Keys,
right_keys: Optional[Keys] = None,
join_type: Optional[JoinType] = None,
left_suffix: Optional[str] = None,
right_suffix: Optional[str] = None,
coalesce_keys: bool = True,
use_threads: bool = True,
):
raise NotImplementedError
def join_asof(
self,
right_dataset: Any,
on: str,
by: Keys,
tolerance: int,
right_on: Optional[str] = None,
right_by: Optional[Keys] = None,
):
raise NotImplementedError
@property
def partition_expression(self):
raise NotImplementedError
def replace_schema(self, schema: pa.Schema):
raise NotImplementedError
def scanner(
self,
columns: Optional[List[str]] = None,
filter: Optional[Filter] = None,
batch_size: Optional[int] = None,
batch_readahead: Optional[int] = None,
fragment_readahead: Optional[int] = None,
fragment_scan_options: Optional[Any] = None,
use_threads: bool = True,
memory_pool: Optional[Any] = None,
):
return PyarrowScannerAdapter(
self.table,
columns,
filter,
batch_size,
batch_readahead,
fragment_readahead,
fragment_scan_options,
use_threads,
memory_pool,
)
@property
def schema(self):
return self.table.schema
def sort_by(self, sorting: Union[str, List[Tuple[str, bool]]]):
raise NotImplementedError
def take(
self,
indices: List[int],
columns: Optional[List[str]] = None,
filter: Optional[Filter] = None,
batch_size: Optional[int] = None,
batch_readahead: Optional[int] = None,
fragment_readahead: Optional[int] = None,
fragment_scan_options: Optional[Any] = None,
use_threads: bool = True,
memory_pool: Optional[Any] = None,
):
raise NotImplementedError
def to_batches(
self,
columns: Optional[List[str]] = None,
filter: Optional[Filter] = None,
batch_size: Optional[int] = None,
batch_readahead: Optional[int] = None,
fragment_readahead: Optional[int] = None,
fragment_scan_options: Optional[Any] = None,
use_threads: bool = True,
memory_pool: Optional[Any] = None,
):
return self.scanner(
columns,
filter,
batch_size,
batch_readahead,
fragment_readahead,
fragment_scan_options,
use_threads,
memory_pool,
).to_batches()
def to_table(
self,
columns: Optional[List[str]] = None,
filter: Optional[Filter] = None,
batch_size: Optional[int] = None,
batch_readahead: Optional[int] = None,
fragment_readahead: Optional[int] = None,
fragment_scan_options: Optional[Any] = None,
use_threads: bool = True,
memory_pool: Optional[Any] = None,
):
return self.scanner(
columns,
filter,
batch_size,
batch_readahead,
fragment_readahead,
fragment_scan_options,
use_threads,
memory_pool,
).to_table()

View File

@@ -325,6 +325,14 @@ class LanceQueryBuilder(ABC):
"""
raise NotImplementedError
@abstractmethod
def to_batches(self, /, batch_size: Optional[int] = None) -> pa.Table:
"""
Execute the query and return the results as a pyarrow
[RecordBatchReader](https://arrow.apache.org/docs/python/generated/pyarrow.RecordBatchReader.html)
"""
raise NotImplementedError
def to_list(self) -> List[dict]:
"""
Execute the query and return the results as a list of dictionaries.
@@ -869,6 +877,9 @@ class LanceFtsQueryBuilder(LanceQueryBuilder):
check_reranker_result(results)
return results
def to_batches(self, /, batch_size: Optional[int] = None):
raise NotImplementedError("to_batches on an FTS query")
def tantivy_to_arrow(self) -> pa.Table:
try:
import tantivy
@@ -971,6 +982,9 @@ class LanceFtsQueryBuilder(LanceQueryBuilder):
class LanceEmptyQueryBuilder(LanceQueryBuilder):
def to_arrow(self) -> pa.Table:
return self.to_batches().read_all()
def to_batches(self, /, batch_size: Optional[int] = None) -> pa.RecordBatchReader:
query = Query(
columns=self._columns,
filter=self._where,
@@ -980,7 +994,7 @@ class LanceEmptyQueryBuilder(LanceQueryBuilder):
# not actually respected in remote query
offset=self._offset or 0,
)
return self._table._execute_query(query).read_all()
return self._table._execute_query(query)
def rerank(self, reranker: Reranker) -> LanceEmptyQueryBuilder:
"""Rerank the results using the specified reranker.
@@ -1135,6 +1149,9 @@ class LanceHybridQueryBuilder(LanceQueryBuilder):
results = results.drop(["_rowid"])
return results
def to_batches(self):
raise NotImplementedError("to_batches not yet supported on a hybrid query")
def _rank(self, results: pa.Table, column: str, ascending: bool = True):
if len(results) == 0:
return results
@@ -1502,10 +1519,11 @@ class AsyncQueryBase(object):
... print(plan)
>>> asyncio.run(doctest_example()) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
ProjectionExec: expr=[vector@0 as vector, _distance@2 as _distance]
FilterExec: _distance@2 IS NOT NULL
SortExec: TopK(fetch=10), expr=[_distance@2 ASC NULLS LAST], preserve_partitioning=[false]
KNNVectorDistance: metric=l2
LanceScan: uri=..., projection=[vector], row_id=true, row_addr=false, ordered=false
GlobalLimitExec: skip=0, fetch=10
FilterExec: _distance@2 IS NOT NULL
SortExec: TopK(fetch=10), expr=[_distance@2 ASC NULLS LAST], preserve_partitioning=[false]
KNNVectorDistance: metric=l2
LanceScan: uri=..., projection=[vector], row_id=true, row_addr=false, ordered=false
Parameters
----------

View File

@@ -599,7 +599,9 @@ async def test_create_in_v2_mode(tmp_path):
)
async def is_in_v2_mode(tbl):
batches = await tbl.query().to_batches(max_batch_length=1024 * 10)
batches = (
await tbl.query().limit(10 * 1024).to_batches(max_batch_length=1024 * 10)
)
num_batches = 0
async for batch in batches:
num_batches += 1

View File

@@ -0,0 +1,21 @@
import duckdb
import pyarrow as pa
import lancedb
from lancedb.integrations.pyarrow import PyarrowDatasetAdapter
def test_basic_query(tmp_path):
data = pa.table({"x": [1, 2, 3, 4], "y": [5, 6, 7, 8]})
conn = lancedb.connect(tmp_path)
tbl = conn.create_table("test", data)
adapter = PyarrowDatasetAdapter(tbl) # noqa: F841
duck_conn = duckdb.connect()
results = duck_conn.sql("SELECT SUM(x) FROM adapter").fetchall()
assert results[0][0] == 10
results = duck_conn.sql("SELECT SUM(y) FROM adapter").fetchall()
assert results[0][0] == 26

View File

@@ -0,0 +1,47 @@
import pyarrow as pa
import lancedb
from lancedb.integrations.pyarrow import PyarrowDatasetAdapter
def test_dataset_adapter(tmp_path):
data = pa.table({"x": [1, 2, 3, 4], "y": [5, 6, 7, 8]})
conn = lancedb.connect(tmp_path)
tbl = conn.create_table("test", data)
adapter = PyarrowDatasetAdapter(tbl)
assert adapter.count_rows() == 4
assert adapter.count_rows("x > 2") == 2
assert adapter.schema == data.schema
assert adapter.head(2) == data.slice(0, 2)
assert adapter.to_table() == data
assert adapter.to_batches().read_all() == data
assert adapter.scanner().to_table() == data
assert adapter.scanner().to_batches().read_all() == data
assert adapter.scanner().projected_schema == data.schema
assert adapter.scanner(columns=["x"]).projected_schema == pa.schema(
[data.schema.field("x")]
)
assert adapter.scanner(columns=["x"]).to_table() == pa.table({"x": [1, 2, 3, 4]})
# Make sure we bypass the limit
data = pa.table({"x": range(100)})
tbl = conn.create_table("test2", data)
adapter = PyarrowDatasetAdapter(tbl)
assert adapter.count_rows() == 100
assert adapter.to_table().num_rows == 100
assert adapter.head(10).num_rows == 10
# Empty table
tbl = conn.create_table("test3", None, schema=pa.schema({"x": pa.int64()}))
adapter = PyarrowDatasetAdapter(tbl)
assert adapter.count_rows() == 0
assert adapter.to_table().num_rows == 0
assert adapter.head(10).num_rows == 0
assert adapter.scanner().projected_schema == pa.schema({"x": pa.int64()})

View File

@@ -193,7 +193,7 @@ def test_table_add_in_threadpool():
if request.path == "/v1/table/test/insert/":
request.send_response(200)
request.end_headers()
elif request.path == "/v1/table/test/create/":
elif request.path == "/v1/table/test/create/?mode=create":
request.send_response(200)
request.send_header("Content-Type", "application/json")
request.end_headers()

View File

@@ -1,6 +1,6 @@
[package]
name = "lancedb-node"
version = "0.13.1-beta.0"
version = "0.14.0-beta.1"
description = "Serverless, low-latency vector database for AI applications"
license.workspace = true
edition.workspace = true

View File

@@ -1,6 +1,6 @@
[package]
name = "lancedb"
version = "0.13.1-beta.0"
version = "0.14.0-beta.1"
edition.workspace = true
description = "LanceDB: A serverless, low-latency vector database for AI applications"
license.workspace = true
@@ -27,6 +27,7 @@ half = { workspace = true }
lazy_static.workspace = true
lance = { workspace = true }
lance-datafusion.workspace = true
lance-io = { workspace = true }
lance-index = { workspace = true }
lance-table = { workspace = true }
lance-linalg = { workspace = true }

View File

@@ -38,6 +38,9 @@ use crate::table::{NativeTable, TableDefinition, WriteOptions};
use crate::utils::validate_table_name;
use crate::Table;
pub use lance_encoding::version::LanceFileVersion;
#[cfg(feature = "remote")]
use lance_io::object_store::StorageOptions;
use lance_table::io::commit::commit_handler_from_url;
pub const LANCE_FILE_EXTENSION: &str = "lance";
@@ -133,7 +136,7 @@ impl IntoArrow for NoData {
/// A builder for configuring a [`Connection::create_table`] operation
pub struct CreateTableBuilder<const HAS_DATA: bool, T: IntoArrow> {
parent: Arc<dyn ConnectionInternal>,
pub(crate) parent: Arc<dyn ConnectionInternal>,
pub(crate) name: String,
pub(crate) data: Option<T>,
pub(crate) mode: CreateTableMode,
@@ -341,7 +344,7 @@ pub struct OpenTableBuilder {
}
impl OpenTableBuilder {
fn new(parent: Arc<dyn ConnectionInternal>, name: String) -> Self {
pub(crate) fn new(parent: Arc<dyn ConnectionInternal>, name: String) -> Self {
Self {
parent,
name,
@@ -717,12 +720,14 @@ impl ConnectBuilder {
message: "An api_key is required when connecting to LanceDb Cloud".to_string(),
})?;
let storage_options = StorageOptions(self.storage_options.clone());
let internal = Arc::new(crate::remote::db::RemoteDatabase::try_new(
&self.uri,
&api_key,
&region,
self.host_override,
self.client_config,
storage_options.into(),
)?);
Ok(Connection {
internal,
@@ -855,7 +860,7 @@ impl Database {
let table_base_uri = if let Some(store) = engine {
static WARN_ONCE: std::sync::Once = std::sync::Once::new();
WARN_ONCE.call_once(|| {
log::warn!("Specifing engine is not a publicly supported feature in lancedb yet. THE API WILL CHANGE");
log::warn!("Specifying engine is not a publicly supported feature in lancedb yet. THE API WILL CHANGE");
});
let old_scheme = url.scheme().to_string();
let new_scheme = format!("{}+{}", old_scheme, store);
@@ -1036,6 +1041,7 @@ impl ConnectionInternal for Database {
};
let mut write_params = options.write_options.lance_write_params.unwrap_or_default();
if matches!(&options.mode, CreateTableMode::Overwrite) {
write_params.mode = WriteMode::Overwrite;
}
@@ -1122,7 +1128,7 @@ impl ConnectionInternal for Database {
let dir_name = format!("{}.{}", name, LANCE_EXTENSION);
let full_path = self.base_path.child(dir_name.clone());
self.object_store
.remove_dir_all(full_path)
.remove_dir_all(full_path.clone())
.await
.map_err(|err| match err {
// this error is not lance::Error::DatasetNotFound,
@@ -1132,6 +1138,19 @@ impl ConnectionInternal for Database {
},
_ => Error::from(err),
})?;
let object_store_params = ObjectStoreParams {
storage_options: Some(self.storage_options.clone()),
..Default::default()
};
let mut uri = self.uri.clone();
if let Some(query_string) = &self.query_string {
uri.push_str(&format!("?{}", query_string));
}
let commit_handler = commit_handler_from_url(&uri, &Some(object_store_params))
.await
.unwrap();
commit_handler.delete(&full_path).await.unwrap();
Ok(())
}
@@ -1169,6 +1188,7 @@ mod tests {
use lance_testing::datagen::{BatchGenerator, IncrementingInt32};
use tempfile::tempdir;
use crate::query::QueryBase;
use crate::query::{ExecutableQuery, QueryExecutionOptions};
use super::*;
@@ -1296,6 +1316,7 @@ mod tests {
// In v1 the row group size will trump max_batch_length
let batches = tbl
.query()
.limit(20000)
.execute_with_options(QueryExecutionOptions {
max_batch_length: 50000,
..Default::default()

View File

@@ -596,7 +596,7 @@ impl Query {
pub(crate) fn new(parent: Arc<dyn TableInternal>) -> Self {
Self {
parent,
limit: None,
limit: Some(DEFAULT_TOP_K),
offset: None,
filter: None,
full_text_search: None,

View File

@@ -21,6 +21,7 @@ use reqwest::{
};
use crate::error::{Error, Result};
use crate::remote::db::RemoteOptions;
const REQUEST_ID_HEADER: &str = "x-request-id";
@@ -215,6 +216,7 @@ impl RestfulLanceDbClient<Sender> {
region: &str,
host_override: Option<String>,
client_config: ClientConfig,
options: &RemoteOptions,
) -> Result<Self> {
let parsed_url = url::Url::parse(db_url).map_err(|err| Error::InvalidInput {
message: format!("db_url is not a valid URL. '{db_url}'. Error: {err}"),
@@ -226,6 +228,14 @@ impl RestfulLanceDbClient<Sender> {
});
}
let db_name = parsed_url.host_str().unwrap();
let db_prefix = {
let prefix = parsed_url.path().trim_start_matches('/');
if prefix.is_empty() {
None
} else {
Some(prefix)
}
};
// Get the timeouts
let connect_timeout = Self::get_timeout(
@@ -255,6 +265,8 @@ impl RestfulLanceDbClient<Sender> {
region,
db_name,
host_override.is_some(),
options,
db_prefix,
)?)
.user_agent(client_config.user_agent)
.build()
@@ -262,6 +274,7 @@ impl RestfulLanceDbClient<Sender> {
message: "Failed to build HTTP client".into(),
source: Some(Box::new(err)),
})?;
let host = match host_override {
Some(host_override) => host_override,
None => format!("https://{}.{}.api.lancedb.com", db_name, region),
@@ -287,6 +300,8 @@ impl<S: HttpSend> RestfulLanceDbClient<S> {
region: &str,
db_name: &str,
has_host_override: bool,
options: &RemoteOptions,
db_prefix: Option<&str>,
) -> Result<HeaderMap> {
let mut headers = HeaderMap::new();
headers.insert(
@@ -312,6 +327,34 @@ impl<S: HttpSend> RestfulLanceDbClient<S> {
})?,
);
}
if db_prefix.is_some() {
headers.insert(
"x-lancedb-database-prefix",
HeaderValue::from_str(db_prefix.unwrap()).map_err(|_| Error::InvalidInput {
message: format!(
"non-ascii database prefix '{}' provided",
db_prefix.unwrap()
),
})?,
);
}
if let Some(v) = options.0.get("account_name") {
headers.insert(
"x-azure-storage-account-name",
HeaderValue::from_str(v).map_err(|_| Error::InvalidInput {
message: format!("non-ascii storage account name '{}' provided", db_name),
})?,
);
}
if let Some(v) = options.0.get("azure_storage_account_name") {
headers.insert(
"x-azure-storage-account-name",
HeaderValue::from_str(v).map_err(|_| Error::InvalidInput {
message: format!("non-ascii storage account name '{}' provided", db_name),
})?,
);
}
Ok(headers)
}

View File

@@ -12,18 +12,21 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::HashMap;
use std::sync::Arc;
use arrow_array::RecordBatchReader;
use async_trait::async_trait;
use http::StatusCode;
use lance_io::object_store::StorageOptions;
use moka::future::Cache;
use reqwest::header::CONTENT_TYPE;
use serde::Deserialize;
use tokio::task::spawn_blocking;
use crate::connection::{
ConnectionInternal, CreateTableBuilder, NoData, OpenTableBuilder, TableNamesBuilder,
ConnectionInternal, CreateTableBuilder, CreateTableMode, NoData, OpenTableBuilder,
TableNamesBuilder,
};
use crate::embeddings::EmbeddingRegistry;
use crate::error::Result;
@@ -52,9 +55,16 @@ impl RemoteDatabase {
region: &str,
host_override: Option<String>,
client_config: ClientConfig,
options: RemoteOptions,
) -> Result<Self> {
let client =
RestfulLanceDbClient::try_new(uri, api_key, region, host_override, client_config)?;
let client = RestfulLanceDbClient::try_new(
uri,
api_key,
region,
host_override,
client_config,
&options,
)?;
let table_cache = Cache::builder()
.time_to_live(std::time::Duration::from_secs(300))
@@ -95,6 +105,16 @@ impl<S: HttpSend> std::fmt::Display for RemoteDatabase<S> {
}
}
impl From<&CreateTableMode> for &'static str {
fn from(val: &CreateTableMode) -> Self {
match val {
CreateTableMode::Create => "create",
CreateTableMode::Overwrite => "overwrite",
CreateTableMode::ExistOk(_) => "exist_ok",
}
}
}
#[async_trait]
impl<S: HttpSend> ConnectionInternal for RemoteDatabase<S> {
async fn table_names(&self, options: TableNamesBuilder) -> Result<Vec<String>> {
@@ -133,14 +153,40 @@ impl<S: HttpSend> ConnectionInternal for RemoteDatabase<S> {
let req = self
.client
.post(&format!("/v1/table/{}/create/", options.name))
.query(&[("mode", Into::<&str>::into(&options.mode))])
.body(data_buffer)
.header(CONTENT_TYPE, ARROW_STREAM_CONTENT_TYPE);
let (request_id, rsp) = self.client.send(req, false).await?;
if rsp.status() == StatusCode::BAD_REQUEST {
let body = rsp.text().await.err_to_http(request_id.clone())?;
if body.contains("already exists") {
return Err(crate::Error::TableAlreadyExists { name: options.name });
return match options.mode {
CreateTableMode::Create => {
Err(crate::Error::TableAlreadyExists { name: options.name })
}
CreateTableMode::ExistOk(callback) => {
let builder = OpenTableBuilder::new(options.parent, options.name);
let builder = (callback)(builder);
builder.execute().await
}
// This should not happen, as we explicitly set the mode to overwrite and the server
// shouldn't return an error if the table already exists.
//
// However if the server is an older version that doesn't support the mode parameter,
// then we'll get the 400 response.
CreateTableMode::Overwrite => Err(crate::Error::Http {
source: format!(
"unexpected response from server for create mode overwrite: {}",
body
)
.into(),
request_id,
status_code: Some(StatusCode::BAD_REQUEST),
}),
};
} else {
return Err(crate::Error::InvalidInput { message: body });
}
@@ -206,6 +252,29 @@ impl<S: HttpSend> ConnectionInternal for RemoteDatabase<S> {
}
}
/// RemoteOptions contains a subset of StorageOptions that are compatible with Remote LanceDB connections
#[derive(Clone, Debug, Default)]
pub struct RemoteOptions(pub HashMap<String, String>);
impl RemoteOptions {
pub fn new(options: HashMap<String, String>) -> Self {
Self(options)
}
}
impl From<StorageOptions> for RemoteOptions {
fn from(options: StorageOptions) -> Self {
let supported_opts = vec!["account_name", "azure_storage_account_name"];
let mut filtered = HashMap::new();
for opt in supported_opts {
if let Some(v) = options.0.get(opt) {
filtered.insert(opt.to_string(), v.to_string());
}
}
RemoteOptions::new(filtered)
}
}
#[cfg(test)]
mod tests {
use std::sync::{Arc, OnceLock};
@@ -213,7 +282,9 @@ mod tests {
use arrow_array::{Int32Array, RecordBatch, RecordBatchIterator};
use arrow_schema::{DataType, Field, Schema};
use crate::connection::ConnectBuilder;
use crate::{
connection::CreateTableMode,
remote::{ARROW_STREAM_CONTENT_TYPE, JSON_CONTENT_TYPE},
Connection, Error,
};
@@ -382,6 +453,73 @@ mod tests {
);
}
#[tokio::test]
async fn test_create_table_modes() {
let test_cases = [
(None, "mode=create"),
(Some(CreateTableMode::Create), "mode=create"),
(Some(CreateTableMode::Overwrite), "mode=overwrite"),
(
Some(CreateTableMode::ExistOk(Box::new(|b| b))),
"mode=exist_ok",
),
];
for (mode, expected_query_string) in test_cases {
let conn = Connection::new_with_handler(move |request| {
assert_eq!(request.method(), &reqwest::Method::POST);
assert_eq!(request.url().path(), "/v1/table/table1/create/");
assert_eq!(request.url().query(), Some(expected_query_string));
http::Response::builder().status(200).body("").unwrap()
});
let data = RecordBatch::try_new(
Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, false)])),
vec![Arc::new(Int32Array::from(vec![1, 2, 3]))],
)
.unwrap();
let reader = RecordBatchIterator::new([Ok(data.clone())], data.schema());
let mut builder = conn.create_table("table1", reader);
if let Some(mode) = mode {
builder = builder.mode(mode);
}
builder.execute().await.unwrap();
}
// check that the open table callback is called with exist_ok
let conn = Connection::new_with_handler(|request| match request.url().path() {
"/v1/table/table1/create/" => http::Response::builder()
.status(400)
.body("Table table1 already exists")
.unwrap(),
"/v1/table/table1/describe/" => http::Response::builder().status(200).body("").unwrap(),
_ => {
panic!("unexpected path: {:?}", request.url().path());
}
});
let data = RecordBatch::try_new(
Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, false)])),
vec![Arc::new(Int32Array::from(vec![1, 2, 3]))],
)
.unwrap();
let called: Arc<OnceLock<bool>> = Arc::new(OnceLock::new());
let reader = RecordBatchIterator::new([Ok(data.clone())], data.schema());
let called_in_cb = called.clone();
conn.create_table("table1", reader)
.mode(CreateTableMode::ExistOk(Box::new(move |b| {
called_in_cb.clone().set(true).unwrap();
b
})))
.execute()
.await
.unwrap();
let called = *called.get().unwrap_or(&false);
assert!(called);
}
#[tokio::test]
async fn test_create_table_empty() {
let conn = Connection::new_with_handler(|request| {
@@ -436,4 +574,16 @@ mod tests {
});
conn.rename_table("table1", "table2").await.unwrap();
}
#[tokio::test]
async fn test_connect_remote_options() {
let db_uri = "db://my-container/my-prefix";
let _ = ConnectBuilder::new(db_uri)
.region("us-east-1")
.api_key("my-api-key")
.storage_options(vec![("azure_storage_account_name", "my-storage-account")])
.execute()
.await
.unwrap();
}
}

View File

@@ -1227,6 +1227,7 @@ mod tests {
"prefilter": true,
"distance_type": "l2",
"nprobes": 20,
"k": 10,
"ef": Option::<usize>::None,
"refine_factor": null,
"version": null,