mirror of
https://github.com/lancedb/lancedb.git
synced 2025-12-24 05:49:57 +00:00
Compare commits
24 Commits
python-v0.
...
python-v0.
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
12c7bd18a5 | ||
|
|
c6bf6a25d6 | ||
|
|
c998a47e17 | ||
|
|
d8c758513c | ||
|
|
3795e02ee3 | ||
|
|
c7d424b2f3 | ||
|
|
1efb9914ee | ||
|
|
83e26a231e | ||
|
|
72a17b2de4 | ||
|
|
4231925476 | ||
|
|
84a6693294 | ||
|
|
6c2d4c10a4 | ||
|
|
d914722f79 | ||
|
|
a6e4034dba | ||
|
|
2616a50502 | ||
|
|
7b5e9d824a | ||
|
|
3b173e7cb9 | ||
|
|
d496ab13a0 | ||
|
|
69d9beebc7 | ||
|
|
d32360b99d | ||
|
|
9fa08bfa93 | ||
|
|
d6d9cb7415 | ||
|
|
990d93f553 | ||
|
|
0832cba3c6 |
@@ -1,5 +1,5 @@
|
||||
[tool.bumpversion]
|
||||
current_version = "0.13.1-beta.0"
|
||||
current_version = "0.14.0-beta.1"
|
||||
parse = """(?x)
|
||||
(?P<major>0|[1-9]\\d*)\\.
|
||||
(?P<minor>0|[1-9]\\d*)\\.
|
||||
|
||||
108
.github/workflows/npm-publish.yml
vendored
108
.github/workflows/npm-publish.yml
vendored
@@ -133,7 +133,7 @@ jobs:
|
||||
free -h
|
||||
- name: Build Linux Artifacts
|
||||
run: |
|
||||
bash ci/build_linux_artifacts.sh ${{ matrix.config.arch }}
|
||||
bash ci/build_linux_artifacts.sh ${{ matrix.config.arch }} ${{ matrix.config.arch }}-unknown-linux-gnu
|
||||
- name: Upload Linux Artifacts
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
@@ -143,7 +143,7 @@ jobs:
|
||||
|
||||
node-linux-musl:
|
||||
name: vectordb (${{ matrix.config.arch}}-unknown-linux-musl)
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: ${{ matrix.config.runner }}
|
||||
container: alpine:edge
|
||||
# Only runs on tags that matches the make-release action
|
||||
if: startsWith(github.ref, 'refs/tags/v')
|
||||
@@ -152,7 +152,10 @@ jobs:
|
||||
matrix:
|
||||
config:
|
||||
- arch: x86_64
|
||||
runner: ubuntu-latest
|
||||
- arch: aarch64
|
||||
# For successful fat LTO builds, we need a large runner to avoid OOM errors.
|
||||
runner: buildjet-16vcpu-ubuntu-2204-arm
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
@@ -185,7 +188,7 @@ jobs:
|
||||
- name: Build Linux Artifacts
|
||||
run: |
|
||||
source ./saved_env
|
||||
bash ci/manylinux_node/build_vectordb.sh ${{ matrix.config.arch }}
|
||||
bash ci/manylinux_node/build_vectordb.sh ${{ matrix.config.arch }} ${{ matrix.config.arch }}-unknown-linux-musl
|
||||
- name: Upload Linux Artifacts
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
@@ -246,7 +249,7 @@ jobs:
|
||||
|
||||
nodejs-linux-musl:
|
||||
name: lancedb (${{ matrix.config.arch}}-unknown-linux-musl
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: ${{ matrix.config.runner }}
|
||||
container: alpine:edge
|
||||
# Only runs on tags that matches the make-release action
|
||||
if: startsWith(github.ref, 'refs/tags/v')
|
||||
@@ -255,7 +258,10 @@ jobs:
|
||||
matrix:
|
||||
config:
|
||||
- arch: x86_64
|
||||
runner: ubuntu-latest
|
||||
- arch: aarch64
|
||||
# For successful fat LTO builds, we need a large runner to avoid OOM errors.
|
||||
runner: buildjet-16vcpu-ubuntu-2204-arm
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
@@ -334,6 +340,50 @@ jobs:
|
||||
path: |
|
||||
node/dist/lancedb-vectordb-win32*.tgz
|
||||
|
||||
node-windows-arm64:
|
||||
name: vectordb ${{ matrix.config.arch }}-pc-windows-msvc
|
||||
runs-on: ubuntu-latest
|
||||
container: alpine:edge
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
config:
|
||||
# - arch: x86_64
|
||||
- arch: aarch64
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
apk add protobuf-dev curl clang lld llvm19 grep npm bash msitools sed
|
||||
curl --proto '=https' --tlsv1.3 -sSf https://raw.githubusercontent.com/rust-lang/rustup/refs/heads/master/rustup-init.sh | sh -s -- -y --default-toolchain 1.80.0
|
||||
echo "source $HOME/.cargo/env" >> saved_env
|
||||
echo "export CC=clang" >> saved_env
|
||||
echo "export AR=llvm-ar" >> saved_env
|
||||
source "$HOME/.cargo/env"
|
||||
rustup target add ${{ matrix.config.arch }}-pc-windows-msvc --toolchain 1.80.0
|
||||
(mkdir -p sysroot && cd sysroot && sh ../ci/sysroot-${{ matrix.config.arch }}-pc-windows-msvc.sh)
|
||||
echo "export C_INCLUDE_PATH=/usr/${{ matrix.config.arch }}-pc-windows-msvc/usr/include" >> saved_env
|
||||
echo "export CARGO_BUILD_TARGET=${{ matrix.config.arch }}-pc-windows-msvc" >> saved_env
|
||||
- name: Configure x86_64 build
|
||||
if: ${{ matrix.config.arch == 'x86_64' }}
|
||||
run: |
|
||||
echo "export RUSTFLAGS='-Ctarget-cpu=haswell -Ctarget-feature=+crt-static,+avx2,+fma,+f16c -Clinker=lld -Clink-arg=/LIBPATH:/usr/x86_64-pc-windows-msvc/usr/lib'" >> saved_env
|
||||
- name: Configure aarch64 build
|
||||
if: ${{ matrix.config.arch == 'aarch64' }}
|
||||
run: |
|
||||
echo "export RUSTFLAGS='-Ctarget-feature=+crt-static,+neon,+fp16,+fhm,+dotprod -Clinker=lld -Clink-arg=/LIBPATH:/usr/aarch64-pc-windows-msvc/usr/lib -Clink-arg=arm64rt.lib'" >> saved_env
|
||||
- name: Build Windows Artifacts
|
||||
run: |
|
||||
source ./saved_env
|
||||
bash ci/manylinux_node/build_vectordb.sh ${{ matrix.config.arch }} ${{ matrix.config.arch }}-pc-windows-msvc
|
||||
- name: Upload Windows Artifacts
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: node-native-windows-${{ matrix.config.arch }}
|
||||
path: |
|
||||
node/dist/lancedb-vectordb-win32*.tgz
|
||||
|
||||
# TODO: re-enable once working https://github.com/lancedb/lancedb/pull/1831
|
||||
# node-windows-arm64:
|
||||
# name: vectordb win32-arm64-msvc
|
||||
@@ -472,6 +522,52 @@ jobs:
|
||||
path: |
|
||||
nodejs/dist/*.node
|
||||
|
||||
nodejs-windows-arm64:
|
||||
name: lancedb ${{ matrix.config.arch }}-pc-windows-msvc
|
||||
runs-on: ubuntu-latest
|
||||
container: alpine:edge
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
config:
|
||||
# - arch: x86_64
|
||||
- arch: aarch64
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
apk add protobuf-dev curl clang lld llvm19 grep npm bash msitools sed
|
||||
curl --proto '=https' --tlsv1.3 -sSf https://raw.githubusercontent.com/rust-lang/rustup/refs/heads/master/rustup-init.sh | sh -s -- -y --default-toolchain 1.80.0
|
||||
echo "source $HOME/.cargo/env" >> saved_env
|
||||
echo "export CC=clang" >> saved_env
|
||||
echo "export AR=llvm-ar" >> saved_env
|
||||
source "$HOME/.cargo/env"
|
||||
rustup target add ${{ matrix.config.arch }}-pc-windows-msvc --toolchain 1.80.0
|
||||
(mkdir -p sysroot && cd sysroot && sh ../ci/sysroot-${{ matrix.config.arch }}-pc-windows-msvc.sh)
|
||||
echo "export C_INCLUDE_PATH=/usr/${{ matrix.config.arch }}-pc-windows-msvc/usr/include" >> saved_env
|
||||
echo "export CARGO_BUILD_TARGET=${{ matrix.config.arch }}-pc-windows-msvc" >> saved_env
|
||||
printf '#!/bin/sh\ncargo "$@"' > $HOME/.cargo/bin/cargo-xwin
|
||||
chmod u+x $HOME/.cargo/bin/cargo-xwin
|
||||
- name: Configure x86_64 build
|
||||
if: ${{ matrix.config.arch == 'x86_64' }}
|
||||
run: |
|
||||
echo "export RUSTFLAGS='-Ctarget-cpu=haswell -Ctarget-feature=+crt-static,+avx2,+fma,+f16c -Clinker=lld -Clink-arg=/LIBPATH:/usr/x86_64-pc-windows-msvc/usr/lib'" >> saved_env
|
||||
- name: Configure aarch64 build
|
||||
if: ${{ matrix.config.arch == 'aarch64' }}
|
||||
run: |
|
||||
echo "export RUSTFLAGS='-Ctarget-feature=+crt-static,+neon,+fp16,+fhm,+dotprod -Clinker=lld -Clink-arg=/LIBPATH:/usr/aarch64-pc-windows-msvc/usr/lib -Clink-arg=arm64rt.lib'" >> saved_env
|
||||
- name: Build Windows Artifacts
|
||||
run: |
|
||||
source ./saved_env
|
||||
bash ci/manylinux_node/build_lancedb.sh ${{ matrix.config.arch }}
|
||||
- name: Upload Windows Artifacts
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: nodejs-native-windows-${{ matrix.config.arch }}
|
||||
path: |
|
||||
nodejs/dist/*.node
|
||||
|
||||
# TODO: re-enable once working https://github.com/lancedb/lancedb/pull/1831
|
||||
# nodejs-windows-arm64:
|
||||
# name: lancedb win32-arm64-msvc
|
||||
@@ -568,7 +664,7 @@ jobs:
|
||||
|
||||
release:
|
||||
name: vectordb NPM Publish
|
||||
needs: [node, node-macos, node-linux-gnu, node-linux-musl, node-windows]
|
||||
needs: [node, node-macos, node-linux-gnu, node-linux-musl, node-windows, node-windows-arm64]
|
||||
runs-on: ubuntu-latest
|
||||
# Only runs on tags that matches the make-release action
|
||||
if: startsWith(github.ref, 'refs/tags/v')
|
||||
@@ -608,7 +704,7 @@ jobs:
|
||||
|
||||
release-nodejs:
|
||||
name: lancedb NPM Publish
|
||||
needs: [nodejs-macos, nodejs-linux-gnu, nodejs-linux-musl, nodejs-windows]
|
||||
needs: [nodejs-macos, nodejs-linux-gnu, nodejs-linux-musl, nodejs-windows, nodejs-windows-arm64]
|
||||
runs-on: ubuntu-latest
|
||||
# Only runs on tags that matches the make-release action
|
||||
if: startsWith(github.ref, 'refs/tags/v')
|
||||
|
||||
2
.github/workflows/pypi-publish.yml
vendored
2
.github/workflows/pypi-publish.yml
vendored
@@ -83,7 +83,7 @@ jobs:
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: 3.8
|
||||
python-version: 3.12
|
||||
- uses: ./.github/workflows/build_windows_wheel
|
||||
with:
|
||||
python-minor-version: 8
|
||||
|
||||
1
.github/workflows/upload_wheel/action.yml
vendored
1
.github/workflows/upload_wheel/action.yml
vendored
@@ -17,6 +17,7 @@ runs:
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install twine
|
||||
python3 -m pip install --upgrade pkginfo
|
||||
- name: Choose repo
|
||||
shell: bash
|
||||
id: choose_repo
|
||||
|
||||
15
Cargo.toml
15
Cargo.toml
@@ -23,13 +23,14 @@ rust-version = "1.80.0" # TO
|
||||
[workspace.dependencies]
|
||||
lance = { "version" = "=0.20.0", "features" = [
|
||||
"dynamodb",
|
||||
], git = "https://github.com/lancedb/lance.git", tag = "v0.20.0-beta.2" }
|
||||
lance-index = { version = "=0.20.0", git = "https://github.com/lancedb/lance.git", tag = "v0.20.0-beta.2" }
|
||||
lance-linalg = { version = "=0.20.0", git = "https://github.com/lancedb/lance.git", tag = "v0.20.0-beta.2" }
|
||||
lance-table = { version = "=0.20.0", git = "https://github.com/lancedb/lance.git", tag = "v0.20.0-beta.2" }
|
||||
lance-testing = { version = "=0.20.0", git = "https://github.com/lancedb/lance.git", tag = "v0.20.0-beta.2" }
|
||||
lance-datafusion = { version = "=0.20.0", git = "https://github.com/lancedb/lance.git", tag = "v0.20.0-beta.2" }
|
||||
lance-encoding = { version = "=0.20.0", git = "https://github.com/lancedb/lance.git", tag = "v0.20.0-beta.2" }
|
||||
], git = "https://github.com/lancedb/lance.git", tag = "v0.20.0-beta.3" }
|
||||
lance-io = { version = "=0.20.0", git = "https://github.com/lancedb/lance.git", tag = "v0.20.0-beta.3" }
|
||||
lance-index = { version = "=0.20.0", git = "https://github.com/lancedb/lance.git", tag = "v0.20.0-beta.3" }
|
||||
lance-linalg = { version = "=0.20.0", git = "https://github.com/lancedb/lance.git", tag = "v0.20.0-beta.3" }
|
||||
lance-table = { version = "=0.20.0", git = "https://github.com/lancedb/lance.git", tag = "v0.20.0-beta.3" }
|
||||
lance-testing = { version = "=0.20.0", git = "https://github.com/lancedb/lance.git", tag = "v0.20.0-beta.3" }
|
||||
lance-datafusion = { version = "=0.20.0", git = "https://github.com/lancedb/lance.git", tag = "v0.20.0-beta.3" }
|
||||
lance-encoding = { version = "=0.20.0", git = "https://github.com/lancedb/lance.git", tag = "v0.20.0-beta.3" }
|
||||
# Note that this one does not include pyarrow
|
||||
arrow = { version = "52.2", optional = false }
|
||||
arrow-array = "52.2"
|
||||
|
||||
@@ -1,8 +1,9 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
ARCH=${1:-x86_64}
|
||||
TARGET_TRIPLE=${2:-x86_64-unknown-linux-gnu}
|
||||
|
||||
# We pass down the current user so that when we later mount the local files
|
||||
# We pass down the current user so that when we later mount the local files
|
||||
# into the container, the files are accessible by the current user.
|
||||
pushd ci/manylinux_node
|
||||
docker build \
|
||||
@@ -18,4 +19,4 @@ docker run \
|
||||
-v $(pwd):/io -w /io \
|
||||
--memory-swap=-1 \
|
||||
lancedb-node-manylinux \
|
||||
bash ci/manylinux_node/build_vectordb.sh $ARCH
|
||||
bash ci/manylinux_node/build_vectordb.sh $ARCH $TARGET_TRIPLE
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
# Builds the node module for manylinux. Invoked by ci/build_linux_artifacts.sh.
|
||||
set -e
|
||||
ARCH=${1:-x86_64}
|
||||
TARGET_TRIPLE=${2:-x86_64-unknown-linux-gnu}
|
||||
|
||||
if [ "$ARCH" = "x86_64" ]; then
|
||||
export OPENSSL_LIB_DIR=/usr/local/lib64/
|
||||
@@ -17,4 +18,4 @@ FILE=$HOME/.bashrc && test -f $FILE && source $FILE
|
||||
cd node
|
||||
npm ci
|
||||
npm run build-release
|
||||
npm run pack-build
|
||||
npm run pack-build -- -t $TARGET_TRIPLE
|
||||
|
||||
105
ci/sysroot-aarch64-pc-windows-msvc.sh
Normal file
105
ci/sysroot-aarch64-pc-windows-msvc.sh
Normal file
@@ -0,0 +1,105 @@
|
||||
#!/bin/sh
|
||||
|
||||
# https://github.com/mstorsjo/msvc-wine/blob/master/vsdownload.py
|
||||
# https://github.com/mozilla/gecko-dev/blob/6027d1d91f2d3204a3992633b3ef730ff005fc64/build/vs/vs2022-car.yaml
|
||||
|
||||
# function dl() {
|
||||
# curl -O https://download.visualstudio.microsoft.com/download/pr/$1
|
||||
# }
|
||||
|
||||
# [[.h]]
|
||||
|
||||
# "id": "Win11SDK_10.0.26100"
|
||||
# "version": "10.0.26100.7"
|
||||
|
||||
# libucrt.lib
|
||||
|
||||
# example: <assert.h>
|
||||
# dir: ucrt/
|
||||
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/2ee3a5fc6e9fc832af7295b138e93839/universal%20crt%20headers%20libraries%20and%20sources-x86_en-us.msi
|
||||
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/b1aa09b90fe314aceb090f6ec7626624/16ab2ea2187acffa6435e334796c8c89.cab
|
||||
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/400609bb0ff5804e36dbe6dcd42a7f01/6ee7bbee8435130a869cf971694fd9e2.cab
|
||||
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/2ac327317abb865a0e3f56b2faefa918/78fa3c824c2c48bd4a49ab5969adaaf7.cab
|
||||
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/f034bc0b2680f67dccd4bfeea3d0f932/7afc7b670accd8e3cc94cfffd516f5cb.cab
|
||||
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/7ed5e12f9d50f80825a8b27838cf4c7f/96076045170fe5db6d5dcf14b6f6688e.cab
|
||||
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/764edc185a696bda9e07df8891dddbbb/a1e2a83aa8a71c48c742eeaff6e71928.cab
|
||||
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/66854bedc6dbd5ccb5dd82c8e2412231/b2f03f34ff83ec013b9e45c7cd8e8a73.cab
|
||||
|
||||
# example: <windows.h>
|
||||
# dir: um/
|
||||
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/b286efac4d83a54fc49190bddef1edc9/windows%20sdk%20for%20windows%20store%20apps%20headers-x86_en-us.msi
|
||||
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/e0dc3811d92ab96fcb72bf63d6c08d71/766c0ffd568bbb31bf7fb6793383e24a.cab
|
||||
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/613503da4b5628768497822826aed39f/8125ee239710f33ea485965f76fae646.cab
|
||||
|
||||
# example: <winapifamily.h>
|
||||
# dir: /shared
|
||||
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/122979f0348d3a2a36b6aa1a111d5d0c/windows%20sdk%20for%20windows%20store%20apps%20headers%20onecoreuap-x86_en-us.msi
|
||||
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/766e04beecdfccff39e91dd9eb32834a/e89e3dcbb016928c7e426238337d69eb.cab
|
||||
|
||||
|
||||
# "id": "Microsoft.VisualC.14.16.CRT.Headers"
|
||||
# "version": "14.16.27045"
|
||||
|
||||
# example: <vcruntime.h>
|
||||
# dir: MSVC/
|
||||
curl -O https://download.visualstudio.microsoft.com/download/pr/bac0afd7-cc9e-4182-8a83-9898fa20e092/87bbe41e09a2f83711e72696f49681429327eb7a4b90618c35667a6ba2e2880e/Microsoft.VisualC.14.16.CRT.Headers.vsix
|
||||
|
||||
# [[.lib]]
|
||||
|
||||
# advapi32.lib bcrypt.lib kernel32.lib ntdll.lib user32.lib uuid.lib ws2_32.lib userenv.lib cfgmgr32.lib runtimeobject.lib
|
||||
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/944c4153b849a1f7d0c0404a4f1c05ea/windows%20sdk%20for%20windows%20store%20apps%20libs-x86_en-us.msi
|
||||
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/5306aed3e1a38d1e8bef5934edeb2a9b/05047a45609f311645eebcac2739fc4c.cab
|
||||
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/13c8a73a0f5a6474040b26d016a26fab/13d68b8a7b6678a368e2d13ff4027521.cab
|
||||
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/149578fb3b621cdb61ee1813b9b3e791/463ad1b0783ebda908fd6c16a4abfe93.cab
|
||||
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/5c986c4f393c6b09d5aec3b539e9fb4a/5a22e5cde814b041749fb271547f4dd5.cab
|
||||
|
||||
# fwpuclnt.lib arm64rt.lib
|
||||
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/7a332420d812f7c1d41da865ae5a7c52/windows%20sdk%20desktop%20libs%20arm64-x86_en-us.msi
|
||||
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/19de98ed4a79938d0045d19c047936b3/3e2f7be479e3679d700ce0782e4cc318.cab
|
||||
|
||||
# libcmt.lib libvcruntime.lib
|
||||
curl -O https://download.visualstudio.microsoft.com/download/pr/bac0afd7-cc9e-4182-8a83-9898fa20e092/227f40682a88dc5fa0ccb9cadc9ad30af99ad1f1a75db63407587d079f60d035/Microsoft.VisualC.14.16.CRT.ARM64.Desktop.vsix
|
||||
|
||||
|
||||
msiextract universal%20crt%20headers%20libraries%20and%20sources-x86_en-us.msi
|
||||
msiextract windows%20sdk%20for%20windows%20store%20apps%20headers-x86_en-us.msi
|
||||
msiextract windows%20sdk%20for%20windows%20store%20apps%20headers%20onecoreuap-x86_en-us.msi
|
||||
msiextract windows%20sdk%20for%20windows%20store%20apps%20libs-x86_en-us.msi
|
||||
msiextract windows%20sdk%20desktop%20libs%20arm64-x86_en-us.msi
|
||||
unzip -o Microsoft.VisualC.14.16.CRT.Headers.vsix
|
||||
unzip -o Microsoft.VisualC.14.16.CRT.ARM64.Desktop.vsix
|
||||
|
||||
mkdir -p /usr/aarch64-pc-windows-msvc/usr/include
|
||||
mkdir -p /usr/aarch64-pc-windows-msvc/usr/lib
|
||||
|
||||
# lowercase folder/file names
|
||||
echo "$(find . -regex ".*/[^/]*[A-Z][^/]*")" | xargs -I{} sh -c 'mv "$(echo "{}" | sed -E '"'"'s/(.*\/)/\L\1/'"'"')" "$(echo "{}" | tr [A-Z] [a-z])"'
|
||||
|
||||
# .h
|
||||
(cd 'program files/windows kits/10/include/10.0.26100.0' && cp -r ucrt/* um/* shared/* -t /usr/aarch64-pc-windows-msvc/usr/include)
|
||||
|
||||
cp -r contents/vc/tools/msvc/14.16.27023/include/* /usr/aarch64-pc-windows-msvc/usr/include
|
||||
|
||||
# lowercase #include "" and #include <>
|
||||
find /usr/aarch64-pc-windows-msvc/usr/include -type f -exec sed -i -E 's/(#include <[^<>]*?[A-Z][^<>]*?>)|(#include "[^"]*?[A-Z][^"]*?")/\L\1\2/' "{}" ';'
|
||||
|
||||
# ARM intrinsics
|
||||
# original dir: MSVC/
|
||||
|
||||
# '__n128x4' redefined in arm_neon.h
|
||||
# "arm64_neon.h" included from intrin.h
|
||||
|
||||
(cd /usr/lib/llvm19/lib/clang/19/include && cp arm_neon.h intrin.h -t /usr/aarch64-pc-windows-msvc/usr/include)
|
||||
|
||||
# .lib
|
||||
|
||||
# _Interlocked intrinsics
|
||||
# must always link with arm64rt.lib
|
||||
# reason: https://developercommunity.visualstudio.com/t/libucrtlibstreamobj-error-lnk2001-unresolved-exter/1544787#T-ND1599818
|
||||
# I don't understand the 'correct' fix for this, arm64rt.lib is supposed to be the workaround
|
||||
|
||||
(cd 'program files/windows kits/10/lib/10.0.26100.0/um/arm64' && cp advapi32.lib bcrypt.lib kernel32.lib ntdll.lib user32.lib uuid.lib ws2_32.lib userenv.lib cfgmgr32.lib runtimeobject.lib fwpuclnt.lib arm64rt.lib -t /usr/aarch64-pc-windows-msvc/usr/lib)
|
||||
|
||||
(cd 'contents/vc/tools/msvc/14.16.27023/lib/arm64' && cp libcmt.lib libvcruntime.lib -t /usr/aarch64-pc-windows-msvc/usr/lib)
|
||||
|
||||
cp 'program files/windows kits/10/lib/10.0.26100.0/ucrt/arm64/libucrt.lib' /usr/aarch64-pc-windows-msvc/usr/lib
|
||||
105
ci/sysroot-x86_64-pc-windows-msvc.sh
Normal file
105
ci/sysroot-x86_64-pc-windows-msvc.sh
Normal file
@@ -0,0 +1,105 @@
|
||||
#!/bin/sh
|
||||
|
||||
# https://github.com/mstorsjo/msvc-wine/blob/master/vsdownload.py
|
||||
# https://github.com/mozilla/gecko-dev/blob/6027d1d91f2d3204a3992633b3ef730ff005fc64/build/vs/vs2022-car.yaml
|
||||
|
||||
# function dl() {
|
||||
# curl -O https://download.visualstudio.microsoft.com/download/pr/$1
|
||||
# }
|
||||
|
||||
# [[.h]]
|
||||
|
||||
# "id": "Win11SDK_10.0.26100"
|
||||
# "version": "10.0.26100.7"
|
||||
|
||||
# libucrt.lib
|
||||
|
||||
# example: <assert.h>
|
||||
# dir: ucrt/
|
||||
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/2ee3a5fc6e9fc832af7295b138e93839/universal%20crt%20headers%20libraries%20and%20sources-x86_en-us.msi
|
||||
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/b1aa09b90fe314aceb090f6ec7626624/16ab2ea2187acffa6435e334796c8c89.cab
|
||||
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/400609bb0ff5804e36dbe6dcd42a7f01/6ee7bbee8435130a869cf971694fd9e2.cab
|
||||
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/2ac327317abb865a0e3f56b2faefa918/78fa3c824c2c48bd4a49ab5969adaaf7.cab
|
||||
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/f034bc0b2680f67dccd4bfeea3d0f932/7afc7b670accd8e3cc94cfffd516f5cb.cab
|
||||
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/7ed5e12f9d50f80825a8b27838cf4c7f/96076045170fe5db6d5dcf14b6f6688e.cab
|
||||
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/764edc185a696bda9e07df8891dddbbb/a1e2a83aa8a71c48c742eeaff6e71928.cab
|
||||
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/66854bedc6dbd5ccb5dd82c8e2412231/b2f03f34ff83ec013b9e45c7cd8e8a73.cab
|
||||
|
||||
# example: <windows.h>
|
||||
# dir: um/
|
||||
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/b286efac4d83a54fc49190bddef1edc9/windows%20sdk%20for%20windows%20store%20apps%20headers-x86_en-us.msi
|
||||
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/e0dc3811d92ab96fcb72bf63d6c08d71/766c0ffd568bbb31bf7fb6793383e24a.cab
|
||||
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/613503da4b5628768497822826aed39f/8125ee239710f33ea485965f76fae646.cab
|
||||
|
||||
# example: <winapifamily.h>
|
||||
# dir: /shared
|
||||
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/122979f0348d3a2a36b6aa1a111d5d0c/windows%20sdk%20for%20windows%20store%20apps%20headers%20onecoreuap-x86_en-us.msi
|
||||
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/766e04beecdfccff39e91dd9eb32834a/e89e3dcbb016928c7e426238337d69eb.cab
|
||||
|
||||
|
||||
# "id": "Microsoft.VisualC.14.16.CRT.Headers"
|
||||
# "version": "14.16.27045"
|
||||
|
||||
# example: <vcruntime.h>
|
||||
# dir: MSVC/
|
||||
curl -O https://download.visualstudio.microsoft.com/download/pr/bac0afd7-cc9e-4182-8a83-9898fa20e092/87bbe41e09a2f83711e72696f49681429327eb7a4b90618c35667a6ba2e2880e/Microsoft.VisualC.14.16.CRT.Headers.vsix
|
||||
|
||||
# [[.lib]]
|
||||
|
||||
# advapi32.lib bcrypt.lib kernel32.lib ntdll.lib user32.lib uuid.lib ws2_32.lib userenv.lib cfgmgr32.lib
|
||||
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/944c4153b849a1f7d0c0404a4f1c05ea/windows%20sdk%20for%20windows%20store%20apps%20libs-x86_en-us.msi
|
||||
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/5306aed3e1a38d1e8bef5934edeb2a9b/05047a45609f311645eebcac2739fc4c.cab
|
||||
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/13c8a73a0f5a6474040b26d016a26fab/13d68b8a7b6678a368e2d13ff4027521.cab
|
||||
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/149578fb3b621cdb61ee1813b9b3e791/463ad1b0783ebda908fd6c16a4abfe93.cab
|
||||
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/5c986c4f393c6b09d5aec3b539e9fb4a/5a22e5cde814b041749fb271547f4dd5.cab
|
||||
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/bfc3904a0195453419ae4dfea7abd6fb/e10768bb6e9d0ea730280336b697da66.cab
|
||||
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/637f9f3be880c71f9e3ca07b4d67345c/f9b24c8280986c0683fbceca5326d806.cab
|
||||
|
||||
# dbghelp.lib fwpuclnt.lib
|
||||
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/9f51690d5aa804b1340ce12d1ec80f89/windows%20sdk%20desktop%20libs%20x64-x86_en-us.msi
|
||||
curl -O https://download.visualstudio.microsoft.com/download/pr/32863b8d-a46d-4231-8e84-0888519d20a9/d3a7df4ca3303a698640a29e558a5e5b/58314d0646d7e1a25e97c902166c3155.cab
|
||||
|
||||
# libcmt.lib libvcruntime.lib
|
||||
curl -O https://download.visualstudio.microsoft.com/download/pr/bac0afd7-cc9e-4182-8a83-9898fa20e092/8728f21ae09940f1f4b4ee47b4a596be2509e2a47d2f0c83bbec0ea37d69644b/Microsoft.VisualC.14.16.CRT.x64.Desktop.vsix
|
||||
|
||||
|
||||
msiextract universal%20crt%20headers%20libraries%20and%20sources-x86_en-us.msi
|
||||
msiextract windows%20sdk%20for%20windows%20store%20apps%20headers-x86_en-us.msi
|
||||
msiextract windows%20sdk%20for%20windows%20store%20apps%20headers%20onecoreuap-x86_en-us.msi
|
||||
msiextract windows%20sdk%20for%20windows%20store%20apps%20libs-x86_en-us.msi
|
||||
msiextract windows%20sdk%20desktop%20libs%20x64-x86_en-us.msi
|
||||
unzip -o Microsoft.VisualC.14.16.CRT.Headers.vsix
|
||||
unzip -o Microsoft.VisualC.14.16.CRT.x64.Desktop.vsix
|
||||
|
||||
mkdir -p /usr/x86_64-pc-windows-msvc/usr/include
|
||||
mkdir -p /usr/x86_64-pc-windows-msvc/usr/lib
|
||||
|
||||
# lowercase folder/file names
|
||||
echo "$(find . -regex ".*/[^/]*[A-Z][^/]*")" | xargs -I{} sh -c 'mv "$(echo "{}" | sed -E '"'"'s/(.*\/)/\L\1/'"'"')" "$(echo "{}" | tr [A-Z] [a-z])"'
|
||||
|
||||
# .h
|
||||
(cd 'program files/windows kits/10/include/10.0.26100.0' && cp -r ucrt/* um/* shared/* -t /usr/x86_64-pc-windows-msvc/usr/include)
|
||||
|
||||
cp -r contents/vc/tools/msvc/14.16.27023/include/* /usr/x86_64-pc-windows-msvc/usr/include
|
||||
|
||||
# lowercase #include "" and #include <>
|
||||
find /usr/x86_64-pc-windows-msvc/usr/include -type f -exec sed -i -E 's/(#include <[^<>]*?[A-Z][^<>]*?>)|(#include "[^"]*?[A-Z][^"]*?")/\L\1\2/' "{}" ';'
|
||||
|
||||
# x86 intrinsics
|
||||
# original dir: MSVC/
|
||||
|
||||
# '_mm_movemask_epi8' defined in emmintrin.h
|
||||
# '__v4sf' defined in xmmintrin.h
|
||||
# '__v2si' defined in mmintrin.h
|
||||
# '__m128d' redefined in immintrin.h
|
||||
# '__m128i' redefined in intrin.h
|
||||
# '_mm_comlt_epu8' defined in ammintrin.h
|
||||
|
||||
(cd /usr/lib/llvm19/lib/clang/19/include && cp emmintrin.h xmmintrin.h mmintrin.h immintrin.h intrin.h ammintrin.h -t /usr/x86_64-pc-windows-msvc/usr/include)
|
||||
|
||||
# .lib
|
||||
(cd 'program files/windows kits/10/lib/10.0.26100.0/um/x64' && cp advapi32.lib bcrypt.lib kernel32.lib ntdll.lib user32.lib uuid.lib ws2_32.lib userenv.lib cfgmgr32.lib dbghelp.lib fwpuclnt.lib -t /usr/x86_64-pc-windows-msvc/usr/lib)
|
||||
|
||||
(cd 'contents/vc/tools/msvc/14.16.27023/lib/x64' && cp libcmt.lib libvcruntime.lib -t /usr/x86_64-pc-windows-msvc/usr/lib)
|
||||
|
||||
cp 'program files/windows kits/10/lib/10.0.26100.0/ucrt/x64/libucrt.lib' /usr/x86_64-pc-windows-msvc/usr/lib
|
||||
@@ -55,6 +55,9 @@ plugins:
|
||||
show_signature_annotations: true
|
||||
show_root_heading: true
|
||||
members_order: source
|
||||
docstring_section_style: list
|
||||
signature_crossrefs: true
|
||||
separate_signature: true
|
||||
import:
|
||||
# for cross references
|
||||
- https://arrow.apache.org/docs/objects.inv
|
||||
|
||||
@@ -1,6 +1,16 @@
|
||||
# Python API Reference
|
||||
|
||||
This section contains the API reference for the OSS Python API.
|
||||
This section contains the API reference for the Python API. There is a
|
||||
synchronous and an asynchronous API client.
|
||||
|
||||
The general flow of using the API is:
|
||||
|
||||
1. Use [lancedb.connect][] or [lancedb.connect_async][] to connect to a database.
|
||||
2. Use the returned [lancedb.DBConnection][] or [lancedb.AsyncConnection][] to
|
||||
create or open tables.
|
||||
3. Use the returned [lancedb.table.Table][] or [lancedb.AsyncTable][] to query
|
||||
or modify tables.
|
||||
|
||||
|
||||
## Installation
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
<parent>
|
||||
<groupId>com.lancedb</groupId>
|
||||
<artifactId>lancedb-parent</artifactId>
|
||||
<version>0.13.1-beta.0</version>
|
||||
<version>0.14.0-beta.1</version>
|
||||
<relativePath>../pom.xml</relativePath>
|
||||
</parent>
|
||||
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
|
||||
<groupId>com.lancedb</groupId>
|
||||
<artifactId>lancedb-parent</artifactId>
|
||||
<version>0.13.1-beta.0</version>
|
||||
<version>0.14.0-beta.1</version>
|
||||
<packaging>pom</packaging>
|
||||
|
||||
<name>LanceDB Parent</name>
|
||||
|
||||
20
node/package-lock.json
generated
20
node/package-lock.json
generated
@@ -1,12 +1,12 @@
|
||||
{
|
||||
"name": "vectordb",
|
||||
"version": "0.13.1-beta.0",
|
||||
"version": "0.14.0-beta.1",
|
||||
"lockfileVersion": 3,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"name": "vectordb",
|
||||
"version": "0.13.1-beta.0",
|
||||
"version": "0.14.0-beta.1",
|
||||
"cpu": [
|
||||
"x64",
|
||||
"arm64"
|
||||
@@ -52,14 +52,14 @@
|
||||
"uuid": "^9.0.0"
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"@lancedb/vectordb-darwin-arm64": "0.13.1-beta.0",
|
||||
"@lancedb/vectordb-darwin-x64": "0.13.1-beta.0",
|
||||
"@lancedb/vectordb-linux-arm64-gnu": "0.13.1-beta.0",
|
||||
"@lancedb/vectordb-linux-arm64-musl": "0.13.1-beta.0",
|
||||
"@lancedb/vectordb-linux-x64-gnu": "0.13.1-beta.0",
|
||||
"@lancedb/vectordb-linux-x64-musl": "0.13.1-beta.0",
|
||||
"@lancedb/vectordb-win32-arm64-msvc": "0.13.1-beta.0",
|
||||
"@lancedb/vectordb-win32-x64-msvc": "0.13.1-beta.0"
|
||||
"@lancedb/vectordb-darwin-arm64": "0.14.0-beta.1",
|
||||
"@lancedb/vectordb-darwin-x64": "0.14.0-beta.1",
|
||||
"@lancedb/vectordb-linux-arm64-gnu": "0.14.0-beta.1",
|
||||
"@lancedb/vectordb-linux-arm64-musl": "0.14.0-beta.1",
|
||||
"@lancedb/vectordb-linux-x64-gnu": "0.14.0-beta.1",
|
||||
"@lancedb/vectordb-linux-x64-musl": "0.14.0-beta.1",
|
||||
"@lancedb/vectordb-win32-arm64-msvc": "0.14.0-beta.1",
|
||||
"@lancedb/vectordb-win32-x64-msvc": "0.14.0-beta.1"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"@apache-arrow/ts": "^14.0.2",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "vectordb",
|
||||
"version": "0.13.1-beta.0",
|
||||
"version": "0.14.0-beta.1",
|
||||
"description": " Serverless, low-latency vector database for AI applications",
|
||||
"main": "dist/index.js",
|
||||
"types": "dist/index.d.ts",
|
||||
@@ -91,13 +91,13 @@
|
||||
}
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"@lancedb/vectordb-darwin-x64": "0.13.1-beta.0",
|
||||
"@lancedb/vectordb-darwin-arm64": "0.13.1-beta.0",
|
||||
"@lancedb/vectordb-linux-x64-gnu": "0.13.1-beta.0",
|
||||
"@lancedb/vectordb-linux-arm64-gnu": "0.13.1-beta.0",
|
||||
"@lancedb/vectordb-linux-x64-musl": "0.13.1-beta.0",
|
||||
"@lancedb/vectordb-linux-arm64-musl": "0.13.1-beta.0",
|
||||
"@lancedb/vectordb-win32-x64-msvc": "0.13.1-beta.0",
|
||||
"@lancedb/vectordb-win32-arm64-msvc": "0.13.1-beta.0"
|
||||
"@lancedb/vectordb-darwin-x64": "0.14.0-beta.1",
|
||||
"@lancedb/vectordb-darwin-arm64": "0.14.0-beta.1",
|
||||
"@lancedb/vectordb-linux-x64-gnu": "0.14.0-beta.1",
|
||||
"@lancedb/vectordb-linux-arm64-gnu": "0.14.0-beta.1",
|
||||
"@lancedb/vectordb-linux-x64-musl": "0.14.0-beta.1",
|
||||
"@lancedb/vectordb-linux-arm64-musl": "0.14.0-beta.1",
|
||||
"@lancedb/vectordb-win32-x64-msvc": "0.14.0-beta.1",
|
||||
"@lancedb/vectordb-win32-arm64-msvc": "0.14.0-beta.1"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
[package]
|
||||
name = "lancedb-nodejs"
|
||||
edition.workspace = true
|
||||
version = "0.13.1-beta.0"
|
||||
version = "0.14.0-beta.1"
|
||||
license.workspace = true
|
||||
description.workspace = true
|
||||
repository.workspace = true
|
||||
|
||||
@@ -110,7 +110,10 @@ describe("given a connection", () => {
|
||||
let table = await db.createTable("test", data, { useLegacyFormat: true });
|
||||
|
||||
const isV2 = async (table: Table) => {
|
||||
const data = await table.query().toArrow({ maxBatchLength: 100000 });
|
||||
const data = await table
|
||||
.query()
|
||||
.limit(10000)
|
||||
.toArrow({ maxBatchLength: 100000 });
|
||||
console.log(data.batches.length);
|
||||
return data.batches.length < 5;
|
||||
};
|
||||
|
||||
@@ -585,11 +585,11 @@ describe("When creating an index", () => {
|
||||
expect(fs.readdirSync(indexDir)).toHaveLength(1);
|
||||
|
||||
for await (const r of tbl.query().where("id > 1").select(["id"])) {
|
||||
expect(r.numRows).toBe(298);
|
||||
expect(r.numRows).toBe(10);
|
||||
}
|
||||
// should also work with 'filter' alias
|
||||
for await (const r of tbl.query().filter("id > 1").select(["id"])) {
|
||||
expect(r.numRows).toBe(298);
|
||||
expect(r.numRows).toBe(10);
|
||||
}
|
||||
});
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-darwin-arm64",
|
||||
"version": "0.13.1-beta.0",
|
||||
"version": "0.14.0-beta.1",
|
||||
"os": ["darwin"],
|
||||
"cpu": ["arm64"],
|
||||
"main": "lancedb.darwin-arm64.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-darwin-x64",
|
||||
"version": "0.13.1-beta.0",
|
||||
"version": "0.14.0-beta.1",
|
||||
"os": ["darwin"],
|
||||
"cpu": ["x64"],
|
||||
"main": "lancedb.darwin-x64.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-linux-arm64-gnu",
|
||||
"version": "0.13.1-beta.0",
|
||||
"version": "0.14.0-beta.1",
|
||||
"os": ["linux"],
|
||||
"cpu": ["arm64"],
|
||||
"main": "lancedb.linux-arm64-gnu.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-linux-arm64-musl",
|
||||
"version": "0.13.1-beta.0",
|
||||
"version": "0.14.0-beta.1",
|
||||
"os": ["linux"],
|
||||
"cpu": ["arm64"],
|
||||
"main": "lancedb.linux-arm64-musl.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-linux-x64-gnu",
|
||||
"version": "0.13.1-beta.0",
|
||||
"version": "0.14.0-beta.1",
|
||||
"os": ["linux"],
|
||||
"cpu": ["x64"],
|
||||
"main": "lancedb.linux-x64-gnu.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-linux-x64-musl",
|
||||
"version": "0.13.1-beta.0",
|
||||
"version": "0.14.0-beta.1",
|
||||
"os": ["linux"],
|
||||
"cpu": ["x64"],
|
||||
"main": "lancedb.linux-x64-musl.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-win32-arm64-msvc",
|
||||
"version": "0.13.1-beta.0",
|
||||
"version": "0.14.0-beta.1",
|
||||
"os": [
|
||||
"win32"
|
||||
],
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-win32-x64-msvc",
|
||||
"version": "0.13.1-beta.0",
|
||||
"version": "0.14.0-beta.1",
|
||||
"os": ["win32"],
|
||||
"cpu": ["x64"],
|
||||
"main": "lancedb.win32-x64-msvc.node",
|
||||
|
||||
@@ -10,7 +10,7 @@
|
||||
"vector database",
|
||||
"ann"
|
||||
],
|
||||
"version": "0.13.1-beta.0",
|
||||
"version": "0.14.0-beta.1",
|
||||
"main": "dist/index.js",
|
||||
"exports": {
|
||||
".": "./dist/index.js",
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
[tool.bumpversion]
|
||||
current_version = "0.17.0-beta.0"
|
||||
current_version = "0.17.0-beta.3"
|
||||
parse = """(?x)
|
||||
(?P<major>0|[1-9]\\d*)\\.
|
||||
(?P<minor>0|[1-9]\\d*)\\.
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "lancedb-python"
|
||||
version = "0.17.0-beta.0"
|
||||
version = "0.17.0-beta.3"
|
||||
edition.workspace = true
|
||||
description = "Python bindings for LanceDB"
|
||||
license.workspace = true
|
||||
|
||||
@@ -3,7 +3,7 @@ name = "lancedb"
|
||||
# version in Cargo.toml
|
||||
dependencies = [
|
||||
"deprecation",
|
||||
"pylance==0.20.0b2",
|
||||
"pylance==0.20.0b3",
|
||||
"tqdm>=4.27.0",
|
||||
"pydantic>=1.10",
|
||||
"packaging",
|
||||
|
||||
@@ -12,18 +12,22 @@
|
||||
# limitations under the License.
|
||||
|
||||
import os
|
||||
from typing import ClassVar, List, Union
|
||||
from typing import ClassVar, TYPE_CHECKING, List, Union
|
||||
|
||||
import numpy as np
|
||||
import pyarrow as pa
|
||||
|
||||
from ..util import attempt_import_or_raise
|
||||
from .base import TextEmbeddingFunction
|
||||
from .base import EmbeddingFunction
|
||||
from .registry import register
|
||||
from .utils import api_key_not_found_help, TEXT
|
||||
from .utils import api_key_not_found_help, IMAGES
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import PIL
|
||||
|
||||
|
||||
@register("voyageai")
|
||||
class VoyageAIEmbeddingFunction(TextEmbeddingFunction):
|
||||
class VoyageAIEmbeddingFunction(EmbeddingFunction):
|
||||
"""
|
||||
An embedding function that uses the VoyageAI API
|
||||
|
||||
@@ -36,6 +40,7 @@ class VoyageAIEmbeddingFunction(TextEmbeddingFunction):
|
||||
|
||||
* voyage-3
|
||||
* voyage-3-lite
|
||||
* voyage-multimodal-3
|
||||
* voyage-finance-2
|
||||
* voyage-multilingual-2
|
||||
* voyage-law-2
|
||||
@@ -54,7 +59,7 @@ class VoyageAIEmbeddingFunction(TextEmbeddingFunction):
|
||||
.create(name="voyage-3")
|
||||
|
||||
class TextModel(LanceModel):
|
||||
text: str = voyageai.SourceField()
|
||||
data: str = voyageai.SourceField()
|
||||
vector: Vector(voyageai.ndims()) = voyageai.VectorField()
|
||||
|
||||
data = [ { "text": "hello world" },
|
||||
@@ -77,6 +82,7 @@ class VoyageAIEmbeddingFunction(TextEmbeddingFunction):
|
||||
return 1536
|
||||
elif self.name in [
|
||||
"voyage-3",
|
||||
"voyage-multimodal-3",
|
||||
"voyage-finance-2",
|
||||
"voyage-multilingual-2",
|
||||
"voyage-law-2",
|
||||
@@ -85,19 +91,19 @@ class VoyageAIEmbeddingFunction(TextEmbeddingFunction):
|
||||
else:
|
||||
raise ValueError(f"Model {self.name} not supported")
|
||||
|
||||
def compute_query_embeddings(self, query: str, *args, **kwargs) -> List[np.array]:
|
||||
return self.compute_source_embeddings(query, input_type="query")
|
||||
def sanitize_input(self, images: IMAGES) -> Union[List[bytes], np.ndarray]:
|
||||
"""
|
||||
Sanitize the input to the embedding function.
|
||||
"""
|
||||
if isinstance(images, (str, bytes)):
|
||||
images = [images]
|
||||
elif isinstance(images, pa.Array):
|
||||
images = images.to_pylist()
|
||||
elif isinstance(images, pa.ChunkedArray):
|
||||
images = images.combine_chunks().to_pylist()
|
||||
return images
|
||||
|
||||
def compute_source_embeddings(self, texts: TEXT, *args, **kwargs) -> List[np.array]:
|
||||
texts = self.sanitize_input(texts)
|
||||
input_type = (
|
||||
kwargs.get("input_type") or "document"
|
||||
) # assume source input type if not passed by `compute_query_embeddings`
|
||||
return self.generate_embeddings(texts, input_type=input_type)
|
||||
|
||||
def generate_embeddings(
|
||||
self, texts: Union[List[str], np.ndarray], *args, **kwargs
|
||||
) -> List[np.array]:
|
||||
def generate_text_embeddings(self, text: str, **kwargs) -> np.ndarray:
|
||||
"""
|
||||
Get the embeddings for the given texts
|
||||
|
||||
@@ -109,15 +115,55 @@ class VoyageAIEmbeddingFunction(TextEmbeddingFunction):
|
||||
|
||||
truncation: Optional[bool]
|
||||
"""
|
||||
VoyageAIEmbeddingFunction._init_client()
|
||||
rs = VoyageAIEmbeddingFunction.client.embed(
|
||||
texts=texts, model=self.name, **kwargs
|
||||
)
|
||||
if self.name in ["voyage-multimodal-3"]:
|
||||
rs = VoyageAIEmbeddingFunction._get_client().multimodal_embed(
|
||||
inputs=[[text]], model=self.name, **kwargs
|
||||
)
|
||||
else:
|
||||
rs = VoyageAIEmbeddingFunction._get_client().embed(
|
||||
texts=[text], model=self.name, **kwargs
|
||||
)
|
||||
|
||||
return [emb for emb in rs.embeddings]
|
||||
return rs.embeddings[0]
|
||||
|
||||
def generate_image_embedding(
|
||||
self, image: "PIL.Image.Image", **kwargs
|
||||
) -> np.ndarray:
|
||||
rs = VoyageAIEmbeddingFunction._get_client().multimodal_embed(
|
||||
inputs=[[image]], model=self.name, **kwargs
|
||||
)
|
||||
return rs.embeddings[0]
|
||||
|
||||
def compute_query_embeddings(
|
||||
self, query: Union[str, "PIL.Image.Image"], *args, **kwargs
|
||||
) -> List[np.ndarray]:
|
||||
"""
|
||||
Compute the embeddings for a given user query
|
||||
|
||||
Parameters
|
||||
----------
|
||||
query : Union[str, PIL.Image.Image]
|
||||
The query to embed. A query can be either text or an image.
|
||||
"""
|
||||
if isinstance(query, str):
|
||||
return [self.generate_text_embeddings(query, input_type="query")]
|
||||
else:
|
||||
PIL = attempt_import_or_raise("PIL", "pillow")
|
||||
if isinstance(query, PIL.Image.Image):
|
||||
return [self.generate_image_embedding(query, input_type="query")]
|
||||
else:
|
||||
raise TypeError("Only text PIL images supported as query")
|
||||
|
||||
def compute_source_embeddings(
|
||||
self, images: IMAGES, *args, **kwargs
|
||||
) -> List[np.array]:
|
||||
images = self.sanitize_input(images)
|
||||
return [
|
||||
self.generate_image_embedding(img, input_type="document") for img in images
|
||||
]
|
||||
|
||||
@staticmethod
|
||||
def _init_client():
|
||||
def _get_client():
|
||||
if VoyageAIEmbeddingFunction.client is None:
|
||||
voyageai = attempt_import_or_raise("voyageai")
|
||||
if os.environ.get("VOYAGE_API_KEY") is None:
|
||||
@@ -125,3 +171,4 @@ class VoyageAIEmbeddingFunction(TextEmbeddingFunction):
|
||||
VoyageAIEmbeddingFunction.client = voyageai.Client(
|
||||
os.environ["VOYAGE_API_KEY"]
|
||||
)
|
||||
return VoyageAIEmbeddingFunction.client
|
||||
|
||||
0
python/python/lancedb/integrations/__init__.py
Normal file
0
python/python/lancedb/integrations/__init__.py
Normal file
248
python/python/lancedb/integrations/pyarrow.py
Normal file
248
python/python/lancedb/integrations/pyarrow.py
Normal file
@@ -0,0 +1,248 @@
|
||||
import logging
|
||||
from typing import Any, List, Optional, Tuple, Union, Literal
|
||||
|
||||
import pyarrow as pa
|
||||
|
||||
from ..table import Table
|
||||
|
||||
Filter = Union[str, pa.compute.Expression]
|
||||
Keys = Union[str, List[str]]
|
||||
JoinType = Literal[
|
||||
"left semi",
|
||||
"right semi",
|
||||
"left anti",
|
||||
"right anti",
|
||||
"inner",
|
||||
"left outer",
|
||||
"right outer",
|
||||
"full outer",
|
||||
]
|
||||
|
||||
|
||||
class PyarrowScannerAdapter(pa.dataset.Scanner):
|
||||
def __init__(
|
||||
self,
|
||||
table: Table,
|
||||
columns: Optional[List[str]] = None,
|
||||
filter: Optional[Filter] = None,
|
||||
batch_size: Optional[int] = None,
|
||||
batch_readahead: Optional[int] = None,
|
||||
fragment_readahead: Optional[int] = None,
|
||||
fragment_scan_options: Optional[Any] = None,
|
||||
use_threads: bool = True,
|
||||
memory_pool: Optional[Any] = None,
|
||||
):
|
||||
self.table = table
|
||||
self.columns = columns
|
||||
self.filter = filter
|
||||
self.batch_size = batch_size
|
||||
if batch_readahead is not None:
|
||||
logging.debug("ignoring batch_readahead which has no lance equivalent")
|
||||
if fragment_readahead is not None:
|
||||
logging.debug("ignoring fragment_readahead which has no lance equivalent")
|
||||
if fragment_scan_options is not None:
|
||||
raise NotImplementedError("fragment_scan_options not supported")
|
||||
if use_threads is False:
|
||||
raise NotImplementedError("use_threads=False not supported")
|
||||
if memory_pool is not None:
|
||||
raise NotImplementedError("memory_pool not supported")
|
||||
|
||||
def count_rows(self):
|
||||
return self.table.count_rows(self.filter)
|
||||
|
||||
def from_batches(self, **kwargs):
|
||||
raise NotImplementedError
|
||||
|
||||
def from_dataset(self, **kwargs):
|
||||
raise NotImplementedError
|
||||
|
||||
def from_fragment(self, **kwargs):
|
||||
raise NotImplementedError
|
||||
|
||||
def head(self, num_rows: int):
|
||||
return self.to_reader(limit=num_rows).read_all()
|
||||
|
||||
@property
|
||||
def projected_schema(self):
|
||||
return self.head(1).schema
|
||||
|
||||
def scan_batches(self):
|
||||
return self.to_reader()
|
||||
|
||||
def take(self, indices: List[int]):
|
||||
raise NotImplementedError
|
||||
|
||||
def to_batches(self):
|
||||
return self.to_reader()
|
||||
|
||||
def to_table(self):
|
||||
return self.to_reader().read_all()
|
||||
|
||||
def to_reader(self, *, limit: Optional[int] = None):
|
||||
query = self.table.search()
|
||||
# Disable the builtin limit
|
||||
if limit is None:
|
||||
num_rows = self.count_rows()
|
||||
query.limit(num_rows)
|
||||
elif limit <= 0:
|
||||
raise ValueError("limit must be positive")
|
||||
else:
|
||||
query.limit(limit)
|
||||
if self.columns is not None:
|
||||
query = query.select(self.columns)
|
||||
if self.filter is not None:
|
||||
query = query.where(self.filter, prefilter=True)
|
||||
return query.to_batches(batch_size=self.batch_size)
|
||||
|
||||
|
||||
class PyarrowDatasetAdapter(pa.dataset.Dataset):
|
||||
def __init__(self, table: Table):
|
||||
self.table = table
|
||||
|
||||
def count_rows(self, filter: Optional[Filter] = None):
|
||||
return self.table.count_rows(filter)
|
||||
|
||||
def get_fragments(self, filter: Optional[Filter] = None):
|
||||
raise NotImplementedError
|
||||
|
||||
def head(
|
||||
self,
|
||||
num_rows: int,
|
||||
columns: Optional[List[str]] = None,
|
||||
filter: Optional[Filter] = None,
|
||||
batch_size: Optional[int] = None,
|
||||
batch_readahead: Optional[int] = None,
|
||||
fragment_readahead: Optional[int] = None,
|
||||
fragment_scan_options: Optional[Any] = None,
|
||||
use_threads: bool = True,
|
||||
memory_pool: Optional[Any] = None,
|
||||
):
|
||||
return self.scanner(
|
||||
columns,
|
||||
filter,
|
||||
batch_size,
|
||||
batch_readahead,
|
||||
fragment_readahead,
|
||||
fragment_scan_options,
|
||||
use_threads,
|
||||
memory_pool,
|
||||
).head(num_rows)
|
||||
|
||||
def join(
|
||||
self,
|
||||
right_dataset: Any,
|
||||
keys: Keys,
|
||||
right_keys: Optional[Keys] = None,
|
||||
join_type: Optional[JoinType] = None,
|
||||
left_suffix: Optional[str] = None,
|
||||
right_suffix: Optional[str] = None,
|
||||
coalesce_keys: bool = True,
|
||||
use_threads: bool = True,
|
||||
):
|
||||
raise NotImplementedError
|
||||
|
||||
def join_asof(
|
||||
self,
|
||||
right_dataset: Any,
|
||||
on: str,
|
||||
by: Keys,
|
||||
tolerance: int,
|
||||
right_on: Optional[str] = None,
|
||||
right_by: Optional[Keys] = None,
|
||||
):
|
||||
raise NotImplementedError
|
||||
|
||||
@property
|
||||
def partition_expression(self):
|
||||
raise NotImplementedError
|
||||
|
||||
def replace_schema(self, schema: pa.Schema):
|
||||
raise NotImplementedError
|
||||
|
||||
def scanner(
|
||||
self,
|
||||
columns: Optional[List[str]] = None,
|
||||
filter: Optional[Filter] = None,
|
||||
batch_size: Optional[int] = None,
|
||||
batch_readahead: Optional[int] = None,
|
||||
fragment_readahead: Optional[int] = None,
|
||||
fragment_scan_options: Optional[Any] = None,
|
||||
use_threads: bool = True,
|
||||
memory_pool: Optional[Any] = None,
|
||||
):
|
||||
return PyarrowScannerAdapter(
|
||||
self.table,
|
||||
columns,
|
||||
filter,
|
||||
batch_size,
|
||||
batch_readahead,
|
||||
fragment_readahead,
|
||||
fragment_scan_options,
|
||||
use_threads,
|
||||
memory_pool,
|
||||
)
|
||||
|
||||
@property
|
||||
def schema(self):
|
||||
return self.table.schema
|
||||
|
||||
def sort_by(self, sorting: Union[str, List[Tuple[str, bool]]]):
|
||||
raise NotImplementedError
|
||||
|
||||
def take(
|
||||
self,
|
||||
indices: List[int],
|
||||
columns: Optional[List[str]] = None,
|
||||
filter: Optional[Filter] = None,
|
||||
batch_size: Optional[int] = None,
|
||||
batch_readahead: Optional[int] = None,
|
||||
fragment_readahead: Optional[int] = None,
|
||||
fragment_scan_options: Optional[Any] = None,
|
||||
use_threads: bool = True,
|
||||
memory_pool: Optional[Any] = None,
|
||||
):
|
||||
raise NotImplementedError
|
||||
|
||||
def to_batches(
|
||||
self,
|
||||
columns: Optional[List[str]] = None,
|
||||
filter: Optional[Filter] = None,
|
||||
batch_size: Optional[int] = None,
|
||||
batch_readahead: Optional[int] = None,
|
||||
fragment_readahead: Optional[int] = None,
|
||||
fragment_scan_options: Optional[Any] = None,
|
||||
use_threads: bool = True,
|
||||
memory_pool: Optional[Any] = None,
|
||||
):
|
||||
return self.scanner(
|
||||
columns,
|
||||
filter,
|
||||
batch_size,
|
||||
batch_readahead,
|
||||
fragment_readahead,
|
||||
fragment_scan_options,
|
||||
use_threads,
|
||||
memory_pool,
|
||||
).to_batches()
|
||||
|
||||
def to_table(
|
||||
self,
|
||||
columns: Optional[List[str]] = None,
|
||||
filter: Optional[Filter] = None,
|
||||
batch_size: Optional[int] = None,
|
||||
batch_readahead: Optional[int] = None,
|
||||
fragment_readahead: Optional[int] = None,
|
||||
fragment_scan_options: Optional[Any] = None,
|
||||
use_threads: bool = True,
|
||||
memory_pool: Optional[Any] = None,
|
||||
):
|
||||
return self.scanner(
|
||||
columns,
|
||||
filter,
|
||||
batch_size,
|
||||
batch_readahead,
|
||||
fragment_readahead,
|
||||
fragment_scan_options,
|
||||
use_threads,
|
||||
memory_pool,
|
||||
).to_table()
|
||||
@@ -325,6 +325,14 @@ class LanceQueryBuilder(ABC):
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
@abstractmethod
|
||||
def to_batches(self, /, batch_size: Optional[int] = None) -> pa.Table:
|
||||
"""
|
||||
Execute the query and return the results as a pyarrow
|
||||
[RecordBatchReader](https://arrow.apache.org/docs/python/generated/pyarrow.RecordBatchReader.html)
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
def to_list(self) -> List[dict]:
|
||||
"""
|
||||
Execute the query and return the results as a list of dictionaries.
|
||||
@@ -869,6 +877,9 @@ class LanceFtsQueryBuilder(LanceQueryBuilder):
|
||||
check_reranker_result(results)
|
||||
return results
|
||||
|
||||
def to_batches(self, /, batch_size: Optional[int] = None):
|
||||
raise NotImplementedError("to_batches on an FTS query")
|
||||
|
||||
def tantivy_to_arrow(self) -> pa.Table:
|
||||
try:
|
||||
import tantivy
|
||||
@@ -971,6 +982,9 @@ class LanceFtsQueryBuilder(LanceQueryBuilder):
|
||||
|
||||
class LanceEmptyQueryBuilder(LanceQueryBuilder):
|
||||
def to_arrow(self) -> pa.Table:
|
||||
return self.to_batches().read_all()
|
||||
|
||||
def to_batches(self, /, batch_size: Optional[int] = None) -> pa.RecordBatchReader:
|
||||
query = Query(
|
||||
columns=self._columns,
|
||||
filter=self._where,
|
||||
@@ -980,7 +994,7 @@ class LanceEmptyQueryBuilder(LanceQueryBuilder):
|
||||
# not actually respected in remote query
|
||||
offset=self._offset or 0,
|
||||
)
|
||||
return self._table._execute_query(query).read_all()
|
||||
return self._table._execute_query(query)
|
||||
|
||||
def rerank(self, reranker: Reranker) -> LanceEmptyQueryBuilder:
|
||||
"""Rerank the results using the specified reranker.
|
||||
@@ -1135,6 +1149,9 @@ class LanceHybridQueryBuilder(LanceQueryBuilder):
|
||||
results = results.drop(["_rowid"])
|
||||
return results
|
||||
|
||||
def to_batches(self):
|
||||
raise NotImplementedError("to_batches not yet supported on a hybrid query")
|
||||
|
||||
def _rank(self, results: pa.Table, column: str, ascending: bool = True):
|
||||
if len(results) == 0:
|
||||
return results
|
||||
@@ -1502,10 +1519,11 @@ class AsyncQueryBase(object):
|
||||
... print(plan)
|
||||
>>> asyncio.run(doctest_example()) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
|
||||
ProjectionExec: expr=[vector@0 as vector, _distance@2 as _distance]
|
||||
FilterExec: _distance@2 IS NOT NULL
|
||||
SortExec: TopK(fetch=10), expr=[_distance@2 ASC NULLS LAST], preserve_partitioning=[false]
|
||||
KNNVectorDistance: metric=l2
|
||||
LanceScan: uri=..., projection=[vector], row_id=true, row_addr=false, ordered=false
|
||||
GlobalLimitExec: skip=0, fetch=10
|
||||
FilterExec: _distance@2 IS NOT NULL
|
||||
SortExec: TopK(fetch=10), expr=[_distance@2 ASC NULLS LAST], preserve_partitioning=[false]
|
||||
KNNVectorDistance: metric=l2
|
||||
LanceScan: uri=..., projection=[vector], row_id=true, row_addr=false, ordered=false
|
||||
|
||||
Parameters
|
||||
----------
|
||||
|
||||
@@ -599,7 +599,9 @@ async def test_create_in_v2_mode(tmp_path):
|
||||
)
|
||||
|
||||
async def is_in_v2_mode(tbl):
|
||||
batches = await tbl.query().to_batches(max_batch_length=1024 * 10)
|
||||
batches = (
|
||||
await tbl.query().limit(10 * 1024).to_batches(max_batch_length=1024 * 10)
|
||||
)
|
||||
num_batches = 0
|
||||
async for batch in batches:
|
||||
num_batches += 1
|
||||
|
||||
21
python/python/tests/test_duckdb.py
Normal file
21
python/python/tests/test_duckdb.py
Normal file
@@ -0,0 +1,21 @@
|
||||
import duckdb
|
||||
import pyarrow as pa
|
||||
|
||||
import lancedb
|
||||
from lancedb.integrations.pyarrow import PyarrowDatasetAdapter
|
||||
|
||||
|
||||
def test_basic_query(tmp_path):
|
||||
data = pa.table({"x": [1, 2, 3, 4], "y": [5, 6, 7, 8]})
|
||||
conn = lancedb.connect(tmp_path)
|
||||
tbl = conn.create_table("test", data)
|
||||
|
||||
adapter = PyarrowDatasetAdapter(tbl) # noqa: F841
|
||||
|
||||
duck_conn = duckdb.connect()
|
||||
|
||||
results = duck_conn.sql("SELECT SUM(x) FROM adapter").fetchall()
|
||||
assert results[0][0] == 10
|
||||
|
||||
results = duck_conn.sql("SELECT SUM(y) FROM adapter").fetchall()
|
||||
assert results[0][0] == 26
|
||||
47
python/python/tests/test_pyarrow.py
Normal file
47
python/python/tests/test_pyarrow.py
Normal file
@@ -0,0 +1,47 @@
|
||||
import pyarrow as pa
|
||||
|
||||
import lancedb
|
||||
from lancedb.integrations.pyarrow import PyarrowDatasetAdapter
|
||||
|
||||
|
||||
def test_dataset_adapter(tmp_path):
|
||||
data = pa.table({"x": [1, 2, 3, 4], "y": [5, 6, 7, 8]})
|
||||
conn = lancedb.connect(tmp_path)
|
||||
tbl = conn.create_table("test", data)
|
||||
|
||||
adapter = PyarrowDatasetAdapter(tbl)
|
||||
|
||||
assert adapter.count_rows() == 4
|
||||
assert adapter.count_rows("x > 2") == 2
|
||||
assert adapter.schema == data.schema
|
||||
assert adapter.head(2) == data.slice(0, 2)
|
||||
assert adapter.to_table() == data
|
||||
assert adapter.to_batches().read_all() == data
|
||||
assert adapter.scanner().to_table() == data
|
||||
assert adapter.scanner().to_batches().read_all() == data
|
||||
|
||||
assert adapter.scanner().projected_schema == data.schema
|
||||
assert adapter.scanner(columns=["x"]).projected_schema == pa.schema(
|
||||
[data.schema.field("x")]
|
||||
)
|
||||
assert adapter.scanner(columns=["x"]).to_table() == pa.table({"x": [1, 2, 3, 4]})
|
||||
|
||||
# Make sure we bypass the limit
|
||||
data = pa.table({"x": range(100)})
|
||||
tbl = conn.create_table("test2", data)
|
||||
|
||||
adapter = PyarrowDatasetAdapter(tbl)
|
||||
|
||||
assert adapter.count_rows() == 100
|
||||
assert adapter.to_table().num_rows == 100
|
||||
assert adapter.head(10).num_rows == 10
|
||||
|
||||
# Empty table
|
||||
tbl = conn.create_table("test3", None, schema=pa.schema({"x": pa.int64()}))
|
||||
adapter = PyarrowDatasetAdapter(tbl)
|
||||
|
||||
assert adapter.count_rows() == 0
|
||||
assert adapter.to_table().num_rows == 0
|
||||
assert adapter.head(10).num_rows == 0
|
||||
|
||||
assert adapter.scanner().projected_schema == pa.schema({"x": pa.int64()})
|
||||
@@ -193,7 +193,7 @@ def test_table_add_in_threadpool():
|
||||
if request.path == "/v1/table/test/insert/":
|
||||
request.send_response(200)
|
||||
request.end_headers()
|
||||
elif request.path == "/v1/table/test/create/":
|
||||
elif request.path == "/v1/table/test/create/?mode=create":
|
||||
request.send_response(200)
|
||||
request.send_header("Content-Type", "application/json")
|
||||
request.end_headers()
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "lancedb-node"
|
||||
version = "0.13.1-beta.0"
|
||||
version = "0.14.0-beta.1"
|
||||
description = "Serverless, low-latency vector database for AI applications"
|
||||
license.workspace = true
|
||||
edition.workspace = true
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "lancedb"
|
||||
version = "0.13.1-beta.0"
|
||||
version = "0.14.0-beta.1"
|
||||
edition.workspace = true
|
||||
description = "LanceDB: A serverless, low-latency vector database for AI applications"
|
||||
license.workspace = true
|
||||
@@ -27,6 +27,7 @@ half = { workspace = true }
|
||||
lazy_static.workspace = true
|
||||
lance = { workspace = true }
|
||||
lance-datafusion.workspace = true
|
||||
lance-io = { workspace = true }
|
||||
lance-index = { workspace = true }
|
||||
lance-table = { workspace = true }
|
||||
lance-linalg = { workspace = true }
|
||||
|
||||
@@ -38,6 +38,9 @@ use crate::table::{NativeTable, TableDefinition, WriteOptions};
|
||||
use crate::utils::validate_table_name;
|
||||
use crate::Table;
|
||||
pub use lance_encoding::version::LanceFileVersion;
|
||||
#[cfg(feature = "remote")]
|
||||
use lance_io::object_store::StorageOptions;
|
||||
use lance_table::io::commit::commit_handler_from_url;
|
||||
|
||||
pub const LANCE_FILE_EXTENSION: &str = "lance";
|
||||
|
||||
@@ -133,7 +136,7 @@ impl IntoArrow for NoData {
|
||||
|
||||
/// A builder for configuring a [`Connection::create_table`] operation
|
||||
pub struct CreateTableBuilder<const HAS_DATA: bool, T: IntoArrow> {
|
||||
parent: Arc<dyn ConnectionInternal>,
|
||||
pub(crate) parent: Arc<dyn ConnectionInternal>,
|
||||
pub(crate) name: String,
|
||||
pub(crate) data: Option<T>,
|
||||
pub(crate) mode: CreateTableMode,
|
||||
@@ -341,7 +344,7 @@ pub struct OpenTableBuilder {
|
||||
}
|
||||
|
||||
impl OpenTableBuilder {
|
||||
fn new(parent: Arc<dyn ConnectionInternal>, name: String) -> Self {
|
||||
pub(crate) fn new(parent: Arc<dyn ConnectionInternal>, name: String) -> Self {
|
||||
Self {
|
||||
parent,
|
||||
name,
|
||||
@@ -717,12 +720,14 @@ impl ConnectBuilder {
|
||||
message: "An api_key is required when connecting to LanceDb Cloud".to_string(),
|
||||
})?;
|
||||
|
||||
let storage_options = StorageOptions(self.storage_options.clone());
|
||||
let internal = Arc::new(crate::remote::db::RemoteDatabase::try_new(
|
||||
&self.uri,
|
||||
&api_key,
|
||||
®ion,
|
||||
self.host_override,
|
||||
self.client_config,
|
||||
storage_options.into(),
|
||||
)?);
|
||||
Ok(Connection {
|
||||
internal,
|
||||
@@ -855,7 +860,7 @@ impl Database {
|
||||
let table_base_uri = if let Some(store) = engine {
|
||||
static WARN_ONCE: std::sync::Once = std::sync::Once::new();
|
||||
WARN_ONCE.call_once(|| {
|
||||
log::warn!("Specifing engine is not a publicly supported feature in lancedb yet. THE API WILL CHANGE");
|
||||
log::warn!("Specifying engine is not a publicly supported feature in lancedb yet. THE API WILL CHANGE");
|
||||
});
|
||||
let old_scheme = url.scheme().to_string();
|
||||
let new_scheme = format!("{}+{}", old_scheme, store);
|
||||
@@ -1036,6 +1041,7 @@ impl ConnectionInternal for Database {
|
||||
};
|
||||
|
||||
let mut write_params = options.write_options.lance_write_params.unwrap_or_default();
|
||||
|
||||
if matches!(&options.mode, CreateTableMode::Overwrite) {
|
||||
write_params.mode = WriteMode::Overwrite;
|
||||
}
|
||||
@@ -1122,7 +1128,7 @@ impl ConnectionInternal for Database {
|
||||
let dir_name = format!("{}.{}", name, LANCE_EXTENSION);
|
||||
let full_path = self.base_path.child(dir_name.clone());
|
||||
self.object_store
|
||||
.remove_dir_all(full_path)
|
||||
.remove_dir_all(full_path.clone())
|
||||
.await
|
||||
.map_err(|err| match err {
|
||||
// this error is not lance::Error::DatasetNotFound,
|
||||
@@ -1132,6 +1138,19 @@ impl ConnectionInternal for Database {
|
||||
},
|
||||
_ => Error::from(err),
|
||||
})?;
|
||||
|
||||
let object_store_params = ObjectStoreParams {
|
||||
storage_options: Some(self.storage_options.clone()),
|
||||
..Default::default()
|
||||
};
|
||||
let mut uri = self.uri.clone();
|
||||
if let Some(query_string) = &self.query_string {
|
||||
uri.push_str(&format!("?{}", query_string));
|
||||
}
|
||||
let commit_handler = commit_handler_from_url(&uri, &Some(object_store_params))
|
||||
.await
|
||||
.unwrap();
|
||||
commit_handler.delete(&full_path).await.unwrap();
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -1169,6 +1188,7 @@ mod tests {
|
||||
use lance_testing::datagen::{BatchGenerator, IncrementingInt32};
|
||||
use tempfile::tempdir;
|
||||
|
||||
use crate::query::QueryBase;
|
||||
use crate::query::{ExecutableQuery, QueryExecutionOptions};
|
||||
|
||||
use super::*;
|
||||
@@ -1296,6 +1316,7 @@ mod tests {
|
||||
// In v1 the row group size will trump max_batch_length
|
||||
let batches = tbl
|
||||
.query()
|
||||
.limit(20000)
|
||||
.execute_with_options(QueryExecutionOptions {
|
||||
max_batch_length: 50000,
|
||||
..Default::default()
|
||||
|
||||
@@ -596,7 +596,7 @@ impl Query {
|
||||
pub(crate) fn new(parent: Arc<dyn TableInternal>) -> Self {
|
||||
Self {
|
||||
parent,
|
||||
limit: None,
|
||||
limit: Some(DEFAULT_TOP_K),
|
||||
offset: None,
|
||||
filter: None,
|
||||
full_text_search: None,
|
||||
|
||||
@@ -21,6 +21,7 @@ use reqwest::{
|
||||
};
|
||||
|
||||
use crate::error::{Error, Result};
|
||||
use crate::remote::db::RemoteOptions;
|
||||
|
||||
const REQUEST_ID_HEADER: &str = "x-request-id";
|
||||
|
||||
@@ -215,6 +216,7 @@ impl RestfulLanceDbClient<Sender> {
|
||||
region: &str,
|
||||
host_override: Option<String>,
|
||||
client_config: ClientConfig,
|
||||
options: &RemoteOptions,
|
||||
) -> Result<Self> {
|
||||
let parsed_url = url::Url::parse(db_url).map_err(|err| Error::InvalidInput {
|
||||
message: format!("db_url is not a valid URL. '{db_url}'. Error: {err}"),
|
||||
@@ -226,6 +228,14 @@ impl RestfulLanceDbClient<Sender> {
|
||||
});
|
||||
}
|
||||
let db_name = parsed_url.host_str().unwrap();
|
||||
let db_prefix = {
|
||||
let prefix = parsed_url.path().trim_start_matches('/');
|
||||
if prefix.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(prefix)
|
||||
}
|
||||
};
|
||||
|
||||
// Get the timeouts
|
||||
let connect_timeout = Self::get_timeout(
|
||||
@@ -255,6 +265,8 @@ impl RestfulLanceDbClient<Sender> {
|
||||
region,
|
||||
db_name,
|
||||
host_override.is_some(),
|
||||
options,
|
||||
db_prefix,
|
||||
)?)
|
||||
.user_agent(client_config.user_agent)
|
||||
.build()
|
||||
@@ -262,6 +274,7 @@ impl RestfulLanceDbClient<Sender> {
|
||||
message: "Failed to build HTTP client".into(),
|
||||
source: Some(Box::new(err)),
|
||||
})?;
|
||||
|
||||
let host = match host_override {
|
||||
Some(host_override) => host_override,
|
||||
None => format!("https://{}.{}.api.lancedb.com", db_name, region),
|
||||
@@ -287,6 +300,8 @@ impl<S: HttpSend> RestfulLanceDbClient<S> {
|
||||
region: &str,
|
||||
db_name: &str,
|
||||
has_host_override: bool,
|
||||
options: &RemoteOptions,
|
||||
db_prefix: Option<&str>,
|
||||
) -> Result<HeaderMap> {
|
||||
let mut headers = HeaderMap::new();
|
||||
headers.insert(
|
||||
@@ -312,6 +327,34 @@ impl<S: HttpSend> RestfulLanceDbClient<S> {
|
||||
})?,
|
||||
);
|
||||
}
|
||||
if db_prefix.is_some() {
|
||||
headers.insert(
|
||||
"x-lancedb-database-prefix",
|
||||
HeaderValue::from_str(db_prefix.unwrap()).map_err(|_| Error::InvalidInput {
|
||||
message: format!(
|
||||
"non-ascii database prefix '{}' provided",
|
||||
db_prefix.unwrap()
|
||||
),
|
||||
})?,
|
||||
);
|
||||
}
|
||||
|
||||
if let Some(v) = options.0.get("account_name") {
|
||||
headers.insert(
|
||||
"x-azure-storage-account-name",
|
||||
HeaderValue::from_str(v).map_err(|_| Error::InvalidInput {
|
||||
message: format!("non-ascii storage account name '{}' provided", db_name),
|
||||
})?,
|
||||
);
|
||||
}
|
||||
if let Some(v) = options.0.get("azure_storage_account_name") {
|
||||
headers.insert(
|
||||
"x-azure-storage-account-name",
|
||||
HeaderValue::from_str(v).map_err(|_| Error::InvalidInput {
|
||||
message: format!("non-ascii storage account name '{}' provided", db_name),
|
||||
})?,
|
||||
);
|
||||
}
|
||||
|
||||
Ok(headers)
|
||||
}
|
||||
|
||||
@@ -12,18 +12,21 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow_array::RecordBatchReader;
|
||||
use async_trait::async_trait;
|
||||
use http::StatusCode;
|
||||
use lance_io::object_store::StorageOptions;
|
||||
use moka::future::Cache;
|
||||
use reqwest::header::CONTENT_TYPE;
|
||||
use serde::Deserialize;
|
||||
use tokio::task::spawn_blocking;
|
||||
|
||||
use crate::connection::{
|
||||
ConnectionInternal, CreateTableBuilder, NoData, OpenTableBuilder, TableNamesBuilder,
|
||||
ConnectionInternal, CreateTableBuilder, CreateTableMode, NoData, OpenTableBuilder,
|
||||
TableNamesBuilder,
|
||||
};
|
||||
use crate::embeddings::EmbeddingRegistry;
|
||||
use crate::error::Result;
|
||||
@@ -52,9 +55,16 @@ impl RemoteDatabase {
|
||||
region: &str,
|
||||
host_override: Option<String>,
|
||||
client_config: ClientConfig,
|
||||
options: RemoteOptions,
|
||||
) -> Result<Self> {
|
||||
let client =
|
||||
RestfulLanceDbClient::try_new(uri, api_key, region, host_override, client_config)?;
|
||||
let client = RestfulLanceDbClient::try_new(
|
||||
uri,
|
||||
api_key,
|
||||
region,
|
||||
host_override,
|
||||
client_config,
|
||||
&options,
|
||||
)?;
|
||||
|
||||
let table_cache = Cache::builder()
|
||||
.time_to_live(std::time::Duration::from_secs(300))
|
||||
@@ -95,6 +105,16 @@ impl<S: HttpSend> std::fmt::Display for RemoteDatabase<S> {
|
||||
}
|
||||
}
|
||||
|
||||
impl From<&CreateTableMode> for &'static str {
|
||||
fn from(val: &CreateTableMode) -> Self {
|
||||
match val {
|
||||
CreateTableMode::Create => "create",
|
||||
CreateTableMode::Overwrite => "overwrite",
|
||||
CreateTableMode::ExistOk(_) => "exist_ok",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl<S: HttpSend> ConnectionInternal for RemoteDatabase<S> {
|
||||
async fn table_names(&self, options: TableNamesBuilder) -> Result<Vec<String>> {
|
||||
@@ -133,14 +153,40 @@ impl<S: HttpSend> ConnectionInternal for RemoteDatabase<S> {
|
||||
let req = self
|
||||
.client
|
||||
.post(&format!("/v1/table/{}/create/", options.name))
|
||||
.query(&[("mode", Into::<&str>::into(&options.mode))])
|
||||
.body(data_buffer)
|
||||
.header(CONTENT_TYPE, ARROW_STREAM_CONTENT_TYPE);
|
||||
|
||||
let (request_id, rsp) = self.client.send(req, false).await?;
|
||||
|
||||
if rsp.status() == StatusCode::BAD_REQUEST {
|
||||
let body = rsp.text().await.err_to_http(request_id.clone())?;
|
||||
if body.contains("already exists") {
|
||||
return Err(crate::Error::TableAlreadyExists { name: options.name });
|
||||
return match options.mode {
|
||||
CreateTableMode::Create => {
|
||||
Err(crate::Error::TableAlreadyExists { name: options.name })
|
||||
}
|
||||
CreateTableMode::ExistOk(callback) => {
|
||||
let builder = OpenTableBuilder::new(options.parent, options.name);
|
||||
let builder = (callback)(builder);
|
||||
builder.execute().await
|
||||
}
|
||||
|
||||
// This should not happen, as we explicitly set the mode to overwrite and the server
|
||||
// shouldn't return an error if the table already exists.
|
||||
//
|
||||
// However if the server is an older version that doesn't support the mode parameter,
|
||||
// then we'll get the 400 response.
|
||||
CreateTableMode::Overwrite => Err(crate::Error::Http {
|
||||
source: format!(
|
||||
"unexpected response from server for create mode overwrite: {}",
|
||||
body
|
||||
)
|
||||
.into(),
|
||||
request_id,
|
||||
status_code: Some(StatusCode::BAD_REQUEST),
|
||||
}),
|
||||
};
|
||||
} else {
|
||||
return Err(crate::Error::InvalidInput { message: body });
|
||||
}
|
||||
@@ -206,6 +252,29 @@ impl<S: HttpSend> ConnectionInternal for RemoteDatabase<S> {
|
||||
}
|
||||
}
|
||||
|
||||
/// RemoteOptions contains a subset of StorageOptions that are compatible with Remote LanceDB connections
|
||||
#[derive(Clone, Debug, Default)]
|
||||
pub struct RemoteOptions(pub HashMap<String, String>);
|
||||
|
||||
impl RemoteOptions {
|
||||
pub fn new(options: HashMap<String, String>) -> Self {
|
||||
Self(options)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<StorageOptions> for RemoteOptions {
|
||||
fn from(options: StorageOptions) -> Self {
|
||||
let supported_opts = vec!["account_name", "azure_storage_account_name"];
|
||||
let mut filtered = HashMap::new();
|
||||
for opt in supported_opts {
|
||||
if let Some(v) = options.0.get(opt) {
|
||||
filtered.insert(opt.to_string(), v.to_string());
|
||||
}
|
||||
}
|
||||
RemoteOptions::new(filtered)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::sync::{Arc, OnceLock};
|
||||
@@ -213,7 +282,9 @@ mod tests {
|
||||
use arrow_array::{Int32Array, RecordBatch, RecordBatchIterator};
|
||||
use arrow_schema::{DataType, Field, Schema};
|
||||
|
||||
use crate::connection::ConnectBuilder;
|
||||
use crate::{
|
||||
connection::CreateTableMode,
|
||||
remote::{ARROW_STREAM_CONTENT_TYPE, JSON_CONTENT_TYPE},
|
||||
Connection, Error,
|
||||
};
|
||||
@@ -382,6 +453,73 @@ mod tests {
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_create_table_modes() {
|
||||
let test_cases = [
|
||||
(None, "mode=create"),
|
||||
(Some(CreateTableMode::Create), "mode=create"),
|
||||
(Some(CreateTableMode::Overwrite), "mode=overwrite"),
|
||||
(
|
||||
Some(CreateTableMode::ExistOk(Box::new(|b| b))),
|
||||
"mode=exist_ok",
|
||||
),
|
||||
];
|
||||
|
||||
for (mode, expected_query_string) in test_cases {
|
||||
let conn = Connection::new_with_handler(move |request| {
|
||||
assert_eq!(request.method(), &reqwest::Method::POST);
|
||||
assert_eq!(request.url().path(), "/v1/table/table1/create/");
|
||||
assert_eq!(request.url().query(), Some(expected_query_string));
|
||||
|
||||
http::Response::builder().status(200).body("").unwrap()
|
||||
});
|
||||
|
||||
let data = RecordBatch::try_new(
|
||||
Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, false)])),
|
||||
vec![Arc::new(Int32Array::from(vec![1, 2, 3]))],
|
||||
)
|
||||
.unwrap();
|
||||
let reader = RecordBatchIterator::new([Ok(data.clone())], data.schema());
|
||||
let mut builder = conn.create_table("table1", reader);
|
||||
if let Some(mode) = mode {
|
||||
builder = builder.mode(mode);
|
||||
}
|
||||
builder.execute().await.unwrap();
|
||||
}
|
||||
|
||||
// check that the open table callback is called with exist_ok
|
||||
let conn = Connection::new_with_handler(|request| match request.url().path() {
|
||||
"/v1/table/table1/create/" => http::Response::builder()
|
||||
.status(400)
|
||||
.body("Table table1 already exists")
|
||||
.unwrap(),
|
||||
"/v1/table/table1/describe/" => http::Response::builder().status(200).body("").unwrap(),
|
||||
_ => {
|
||||
panic!("unexpected path: {:?}", request.url().path());
|
||||
}
|
||||
});
|
||||
let data = RecordBatch::try_new(
|
||||
Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, false)])),
|
||||
vec![Arc::new(Int32Array::from(vec![1, 2, 3]))],
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let called: Arc<OnceLock<bool>> = Arc::new(OnceLock::new());
|
||||
let reader = RecordBatchIterator::new([Ok(data.clone())], data.schema());
|
||||
let called_in_cb = called.clone();
|
||||
conn.create_table("table1", reader)
|
||||
.mode(CreateTableMode::ExistOk(Box::new(move |b| {
|
||||
called_in_cb.clone().set(true).unwrap();
|
||||
b
|
||||
})))
|
||||
.execute()
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let called = *called.get().unwrap_or(&false);
|
||||
assert!(called);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_create_table_empty() {
|
||||
let conn = Connection::new_with_handler(|request| {
|
||||
@@ -436,4 +574,16 @@ mod tests {
|
||||
});
|
||||
conn.rename_table("table1", "table2").await.unwrap();
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_connect_remote_options() {
|
||||
let db_uri = "db://my-container/my-prefix";
|
||||
let _ = ConnectBuilder::new(db_uri)
|
||||
.region("us-east-1")
|
||||
.api_key("my-api-key")
|
||||
.storage_options(vec![("azure_storage_account_name", "my-storage-account")])
|
||||
.execute()
|
||||
.await
|
||||
.unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1227,6 +1227,7 @@ mod tests {
|
||||
"prefilter": true,
|
||||
"distance_type": "l2",
|
||||
"nprobes": 20,
|
||||
"k": 10,
|
||||
"ef": Option::<usize>::None,
|
||||
"refine_factor": null,
|
||||
"version": null,
|
||||
|
||||
Reference in New Issue
Block a user