mirror of
https://github.com/lancedb/lancedb.git
synced 2026-04-05 15:30:41 +00:00
Compare commits
88 Commits
python-v0.
...
python-v0.
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
590c0c1e77 | ||
|
|
382ecd65e3 | ||
|
|
e26b22bcca | ||
|
|
3ba46135a5 | ||
|
|
f903d07887 | ||
|
|
5d550124bd | ||
|
|
c57cb310a2 | ||
|
|
97754f5123 | ||
|
|
7b1c063848 | ||
|
|
c7f189f27b | ||
|
|
a0a2942ad5 | ||
|
|
e3d53dd185 | ||
|
|
66804e99fc | ||
|
|
9f85d4c639 | ||
|
|
1ba19d728e | ||
|
|
4c44587af0 | ||
|
|
1d1cafb59c | ||
|
|
4714598155 | ||
|
|
74f457a0f2 | ||
|
|
cca6a7c989 | ||
|
|
ad96489114 | ||
|
|
76429730c0 | ||
|
|
874b74dd3c | ||
|
|
61de47f3a5 | ||
|
|
f4d613565e | ||
|
|
410ab9b6fe | ||
|
|
1d6e00b902 | ||
|
|
a0228036ae | ||
|
|
d8fc071a7d | ||
|
|
e6fd8d071e | ||
|
|
670dcca551 | ||
|
|
ed7e01a58b | ||
|
|
3450ccaf7f | ||
|
|
9b229f1e7c | ||
|
|
f5b21c0aa4 | ||
|
|
e927924d26 | ||
|
|
11a4966bfc | ||
|
|
dd5aaa72dc | ||
|
|
3a200d77ef | ||
|
|
bd09c53938 | ||
|
|
0b18e33180 | ||
|
|
c89240b16c | ||
|
|
099ff355a4 | ||
|
|
c5995fda67 | ||
|
|
25eb1fbfa4 | ||
|
|
4ac41c5c3f | ||
|
|
9a5b0398ec | ||
|
|
d1d720d08a | ||
|
|
c2e543f1b7 | ||
|
|
216c1b5f77 | ||
|
|
fc1867da83 | ||
|
|
f951da2b00 | ||
|
|
6530d82690 | ||
|
|
b3fc9c444f | ||
|
|
6de8f42dcd | ||
|
|
5c3bd68e58 | ||
|
|
4be85444f0 | ||
|
|
68c07f333f | ||
|
|
814a379e08 | ||
|
|
f31561c5bb | ||
|
|
e0c5ceac03 | ||
|
|
e93bb3355a | ||
|
|
b75991eb07 | ||
|
|
97ca9bb943 | ||
|
|
fa1b04f341 | ||
|
|
367abe99d2 | ||
|
|
52ce2c995c | ||
|
|
e71a00998c | ||
|
|
39a2ac0a1c | ||
|
|
bc7b344fa4 | ||
|
|
f91d2f5fec | ||
|
|
cf81b6419f | ||
|
|
0498ac1f2f | ||
|
|
aeb1c3ee6a | ||
|
|
f9ae46c0e7 | ||
|
|
84bf022fb1 | ||
|
|
310967eceb | ||
|
|
154dbeee2a | ||
|
|
c9c08ac8b9 | ||
|
|
e253f5d9b6 | ||
|
|
05b4fb0990 | ||
|
|
613b9c1099 | ||
|
|
d5948576b9 | ||
|
|
0d3fc7860a | ||
|
|
531cec075c | ||
|
|
0e486511fa | ||
|
|
367262662d | ||
|
|
11efaf46ae |
@@ -1,5 +1,5 @@
|
|||||||
[tool.bumpversion]
|
[tool.bumpversion]
|
||||||
current_version = "0.27.0-beta.0"
|
current_version = "0.27.2"
|
||||||
parse = """(?x)
|
parse = """(?x)
|
||||||
(?P<major>0|[1-9]\\d*)\\.
|
(?P<major>0|[1-9]\\d*)\\.
|
||||||
(?P<minor>0|[1-9]\\d*)\\.
|
(?P<minor>0|[1-9]\\d*)\\.
|
||||||
|
|||||||
@@ -23,12 +23,15 @@ runs:
|
|||||||
steps:
|
steps:
|
||||||
- name: CONFIRM ARM BUILD
|
- name: CONFIRM ARM BUILD
|
||||||
shell: bash
|
shell: bash
|
||||||
|
env:
|
||||||
|
ARM_BUILD: ${{ inputs.arm-build }}
|
||||||
run: |
|
run: |
|
||||||
echo "ARM BUILD: ${{ inputs.arm-build }}"
|
echo "ARM BUILD: $ARM_BUILD"
|
||||||
- name: Build x86_64 Manylinux wheel
|
- name: Build x86_64 Manylinux wheel
|
||||||
if: ${{ inputs.arm-build == 'false' }}
|
if: ${{ inputs.arm-build == 'false' }}
|
||||||
uses: PyO3/maturin-action@v1
|
uses: PyO3/maturin-action@v1
|
||||||
with:
|
with:
|
||||||
|
maturin-version: "1.12.4"
|
||||||
command: build
|
command: build
|
||||||
working-directory: python
|
working-directory: python
|
||||||
docker-options: "-e PIP_EXTRA_INDEX_URL='https://pypi.fury.io/lance-format/ https://pypi.fury.io/lancedb/'"
|
docker-options: "-e PIP_EXTRA_INDEX_URL='https://pypi.fury.io/lance-format/ https://pypi.fury.io/lancedb/'"
|
||||||
@@ -44,6 +47,7 @@ runs:
|
|||||||
if: ${{ inputs.arm-build == 'true' }}
|
if: ${{ inputs.arm-build == 'true' }}
|
||||||
uses: PyO3/maturin-action@v1
|
uses: PyO3/maturin-action@v1
|
||||||
with:
|
with:
|
||||||
|
maturin-version: "1.12.4"
|
||||||
command: build
|
command: build
|
||||||
working-directory: python
|
working-directory: python
|
||||||
docker-options: "-e PIP_EXTRA_INDEX_URL='https://pypi.fury.io/lance-format/ https://pypi.fury.io/lancedb/'"
|
docker-options: "-e PIP_EXTRA_INDEX_URL='https://pypi.fury.io/lance-format/ https://pypi.fury.io/lancedb/'"
|
||||||
|
|||||||
1
.github/workflows/build_mac_wheel/action.yml
vendored
1
.github/workflows/build_mac_wheel/action.yml
vendored
@@ -20,6 +20,7 @@ runs:
|
|||||||
uses: PyO3/maturin-action@v1
|
uses: PyO3/maturin-action@v1
|
||||||
with:
|
with:
|
||||||
command: build
|
command: build
|
||||||
|
maturin-version: "1.12.4"
|
||||||
# TODO: pass through interpreter
|
# TODO: pass through interpreter
|
||||||
args: ${{ inputs.args }}
|
args: ${{ inputs.args }}
|
||||||
docker-options: "-e PIP_EXTRA_INDEX_URL='https://pypi.fury.io/lance-format/ https://pypi.fury.io/lancedb/'"
|
docker-options: "-e PIP_EXTRA_INDEX_URL='https://pypi.fury.io/lance-format/ https://pypi.fury.io/lancedb/'"
|
||||||
|
|||||||
@@ -25,6 +25,7 @@ runs:
|
|||||||
uses: PyO3/maturin-action@v1
|
uses: PyO3/maturin-action@v1
|
||||||
with:
|
with:
|
||||||
command: build
|
command: build
|
||||||
|
maturin-version: "1.12.4"
|
||||||
args: ${{ inputs.args }}
|
args: ${{ inputs.args }}
|
||||||
docker-options: "-e PIP_EXTRA_INDEX_URL='https://pypi.fury.io/lance-format/ https://pypi.fury.io/lancedb/'"
|
docker-options: "-e PIP_EXTRA_INDEX_URL='https://pypi.fury.io/lance-format/ https://pypi.fury.io/lancedb/'"
|
||||||
working-directory: python
|
working-directory: python
|
||||||
|
|||||||
6
.github/workflows/dev.yml
vendored
6
.github/workflows/dev.yml
vendored
@@ -15,7 +15,7 @@ jobs:
|
|||||||
name: Label PR
|
name: Label PR
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- uses: srvaroa/labeler@master
|
- uses: srvaroa/labeler@v1
|
||||||
env:
|
env:
|
||||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||||
commitlint:
|
commitlint:
|
||||||
@@ -24,7 +24,7 @@ jobs:
|
|||||||
name: Verify PR title / description conforms to semantic-release
|
name: Verify PR title / description conforms to semantic-release
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/setup-node@v3
|
- uses: actions/setup-node@v4
|
||||||
with:
|
with:
|
||||||
node-version: "18"
|
node-version: "18"
|
||||||
# These rules are disabled because Github will always ensure there
|
# These rules are disabled because Github will always ensure there
|
||||||
@@ -47,7 +47,7 @@ jobs:
|
|||||||
|
|
||||||
${{ github.event.pull_request.body }}
|
${{ github.event.pull_request.body }}
|
||||||
- if: failure()
|
- if: failure()
|
||||||
uses: actions/github-script@v6
|
uses: actions/github-script@v7
|
||||||
with:
|
with:
|
||||||
script: |
|
script: |
|
||||||
const message = `**ACTION NEEDED**
|
const message = `**ACTION NEEDED**
|
||||||
|
|||||||
4
.github/workflows/docs.yml
vendored
4
.github/workflows/docs.yml
vendored
@@ -53,7 +53,7 @@ jobs:
|
|||||||
python -m pip install --extra-index-url https://pypi.fury.io/lance-format/ --extra-index-url https://pypi.fury.io/lancedb/ -e .
|
python -m pip install --extra-index-url https://pypi.fury.io/lance-format/ --extra-index-url https://pypi.fury.io/lancedb/ -e .
|
||||||
python -m pip install --extra-index-url https://pypi.fury.io/lance-format/ --extra-index-url https://pypi.fury.io/lancedb/ -r ../docs/requirements.txt
|
python -m pip install --extra-index-url https://pypi.fury.io/lance-format/ --extra-index-url https://pypi.fury.io/lancedb/ -r ../docs/requirements.txt
|
||||||
- name: Set up node
|
- name: Set up node
|
||||||
uses: actions/setup-node@v3
|
uses: actions/setup-node@v4
|
||||||
with:
|
with:
|
||||||
node-version: 20
|
node-version: 20
|
||||||
cache: 'npm'
|
cache: 'npm'
|
||||||
@@ -68,7 +68,7 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
PYTHONPATH=. mkdocs build
|
PYTHONPATH=. mkdocs build
|
||||||
- name: Setup Pages
|
- name: Setup Pages
|
||||||
uses: actions/configure-pages@v2
|
uses: actions/configure-pages@v5
|
||||||
- name: Upload artifact
|
- name: Upload artifact
|
||||||
uses: actions/upload-pages-artifact@v3
|
uses: actions/upload-pages-artifact@v3
|
||||||
with:
|
with:
|
||||||
|
|||||||
21
.github/workflows/nodejs.yml
vendored
21
.github/workflows/nodejs.yml
vendored
@@ -7,6 +7,7 @@ on:
|
|||||||
pull_request:
|
pull_request:
|
||||||
paths:
|
paths:
|
||||||
- Cargo.toml
|
- Cargo.toml
|
||||||
|
- Cargo.lock
|
||||||
- nodejs/**
|
- nodejs/**
|
||||||
- rust/**
|
- rust/**
|
||||||
- docs/src/js/**
|
- docs/src/js/**
|
||||||
@@ -37,7 +38,7 @@ jobs:
|
|||||||
with:
|
with:
|
||||||
fetch-depth: 0
|
fetch-depth: 0
|
||||||
lfs: true
|
lfs: true
|
||||||
- uses: actions/setup-node@v3
|
- uses: actions/setup-node@v4
|
||||||
with:
|
with:
|
||||||
node-version: 20
|
node-version: 20
|
||||||
cache: 'npm'
|
cache: 'npm'
|
||||||
@@ -77,9 +78,12 @@ jobs:
|
|||||||
with:
|
with:
|
||||||
fetch-depth: 0
|
fetch-depth: 0
|
||||||
lfs: true
|
lfs: true
|
||||||
- uses: actions/setup-node@v3
|
- uses: actions/setup-node@v4
|
||||||
|
name: Setup Node.js 20 for build
|
||||||
with:
|
with:
|
||||||
node-version: ${{ matrix.node-version }}
|
# @napi-rs/cli v3 requires Node >= 20.12 (via @inquirer/prompts@8).
|
||||||
|
# Build always on Node 20; tests run on the matrix version below.
|
||||||
|
node-version: 20
|
||||||
cache: 'npm'
|
cache: 'npm'
|
||||||
cache-dependency-path: nodejs/package-lock.json
|
cache-dependency-path: nodejs/package-lock.json
|
||||||
- uses: Swatinem/rust-cache@v2
|
- uses: Swatinem/rust-cache@v2
|
||||||
@@ -87,12 +91,16 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
sudo apt update
|
sudo apt update
|
||||||
sudo apt install -y protobuf-compiler libssl-dev
|
sudo apt install -y protobuf-compiler libssl-dev
|
||||||
npm install -g @napi-rs/cli
|
|
||||||
- name: Build
|
- name: Build
|
||||||
run: |
|
run: |
|
||||||
npm ci --include=optional
|
npm ci --include=optional
|
||||||
npm run build:debug -- --profile ci
|
npm run build:debug -- --profile ci
|
||||||
npm run tsc
|
- uses: actions/setup-node@v4
|
||||||
|
name: Setup Node.js ${{ matrix.node-version }} for test
|
||||||
|
with:
|
||||||
|
node-version: ${{ matrix.node-version }}
|
||||||
|
- name: Compile TypeScript
|
||||||
|
run: npm run tsc
|
||||||
- name: Setup localstack
|
- name: Setup localstack
|
||||||
working-directory: .
|
working-directory: .
|
||||||
run: docker compose up --detach --wait
|
run: docker compose up --detach --wait
|
||||||
@@ -136,7 +144,7 @@ jobs:
|
|||||||
with:
|
with:
|
||||||
fetch-depth: 0
|
fetch-depth: 0
|
||||||
lfs: true
|
lfs: true
|
||||||
- uses: actions/setup-node@v3
|
- uses: actions/setup-node@v4
|
||||||
with:
|
with:
|
||||||
node-version: 20
|
node-version: 20
|
||||||
cache: 'npm'
|
cache: 'npm'
|
||||||
@@ -145,7 +153,6 @@ jobs:
|
|||||||
- name: Install dependencies
|
- name: Install dependencies
|
||||||
run: |
|
run: |
|
||||||
brew install protobuf
|
brew install protobuf
|
||||||
npm install -g @napi-rs/cli
|
|
||||||
- name: Build
|
- name: Build
|
||||||
run: |
|
run: |
|
||||||
npm ci --include=optional
|
npm ci --include=optional
|
||||||
|
|||||||
61
.github/workflows/npm-publish.yml
vendored
61
.github/workflows/npm-publish.yml
vendored
@@ -19,6 +19,7 @@ on:
|
|||||||
paths:
|
paths:
|
||||||
- .github/workflows/npm-publish.yml
|
- .github/workflows/npm-publish.yml
|
||||||
- Cargo.toml # Change in dependency frequently breaks builds
|
- Cargo.toml # Change in dependency frequently breaks builds
|
||||||
|
- Cargo.lock
|
||||||
|
|
||||||
concurrency:
|
concurrency:
|
||||||
group: ${{ github.workflow }}-${{ github.ref }}
|
group: ${{ github.workflow }}-${{ github.ref }}
|
||||||
@@ -124,20 +125,22 @@ jobs:
|
|||||||
pre_build: |-
|
pre_build: |-
|
||||||
set -e &&
|
set -e &&
|
||||||
apt-get update &&
|
apt-get update &&
|
||||||
apt-get install -y protobuf-compiler pkg-config
|
apt-get install -y protobuf-compiler pkg-config &&
|
||||||
|
# The base image (manylinux2014-cross) sets TARGET_CC to the old
|
||||||
|
# GCC 4.8 cross-compiler. aws-lc-sys checks TARGET_CC before CC,
|
||||||
|
# so it picks up GCC even though the napi-rs image sets CC=clang.
|
||||||
|
# Override to use the image's clang-18 which supports -fuse-ld=lld.
|
||||||
|
export TARGET_CC=clang TARGET_CXX=clang++
|
||||||
- target: x86_64-unknown-linux-musl
|
- target: x86_64-unknown-linux-musl
|
||||||
# This one seems to need some extra memory
|
# This one seems to need some extra memory
|
||||||
host: ubuntu-2404-8x-x64
|
host: ubuntu-2404-8x-x64
|
||||||
# https://github.com/napi-rs/napi-rs/blob/main/alpine.Dockerfile
|
|
||||||
docker: ghcr.io/napi-rs/napi-rs/nodejs-rust:lts-alpine
|
|
||||||
features: fp16kernels
|
features: fp16kernels
|
||||||
pre_build: |-
|
pre_build: |-
|
||||||
set -e &&
|
set -e &&
|
||||||
apk add protobuf-dev curl &&
|
sudo apt-get update &&
|
||||||
ln -s /usr/lib/gcc/x86_64-alpine-linux-musl/14.2.0/crtbeginS.o /usr/lib/crtbeginS.o &&
|
sudo apt-get install -y protobuf-compiler pkg-config &&
|
||||||
ln -s /usr/lib/libgcc_s.so /usr/lib/libgcc.so &&
|
rustup target add x86_64-unknown-linux-musl &&
|
||||||
CC=gcc &&
|
export EXTRA_ARGS="-x"
|
||||||
CXX=g++
|
|
||||||
- target: aarch64-unknown-linux-gnu
|
- target: aarch64-unknown-linux-gnu
|
||||||
host: ubuntu-2404-8x-x64
|
host: ubuntu-2404-8x-x64
|
||||||
# https://github.com/napi-rs/napi-rs/blob/main/debian-aarch64.Dockerfile
|
# https://github.com/napi-rs/napi-rs/blob/main/debian-aarch64.Dockerfile
|
||||||
@@ -147,21 +150,20 @@ jobs:
|
|||||||
set -e &&
|
set -e &&
|
||||||
apt-get update &&
|
apt-get update &&
|
||||||
apt-get install -y protobuf-compiler pkg-config &&
|
apt-get install -y protobuf-compiler pkg-config &&
|
||||||
# https://github.com/aws/aws-lc-rs/issues/737#issuecomment-2725918627
|
export TARGET_CC=clang TARGET_CXX=clang++ &&
|
||||||
ln -s /usr/aarch64-unknown-linux-gnu/lib/gcc/aarch64-unknown-linux-gnu/4.8.5/crtbeginS.o /usr/aarch64-unknown-linux-gnu/aarch64-unknown-linux-gnu/sysroot/usr/lib/crtbeginS.o &&
|
# The manylinux2014 sysroot has glibc 2.17 headers which lack
|
||||||
ln -s /usr/aarch64-unknown-linux-gnu/lib/gcc /usr/aarch64-unknown-linux-gnu/aarch64-unknown-linux-gnu/sysroot/usr/lib/gcc &&
|
# AT_HWCAP2 (added in Linux 3.17). Define it for aws-lc-sys.
|
||||||
|
export CFLAGS="$CFLAGS -DAT_HWCAP2=26" &&
|
||||||
rustup target add aarch64-unknown-linux-gnu
|
rustup target add aarch64-unknown-linux-gnu
|
||||||
- target: aarch64-unknown-linux-musl
|
- target: aarch64-unknown-linux-musl
|
||||||
host: ubuntu-2404-8x-x64
|
host: ubuntu-2404-8x-x64
|
||||||
# https://github.com/napi-rs/napi-rs/blob/main/alpine.Dockerfile
|
|
||||||
docker: ghcr.io/napi-rs/napi-rs/nodejs-rust:lts-alpine
|
|
||||||
features: ","
|
features: ","
|
||||||
pre_build: |-
|
pre_build: |-
|
||||||
set -e &&
|
set -e &&
|
||||||
apk add protobuf-dev &&
|
sudo apt-get update &&
|
||||||
|
sudo apt-get install -y protobuf-compiler &&
|
||||||
rustup target add aarch64-unknown-linux-musl &&
|
rustup target add aarch64-unknown-linux-musl &&
|
||||||
export CC_aarch64_unknown_linux_musl=aarch64-linux-musl-gcc &&
|
export EXTRA_ARGS="-x"
|
||||||
export CXX_aarch64_unknown_linux_musl=aarch64-linux-musl-g++
|
|
||||||
name: build - ${{ matrix.settings.target }}
|
name: build - ${{ matrix.settings.target }}
|
||||||
runs-on: ${{ matrix.settings.host }}
|
runs-on: ${{ matrix.settings.host }}
|
||||||
defaults:
|
defaults:
|
||||||
@@ -192,12 +194,18 @@ jobs:
|
|||||||
.cargo-cache
|
.cargo-cache
|
||||||
target/
|
target/
|
||||||
key: nodejs-${{ matrix.settings.target }}-cargo-${{ matrix.settings.host }}
|
key: nodejs-${{ matrix.settings.target }}-cargo-${{ matrix.settings.host }}
|
||||||
- name: Setup toolchain
|
|
||||||
run: ${{ matrix.settings.setup }}
|
|
||||||
if: ${{ matrix.settings.setup }}
|
|
||||||
shell: bash
|
|
||||||
- name: Install dependencies
|
- name: Install dependencies
|
||||||
run: npm ci
|
run: npm ci
|
||||||
|
- name: Install Zig
|
||||||
|
uses: mlugg/setup-zig@v2
|
||||||
|
if: ${{ contains(matrix.settings.target, 'musl') }}
|
||||||
|
with:
|
||||||
|
version: 0.14.1
|
||||||
|
- name: Install cargo-zigbuild
|
||||||
|
uses: taiki-e/install-action@v2
|
||||||
|
if: ${{ contains(matrix.settings.target, 'musl') }}
|
||||||
|
with:
|
||||||
|
tool: cargo-zigbuild
|
||||||
- name: Build in docker
|
- name: Build in docker
|
||||||
uses: addnab/docker-run-action@v3
|
uses: addnab/docker-run-action@v3
|
||||||
if: ${{ matrix.settings.docker }}
|
if: ${{ matrix.settings.docker }}
|
||||||
@@ -210,24 +218,24 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
set -e
|
set -e
|
||||||
${{ matrix.settings.pre_build }}
|
${{ matrix.settings.pre_build }}
|
||||||
npx napi build --platform --release --no-const-enum \
|
npx napi build --platform --release \
|
||||||
--features ${{ matrix.settings.features }} \
|
--features ${{ matrix.settings.features }} \
|
||||||
--target ${{ matrix.settings.target }} \
|
--target ${{ matrix.settings.target }} \
|
||||||
--dts ../lancedb/native.d.ts \
|
--dts ../lancedb/native.d.ts \
|
||||||
--js ../lancedb/native.js \
|
--js ../lancedb/native.js \
|
||||||
--strip \
|
--strip \
|
||||||
dist/
|
--output-dir dist/
|
||||||
- name: Build
|
- name: Build
|
||||||
run: |
|
run: |
|
||||||
${{ matrix.settings.pre_build }}
|
${{ matrix.settings.pre_build }}
|
||||||
npx napi build --platform --release --no-const-enum \
|
npx napi build --platform --release \
|
||||||
--features ${{ matrix.settings.features }} \
|
--features ${{ matrix.settings.features }} \
|
||||||
--target ${{ matrix.settings.target }} \
|
--target ${{ matrix.settings.target }} \
|
||||||
--dts ../lancedb/native.d.ts \
|
--dts ../lancedb/native.d.ts \
|
||||||
--js ../lancedb/native.js \
|
--js ../lancedb/native.js \
|
||||||
--strip \
|
--strip \
|
||||||
$EXTRA_ARGS \
|
$EXTRA_ARGS \
|
||||||
dist/
|
--output-dir dist/
|
||||||
if: ${{ !matrix.settings.docker }}
|
if: ${{ !matrix.settings.docker }}
|
||||||
shell: bash
|
shell: bash
|
||||||
- name: Upload artifact
|
- name: Upload artifact
|
||||||
@@ -265,7 +273,7 @@ jobs:
|
|||||||
- target: x86_64-unknown-linux-gnu
|
- target: x86_64-unknown-linux-gnu
|
||||||
host: ubuntu-latest
|
host: ubuntu-latest
|
||||||
- target: aarch64-unknown-linux-gnu
|
- target: aarch64-unknown-linux-gnu
|
||||||
host: buildjet-16vcpu-ubuntu-2204-arm
|
host: ubuntu-2404-8x-arm64
|
||||||
node:
|
node:
|
||||||
- '20'
|
- '20'
|
||||||
runs-on: ${{ matrix.settings.host }}
|
runs-on: ${{ matrix.settings.host }}
|
||||||
@@ -355,7 +363,8 @@ jobs:
|
|||||||
if [[ $DRY_RUN == "true" ]]; then
|
if [[ $DRY_RUN == "true" ]]; then
|
||||||
ARGS="$ARGS --dry-run"
|
ARGS="$ARGS --dry-run"
|
||||||
fi
|
fi
|
||||||
if [[ $GITHUB_REF =~ refs/tags/v(.*)-beta.* ]]; then
|
VERSION=$(node -p "require('./package.json').version")
|
||||||
|
if [[ $VERSION == *-* ]]; then
|
||||||
ARGS="$ARGS --tag preview"
|
ARGS="$ARGS --tag preview"
|
||||||
fi
|
fi
|
||||||
npm publish $ARGS
|
npm publish $ARGS
|
||||||
|
|||||||
1
.github/workflows/pypi-publish.yml
vendored
1
.github/workflows/pypi-publish.yml
vendored
@@ -9,6 +9,7 @@ on:
|
|||||||
paths:
|
paths:
|
||||||
- .github/workflows/pypi-publish.yml
|
- .github/workflows/pypi-publish.yml
|
||||||
- Cargo.toml # Change in dependency frequently breaks builds
|
- Cargo.toml # Change in dependency frequently breaks builds
|
||||||
|
- Cargo.lock
|
||||||
|
|
||||||
env:
|
env:
|
||||||
PIP_EXTRA_INDEX_URL: "https://pypi.fury.io/lance-format/ https://pypi.fury.io/lancedb/"
|
PIP_EXTRA_INDEX_URL: "https://pypi.fury.io/lance-format/ https://pypi.fury.io/lancedb/"
|
||||||
|
|||||||
5
.github/workflows/python.yml
vendored
5
.github/workflows/python.yml
vendored
@@ -7,9 +7,14 @@ on:
|
|||||||
pull_request:
|
pull_request:
|
||||||
paths:
|
paths:
|
||||||
- Cargo.toml
|
- Cargo.toml
|
||||||
|
- Cargo.lock
|
||||||
- python/**
|
- python/**
|
||||||
- rust/**
|
- rust/**
|
||||||
- .github/workflows/python.yml
|
- .github/workflows/python.yml
|
||||||
|
- .github/workflows/build_linux_wheel/**
|
||||||
|
- .github/workflows/build_mac_wheel/**
|
||||||
|
- .github/workflows/build_windows_wheel/**
|
||||||
|
- .github/workflows/run_tests/**
|
||||||
|
|
||||||
concurrency:
|
concurrency:
|
||||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
|
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
|
||||||
|
|||||||
21
.github/workflows/rust.yml
vendored
21
.github/workflows/rust.yml
vendored
@@ -7,6 +7,7 @@ on:
|
|||||||
pull_request:
|
pull_request:
|
||||||
paths:
|
paths:
|
||||||
- Cargo.toml
|
- Cargo.toml
|
||||||
|
- Cargo.lock
|
||||||
- rust/**
|
- rust/**
|
||||||
- .github/workflows/rust.yml
|
- .github/workflows/rust.yml
|
||||||
|
|
||||||
@@ -100,7 +101,9 @@ jobs:
|
|||||||
lfs: true
|
lfs: true
|
||||||
- uses: Swatinem/rust-cache@v2
|
- uses: Swatinem/rust-cache@v2
|
||||||
- name: Install dependencies
|
- name: Install dependencies
|
||||||
run: sudo apt install -y protobuf-compiler libssl-dev
|
run: |
|
||||||
|
sudo apt update
|
||||||
|
sudo apt install -y protobuf-compiler libssl-dev
|
||||||
- uses: rui314/setup-mold@v1
|
- uses: rui314/setup-mold@v1
|
||||||
- name: Make Swap
|
- name: Make Swap
|
||||||
run: |
|
run: |
|
||||||
@@ -204,14 +207,14 @@ jobs:
|
|||||||
- name: Downgrade dependencies
|
- name: Downgrade dependencies
|
||||||
# These packages have newer requirements for MSRV
|
# These packages have newer requirements for MSRV
|
||||||
run: |
|
run: |
|
||||||
cargo update -p aws-sdk-bedrockruntime --precise 1.64.0
|
cargo update -p aws-sdk-bedrockruntime --precise 1.77.0
|
||||||
cargo update -p aws-sdk-dynamodb --precise 1.55.0
|
cargo update -p aws-sdk-dynamodb --precise 1.68.0
|
||||||
cargo update -p aws-config --precise 1.5.10
|
cargo update -p aws-config --precise 1.6.0
|
||||||
cargo update -p aws-sdk-kms --precise 1.51.0
|
cargo update -p aws-sdk-kms --precise 1.63.0
|
||||||
cargo update -p aws-sdk-s3 --precise 1.65.0
|
cargo update -p aws-sdk-s3 --precise 1.79.0
|
||||||
cargo update -p aws-sdk-sso --precise 1.50.0
|
cargo update -p aws-sdk-sso --precise 1.62.0
|
||||||
cargo update -p aws-sdk-ssooidc --precise 1.51.0
|
cargo update -p aws-sdk-ssooidc --precise 1.63.0
|
||||||
cargo update -p aws-sdk-sts --precise 1.51.0
|
cargo update -p aws-sdk-sts --precise 1.63.0
|
||||||
cargo update -p home --precise 0.5.9
|
cargo update -p home --precise 0.5.9
|
||||||
- name: cargo +${{ matrix.msrv }} check
|
- name: cargo +${{ matrix.msrv }} check
|
||||||
env:
|
env:
|
||||||
|
|||||||
3009
Cargo.lock
generated
3009
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
46
Cargo.toml
46
Cargo.toml
@@ -5,7 +5,7 @@ exclude = ["python"]
|
|||||||
resolver = "2"
|
resolver = "2"
|
||||||
|
|
||||||
[workspace.package]
|
[workspace.package]
|
||||||
edition = "2021"
|
edition = "2024"
|
||||||
authors = ["LanceDB Devs <dev@lancedb.com>"]
|
authors = ["LanceDB Devs <dev@lancedb.com>"]
|
||||||
license = "Apache-2.0"
|
license = "Apache-2.0"
|
||||||
repository = "https://github.com/lancedb/lancedb"
|
repository = "https://github.com/lancedb/lancedb"
|
||||||
@@ -15,20 +15,20 @@ categories = ["database-implementations"]
|
|||||||
rust-version = "1.91.0"
|
rust-version = "1.91.0"
|
||||||
|
|
||||||
[workspace.dependencies]
|
[workspace.dependencies]
|
||||||
lance = { "version" = "=3.0.0-beta.5", default-features = false, "tag" = "v3.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
|
lance = { "version" = "=5.0.0-beta.4", default-features = false, "tag" = "v5.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" }
|
||||||
lance-core = { "version" = "=3.0.0-beta.5", "tag" = "v3.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
|
lance-core = { "version" = "=5.0.0-beta.4", "tag" = "v5.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" }
|
||||||
lance-datagen = { "version" = "=3.0.0-beta.5", "tag" = "v3.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
|
lance-datagen = { "version" = "=5.0.0-beta.4", "tag" = "v5.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" }
|
||||||
lance-file = { "version" = "=3.0.0-beta.5", "tag" = "v3.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
|
lance-file = { "version" = "=5.0.0-beta.4", "tag" = "v5.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" }
|
||||||
lance-io = { "version" = "=3.0.0-beta.5", default-features = false, "tag" = "v3.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
|
lance-io = { "version" = "=5.0.0-beta.4", default-features = false, "tag" = "v5.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" }
|
||||||
lance-index = { "version" = "=3.0.0-beta.5", "tag" = "v3.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
|
lance-index = { "version" = "=5.0.0-beta.4", "tag" = "v5.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" }
|
||||||
lance-linalg = { "version" = "=3.0.0-beta.5", "tag" = "v3.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
|
lance-linalg = { "version" = "=5.0.0-beta.4", "tag" = "v5.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" }
|
||||||
lance-namespace = { "version" = "=3.0.0-beta.5", "tag" = "v3.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
|
lance-namespace = { "version" = "=5.0.0-beta.4", "tag" = "v5.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" }
|
||||||
lance-namespace-impls = { "version" = "=3.0.0-beta.5", default-features = false, "tag" = "v3.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
|
lance-namespace-impls = { "version" = "=5.0.0-beta.4", default-features = false, "tag" = "v5.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" }
|
||||||
lance-table = { "version" = "=3.0.0-beta.5", "tag" = "v3.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
|
lance-table = { "version" = "=5.0.0-beta.4", "tag" = "v5.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" }
|
||||||
lance-testing = { "version" = "=3.0.0-beta.5", "tag" = "v3.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
|
lance-testing = { "version" = "=5.0.0-beta.4", "tag" = "v5.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" }
|
||||||
lance-datafusion = { "version" = "=3.0.0-beta.5", "tag" = "v3.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
|
lance-datafusion = { "version" = "=5.0.0-beta.4", "tag" = "v5.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" }
|
||||||
lance-encoding = { "version" = "=3.0.0-beta.5", "tag" = "v3.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
|
lance-encoding = { "version" = "=5.0.0-beta.4", "tag" = "v5.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" }
|
||||||
lance-arrow = { "version" = "=3.0.0-beta.5", "tag" = "v3.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
|
lance-arrow = { "version" = "=5.0.0-beta.4", "tag" = "v5.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" }
|
||||||
ahash = "0.8"
|
ahash = "0.8"
|
||||||
# Note that this one does not include pyarrow
|
# Note that this one does not include pyarrow
|
||||||
arrow = { version = "57.2", optional = false }
|
arrow = { version = "57.2", optional = false }
|
||||||
@@ -40,13 +40,15 @@ arrow-schema = "57.2"
|
|||||||
arrow-select = "57.2"
|
arrow-select = "57.2"
|
||||||
arrow-cast = "57.2"
|
arrow-cast = "57.2"
|
||||||
async-trait = "0"
|
async-trait = "0"
|
||||||
datafusion = { version = "51.0", default-features = false }
|
datafusion = { version = "52.1", default-features = false }
|
||||||
datafusion-catalog = "51.0"
|
datafusion-catalog = "52.1"
|
||||||
datafusion-common = { version = "51.0", default-features = false }
|
datafusion-common = { version = "52.1", default-features = false }
|
||||||
datafusion-execution = "51.0"
|
datafusion-execution = "52.1"
|
||||||
datafusion-expr = "51.0"
|
datafusion-expr = "52.1"
|
||||||
datafusion-physical-plan = "51.0"
|
datafusion-functions = "52.1"
|
||||||
datafusion-physical-expr = "51.0"
|
datafusion-physical-plan = "52.1"
|
||||||
|
datafusion-physical-expr = "52.1"
|
||||||
|
datafusion-sql = "52.1"
|
||||||
env_logger = "0.11"
|
env_logger = "0.11"
|
||||||
half = { "version" = "2.7.1", default-features = false, features = [
|
half = { "version" = "2.7.1", default-features = false, features = [
|
||||||
"num-traits",
|
"num-traits",
|
||||||
|
|||||||
@@ -3,6 +3,7 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
|
import functools
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
@@ -26,6 +27,7 @@ SEMVER_RE = re.compile(
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@functools.total_ordering
|
||||||
@dataclass(frozen=True)
|
@dataclass(frozen=True)
|
||||||
class SemVer:
|
class SemVer:
|
||||||
major: int
|
major: int
|
||||||
@@ -156,7 +158,9 @@ def read_current_version(repo_root: Path) -> str:
|
|||||||
|
|
||||||
|
|
||||||
def determine_latest_tag(tags: Iterable[TagInfo]) -> TagInfo:
|
def determine_latest_tag(tags: Iterable[TagInfo]) -> TagInfo:
|
||||||
return max(tags, key=lambda tag: tag.semver)
|
# Stable releases (no prerelease) are always preferred over pre-releases.
|
||||||
|
# Within each group, standard semver ordering applies.
|
||||||
|
return max(tags, key=lambda tag: (not tag.semver.prerelease, tag.semver))
|
||||||
|
|
||||||
|
|
||||||
def write_outputs(args: argparse.Namespace, payload: dict) -> None:
|
def write_outputs(args: argparse.Namespace, payload: dict) -> None:
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
version: "3.9"
|
version: "3.9"
|
||||||
services:
|
services:
|
||||||
localstack:
|
localstack:
|
||||||
image: localstack/localstack:3.3
|
image: localstack/localstack:4.0
|
||||||
ports:
|
ports:
|
||||||
- 4566:4566
|
- 4566:4566
|
||||||
environment:
|
environment:
|
||||||
|
|||||||
@@ -1,27 +1,27 @@
|
|||||||
#Simple base dockerfile that supports basic dependencies required to run lance with FTS and Hybrid Search
|
# Simple base dockerfile that supports basic dependencies required to run lance with FTS and Hybrid Search
|
||||||
#Usage docker build -t lancedb:latest -f Dockerfile .
|
# Usage: docker build -t lancedb:latest -f Dockerfile .
|
||||||
FROM python:3.10-slim-buster
|
FROM python:3.12-slim-bookworm
|
||||||
|
|
||||||
# Install Rust
|
# Install build dependencies in a single layer
|
||||||
RUN apt-get update && apt-get install -y curl build-essential && \
|
RUN apt-get update && \
|
||||||
curl https://sh.rustup.rs -sSf | sh -s -- -y
|
apt-get install -y --no-install-recommends \
|
||||||
|
curl \
|
||||||
# Set the environment variable for Rust
|
build-essential \
|
||||||
ENV PATH="/root/.cargo/bin:${PATH}"
|
protobuf-compiler \
|
||||||
|
git \
|
||||||
# Install protobuf compiler
|
ca-certificates && \
|
||||||
RUN apt-get install -y protobuf-compiler && \
|
|
||||||
apt-get clean && \
|
apt-get clean && \
|
||||||
rm -rf /var/lib/apt/lists/*
|
rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
RUN apt-get -y update &&\
|
# Install Rust (pinned installer, non-interactive)
|
||||||
apt-get -y upgrade && \
|
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain stable --profile minimal
|
||||||
apt-get -y install git
|
|
||||||
|
|
||||||
|
# Set the environment variable for Rust
|
||||||
|
ENV PATH="/root/.cargo/bin:${PATH}"
|
||||||
|
|
||||||
# Verify installations
|
# Verify installations
|
||||||
RUN python --version && \
|
RUN python --version && \
|
||||||
rustc --version && \
|
rustc --version && \
|
||||||
protoc --version
|
protoc --version
|
||||||
|
|
||||||
RUN pip install tantivy lancedb
|
RUN pip install --no-cache-dir tantivy lancedb
|
||||||
|
|||||||
@@ -52,14 +52,21 @@ plugins:
|
|||||||
options:
|
options:
|
||||||
docstring_style: numpy
|
docstring_style: numpy
|
||||||
heading_level: 3
|
heading_level: 3
|
||||||
show_source: true
|
|
||||||
show_symbol_type_in_heading: true
|
|
||||||
show_signature_annotations: true
|
show_signature_annotations: true
|
||||||
show_root_heading: true
|
show_root_heading: true
|
||||||
|
show_docstring_examples: true
|
||||||
|
show_docstring_attributes: false
|
||||||
|
show_docstring_other_parameters: true
|
||||||
|
show_symbol_type_heading: true
|
||||||
|
show_labels: false
|
||||||
|
show_if_no_docstring: true
|
||||||
|
show_source: false
|
||||||
members_order: source
|
members_order: source
|
||||||
docstring_section_style: list
|
docstring_section_style: list
|
||||||
signature_crossrefs: true
|
signature_crossrefs: true
|
||||||
separate_signature: true
|
separate_signature: true
|
||||||
|
filters:
|
||||||
|
- "!^_"
|
||||||
import:
|
import:
|
||||||
# for cross references
|
# for cross references
|
||||||
- https://arrow.apache.org/docs/objects.inv
|
- https://arrow.apache.org/docs/objects.inv
|
||||||
@@ -113,7 +120,7 @@ markdown_extensions:
|
|||||||
emoji_index: !!python/name:material.extensions.emoji.twemoji
|
emoji_index: !!python/name:material.extensions.emoji.twemoji
|
||||||
emoji_generator: !!python/name:material.extensions.emoji.to_svg
|
emoji_generator: !!python/name:material.extensions.emoji.to_svg
|
||||||
- markdown.extensions.toc:
|
- markdown.extensions.toc:
|
||||||
toc_depth: 3
|
toc_depth: 4
|
||||||
permalink: true
|
permalink: true
|
||||||
permalink_title: Anchor link to this section
|
permalink_title: Anchor link to this section
|
||||||
|
|
||||||
|
|||||||
@@ -1,9 +1,9 @@
|
|||||||
mkdocs==1.5.3
|
mkdocs==1.6.1
|
||||||
mkdocs-jupyter==0.24.1
|
mkdocs-jupyter==0.24.1
|
||||||
mkdocs-material==9.5.3
|
mkdocs-material==9.6.23
|
||||||
mkdocs-autorefs<=1.0
|
mkdocs-autorefs>=0.5,<=1.0
|
||||||
mkdocstrings[python]==0.25.2
|
mkdocstrings[python]>=0.24,<1.0
|
||||||
griffe
|
griffe>=0.40,<1.0
|
||||||
mkdocs-render-swagger-plugin
|
mkdocs-render-swagger-plugin>=0.1.0
|
||||||
pydantic
|
pydantic>=2.0,<3.0
|
||||||
mkdocs-redirects
|
mkdocs-redirects>=1.2.0
|
||||||
@@ -14,7 +14,7 @@ Add the following dependency to your `pom.xml`:
|
|||||||
<dependency>
|
<dependency>
|
||||||
<groupId>com.lancedb</groupId>
|
<groupId>com.lancedb</groupId>
|
||||||
<artifactId>lancedb-core</artifactId>
|
<artifactId>lancedb-core</artifactId>
|
||||||
<version>0.27.0-beta.0</version>
|
<version>0.27.2</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -57,32 +57,32 @@ LanceNamespace namespaceClient = LanceDbNamespaceClientBuilder.newBuilder()
|
|||||||
|
|
||||||
## Metadata Operations
|
## Metadata Operations
|
||||||
|
|
||||||
### Creating a Namespace
|
### Creating a Namespace Path
|
||||||
|
|
||||||
Namespaces organize tables hierarchically. Create a namespace before creating tables within it:
|
Namespace paths organize tables hierarchically. Create the desired namespace path before creating tables within it:
|
||||||
|
|
||||||
```java
|
```java
|
||||||
import org.lance.namespace.model.CreateNamespaceRequest;
|
import org.lance.namespace.model.CreateNamespaceRequest;
|
||||||
import org.lance.namespace.model.CreateNamespaceResponse;
|
import org.lance.namespace.model.CreateNamespaceResponse;
|
||||||
|
|
||||||
// Create a child namespace
|
// Create a child namespace path
|
||||||
CreateNamespaceRequest request = new CreateNamespaceRequest();
|
CreateNamespaceRequest request = new CreateNamespaceRequest();
|
||||||
request.setId(Arrays.asList("my_namespace"));
|
request.setId(Arrays.asList("my_namespace"));
|
||||||
|
|
||||||
CreateNamespaceResponse response = namespaceClient.createNamespace(request);
|
CreateNamespaceResponse response = namespaceClient.createNamespace(request);
|
||||||
```
|
```
|
||||||
|
|
||||||
You can also create nested namespaces:
|
You can also create nested namespace paths:
|
||||||
|
|
||||||
```java
|
```java
|
||||||
// Create a nested namespace: parent/child
|
// Create a nested namespace path: parent/child
|
||||||
CreateNamespaceRequest request = new CreateNamespaceRequest();
|
CreateNamespaceRequest request = new CreateNamespaceRequest();
|
||||||
request.setId(Arrays.asList("parent_namespace", "child_namespace"));
|
request.setId(Arrays.asList("parent_namespace", "child_namespace"));
|
||||||
|
|
||||||
CreateNamespaceResponse response = namespaceClient.createNamespace(request);
|
CreateNamespaceResponse response = namespaceClient.createNamespace(request);
|
||||||
```
|
```
|
||||||
|
|
||||||
### Describing a Namespace
|
### Describing a Namespace Path
|
||||||
|
|
||||||
```java
|
```java
|
||||||
import org.lance.namespace.model.DescribeNamespaceRequest;
|
import org.lance.namespace.model.DescribeNamespaceRequest;
|
||||||
@@ -95,22 +95,22 @@ DescribeNamespaceResponse response = namespaceClient.describeNamespace(request);
|
|||||||
System.out.println("Namespace properties: " + response.getProperties());
|
System.out.println("Namespace properties: " + response.getProperties());
|
||||||
```
|
```
|
||||||
|
|
||||||
### Listing Namespaces
|
### Listing Namespace Paths
|
||||||
|
|
||||||
```java
|
```java
|
||||||
import org.lance.namespace.model.ListNamespacesRequest;
|
import org.lance.namespace.model.ListNamespacesRequest;
|
||||||
import org.lance.namespace.model.ListNamespacesResponse;
|
import org.lance.namespace.model.ListNamespacesResponse;
|
||||||
|
|
||||||
// List all namespaces at root level
|
// List all namespace paths at the root level
|
||||||
ListNamespacesRequest request = new ListNamespacesRequest();
|
ListNamespacesRequest request = new ListNamespacesRequest();
|
||||||
request.setId(Arrays.asList()); // Empty for root
|
request.setId(Arrays.asList()); // Empty for root
|
||||||
|
|
||||||
ListNamespacesResponse response = namespaceClient.listNamespaces(request);
|
ListNamespacesResponse response = namespaceClient.listNamespaces(request);
|
||||||
for (String ns : response.getNamespaces()) {
|
for (String ns : response.getNamespaces()) {
|
||||||
System.out.println("Namespace: " + ns);
|
System.out.println("Namespace path: " + ns);
|
||||||
}
|
}
|
||||||
|
|
||||||
// List child namespaces under a parent
|
// List child namespace paths under a parent path
|
||||||
ListNamespacesRequest childRequest = new ListNamespacesRequest();
|
ListNamespacesRequest childRequest = new ListNamespacesRequest();
|
||||||
childRequest.setId(Arrays.asList("parent_namespace"));
|
childRequest.setId(Arrays.asList("parent_namespace"));
|
||||||
|
|
||||||
@@ -123,7 +123,7 @@ ListNamespacesResponse childResponse = namespaceClient.listNamespaces(childReque
|
|||||||
import org.lance.namespace.model.ListTablesRequest;
|
import org.lance.namespace.model.ListTablesRequest;
|
||||||
import org.lance.namespace.model.ListTablesResponse;
|
import org.lance.namespace.model.ListTablesResponse;
|
||||||
|
|
||||||
// List tables in a namespace
|
// List tables in a namespace path
|
||||||
ListTablesRequest request = new ListTablesRequest();
|
ListTablesRequest request = new ListTablesRequest();
|
||||||
request.setId(Arrays.asList("my_namespace"));
|
request.setId(Arrays.asList("my_namespace"));
|
||||||
|
|
||||||
@@ -133,7 +133,7 @@ for (String table : response.getTables()) {
|
|||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
### Dropping a Namespace
|
### Dropping a Namespace Path
|
||||||
|
|
||||||
```java
|
```java
|
||||||
import org.lance.namespace.model.DropNamespaceRequest;
|
import org.lance.namespace.model.DropNamespaceRequest;
|
||||||
@@ -175,7 +175,7 @@ DropTableResponse response = namespaceClient.dropTable(request);
|
|||||||
|
|
||||||
### Creating a Table
|
### Creating a Table
|
||||||
|
|
||||||
Tables are created within a namespace by providing data in Apache Arrow IPC format:
|
Tables are created within a namespace path by providing data in Apache Arrow IPC format:
|
||||||
|
|
||||||
```java
|
```java
|
||||||
import org.lance.namespace.LanceNamespace;
|
import org.lance.namespace.LanceNamespace;
|
||||||
@@ -242,7 +242,7 @@ try (BufferAllocator allocator = new RootAllocator();
|
|||||||
}
|
}
|
||||||
byte[] tableData = out.toByteArray();
|
byte[] tableData = out.toByteArray();
|
||||||
|
|
||||||
// Create table in a namespace
|
// Create a table in a namespace path
|
||||||
CreateTableRequest request = new CreateTableRequest();
|
CreateTableRequest request = new CreateTableRequest();
|
||||||
request.setId(Arrays.asList("my_namespace", "my_table"));
|
request.setId(Arrays.asList("my_namespace", "my_table"));
|
||||||
CreateTableResponse response = namespaceClient.createTable(request, tableData);
|
CreateTableResponse response = namespaceClient.createTable(request, tableData);
|
||||||
|
|||||||
@@ -61,8 +61,8 @@ sharing the same data, deletion, and index files.
|
|||||||
* **options.sourceVersion?**: `number`
|
* **options.sourceVersion?**: `number`
|
||||||
The version of the source table to clone.
|
The version of the source table to clone.
|
||||||
|
|
||||||
* **options.targetNamespace?**: `string`[]
|
* **options.targetNamespacePath?**: `string`[]
|
||||||
The namespace for the target table (defaults to root namespace).
|
The namespace path for the target table (defaults to root namespace).
|
||||||
|
|
||||||
#### Returns
|
#### Returns
|
||||||
|
|
||||||
@@ -116,13 +116,13 @@ Creates a new empty Table
|
|||||||
|
|
||||||
`Promise`<[`Table`](Table.md)>
|
`Promise`<[`Table`](Table.md)>
|
||||||
|
|
||||||
#### createEmptyTable(name, schema, namespace, options)
|
#### createEmptyTable(name, schema, namespacePath, options)
|
||||||
|
|
||||||
```ts
|
```ts
|
||||||
abstract createEmptyTable(
|
abstract createEmptyTable(
|
||||||
name,
|
name,
|
||||||
schema,
|
schema,
|
||||||
namespace?,
|
namespacePath?,
|
||||||
options?): Promise<Table>
|
options?): Promise<Table>
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -136,8 +136,8 @@ Creates a new empty Table
|
|||||||
* **schema**: [`SchemaLike`](../type-aliases/SchemaLike.md)
|
* **schema**: [`SchemaLike`](../type-aliases/SchemaLike.md)
|
||||||
The schema of the table
|
The schema of the table
|
||||||
|
|
||||||
* **namespace?**: `string`[]
|
* **namespacePath?**: `string`[]
|
||||||
The namespace to create the table in (defaults to root namespace)
|
The namespace path to create the table in (defaults to root namespace)
|
||||||
|
|
||||||
* **options?**: `Partial`<[`CreateTableOptions`](../interfaces/CreateTableOptions.md)>
|
* **options?**: `Partial`<[`CreateTableOptions`](../interfaces/CreateTableOptions.md)>
|
||||||
Additional options
|
Additional options
|
||||||
@@ -150,10 +150,10 @@ Creates a new empty Table
|
|||||||
|
|
||||||
### createTable()
|
### createTable()
|
||||||
|
|
||||||
#### createTable(options, namespace)
|
#### createTable(options, namespacePath)
|
||||||
|
|
||||||
```ts
|
```ts
|
||||||
abstract createTable(options, namespace?): Promise<Table>
|
abstract createTable(options, namespacePath?): Promise<Table>
|
||||||
```
|
```
|
||||||
|
|
||||||
Creates a new Table and initialize it with new data.
|
Creates a new Table and initialize it with new data.
|
||||||
@@ -163,8 +163,8 @@ Creates a new Table and initialize it with new data.
|
|||||||
* **options**: `object` & `Partial`<[`CreateTableOptions`](../interfaces/CreateTableOptions.md)>
|
* **options**: `object` & `Partial`<[`CreateTableOptions`](../interfaces/CreateTableOptions.md)>
|
||||||
The options object.
|
The options object.
|
||||||
|
|
||||||
* **namespace?**: `string`[]
|
* **namespacePath?**: `string`[]
|
||||||
The namespace to create the table in (defaults to root namespace)
|
The namespace path to create the table in (defaults to root namespace)
|
||||||
|
|
||||||
##### Returns
|
##### Returns
|
||||||
|
|
||||||
@@ -197,13 +197,13 @@ Creates a new Table and initialize it with new data.
|
|||||||
|
|
||||||
`Promise`<[`Table`](Table.md)>
|
`Promise`<[`Table`](Table.md)>
|
||||||
|
|
||||||
#### createTable(name, data, namespace, options)
|
#### createTable(name, data, namespacePath, options)
|
||||||
|
|
||||||
```ts
|
```ts
|
||||||
abstract createTable(
|
abstract createTable(
|
||||||
name,
|
name,
|
||||||
data,
|
data,
|
||||||
namespace?,
|
namespacePath?,
|
||||||
options?): Promise<Table>
|
options?): Promise<Table>
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -218,8 +218,8 @@ Creates a new Table and initialize it with new data.
|
|||||||
Non-empty Array of Records
|
Non-empty Array of Records
|
||||||
to be inserted into the table
|
to be inserted into the table
|
||||||
|
|
||||||
* **namespace?**: `string`[]
|
* **namespacePath?**: `string`[]
|
||||||
The namespace to create the table in (defaults to root namespace)
|
The namespace path to create the table in (defaults to root namespace)
|
||||||
|
|
||||||
* **options?**: `Partial`<[`CreateTableOptions`](../interfaces/CreateTableOptions.md)>
|
* **options?**: `Partial`<[`CreateTableOptions`](../interfaces/CreateTableOptions.md)>
|
||||||
Additional options
|
Additional options
|
||||||
@@ -247,15 +247,15 @@ Return a brief description of the connection
|
|||||||
### dropAllTables()
|
### dropAllTables()
|
||||||
|
|
||||||
```ts
|
```ts
|
||||||
abstract dropAllTables(namespace?): Promise<void>
|
abstract dropAllTables(namespacePath?): Promise<void>
|
||||||
```
|
```
|
||||||
|
|
||||||
Drop all tables in the database.
|
Drop all tables in the database.
|
||||||
|
|
||||||
#### Parameters
|
#### Parameters
|
||||||
|
|
||||||
* **namespace?**: `string`[]
|
* **namespacePath?**: `string`[]
|
||||||
The namespace to drop tables from (defaults to root namespace).
|
The namespace path to drop tables from (defaults to root namespace).
|
||||||
|
|
||||||
#### Returns
|
#### Returns
|
||||||
|
|
||||||
@@ -266,7 +266,7 @@ Drop all tables in the database.
|
|||||||
### dropTable()
|
### dropTable()
|
||||||
|
|
||||||
```ts
|
```ts
|
||||||
abstract dropTable(name, namespace?): Promise<void>
|
abstract dropTable(name, namespacePath?): Promise<void>
|
||||||
```
|
```
|
||||||
|
|
||||||
Drop an existing table.
|
Drop an existing table.
|
||||||
@@ -276,8 +276,8 @@ Drop an existing table.
|
|||||||
* **name**: `string`
|
* **name**: `string`
|
||||||
The name of the table to drop.
|
The name of the table to drop.
|
||||||
|
|
||||||
* **namespace?**: `string`[]
|
* **namespacePath?**: `string`[]
|
||||||
The namespace of the table (defaults to root namespace).
|
The namespace path of the table (defaults to root namespace).
|
||||||
|
|
||||||
#### Returns
|
#### Returns
|
||||||
|
|
||||||
@@ -304,7 +304,7 @@ Return true if the connection has not been closed
|
|||||||
```ts
|
```ts
|
||||||
abstract openTable(
|
abstract openTable(
|
||||||
name,
|
name,
|
||||||
namespace?,
|
namespacePath?,
|
||||||
options?): Promise<Table>
|
options?): Promise<Table>
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -315,8 +315,8 @@ Open a table in the database.
|
|||||||
* **name**: `string`
|
* **name**: `string`
|
||||||
The name of the table
|
The name of the table
|
||||||
|
|
||||||
* **namespace?**: `string`[]
|
* **namespacePath?**: `string`[]
|
||||||
The namespace of the table (defaults to root namespace)
|
The namespace path of the table (defaults to root namespace)
|
||||||
|
|
||||||
* **options?**: `Partial`<[`OpenTableOptions`](../interfaces/OpenTableOptions.md)>
|
* **options?**: `Partial`<[`OpenTableOptions`](../interfaces/OpenTableOptions.md)>
|
||||||
Additional options
|
Additional options
|
||||||
@@ -349,10 +349,10 @@ Tables will be returned in lexicographical order.
|
|||||||
|
|
||||||
`Promise`<`string`[]>
|
`Promise`<`string`[]>
|
||||||
|
|
||||||
#### tableNames(namespace, options)
|
#### tableNames(namespacePath, options)
|
||||||
|
|
||||||
```ts
|
```ts
|
||||||
abstract tableNames(namespace?, options?): Promise<string[]>
|
abstract tableNames(namespacePath?, options?): Promise<string[]>
|
||||||
```
|
```
|
||||||
|
|
||||||
List all the table names in this database.
|
List all the table names in this database.
|
||||||
@@ -361,8 +361,8 @@ Tables will be returned in lexicographical order.
|
|||||||
|
|
||||||
##### Parameters
|
##### Parameters
|
||||||
|
|
||||||
* **namespace?**: `string`[]
|
* **namespacePath?**: `string`[]
|
||||||
The namespace to list tables from (defaults to root namespace)
|
The namespace path to list tables from (defaults to root namespace)
|
||||||
|
|
||||||
* **options?**: `Partial`<[`TableNamesOptions`](../interfaces/TableNamesOptions.md)>
|
* **options?**: `Partial`<[`TableNamesOptions`](../interfaces/TableNamesOptions.md)>
|
||||||
options to control the
|
options to control the
|
||||||
|
|||||||
@@ -71,11 +71,12 @@ Add new columns with defined values.
|
|||||||
|
|
||||||
#### Parameters
|
#### Parameters
|
||||||
|
|
||||||
* **newColumnTransforms**: [`AddColumnsSql`](../interfaces/AddColumnsSql.md)[]
|
* **newColumnTransforms**: `Field`<`any`> \| `Field`<`any`>[] \| `Schema`<`any`> \| [`AddColumnsSql`](../interfaces/AddColumnsSql.md)[]
|
||||||
pairs of column names and
|
Either:
|
||||||
the SQL expression to use to calculate the value of the new column. These
|
- An array of objects with column names and SQL expressions to calculate values
|
||||||
expressions will be evaluated for each row in the table, and can
|
- A single Arrow Field defining one column with its data type (column will be initialized with null values)
|
||||||
reference existing columns in the table.
|
- An array of Arrow Fields defining columns with their data types (columns will be initialized with null values)
|
||||||
|
- An Arrow Schema defining columns with their data types (columns will be initialized with null values)
|
||||||
|
|
||||||
#### Returns
|
#### Returns
|
||||||
|
|
||||||
@@ -484,19 +485,7 @@ Modeled after ``VACUUM`` in PostgreSQL.
|
|||||||
- Prune: Removes old versions of the dataset
|
- Prune: Removes old versions of the dataset
|
||||||
- Index: Optimizes the indices, adding new data to existing indices
|
- Index: Optimizes the indices, adding new data to existing indices
|
||||||
|
|
||||||
Experimental API
|
The frequency an application should call optimize is based on the frequency of
|
||||||
----------------
|
|
||||||
|
|
||||||
The optimization process is undergoing active development and may change.
|
|
||||||
Our goal with these changes is to improve the performance of optimization and
|
|
||||||
reduce the complexity.
|
|
||||||
|
|
||||||
That being said, it is essential today to run optimize if you want the best
|
|
||||||
performance. It should be stable and safe to use in production, but it our
|
|
||||||
hope that the API may be simplified (or not even need to be called) in the
|
|
||||||
future.
|
|
||||||
|
|
||||||
The frequency an application shoudl call optimize is based on the frequency of
|
|
||||||
data modifications. If data is frequently added, deleted, or updated then
|
data modifications. If data is frequently added, deleted, or updated then
|
||||||
optimize should be run frequently. A good rule of thumb is to run optimize if
|
optimize should be run frequently. A good rule of thumb is to run optimize if
|
||||||
you have added or modified 100,000 or more records or run more than 20 data
|
you have added or modified 100,000 or more records or run more than 20 data
|
||||||
|
|||||||
@@ -8,6 +8,14 @@
|
|||||||
|
|
||||||
## Properties
|
## Properties
|
||||||
|
|
||||||
|
### numDeletedRows
|
||||||
|
|
||||||
|
```ts
|
||||||
|
numDeletedRows: number;
|
||||||
|
```
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
### version
|
### version
|
||||||
|
|
||||||
```ts
|
```ts
|
||||||
|
|||||||
@@ -37,3 +37,12 @@ tbl.optimize({cleanupOlderThan: new Date()});
|
|||||||
```ts
|
```ts
|
||||||
deleteUnverified: boolean;
|
deleteUnverified: boolean;
|
||||||
```
|
```
|
||||||
|
|
||||||
|
Because they may be part of an in-progress transaction, files newer than
|
||||||
|
7 days old are not deleted by default. If you are sure that there are no
|
||||||
|
in-progress transactions, then you can set this to true to delete all
|
||||||
|
files older than `cleanupOlderThan`.
|
||||||
|
|
||||||
|
**WARNING**: This should only be set to true if you can guarantee that
|
||||||
|
no other process is currently working on this dataset. Otherwise the
|
||||||
|
dataset could be put into a corrupted state.
|
||||||
|
|||||||
@@ -52,7 +52,7 @@ new EmbeddingFunction<T, M>(): EmbeddingFunction<T, M>
|
|||||||
### computeQueryEmbeddings()
|
### computeQueryEmbeddings()
|
||||||
|
|
||||||
```ts
|
```ts
|
||||||
computeQueryEmbeddings(data): Promise<number[] | Float32Array | Float64Array>
|
computeQueryEmbeddings(data): Promise<number[] | Uint8Array | Float32Array | Float64Array>
|
||||||
```
|
```
|
||||||
|
|
||||||
Compute the embeddings for a single query
|
Compute the embeddings for a single query
|
||||||
@@ -63,7 +63,7 @@ Compute the embeddings for a single query
|
|||||||
|
|
||||||
#### Returns
|
#### Returns
|
||||||
|
|
||||||
`Promise`<`number`[] \| `Float32Array` \| `Float64Array`>
|
`Promise`<`number`[] \| `Uint8Array` \| `Float32Array` \| `Float64Array`>
|
||||||
|
|
||||||
***
|
***
|
||||||
|
|
||||||
|
|||||||
@@ -37,7 +37,7 @@ new TextEmbeddingFunction<M>(): TextEmbeddingFunction<M>
|
|||||||
### computeQueryEmbeddings()
|
### computeQueryEmbeddings()
|
||||||
|
|
||||||
```ts
|
```ts
|
||||||
computeQueryEmbeddings(data): Promise<number[] | Float32Array | Float64Array>
|
computeQueryEmbeddings(data): Promise<number[] | Uint8Array | Float32Array | Float64Array>
|
||||||
```
|
```
|
||||||
|
|
||||||
Compute the embeddings for a single query
|
Compute the embeddings for a single query
|
||||||
@@ -48,7 +48,7 @@ Compute the embeddings for a single query
|
|||||||
|
|
||||||
#### Returns
|
#### Returns
|
||||||
|
|
||||||
`Promise`<`number`[] \| `Float32Array` \| `Float64Array`>
|
`Promise`<`number`[] \| `Uint8Array` \| `Float32Array` \| `Float64Array`>
|
||||||
|
|
||||||
#### Overrides
|
#### Overrides
|
||||||
|
|
||||||
|
|||||||
@@ -7,5 +7,10 @@
|
|||||||
# Type Alias: IntoVector
|
# Type Alias: IntoVector
|
||||||
|
|
||||||
```ts
|
```ts
|
||||||
type IntoVector: Float32Array | Float64Array | number[] | Promise<Float32Array | Float64Array | number[]>;
|
type IntoVector:
|
||||||
|
| Float32Array
|
||||||
|
| Float64Array
|
||||||
|
| Uint8Array
|
||||||
|
| number[]
|
||||||
|
| Promise<Float32Array | Float64Array | Uint8Array | number[]>;
|
||||||
```
|
```
|
||||||
|
|||||||
@@ -36,6 +36,20 @@ is also an [asynchronous API client](#connections-asynchronous).
|
|||||||
|
|
||||||
::: lancedb.table.Tags
|
::: lancedb.table.Tags
|
||||||
|
|
||||||
|
## Expressions
|
||||||
|
|
||||||
|
Type-safe expression builder for filters and projections. Use these instead
|
||||||
|
of raw SQL strings with [where][lancedb.query.LanceQueryBuilder.where] and
|
||||||
|
[select][lancedb.query.LanceQueryBuilder.select].
|
||||||
|
|
||||||
|
::: lancedb.expr.Expr
|
||||||
|
|
||||||
|
::: lancedb.expr.col
|
||||||
|
|
||||||
|
::: lancedb.expr.lit
|
||||||
|
|
||||||
|
::: lancedb.expr.func
|
||||||
|
|
||||||
## Querying (Synchronous)
|
## Querying (Synchronous)
|
||||||
|
|
||||||
::: lancedb.query.Query
|
::: lancedb.query.Query
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
# LanceDB Java SDK
|
# LanceDB Java Enterprise Client
|
||||||
|
|
||||||
## Configuration and Initialization
|
## Configuration and Initialization
|
||||||
|
|
||||||
|
|||||||
@@ -8,7 +8,7 @@
|
|||||||
<parent>
|
<parent>
|
||||||
<groupId>com.lancedb</groupId>
|
<groupId>com.lancedb</groupId>
|
||||||
<artifactId>lancedb-parent</artifactId>
|
<artifactId>lancedb-parent</artifactId>
|
||||||
<version>0.27.0-beta.0</version>
|
<version>0.27.2-final.0</version>
|
||||||
<relativePath>../pom.xml</relativePath>
|
<relativePath>../pom.xml</relativePath>
|
||||||
</parent>
|
</parent>
|
||||||
|
|
||||||
@@ -56,21 +56,21 @@
|
|||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.logging.log4j</groupId>
|
<groupId>org.apache.logging.log4j</groupId>
|
||||||
<artifactId>log4j-slf4j2-impl</artifactId>
|
<artifactId>log4j-slf4j2-impl</artifactId>
|
||||||
<version>2.24.3</version>
|
<version>2.25.3</version>
|
||||||
<scope>test</scope>
|
<scope>test</scope>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.logging.log4j</groupId>
|
<groupId>org.apache.logging.log4j</groupId>
|
||||||
<artifactId>log4j-core</artifactId>
|
<artifactId>log4j-core</artifactId>
|
||||||
<version>2.24.3</version>
|
<version>2.25.3</version>
|
||||||
<scope>test</scope>
|
<scope>test</scope>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.logging.log4j</groupId>
|
<groupId>org.apache.logging.log4j</groupId>
|
||||||
<artifactId>log4j-api</artifactId>
|
<artifactId>log4j-api</artifactId>
|
||||||
<version>2.24.3</version>
|
<version>2.25.3</version>
|
||||||
<scope>test</scope>
|
<scope>test</scope>
|
||||||
</dependency>
|
</dependency>
|
||||||
</dependencies>
|
</dependencies>
|
||||||
|
|||||||
20
java/pom.xml
20
java/pom.xml
@@ -6,7 +6,7 @@
|
|||||||
|
|
||||||
<groupId>com.lancedb</groupId>
|
<groupId>com.lancedb</groupId>
|
||||||
<artifactId>lancedb-parent</artifactId>
|
<artifactId>lancedb-parent</artifactId>
|
||||||
<version>0.27.0-beta.0</version>
|
<version>0.27.2-final.0</version>
|
||||||
<packaging>pom</packaging>
|
<packaging>pom</packaging>
|
||||||
<name>${project.artifactId}</name>
|
<name>${project.artifactId}</name>
|
||||||
<description>LanceDB Java SDK Parent POM</description>
|
<description>LanceDB Java SDK Parent POM</description>
|
||||||
@@ -28,7 +28,7 @@
|
|||||||
<properties>
|
<properties>
|
||||||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
||||||
<arrow.version>15.0.0</arrow.version>
|
<arrow.version>15.0.0</arrow.version>
|
||||||
<lance-core.version>3.0.0-beta.5</lance-core.version>
|
<lance-core.version>5.0.0-beta.4</lance-core.version>
|
||||||
<spotless.skip>false</spotless.skip>
|
<spotless.skip>false</spotless.skip>
|
||||||
<spotless.version>2.30.0</spotless.version>
|
<spotless.version>2.30.0</spotless.version>
|
||||||
<spotless.java.googlejavaformat.version>1.7</spotless.java.googlejavaformat.version>
|
<spotless.java.googlejavaformat.version>1.7</spotless.java.googlejavaformat.version>
|
||||||
@@ -111,7 +111,7 @@
|
|||||||
<plugin>
|
<plugin>
|
||||||
<groupId>org.apache.maven.plugins</groupId>
|
<groupId>org.apache.maven.plugins</groupId>
|
||||||
<artifactId>maven-source-plugin</artifactId>
|
<artifactId>maven-source-plugin</artifactId>
|
||||||
<version>2.2.1</version>
|
<version>3.3.1</version>
|
||||||
<executions>
|
<executions>
|
||||||
<execution>
|
<execution>
|
||||||
<id>attach-sources</id>
|
<id>attach-sources</id>
|
||||||
@@ -124,7 +124,7 @@
|
|||||||
<plugin>
|
<plugin>
|
||||||
<groupId>org.apache.maven.plugins</groupId>
|
<groupId>org.apache.maven.plugins</groupId>
|
||||||
<artifactId>maven-javadoc-plugin</artifactId>
|
<artifactId>maven-javadoc-plugin</artifactId>
|
||||||
<version>2.9.1</version>
|
<version>3.11.2</version>
|
||||||
<executions>
|
<executions>
|
||||||
<execution>
|
<execution>
|
||||||
<id>attach-javadocs</id>
|
<id>attach-javadocs</id>
|
||||||
@@ -178,15 +178,15 @@
|
|||||||
<plugins>
|
<plugins>
|
||||||
<plugin>
|
<plugin>
|
||||||
<artifactId>maven-clean-plugin</artifactId>
|
<artifactId>maven-clean-plugin</artifactId>
|
||||||
<version>3.1.0</version>
|
<version>3.4.1</version>
|
||||||
</plugin>
|
</plugin>
|
||||||
<plugin>
|
<plugin>
|
||||||
<artifactId>maven-resources-plugin</artifactId>
|
<artifactId>maven-resources-plugin</artifactId>
|
||||||
<version>3.0.2</version>
|
<version>3.3.1</version>
|
||||||
</plugin>
|
</plugin>
|
||||||
<plugin>
|
<plugin>
|
||||||
<artifactId>maven-compiler-plugin</artifactId>
|
<artifactId>maven-compiler-plugin</artifactId>
|
||||||
<version>3.8.1</version>
|
<version>3.14.0</version>
|
||||||
<configuration>
|
<configuration>
|
||||||
<compilerArgs>
|
<compilerArgs>
|
||||||
<arg>-h</arg>
|
<arg>-h</arg>
|
||||||
@@ -205,11 +205,11 @@
|
|||||||
</plugin>
|
</plugin>
|
||||||
<plugin>
|
<plugin>
|
||||||
<artifactId>maven-jar-plugin</artifactId>
|
<artifactId>maven-jar-plugin</artifactId>
|
||||||
<version>3.0.2</version>
|
<version>3.4.2</version>
|
||||||
</plugin>
|
</plugin>
|
||||||
<plugin>
|
<plugin>
|
||||||
<artifactId>maven-install-plugin</artifactId>
|
<artifactId>maven-install-plugin</artifactId>
|
||||||
<version>2.5.2</version>
|
<version>3.1.3</version>
|
||||||
</plugin>
|
</plugin>
|
||||||
<plugin>
|
<plugin>
|
||||||
<groupId>com.diffplug.spotless</groupId>
|
<groupId>com.diffplug.spotless</groupId>
|
||||||
@@ -327,7 +327,7 @@
|
|||||||
<plugin>
|
<plugin>
|
||||||
<groupId>org.apache.maven.plugins</groupId>
|
<groupId>org.apache.maven.plugins</groupId>
|
||||||
<artifactId>maven-gpg-plugin</artifactId>
|
<artifactId>maven-gpg-plugin</artifactId>
|
||||||
<version>1.5</version>
|
<version>3.2.7</version>
|
||||||
<executions>
|
<executions>
|
||||||
<execution>
|
<execution>
|
||||||
<id>sign-artifacts</id>
|
<id>sign-artifacts</id>
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "lancedb-nodejs"
|
name = "lancedb-nodejs"
|
||||||
edition.workspace = true
|
edition.workspace = true
|
||||||
version = "0.27.0-beta.0"
|
version = "0.27.2"
|
||||||
license.workspace = true
|
license.workspace = true
|
||||||
description.workspace = true
|
description.workspace = true
|
||||||
repository.workspace = true
|
repository.workspace = true
|
||||||
@@ -15,25 +15,27 @@ crate-type = ["cdylib"]
|
|||||||
async-trait.workspace = true
|
async-trait.workspace = true
|
||||||
arrow-ipc.workspace = true
|
arrow-ipc.workspace = true
|
||||||
arrow-array.workspace = true
|
arrow-array.workspace = true
|
||||||
|
arrow-buffer = "57.2"
|
||||||
|
half.workspace = true
|
||||||
arrow-schema.workspace = true
|
arrow-schema.workspace = true
|
||||||
env_logger.workspace = true
|
env_logger.workspace = true
|
||||||
futures.workspace = true
|
futures.workspace = true
|
||||||
lancedb = { path = "../rust/lancedb", default-features = false }
|
lancedb = { path = "../rust/lancedb", default-features = false }
|
||||||
napi = { version = "2.16.8", default-features = false, features = [
|
napi = { version = "3.8.3", default-features = false, features = [
|
||||||
"napi9",
|
"napi9",
|
||||||
"async"
|
"async"
|
||||||
] }
|
] }
|
||||||
napi-derive = "2.16.4"
|
napi-derive = "3.5.2"
|
||||||
# Prevent dynamic linking of lzma, which comes from datafusion
|
# Prevent dynamic linking of lzma, which comes from datafusion
|
||||||
lzma-sys = { version = "*", features = ["static"] }
|
lzma-sys = { version = "0.1", features = ["static"] }
|
||||||
log.workspace = true
|
log.workspace = true
|
||||||
|
|
||||||
# Workaround for build failure until we can fix it.
|
# Pin to resolve build failures; update periodically for security patches.
|
||||||
aws-lc-sys = "=0.28.0"
|
aws-lc-sys = "=0.38.0"
|
||||||
aws-lc-rs = "=1.13.0"
|
aws-lc-rs = "=1.16.1"
|
||||||
|
|
||||||
[build-dependencies]
|
[build-dependencies]
|
||||||
napi-build = "2.1"
|
napi-build = "2.3.1"
|
||||||
|
|
||||||
[features]
|
[features]
|
||||||
default = ["remote", "lancedb/aws", "lancedb/gcs", "lancedb/azure", "lancedb/dynamodb", "lancedb/oss", "lancedb/huggingface"]
|
default = ["remote", "lancedb/aws", "lancedb/gcs", "lancedb/azure", "lancedb/dynamodb", "lancedb/oss", "lancedb/huggingface"]
|
||||||
|
|||||||
@@ -63,6 +63,7 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
|
|||||||
tableFromIPC,
|
tableFromIPC,
|
||||||
DataType,
|
DataType,
|
||||||
Dictionary,
|
Dictionary,
|
||||||
|
Uint8: ArrowUint8,
|
||||||
// biome-ignore lint/suspicious/noExplicitAny: <explanation>
|
// biome-ignore lint/suspicious/noExplicitAny: <explanation>
|
||||||
} = <any>arrow;
|
} = <any>arrow;
|
||||||
type Schema = ApacheArrow["Schema"];
|
type Schema = ApacheArrow["Schema"];
|
||||||
@@ -362,6 +363,38 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
|
|||||||
).toEqual(new Float64().toString());
|
).toEqual(new Float64().toString());
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("will infer FixedSizeList<Float32> from Float32Array values", async function () {
|
||||||
|
const table = makeArrowTable([
|
||||||
|
{ id: "a", vector: new Float32Array([0.1, 0.2, 0.3]) },
|
||||||
|
{ id: "b", vector: new Float32Array([0.4, 0.5, 0.6]) },
|
||||||
|
]);
|
||||||
|
|
||||||
|
expect(DataType.isFixedSizeList(table.getChild("vector")?.type)).toBe(
|
||||||
|
true,
|
||||||
|
);
|
||||||
|
const vectorType = table.getChild("vector")?.type;
|
||||||
|
expect(vectorType.listSize).toBe(3);
|
||||||
|
expect(vectorType.children[0].type.toString()).toEqual(
|
||||||
|
new Float32().toString(),
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("will infer FixedSizeList<Uint8> from Uint8Array values", async function () {
|
||||||
|
const table = makeArrowTable([
|
||||||
|
{ id: "a", vector: new Uint8Array([1, 2, 3]) },
|
||||||
|
{ id: "b", vector: new Uint8Array([4, 5, 6]) },
|
||||||
|
]);
|
||||||
|
|
||||||
|
expect(DataType.isFixedSizeList(table.getChild("vector")?.type)).toBe(
|
||||||
|
true,
|
||||||
|
);
|
||||||
|
const vectorType = table.getChild("vector")?.type;
|
||||||
|
expect(vectorType.listSize).toBe(3);
|
||||||
|
expect(vectorType.children[0].type.toString()).toEqual(
|
||||||
|
new ArrowUint8().toString(),
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
it("will use dictionary encoded strings if asked", async function () {
|
it("will use dictionary encoded strings if asked", async function () {
|
||||||
const table = makeArrowTable([{ str: "hello" }]);
|
const table = makeArrowTable([{ str: "hello" }]);
|
||||||
expect(DataType.isUtf8(table.getChild("str")?.type)).toBe(true);
|
expect(DataType.isUtf8(table.getChild("str")?.type)).toBe(true);
|
||||||
|
|||||||
@@ -103,7 +103,7 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
|
|||||||
},
|
},
|
||||||
numIndices: 0,
|
numIndices: 0,
|
||||||
numRows: 3,
|
numRows: 3,
|
||||||
totalBytes: 24,
|
totalBytes: 44,
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -1259,6 +1259,98 @@ describe("schema evolution", function () {
|
|||||||
expect(await table.schema()).toEqual(expectedSchema);
|
expect(await table.schema()).toEqual(expectedSchema);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("can add columns with schema for explicit data types", async function () {
|
||||||
|
const con = await connect(tmpDir.name);
|
||||||
|
const table = await con.createTable("vectors", [
|
||||||
|
{ id: 1n, vector: [0.1, 0.2] },
|
||||||
|
]);
|
||||||
|
|
||||||
|
// Define schema for new columns with explicit data types
|
||||||
|
// Note: All columns must be nullable when using addColumns with Schema
|
||||||
|
// because they are initially populated with null values
|
||||||
|
const newColumnsSchema = new Schema([
|
||||||
|
new Field("price", new Float64(), true),
|
||||||
|
new Field("category", new Utf8(), true),
|
||||||
|
new Field("rating", new Int32(), true),
|
||||||
|
]);
|
||||||
|
|
||||||
|
const result = await table.addColumns(newColumnsSchema);
|
||||||
|
expect(result).toHaveProperty("version");
|
||||||
|
expect(result.version).toBe(2);
|
||||||
|
|
||||||
|
const expectedSchema = new Schema([
|
||||||
|
new Field("id", new Int64(), true),
|
||||||
|
new Field(
|
||||||
|
"vector",
|
||||||
|
new FixedSizeList(2, new Field("item", new Float32(), true)),
|
||||||
|
true,
|
||||||
|
),
|
||||||
|
new Field("price", new Float64(), true),
|
||||||
|
new Field("category", new Utf8(), true),
|
||||||
|
new Field("rating", new Int32(), true),
|
||||||
|
]);
|
||||||
|
expect(await table.schema()).toEqual(expectedSchema);
|
||||||
|
|
||||||
|
// Verify that new columns are populated with null values
|
||||||
|
const results = await table.query().toArray();
|
||||||
|
expect(results).toHaveLength(1);
|
||||||
|
expect(results[0].price).toBeNull();
|
||||||
|
expect(results[0].category).toBeNull();
|
||||||
|
expect(results[0].rating).toBeNull();
|
||||||
|
});
|
||||||
|
|
||||||
|
it("can add a single column using Field", async function () {
|
||||||
|
const con = await connect(tmpDir.name);
|
||||||
|
const table = await con.createTable("vectors", [
|
||||||
|
{ id: 1n, vector: [0.1, 0.2] },
|
||||||
|
]);
|
||||||
|
|
||||||
|
// Add a single field
|
||||||
|
const priceField = new Field("price", new Float64(), true);
|
||||||
|
const result = await table.addColumns(priceField);
|
||||||
|
expect(result).toHaveProperty("version");
|
||||||
|
expect(result.version).toBe(2);
|
||||||
|
|
||||||
|
const expectedSchema = new Schema([
|
||||||
|
new Field("id", new Int64(), true),
|
||||||
|
new Field(
|
||||||
|
"vector",
|
||||||
|
new FixedSizeList(2, new Field("item", new Float32(), true)),
|
||||||
|
true,
|
||||||
|
),
|
||||||
|
new Field("price", new Float64(), true),
|
||||||
|
]);
|
||||||
|
expect(await table.schema()).toEqual(expectedSchema);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("can add multiple columns using array of Fields", async function () {
|
||||||
|
const con = await connect(tmpDir.name);
|
||||||
|
const table = await con.createTable("vectors", [
|
||||||
|
{ id: 1n, vector: [0.1, 0.2] },
|
||||||
|
]);
|
||||||
|
|
||||||
|
// Add multiple fields as array
|
||||||
|
const fields = [
|
||||||
|
new Field("price", new Float64(), true),
|
||||||
|
new Field("category", new Utf8(), true),
|
||||||
|
];
|
||||||
|
const result = await table.addColumns(fields);
|
||||||
|
expect(result).toHaveProperty("version");
|
||||||
|
expect(result.version).toBe(2);
|
||||||
|
|
||||||
|
const expectedSchema = new Schema([
|
||||||
|
new Field("id", new Int64(), true),
|
||||||
|
new Field(
|
||||||
|
"vector",
|
||||||
|
new FixedSizeList(2, new Field("item", new Float32(), true)),
|
||||||
|
true,
|
||||||
|
),
|
||||||
|
new Field("price", new Float64(), true),
|
||||||
|
new Field("category", new Utf8(), true),
|
||||||
|
]);
|
||||||
|
expect(await table.schema()).toEqual(expectedSchema);
|
||||||
|
});
|
||||||
|
|
||||||
it("can alter the columns in the schema", async function () {
|
it("can alter the columns in the schema", async function () {
|
||||||
const con = await connect(tmpDir.name);
|
const con = await connect(tmpDir.name);
|
||||||
const schema = new Schema([
|
const schema = new Schema([
|
||||||
@@ -1697,6 +1789,65 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
|
|||||||
expect(results2[0].text).toBe(data[1].text);
|
expect(results2[0].text).toBe(data[1].text);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
test("full text search fast search", async () => {
|
||||||
|
const db = await connect(tmpDir.name);
|
||||||
|
const data = [{ text: "hello world", vector: [0.1, 0.2, 0.3], id: 1 }];
|
||||||
|
const table = await db.createTable("test", data);
|
||||||
|
await table.createIndex("text", {
|
||||||
|
config: Index.fts(),
|
||||||
|
});
|
||||||
|
|
||||||
|
// Insert unindexed data after creating the index.
|
||||||
|
await table.add([{ text: "xyz", vector: [0.4, 0.5, 0.6], id: 2 }]);
|
||||||
|
|
||||||
|
const withFlatSearch = await table
|
||||||
|
.search("xyz", "fts")
|
||||||
|
.limit(10)
|
||||||
|
.toArray();
|
||||||
|
expect(withFlatSearch.length).toBeGreaterThan(0);
|
||||||
|
|
||||||
|
const fastSearchResults = await table
|
||||||
|
.search("xyz", "fts")
|
||||||
|
.fastSearch()
|
||||||
|
.limit(10)
|
||||||
|
.toArray();
|
||||||
|
expect(fastSearchResults.length).toBe(0);
|
||||||
|
|
||||||
|
const nearestToTextFastSearch = await table
|
||||||
|
.query()
|
||||||
|
.nearestToText("xyz")
|
||||||
|
.fastSearch()
|
||||||
|
.limit(10)
|
||||||
|
.toArray();
|
||||||
|
expect(nearestToTextFastSearch.length).toBe(0);
|
||||||
|
|
||||||
|
// fastSearch should be chainable with other methods.
|
||||||
|
const chainedFastSearch = await table
|
||||||
|
.search("xyz", "fts")
|
||||||
|
.fastSearch()
|
||||||
|
.select(["text"])
|
||||||
|
.limit(5)
|
||||||
|
.toArray();
|
||||||
|
expect(chainedFastSearch.length).toBe(0);
|
||||||
|
|
||||||
|
await table.optimize();
|
||||||
|
|
||||||
|
const indexedFastSearch = await table
|
||||||
|
.search("xyz", "fts")
|
||||||
|
.fastSearch()
|
||||||
|
.limit(10)
|
||||||
|
.toArray();
|
||||||
|
expect(indexedFastSearch.length).toBeGreaterThan(0);
|
||||||
|
|
||||||
|
const indexedNearestToTextFastSearch = await table
|
||||||
|
.query()
|
||||||
|
.nearestToText("xyz")
|
||||||
|
.fastSearch()
|
||||||
|
.limit(10)
|
||||||
|
.toArray();
|
||||||
|
expect(indexedNearestToTextFastSearch.length).toBeGreaterThan(0);
|
||||||
|
});
|
||||||
|
|
||||||
test("prewarm full text search index", async () => {
|
test("prewarm full text search index", async () => {
|
||||||
const db = await connect(tmpDir.name);
|
const db = await connect(tmpDir.name);
|
||||||
const data = [
|
const data = [
|
||||||
@@ -2145,3 +2296,36 @@ describe("when creating an empty table", () => {
|
|||||||
expect((actualSchema.fields[1].type as Float64).precision).toBe(2);
|
expect((actualSchema.fields[1].type as Float64).precision).toBe(2);
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// Ensure we can create float32 arrays without using Arrow
|
||||||
|
// by utilizing native JS TypedArray support
|
||||||
|
//
|
||||||
|
// https://github.com/lancedb/lancedb/issues/3115
|
||||||
|
describe("when creating a table with Float32Array vectors", () => {
|
||||||
|
let tmpDir: tmp.DirResult;
|
||||||
|
beforeEach(() => {
|
||||||
|
tmpDir = tmp.dirSync({ unsafeCleanup: true });
|
||||||
|
});
|
||||||
|
afterEach(() => {
|
||||||
|
tmpDir.removeCallback();
|
||||||
|
});
|
||||||
|
|
||||||
|
it("should persist Float32Array as FixedSizeList<Float32> in the LanceDB schema", async () => {
|
||||||
|
const db = await connect(tmpDir.name);
|
||||||
|
const table = await db.createTable("test", [
|
||||||
|
{ id: "a", vector: new Float32Array([0.1, 0.2, 0.3]) },
|
||||||
|
{ id: "b", vector: new Float32Array([0.4, 0.5, 0.6]) },
|
||||||
|
]);
|
||||||
|
|
||||||
|
const schema = await table.schema();
|
||||||
|
const vectorField = schema.fields.find((f) => f.name === "vector");
|
||||||
|
expect(vectorField).toBeDefined();
|
||||||
|
expect(vectorField!.type).toBeInstanceOf(FixedSizeList);
|
||||||
|
|
||||||
|
const fsl = vectorField!.type as FixedSizeList;
|
||||||
|
expect(fsl.listSize).toBe(3);
|
||||||
|
expect(fsl.children[0].type.typeId).toBe(Type.Float);
|
||||||
|
// precision: HALF=0, SINGLE=1, DOUBLE=2
|
||||||
|
expect((fsl.children[0].type as Float32).precision).toBe(1);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|||||||
110
nodejs/__test__/vector_types.test.ts
Normal file
110
nodejs/__test__/vector_types.test.ts
Normal file
@@ -0,0 +1,110 @@
|
|||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||||
|
|
||||||
|
import * as tmp from "tmp";
|
||||||
|
|
||||||
|
import { type Table, connect } from "../lancedb";
|
||||||
|
import {
|
||||||
|
Field,
|
||||||
|
FixedSizeList,
|
||||||
|
Float32,
|
||||||
|
Int64,
|
||||||
|
Schema,
|
||||||
|
makeArrowTable,
|
||||||
|
} from "../lancedb/arrow";
|
||||||
|
|
||||||
|
describe("Vector query with different typed arrays", () => {
|
||||||
|
let tmpDir: tmp.DirResult;
|
||||||
|
|
||||||
|
afterEach(() => {
|
||||||
|
tmpDir?.removeCallback();
|
||||||
|
});
|
||||||
|
|
||||||
|
async function createFloat32Table(): Promise<Table> {
|
||||||
|
tmpDir = tmp.dirSync({ unsafeCleanup: true });
|
||||||
|
const db = await connect(tmpDir.name);
|
||||||
|
const schema = new Schema([
|
||||||
|
new Field("id", new Int64(), true),
|
||||||
|
new Field(
|
||||||
|
"vec",
|
||||||
|
new FixedSizeList(2, new Field("item", new Float32())),
|
||||||
|
true,
|
||||||
|
),
|
||||||
|
]);
|
||||||
|
const data = makeArrowTable(
|
||||||
|
[
|
||||||
|
{ id: 1n, vec: [1.0, 0.0] },
|
||||||
|
{ id: 2n, vec: [0.0, 1.0] },
|
||||||
|
{ id: 3n, vec: [1.0, 1.0] },
|
||||||
|
],
|
||||||
|
{ schema },
|
||||||
|
);
|
||||||
|
return db.createTable("test_f32", data);
|
||||||
|
}
|
||||||
|
|
||||||
|
it("should search with Float32Array (baseline)", async () => {
|
||||||
|
const table = await createFloat32Table();
|
||||||
|
const results = await table
|
||||||
|
.query()
|
||||||
|
.nearestTo(new Float32Array([1.0, 0.0]))
|
||||||
|
.limit(1)
|
||||||
|
.toArray();
|
||||||
|
|
||||||
|
expect(results.length).toBe(1);
|
||||||
|
expect(Number(results[0].id)).toBe(1);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("should search with number[] (backward compat)", async () => {
|
||||||
|
const table = await createFloat32Table();
|
||||||
|
const results = await table
|
||||||
|
.query()
|
||||||
|
.nearestTo([1.0, 0.0])
|
||||||
|
.limit(1)
|
||||||
|
.toArray();
|
||||||
|
|
||||||
|
expect(results.length).toBe(1);
|
||||||
|
expect(Number(results[0].id)).toBe(1);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("should search with Float64Array via raw path", async () => {
|
||||||
|
const table = await createFloat32Table();
|
||||||
|
const results = await table
|
||||||
|
.query()
|
||||||
|
.nearestTo(new Float64Array([1.0, 0.0]))
|
||||||
|
.limit(1)
|
||||||
|
.toArray();
|
||||||
|
|
||||||
|
expect(results.length).toBe(1);
|
||||||
|
expect(Number(results[0].id)).toBe(1);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("should add multiple query vectors with Float64Array", async () => {
|
||||||
|
const table = await createFloat32Table();
|
||||||
|
const results = await table
|
||||||
|
.query()
|
||||||
|
.nearestTo(new Float64Array([1.0, 0.0]))
|
||||||
|
.addQueryVector(new Float64Array([0.0, 1.0]))
|
||||||
|
.limit(2)
|
||||||
|
.toArray();
|
||||||
|
|
||||||
|
expect(results.length).toBeGreaterThanOrEqual(2);
|
||||||
|
});
|
||||||
|
|
||||||
|
// Float16Array is only available in Node 22+; not in TypeScript's standard lib yet
|
||||||
|
const float16ArrayCtor = (globalThis as unknown as Record<string, unknown>)
|
||||||
|
.Float16Array as (new (values: number[]) => unknown) | undefined;
|
||||||
|
const hasFloat16 = float16ArrayCtor !== undefined;
|
||||||
|
const f16it = hasFloat16 ? it : it.skip;
|
||||||
|
|
||||||
|
f16it("should search with Float16Array via raw path", async () => {
|
||||||
|
const table = await createFloat32Table();
|
||||||
|
const results = await table
|
||||||
|
.query()
|
||||||
|
.nearestTo(new float16ArrayCtor!([1.0, 0.0]) as Float32Array)
|
||||||
|
.limit(1)
|
||||||
|
.toArray();
|
||||||
|
|
||||||
|
expect(results.length).toBe(1);
|
||||||
|
expect(Number(results[0].id)).toBe(1);
|
||||||
|
});
|
||||||
|
});
|
||||||
599
nodejs/examples/package-lock.json
generated
599
nodejs/examples/package-lock.json
generated
@@ -30,12 +30,15 @@
|
|||||||
"x64",
|
"x64",
|
||||||
"arm64"
|
"arm64"
|
||||||
],
|
],
|
||||||
|
"dev": true,
|
||||||
"license": "Apache-2.0",
|
"license": "Apache-2.0",
|
||||||
|
"optional": true,
|
||||||
"os": [
|
"os": [
|
||||||
"darwin",
|
"darwin",
|
||||||
"linux",
|
"linux",
|
||||||
"win32"
|
"win32"
|
||||||
],
|
],
|
||||||
|
"peer": true,
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"reflect-metadata": "^0.2.2"
|
"reflect-metadata": "^0.2.2"
|
||||||
},
|
},
|
||||||
@@ -91,14 +94,15 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/@babel/code-frame": {
|
"node_modules/@babel/code-frame": {
|
||||||
"version": "7.26.2",
|
"version": "7.29.0",
|
||||||
"resolved": "https://registry.npmjs.org/@babel/code-frame/-/code-frame-7.26.2.tgz",
|
"resolved": "https://registry.npmjs.org/@babel/code-frame/-/code-frame-7.29.0.tgz",
|
||||||
"integrity": "sha512-RJlIHRueQgwWitWgF8OdFYGZX328Ax5BCemNGlqHfplnRT9ESi8JkFlvaVYbS+UubVY6dpv87Fs2u5M29iNFVQ==",
|
"integrity": "sha512-9NhCeYjq9+3uxgdtp20LSiJXJvN0FeCtNGpJxuMFZ1Kv3cWUNb6DOhJwUvcVCzKGR66cw4njwM6hrJLqgOwbcw==",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
|
"license": "MIT",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@babel/helper-validator-identifier": "^7.25.9",
|
"@babel/helper-validator-identifier": "^7.28.5",
|
||||||
"js-tokens": "^4.0.0",
|
"js-tokens": "^4.0.0",
|
||||||
"picocolors": "^1.0.0"
|
"picocolors": "^1.1.1"
|
||||||
},
|
},
|
||||||
"engines": {
|
"engines": {
|
||||||
"node": ">=6.9.0"
|
"node": ">=6.9.0"
|
||||||
@@ -233,19 +237,21 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/@babel/helper-string-parser": {
|
"node_modules/@babel/helper-string-parser": {
|
||||||
"version": "7.25.9",
|
"version": "7.27.1",
|
||||||
"resolved": "https://registry.npmjs.org/@babel/helper-string-parser/-/helper-string-parser-7.25.9.tgz",
|
"resolved": "https://registry.npmjs.org/@babel/helper-string-parser/-/helper-string-parser-7.27.1.tgz",
|
||||||
"integrity": "sha512-4A/SCr/2KLd5jrtOMFzaKjVtAei3+2r/NChoBNoZ3EyP/+GlhoaEGoWOZUmFmoITP7zOJyHIMm+DYRd8o3PvHA==",
|
"integrity": "sha512-qMlSxKbpRlAridDExk92nSobyDdpPijUq2DW6oDnUqd0iOGxmQjyqhMIihI9+zv4LPyZdRje2cavWPbCbWm3eA==",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
|
"license": "MIT",
|
||||||
"engines": {
|
"engines": {
|
||||||
"node": ">=6.9.0"
|
"node": ">=6.9.0"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/@babel/helper-validator-identifier": {
|
"node_modules/@babel/helper-validator-identifier": {
|
||||||
"version": "7.25.9",
|
"version": "7.28.5",
|
||||||
"resolved": "https://registry.npmjs.org/@babel/helper-validator-identifier/-/helper-validator-identifier-7.25.9.tgz",
|
"resolved": "https://registry.npmjs.org/@babel/helper-validator-identifier/-/helper-validator-identifier-7.28.5.tgz",
|
||||||
"integrity": "sha512-Ed61U6XJc3CVRfkERJWDz4dJwKe7iLmmJsbOGu9wSloNSFttHV0I8g6UAgb7qnK5ly5bGLPd4oXZlxCdANBOWQ==",
|
"integrity": "sha512-qSs4ifwzKJSV39ucNjsvc6WVHs6b7S03sOh2OcHF9UHfVPqWWALUsNUVzhSBiItjRZoLHx7nIarVjqKVusUZ1Q==",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
|
"license": "MIT",
|
||||||
"engines": {
|
"engines": {
|
||||||
"node": ">=6.9.0"
|
"node": ">=6.9.0"
|
||||||
}
|
}
|
||||||
@@ -260,25 +266,27 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/@babel/helpers": {
|
"node_modules/@babel/helpers": {
|
||||||
"version": "7.26.0",
|
"version": "7.28.6",
|
||||||
"resolved": "https://registry.npmjs.org/@babel/helpers/-/helpers-7.26.0.tgz",
|
"resolved": "https://registry.npmjs.org/@babel/helpers/-/helpers-7.28.6.tgz",
|
||||||
"integrity": "sha512-tbhNuIxNcVb21pInl3ZSjksLCvgdZy9KwJ8brv993QtIVKJBBkYXz4q4ZbAv31GdnC+R90np23L5FbEBlthAEw==",
|
"integrity": "sha512-xOBvwq86HHdB7WUDTfKfT/Vuxh7gElQ+Sfti2Cy6yIWNW05P8iUslOVcZ4/sKbE+/jQaukQAdz/gf3724kYdqw==",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
|
"license": "MIT",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@babel/template": "^7.25.9",
|
"@babel/template": "^7.28.6",
|
||||||
"@babel/types": "^7.26.0"
|
"@babel/types": "^7.28.6"
|
||||||
},
|
},
|
||||||
"engines": {
|
"engines": {
|
||||||
"node": ">=6.9.0"
|
"node": ">=6.9.0"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/@babel/parser": {
|
"node_modules/@babel/parser": {
|
||||||
"version": "7.26.2",
|
"version": "7.29.0",
|
||||||
"resolved": "https://registry.npmjs.org/@babel/parser/-/parser-7.26.2.tgz",
|
"resolved": "https://registry.npmjs.org/@babel/parser/-/parser-7.29.0.tgz",
|
||||||
"integrity": "sha512-DWMCZH9WA4Maitz2q21SRKHo9QXZxkDsbNZoVD62gusNtNBBqDg9i7uOhASfTfIGNzW+O+r7+jAlM8dwphcJKQ==",
|
"integrity": "sha512-IyDgFV5GeDUVX4YdF/3CPULtVGSXXMLh1xVIgdCgxApktqnQV0r7/8Nqthg+8YLGaAtdyIlo2qIdZrbCv4+7ww==",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
|
"license": "MIT",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@babel/types": "^7.26.0"
|
"@babel/types": "^7.29.0"
|
||||||
},
|
},
|
||||||
"bin": {
|
"bin": {
|
||||||
"parser": "bin/babel-parser.js"
|
"parser": "bin/babel-parser.js"
|
||||||
@@ -510,14 +518,15 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/@babel/template": {
|
"node_modules/@babel/template": {
|
||||||
"version": "7.25.9",
|
"version": "7.28.6",
|
||||||
"resolved": "https://registry.npmjs.org/@babel/template/-/template-7.25.9.tgz",
|
"resolved": "https://registry.npmjs.org/@babel/template/-/template-7.28.6.tgz",
|
||||||
"integrity": "sha512-9DGttpmPvIxBb/2uwpVo3dqJ+O6RooAFOS+lB+xDqoE2PVCE8nfoHMdZLpfCQRLwvohzXISPZcgxt80xLfsuwg==",
|
"integrity": "sha512-YA6Ma2KsCdGb+WC6UpBVFJGXL58MDA6oyONbjyF/+5sBgxY/dwkhLogbMT2GXXyU84/IhRw/2D1Os1B/giz+BQ==",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
|
"license": "MIT",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@babel/code-frame": "^7.25.9",
|
"@babel/code-frame": "^7.28.6",
|
||||||
"@babel/parser": "^7.25.9",
|
"@babel/parser": "^7.28.6",
|
||||||
"@babel/types": "^7.25.9"
|
"@babel/types": "^7.28.6"
|
||||||
},
|
},
|
||||||
"engines": {
|
"engines": {
|
||||||
"node": ">=6.9.0"
|
"node": ">=6.9.0"
|
||||||
@@ -542,13 +551,14 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/@babel/types": {
|
"node_modules/@babel/types": {
|
||||||
"version": "7.26.0",
|
"version": "7.29.0",
|
||||||
"resolved": "https://registry.npmjs.org/@babel/types/-/types-7.26.0.tgz",
|
"resolved": "https://registry.npmjs.org/@babel/types/-/types-7.29.0.tgz",
|
||||||
"integrity": "sha512-Z/yiTPj+lDVnF7lWeKCIJzaIkI0vYO87dMpZ4bg4TDrFe4XXLFWL1TbXU27gBP3QccxV9mZICCrnjnYlJjXHOA==",
|
"integrity": "sha512-LwdZHpScM4Qz8Xw2iKSzS+cfglZzJGvofQICy7W7v4caru4EaAmyUuO6BGrbyQ2mYV11W0U8j5mBhd14dd3B0A==",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
|
"license": "MIT",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@babel/helper-string-parser": "^7.25.9",
|
"@babel/helper-string-parser": "^7.27.1",
|
||||||
"@babel/helper-validator-identifier": "^7.25.9"
|
"@babel/helper-validator-identifier": "^7.28.5"
|
||||||
},
|
},
|
||||||
"engines": {
|
"engines": {
|
||||||
"node": ">=6.9.0"
|
"node": ">=6.9.0"
|
||||||
@@ -1151,95 +1161,6 @@
|
|||||||
"url": "https://opencollective.com/libvips"
|
"url": "https://opencollective.com/libvips"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/@isaacs/cliui": {
|
|
||||||
"version": "8.0.2",
|
|
||||||
"resolved": "https://registry.npmjs.org/@isaacs/cliui/-/cliui-8.0.2.tgz",
|
|
||||||
"integrity": "sha512-O8jcjabXaleOG9DQ0+ARXWZBTfnP4WNAqzuiJK7ll44AmxGKv/J2M4TPjxjY3znBCfvBXFzucm1twdyFybFqEA==",
|
|
||||||
"dependencies": {
|
|
||||||
"string-width": "^5.1.2",
|
|
||||||
"string-width-cjs": "npm:string-width@^4.2.0",
|
|
||||||
"strip-ansi": "^7.0.1",
|
|
||||||
"strip-ansi-cjs": "npm:strip-ansi@^6.0.1",
|
|
||||||
"wrap-ansi": "^8.1.0",
|
|
||||||
"wrap-ansi-cjs": "npm:wrap-ansi@^7.0.0"
|
|
||||||
},
|
|
||||||
"engines": {
|
|
||||||
"node": ">=12"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/@isaacs/cliui/node_modules/ansi-regex": {
|
|
||||||
"version": "6.1.0",
|
|
||||||
"resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-6.1.0.tgz",
|
|
||||||
"integrity": "sha512-7HSX4QQb4CspciLpVFwyRe79O3xsIZDDLER21kERQ71oaPodF8jL725AgJMFAYbooIqolJoRLuM81SpeUkpkvA==",
|
|
||||||
"engines": {
|
|
||||||
"node": ">=12"
|
|
||||||
},
|
|
||||||
"funding": {
|
|
||||||
"url": "https://github.com/chalk/ansi-regex?sponsor=1"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/@isaacs/cliui/node_modules/ansi-styles": {
|
|
||||||
"version": "6.2.1",
|
|
||||||
"resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-6.2.1.tgz",
|
|
||||||
"integrity": "sha512-bN798gFfQX+viw3R7yrGWRqnrN2oRkEkUjjl4JNn4E8GxxbjtG3FbrEIIY3l8/hrwUwIeCZvi4QuOTP4MErVug==",
|
|
||||||
"engines": {
|
|
||||||
"node": ">=12"
|
|
||||||
},
|
|
||||||
"funding": {
|
|
||||||
"url": "https://github.com/chalk/ansi-styles?sponsor=1"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/@isaacs/cliui/node_modules/emoji-regex": {
|
|
||||||
"version": "9.2.2",
|
|
||||||
"resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-9.2.2.tgz",
|
|
||||||
"integrity": "sha512-L18DaJsXSUk2+42pv8mLs5jJT2hqFkFE4j21wOmgbUqsZ2hL72NsUU785g9RXgo3s0ZNgVl42TiHp3ZtOv/Vyg=="
|
|
||||||
},
|
|
||||||
"node_modules/@isaacs/cliui/node_modules/string-width": {
|
|
||||||
"version": "5.1.2",
|
|
||||||
"resolved": "https://registry.npmjs.org/string-width/-/string-width-5.1.2.tgz",
|
|
||||||
"integrity": "sha512-HnLOCR3vjcY8beoNLtcjZ5/nxn2afmME6lhrDrebokqMap+XbeW8n9TXpPDOqdGK5qcI3oT0GKTW6wC7EMiVqA==",
|
|
||||||
"dependencies": {
|
|
||||||
"eastasianwidth": "^0.2.0",
|
|
||||||
"emoji-regex": "^9.2.2",
|
|
||||||
"strip-ansi": "^7.0.1"
|
|
||||||
},
|
|
||||||
"engines": {
|
|
||||||
"node": ">=12"
|
|
||||||
},
|
|
||||||
"funding": {
|
|
||||||
"url": "https://github.com/sponsors/sindresorhus"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/@isaacs/cliui/node_modules/strip-ansi": {
|
|
||||||
"version": "7.1.0",
|
|
||||||
"resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-7.1.0.tgz",
|
|
||||||
"integrity": "sha512-iq6eVVI64nQQTRYq2KtEg2d2uU7LElhTJwsH4YzIHZshxlgZms/wIc4VoDQTlG/IvVIrBKG06CrZnp0qv7hkcQ==",
|
|
||||||
"dependencies": {
|
|
||||||
"ansi-regex": "^6.0.1"
|
|
||||||
},
|
|
||||||
"engines": {
|
|
||||||
"node": ">=12"
|
|
||||||
},
|
|
||||||
"funding": {
|
|
||||||
"url": "https://github.com/chalk/strip-ansi?sponsor=1"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/@isaacs/cliui/node_modules/wrap-ansi": {
|
|
||||||
"version": "8.1.0",
|
|
||||||
"resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-8.1.0.tgz",
|
|
||||||
"integrity": "sha512-si7QWI6zUMq56bESFvagtmzMdGOtoxfR+Sez11Mobfc7tm+VkUckk9bW2UeffTGVUbOksxmSw0AA2gs8g71NCQ==",
|
|
||||||
"dependencies": {
|
|
||||||
"ansi-styles": "^6.1.0",
|
|
||||||
"string-width": "^5.0.1",
|
|
||||||
"strip-ansi": "^7.0.1"
|
|
||||||
},
|
|
||||||
"engines": {
|
|
||||||
"node": ">=12"
|
|
||||||
},
|
|
||||||
"funding": {
|
|
||||||
"url": "https://github.com/chalk/wrap-ansi?sponsor=1"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/@isaacs/fs-minipass": {
|
"node_modules/@isaacs/fs-minipass": {
|
||||||
"version": "4.0.1",
|
"version": "4.0.1",
|
||||||
"resolved": "https://registry.npmjs.org/@isaacs/fs-minipass/-/fs-minipass-4.0.1.tgz",
|
"resolved": "https://registry.npmjs.org/@isaacs/fs-minipass/-/fs-minipass-4.0.1.tgz",
|
||||||
@@ -1606,15 +1527,6 @@
|
|||||||
"resolved": "../dist",
|
"resolved": "../dist",
|
||||||
"link": true
|
"link": true
|
||||||
},
|
},
|
||||||
"node_modules/@pkgjs/parseargs": {
|
|
||||||
"version": "0.11.0",
|
|
||||||
"resolved": "https://registry.npmjs.org/@pkgjs/parseargs/-/parseargs-0.11.0.tgz",
|
|
||||||
"integrity": "sha512-+1VkjdD0QBLPodGrJUeqarH8VAIvQODIbwh9XpP5Syisf7YoQgsJKPNFoqqLQlu+VQ/tVSshMR6loPMn8U+dPg==",
|
|
||||||
"optional": true,
|
|
||||||
"engines": {
|
|
||||||
"node": ">=14"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/@protobufjs/aspromise": {
|
"node_modules/@protobufjs/aspromise": {
|
||||||
"version": "1.1.2",
|
"version": "1.1.2",
|
||||||
"resolved": "https://registry.npmjs.org/@protobufjs/aspromise/-/aspromise-1.1.2.tgz",
|
"resolved": "https://registry.npmjs.org/@protobufjs/aspromise/-/aspromise-1.1.2.tgz",
|
||||||
@@ -1846,6 +1758,7 @@
|
|||||||
"version": "5.0.1",
|
"version": "5.0.1",
|
||||||
"resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz",
|
"resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz",
|
||||||
"integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==",
|
"integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==",
|
||||||
|
"dev": true,
|
||||||
"engines": {
|
"engines": {
|
||||||
"node": ">=8"
|
"node": ">=8"
|
||||||
}
|
}
|
||||||
@@ -1854,6 +1767,7 @@
|
|||||||
"version": "4.3.0",
|
"version": "4.3.0",
|
||||||
"resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz",
|
"resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz",
|
||||||
"integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==",
|
"integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==",
|
||||||
|
"dev": true,
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"color-convert": "^2.0.1"
|
"color-convert": "^2.0.1"
|
||||||
},
|
},
|
||||||
@@ -2019,13 +1933,15 @@
|
|||||||
"node_modules/balanced-match": {
|
"node_modules/balanced-match": {
|
||||||
"version": "1.0.2",
|
"version": "1.0.2",
|
||||||
"resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz",
|
"resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz",
|
||||||
"integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw=="
|
"integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==",
|
||||||
|
"dev": true
|
||||||
},
|
},
|
||||||
"node_modules/brace-expansion": {
|
"node_modules/brace-expansion": {
|
||||||
"version": "1.1.11",
|
"version": "1.1.12",
|
||||||
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.11.tgz",
|
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.12.tgz",
|
||||||
"integrity": "sha512-iCuPHDFgrHX7H2vEI/5xpz07zSHB00TpugqhmYtVmMO6518mCuRMoOYFldEBl0g187ufozdaHgWKcYFb61qGiA==",
|
"integrity": "sha512-9T9UjW3r0UW5c1Q7GTwllptXwhvYmEzFhzMfZ9H7FQWt+uZePjZPjBP/W1ZEyZ1twGWom5/56TF4lPcqjnDHcg==",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
|
"license": "MIT",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"balanced-match": "^1.0.0",
|
"balanced-match": "^1.0.0",
|
||||||
"concat-map": "0.0.1"
|
"concat-map": "0.0.1"
|
||||||
@@ -2102,6 +2018,19 @@
|
|||||||
"integrity": "sha512-E+XQCRwSbaaiChtv6k6Dwgc+bx+Bs6vuKJHHl5kox/BaKbhiXzqQOwK4cO22yElGp2OCmjwVhT3HmxgyPGnJfQ==",
|
"integrity": "sha512-E+XQCRwSbaaiChtv6k6Dwgc+bx+Bs6vuKJHHl5kox/BaKbhiXzqQOwK4cO22yElGp2OCmjwVhT3HmxgyPGnJfQ==",
|
||||||
"dev": true
|
"dev": true
|
||||||
},
|
},
|
||||||
|
"node_modules/call-bind-apply-helpers": {
|
||||||
|
"version": "1.0.2",
|
||||||
|
"resolved": "https://registry.npmjs.org/call-bind-apply-helpers/-/call-bind-apply-helpers-1.0.2.tgz",
|
||||||
|
"integrity": "sha512-Sp1ablJ0ivDkSzjcaJdxEunN5/XvksFJ2sMBFfq6x0ryhQV/2b/KwFe21cMpmHtPOSij8K99/wSfoEuTObmuMQ==",
|
||||||
|
"license": "MIT",
|
||||||
|
"dependencies": {
|
||||||
|
"es-errors": "^1.3.0",
|
||||||
|
"function-bind": "^1.1.2"
|
||||||
|
},
|
||||||
|
"engines": {
|
||||||
|
"node": ">= 0.4"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/callsites": {
|
"node_modules/callsites": {
|
||||||
"version": "3.1.0",
|
"version": "3.1.0",
|
||||||
"resolved": "https://registry.npmjs.org/callsites/-/callsites-3.1.0.tgz",
|
"resolved": "https://registry.npmjs.org/callsites/-/callsites-3.1.0.tgz",
|
||||||
@@ -2298,9 +2227,11 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/cross-spawn": {
|
"node_modules/cross-spawn": {
|
||||||
"version": "7.0.3",
|
"version": "7.0.6",
|
||||||
"resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.3.tgz",
|
"resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.6.tgz",
|
||||||
"integrity": "sha512-iRDPJKUPVEND7dHPO8rkbOnPpyDygcDFtWjpeWNCgy8WP2rXcxXL8TskReQl6OrB2G7+UJrags1q15Fudc7G6w==",
|
"integrity": "sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA==",
|
||||||
|
"dev": true,
|
||||||
|
"license": "MIT",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"path-key": "^3.1.0",
|
"path-key": "^3.1.0",
|
||||||
"shebang-command": "^2.0.0",
|
"shebang-command": "^2.0.0",
|
||||||
@@ -2384,10 +2315,19 @@
|
|||||||
"node": "^14.15.0 || ^16.10.0 || >=18.0.0"
|
"node": "^14.15.0 || ^16.10.0 || >=18.0.0"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/eastasianwidth": {
|
"node_modules/dunder-proto": {
|
||||||
"version": "0.2.0",
|
"version": "1.0.1",
|
||||||
"resolved": "https://registry.npmjs.org/eastasianwidth/-/eastasianwidth-0.2.0.tgz",
|
"resolved": "https://registry.npmjs.org/dunder-proto/-/dunder-proto-1.0.1.tgz",
|
||||||
"integrity": "sha512-I88TYZWc9XiYHRQ4/3c5rjjfgkjhLyW2luGIheGERbNQ6OY7yTybanSpDXZa8y7VUP9YmDcYa+eyq4ca7iLqWA=="
|
"integrity": "sha512-KIN/nDJBQRcXw0MLVhZE9iQHmG68qAVIBg9CqmUYjmQIhgij9U5MFvrqkUL5FbtyyzZuOeOt0zdeRe4UY7ct+A==",
|
||||||
|
"license": "MIT",
|
||||||
|
"dependencies": {
|
||||||
|
"call-bind-apply-helpers": "^1.0.1",
|
||||||
|
"es-errors": "^1.3.0",
|
||||||
|
"gopd": "^1.2.0"
|
||||||
|
},
|
||||||
|
"engines": {
|
||||||
|
"node": ">= 0.4"
|
||||||
|
}
|
||||||
},
|
},
|
||||||
"node_modules/ejs": {
|
"node_modules/ejs": {
|
||||||
"version": "3.1.10",
|
"version": "3.1.10",
|
||||||
@@ -2425,7 +2365,8 @@
|
|||||||
"node_modules/emoji-regex": {
|
"node_modules/emoji-regex": {
|
||||||
"version": "8.0.0",
|
"version": "8.0.0",
|
||||||
"resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz",
|
"resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz",
|
||||||
"integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A=="
|
"integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==",
|
||||||
|
"dev": true
|
||||||
},
|
},
|
||||||
"node_modules/error-ex": {
|
"node_modules/error-ex": {
|
||||||
"version": "1.3.2",
|
"version": "1.3.2",
|
||||||
@@ -2442,6 +2383,51 @@
|
|||||||
"integrity": "sha512-zz06S8t0ozoDXMG+ube26zeCTNXcKIPJZJi8hBrF4idCLms4CG9QtK7qBl1boi5ODzFpjswb5JPmHCbMpjaYzg==",
|
"integrity": "sha512-zz06S8t0ozoDXMG+ube26zeCTNXcKIPJZJi8hBrF4idCLms4CG9QtK7qBl1boi5ODzFpjswb5JPmHCbMpjaYzg==",
|
||||||
"dev": true
|
"dev": true
|
||||||
},
|
},
|
||||||
|
"node_modules/es-define-property": {
|
||||||
|
"version": "1.0.1",
|
||||||
|
"resolved": "https://registry.npmjs.org/es-define-property/-/es-define-property-1.0.1.tgz",
|
||||||
|
"integrity": "sha512-e3nRfgfUZ4rNGL232gUgX06QNyyez04KdjFrF+LTRoOXmrOgFKDg4BCdsjW8EnT69eqdYGmRpJwiPVYNrCaW3g==",
|
||||||
|
"license": "MIT",
|
||||||
|
"engines": {
|
||||||
|
"node": ">= 0.4"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/es-errors": {
|
||||||
|
"version": "1.3.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/es-errors/-/es-errors-1.3.0.tgz",
|
||||||
|
"integrity": "sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw==",
|
||||||
|
"license": "MIT",
|
||||||
|
"engines": {
|
||||||
|
"node": ">= 0.4"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/es-object-atoms": {
|
||||||
|
"version": "1.1.1",
|
||||||
|
"resolved": "https://registry.npmjs.org/es-object-atoms/-/es-object-atoms-1.1.1.tgz",
|
||||||
|
"integrity": "sha512-FGgH2h8zKNim9ljj7dankFPcICIK9Cp5bm+c2gQSYePhpaG5+esrLODihIorn+Pe6FGJzWhXQotPv73jTaldXA==",
|
||||||
|
"license": "MIT",
|
||||||
|
"dependencies": {
|
||||||
|
"es-errors": "^1.3.0"
|
||||||
|
},
|
||||||
|
"engines": {
|
||||||
|
"node": ">= 0.4"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/es-set-tostringtag": {
|
||||||
|
"version": "2.1.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/es-set-tostringtag/-/es-set-tostringtag-2.1.0.tgz",
|
||||||
|
"integrity": "sha512-j6vWzfrGVfyXxge+O0x5sh6cvxAog0a/4Rdd2K36zCMV5eJ+/+tOAngRO8cODMNWbVRdVlmGZQL2YS3yR8bIUA==",
|
||||||
|
"license": "MIT",
|
||||||
|
"dependencies": {
|
||||||
|
"es-errors": "^1.3.0",
|
||||||
|
"get-intrinsic": "^1.2.6",
|
||||||
|
"has-tostringtag": "^1.0.2",
|
||||||
|
"hasown": "^2.0.2"
|
||||||
|
},
|
||||||
|
"engines": {
|
||||||
|
"node": ">= 0.4"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/escalade": {
|
"node_modules/escalade": {
|
||||||
"version": "3.2.0",
|
"version": "3.2.0",
|
||||||
"resolved": "https://registry.npmjs.org/escalade/-/escalade-3.2.0.tgz",
|
"resolved": "https://registry.npmjs.org/escalade/-/escalade-3.2.0.tgz",
|
||||||
@@ -2554,19 +2540,21 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/filelist/node_modules/brace-expansion": {
|
"node_modules/filelist/node_modules/brace-expansion": {
|
||||||
"version": "2.0.1",
|
"version": "2.0.2",
|
||||||
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.1.tgz",
|
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.2.tgz",
|
||||||
"integrity": "sha512-XnAIvQ8eM+kC6aULx6wuQiwVsnzsi9d3WxzV3FpWTGA19F621kwdbsAcFKXgKUHZWsy+mY6iL1sHTxWEFCytDA==",
|
"integrity": "sha512-Jt0vHyM+jmUBqojB7E1NIYadt0vI0Qxjxd2TErW94wDz+E2LAm5vKMXXwg6ZZBTHPuUlDgQHKXvjGBdfcF1ZDQ==",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
|
"license": "MIT",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"balanced-match": "^1.0.0"
|
"balanced-match": "^1.0.0"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/filelist/node_modules/minimatch": {
|
"node_modules/filelist/node_modules/minimatch": {
|
||||||
"version": "5.1.6",
|
"version": "5.1.9",
|
||||||
"resolved": "https://registry.npmjs.org/minimatch/-/minimatch-5.1.6.tgz",
|
"resolved": "https://registry.npmjs.org/minimatch/-/minimatch-5.1.9.tgz",
|
||||||
"integrity": "sha512-lKwV/1brpG6mBUFHtb7NUmtABCb2WZZmm2wNiOA5hAb8VdCS4B3dtMWyvcoViccwAW/COERjXLt0zP1zXUN26g==",
|
"integrity": "sha512-7o1wEA2RyMP7Iu7GNba9vc0RWWGACJOCZBJX2GJWip0ikV+wcOsgVuY9uE8CPiyQhkGFSlhuSkZPavN7u1c2Fw==",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
|
"license": "ISC",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"brace-expansion": "^2.0.1"
|
"brace-expansion": "^2.0.1"
|
||||||
},
|
},
|
||||||
@@ -2604,39 +2592,16 @@
|
|||||||
"resolved": "https://registry.npmjs.org/flatbuffers/-/flatbuffers-1.12.0.tgz",
|
"resolved": "https://registry.npmjs.org/flatbuffers/-/flatbuffers-1.12.0.tgz",
|
||||||
"integrity": "sha512-c7CZADjRcl6j0PlvFy0ZqXQ67qSEZfrVPynmnL+2zPc+NtMvrF8Y0QceMo7QqnSPc7+uWjUIAbvCQ5WIKlMVdQ=="
|
"integrity": "sha512-c7CZADjRcl6j0PlvFy0ZqXQ67qSEZfrVPynmnL+2zPc+NtMvrF8Y0QceMo7QqnSPc7+uWjUIAbvCQ5WIKlMVdQ=="
|
||||||
},
|
},
|
||||||
"node_modules/foreground-child": {
|
|
||||||
"version": "3.3.0",
|
|
||||||
"resolved": "https://registry.npmjs.org/foreground-child/-/foreground-child-3.3.0.tgz",
|
|
||||||
"integrity": "sha512-Ld2g8rrAyMYFXBhEqMz8ZAHBi4J4uS1i/CxGMDnjyFWddMXLVcDp051DZfu+t7+ab7Wv6SMqpWmyFIj5UbfFvg==",
|
|
||||||
"dependencies": {
|
|
||||||
"cross-spawn": "^7.0.0",
|
|
||||||
"signal-exit": "^4.0.1"
|
|
||||||
},
|
|
||||||
"engines": {
|
|
||||||
"node": ">=14"
|
|
||||||
},
|
|
||||||
"funding": {
|
|
||||||
"url": "https://github.com/sponsors/isaacs"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/foreground-child/node_modules/signal-exit": {
|
|
||||||
"version": "4.1.0",
|
|
||||||
"resolved": "https://registry.npmjs.org/signal-exit/-/signal-exit-4.1.0.tgz",
|
|
||||||
"integrity": "sha512-bzyZ1e88w9O1iNJbKnOlvYTrWPDl46O1bG0D3XInv+9tkPrxrN8jUUTiFlDkkmKWgn1M6CfIA13SuGqOa9Korw==",
|
|
||||||
"engines": {
|
|
||||||
"node": ">=14"
|
|
||||||
},
|
|
||||||
"funding": {
|
|
||||||
"url": "https://github.com/sponsors/isaacs"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/form-data": {
|
"node_modules/form-data": {
|
||||||
"version": "4.0.1",
|
"version": "4.0.5",
|
||||||
"resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.1.tgz",
|
"resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.5.tgz",
|
||||||
"integrity": "sha512-tzN8e4TX8+kkxGPK8D5u0FNmjPUjw3lwC9lSLxxoB/+GtsJG91CO8bSWy73APlgAZzZbXEYZJuxjkHH2w+Ezhw==",
|
"integrity": "sha512-8RipRLol37bNs2bhoV67fiTEvdTrbMUYcFTiy3+wuuOnUog2QBHCZWXDRijWQfAkhBj2Uf5UnVaiWwA5vdd82w==",
|
||||||
|
"license": "MIT",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"asynckit": "^0.4.0",
|
"asynckit": "^0.4.0",
|
||||||
"combined-stream": "^1.0.8",
|
"combined-stream": "^1.0.8",
|
||||||
|
"es-set-tostringtag": "^2.1.0",
|
||||||
|
"hasown": "^2.0.2",
|
||||||
"mime-types": "^2.1.12"
|
"mime-types": "^2.1.12"
|
||||||
},
|
},
|
||||||
"engines": {
|
"engines": {
|
||||||
@@ -2684,7 +2649,6 @@
|
|||||||
"version": "1.1.2",
|
"version": "1.1.2",
|
||||||
"resolved": "https://registry.npmjs.org/function-bind/-/function-bind-1.1.2.tgz",
|
"resolved": "https://registry.npmjs.org/function-bind/-/function-bind-1.1.2.tgz",
|
||||||
"integrity": "sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA==",
|
"integrity": "sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA==",
|
||||||
"dev": true,
|
|
||||||
"funding": {
|
"funding": {
|
||||||
"url": "https://github.com/sponsors/ljharb"
|
"url": "https://github.com/sponsors/ljharb"
|
||||||
}
|
}
|
||||||
@@ -2707,6 +2671,30 @@
|
|||||||
"node": "6.* || 8.* || >= 10.*"
|
"node": "6.* || 8.* || >= 10.*"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/get-intrinsic": {
|
||||||
|
"version": "1.3.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/get-intrinsic/-/get-intrinsic-1.3.0.tgz",
|
||||||
|
"integrity": "sha512-9fSjSaos/fRIVIp+xSJlE6lfwhES7LNtKaCBIamHsjr2na1BiABJPo0mOjjz8GJDURarmCPGqaiVg5mfjb98CQ==",
|
||||||
|
"license": "MIT",
|
||||||
|
"dependencies": {
|
||||||
|
"call-bind-apply-helpers": "^1.0.2",
|
||||||
|
"es-define-property": "^1.0.1",
|
||||||
|
"es-errors": "^1.3.0",
|
||||||
|
"es-object-atoms": "^1.1.1",
|
||||||
|
"function-bind": "^1.1.2",
|
||||||
|
"get-proto": "^1.0.1",
|
||||||
|
"gopd": "^1.2.0",
|
||||||
|
"has-symbols": "^1.1.0",
|
||||||
|
"hasown": "^2.0.2",
|
||||||
|
"math-intrinsics": "^1.1.0"
|
||||||
|
},
|
||||||
|
"engines": {
|
||||||
|
"node": ">= 0.4"
|
||||||
|
},
|
||||||
|
"funding": {
|
||||||
|
"url": "https://github.com/sponsors/ljharb"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/get-package-type": {
|
"node_modules/get-package-type": {
|
||||||
"version": "0.1.0",
|
"version": "0.1.0",
|
||||||
"resolved": "https://registry.npmjs.org/get-package-type/-/get-package-type-0.1.0.tgz",
|
"resolved": "https://registry.npmjs.org/get-package-type/-/get-package-type-0.1.0.tgz",
|
||||||
@@ -2716,6 +2704,19 @@
|
|||||||
"node": ">=8.0.0"
|
"node": ">=8.0.0"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/get-proto": {
|
||||||
|
"version": "1.0.1",
|
||||||
|
"resolved": "https://registry.npmjs.org/get-proto/-/get-proto-1.0.1.tgz",
|
||||||
|
"integrity": "sha512-sTSfBjoXBp89JvIKIefqw7U2CCebsc74kiY6awiGogKtoSGbgjYE/G/+l9sF3MWFPNc9IcoOC4ODfKHfxFmp0g==",
|
||||||
|
"license": "MIT",
|
||||||
|
"dependencies": {
|
||||||
|
"dunder-proto": "^1.0.1",
|
||||||
|
"es-object-atoms": "^1.0.0"
|
||||||
|
},
|
||||||
|
"engines": {
|
||||||
|
"node": ">= 0.4"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/get-stream": {
|
"node_modules/get-stream": {
|
||||||
"version": "6.0.1",
|
"version": "6.0.1",
|
||||||
"resolved": "https://registry.npmjs.org/get-stream/-/get-stream-6.0.1.tgz",
|
"resolved": "https://registry.npmjs.org/get-stream/-/get-stream-6.0.1.tgz",
|
||||||
@@ -2758,6 +2759,18 @@
|
|||||||
"node": ">=4"
|
"node": ">=4"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/gopd": {
|
||||||
|
"version": "1.2.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/gopd/-/gopd-1.2.0.tgz",
|
||||||
|
"integrity": "sha512-ZUKRh6/kUFoAiTAtTYPZJ3hw9wNxx+BIBOijnlG9PnrJsCcSjs1wyyD6vJpaYtgnzDrKYRSqf3OO6Rfa93xsRg==",
|
||||||
|
"license": "MIT",
|
||||||
|
"engines": {
|
||||||
|
"node": ">= 0.4"
|
||||||
|
},
|
||||||
|
"funding": {
|
||||||
|
"url": "https://github.com/sponsors/ljharb"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/graceful-fs": {
|
"node_modules/graceful-fs": {
|
||||||
"version": "4.2.11",
|
"version": "4.2.11",
|
||||||
"resolved": "https://registry.npmjs.org/graceful-fs/-/graceful-fs-4.2.11.tgz",
|
"resolved": "https://registry.npmjs.org/graceful-fs/-/graceful-fs-4.2.11.tgz",
|
||||||
@@ -2778,11 +2791,37 @@
|
|||||||
"node": ">=8"
|
"node": ">=8"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/has-symbols": {
|
||||||
|
"version": "1.1.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/has-symbols/-/has-symbols-1.1.0.tgz",
|
||||||
|
"integrity": "sha512-1cDNdwJ2Jaohmb3sg4OmKaMBwuC48sYni5HUw2DvsC8LjGTLK9h+eb1X6RyuOHe4hT0ULCW68iomhjUoKUqlPQ==",
|
||||||
|
"license": "MIT",
|
||||||
|
"engines": {
|
||||||
|
"node": ">= 0.4"
|
||||||
|
},
|
||||||
|
"funding": {
|
||||||
|
"url": "https://github.com/sponsors/ljharb"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/has-tostringtag": {
|
||||||
|
"version": "1.0.2",
|
||||||
|
"resolved": "https://registry.npmjs.org/has-tostringtag/-/has-tostringtag-1.0.2.tgz",
|
||||||
|
"integrity": "sha512-NqADB8VjPFLM2V0VvHUewwwsw0ZWBaIdgo+ieHtK3hasLz4qeCRjYcqfB6AQrBggRKppKF8L52/VqdVsO47Dlw==",
|
||||||
|
"license": "MIT",
|
||||||
|
"dependencies": {
|
||||||
|
"has-symbols": "^1.0.3"
|
||||||
|
},
|
||||||
|
"engines": {
|
||||||
|
"node": ">= 0.4"
|
||||||
|
},
|
||||||
|
"funding": {
|
||||||
|
"url": "https://github.com/sponsors/ljharb"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/hasown": {
|
"node_modules/hasown": {
|
||||||
"version": "2.0.2",
|
"version": "2.0.2",
|
||||||
"resolved": "https://registry.npmjs.org/hasown/-/hasown-2.0.2.tgz",
|
"resolved": "https://registry.npmjs.org/hasown/-/hasown-2.0.2.tgz",
|
||||||
"integrity": "sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ==",
|
"integrity": "sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ==",
|
||||||
"dev": true,
|
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"function-bind": "^1.1.2"
|
"function-bind": "^1.1.2"
|
||||||
},
|
},
|
||||||
@@ -2882,6 +2921,7 @@
|
|||||||
"version": "3.0.0",
|
"version": "3.0.0",
|
||||||
"resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-3.0.0.tgz",
|
"resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-3.0.0.tgz",
|
||||||
"integrity": "sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg==",
|
"integrity": "sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg==",
|
||||||
|
"dev": true,
|
||||||
"engines": {
|
"engines": {
|
||||||
"node": ">=8"
|
"node": ">=8"
|
||||||
}
|
}
|
||||||
@@ -2919,7 +2959,8 @@
|
|||||||
"node_modules/isexe": {
|
"node_modules/isexe": {
|
||||||
"version": "2.0.0",
|
"version": "2.0.0",
|
||||||
"resolved": "https://registry.npmjs.org/isexe/-/isexe-2.0.0.tgz",
|
"resolved": "https://registry.npmjs.org/isexe/-/isexe-2.0.0.tgz",
|
||||||
"integrity": "sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw=="
|
"integrity": "sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw==",
|
||||||
|
"dev": true
|
||||||
},
|
},
|
||||||
"node_modules/istanbul-lib-coverage": {
|
"node_modules/istanbul-lib-coverage": {
|
||||||
"version": "3.2.2",
|
"version": "3.2.2",
|
||||||
@@ -2987,20 +3028,6 @@
|
|||||||
"node": ">=8"
|
"node": ">=8"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/jackspeak": {
|
|
||||||
"version": "3.4.3",
|
|
||||||
"resolved": "https://registry.npmjs.org/jackspeak/-/jackspeak-3.4.3.tgz",
|
|
||||||
"integrity": "sha512-OGlZQpz2yfahA/Rd1Y8Cd9SIEsqvXkLVoSw/cgwhnhFMDbsQFeZYoJJ7bIZBS9BcamUW96asq/npPWugM+RQBw==",
|
|
||||||
"dependencies": {
|
|
||||||
"@isaacs/cliui": "^8.0.2"
|
|
||||||
},
|
|
||||||
"funding": {
|
|
||||||
"url": "https://github.com/sponsors/isaacs"
|
|
||||||
},
|
|
||||||
"optionalDependencies": {
|
|
||||||
"@pkgjs/parseargs": "^0.11.0"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/jake": {
|
"node_modules/jake": {
|
||||||
"version": "10.9.2",
|
"version": "10.9.2",
|
||||||
"resolved": "https://registry.npmjs.org/jake/-/jake-10.9.2.tgz",
|
"resolved": "https://registry.npmjs.org/jake/-/jake-10.9.2.tgz",
|
||||||
@@ -3605,10 +3632,11 @@
|
|||||||
"dev": true
|
"dev": true
|
||||||
},
|
},
|
||||||
"node_modules/js-yaml": {
|
"node_modules/js-yaml": {
|
||||||
"version": "3.14.1",
|
"version": "3.14.2",
|
||||||
"resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-3.14.1.tgz",
|
"resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-3.14.2.tgz",
|
||||||
"integrity": "sha512-okMH7OXXJ7YrN9Ok3/SXrnu4iX9yOk+25nqX4imS2npuvTYDmo/QEZoqwZkYaIDk3jVvBOTOIEgEhaLOynBS9g==",
|
"integrity": "sha512-PMSmkqxr106Xa156c2M265Z+FTrPl+oxd/rgOQy2tijQeK5TxQ43psO1ZCwhVOSdnn+RzkzlRz/eY4BgJBYVpg==",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
|
"license": "MIT",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"argparse": "^1.0.7",
|
"argparse": "^1.0.7",
|
||||||
"esprima": "^4.0.0"
|
"esprima": "^4.0.0"
|
||||||
@@ -3728,6 +3756,15 @@
|
|||||||
"tmpl": "1.0.5"
|
"tmpl": "1.0.5"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/math-intrinsics": {
|
||||||
|
"version": "1.1.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/math-intrinsics/-/math-intrinsics-1.1.0.tgz",
|
||||||
|
"integrity": "sha512-/IXtbwEk5HTPyEwyKX6hGkYXxM9nbj64B+ilVJnC/R6B0pH5G4V3b0pVbL7DBj4tkhBAppbQUlf6F6Xl9LHu1g==",
|
||||||
|
"license": "MIT",
|
||||||
|
"engines": {
|
||||||
|
"node": ">= 0.4"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/merge-stream": {
|
"node_modules/merge-stream": {
|
||||||
"version": "2.0.0",
|
"version": "2.0.0",
|
||||||
"resolved": "https://registry.npmjs.org/merge-stream/-/merge-stream-2.0.0.tgz",
|
"resolved": "https://registry.npmjs.org/merge-stream/-/merge-stream-2.0.0.tgz",
|
||||||
@@ -3776,10 +3813,11 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/minimatch": {
|
"node_modules/minimatch": {
|
||||||
"version": "3.1.2",
|
"version": "3.1.5",
|
||||||
"resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz",
|
"resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.5.tgz",
|
||||||
"integrity": "sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw==",
|
"integrity": "sha512-VgjWUsnnT6n+NUk6eZq77zeFdpW2LWDzP6zFGrCbHXiYNul5Dzqk2HHQ5uFH2DNW5Xbp8+jVzaeNt94ssEEl4w==",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
|
"license": "ISC",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"brace-expansion": "^1.1.7"
|
"brace-expansion": "^1.1.7"
|
||||||
},
|
},
|
||||||
@@ -3796,31 +3834,17 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/minizlib": {
|
"node_modules/minizlib": {
|
||||||
"version": "3.0.1",
|
"version": "3.1.0",
|
||||||
"resolved": "https://registry.npmjs.org/minizlib/-/minizlib-3.0.1.tgz",
|
"resolved": "https://registry.npmjs.org/minizlib/-/minizlib-3.1.0.tgz",
|
||||||
"integrity": "sha512-umcy022ILvb5/3Djuu8LWeqUa8D68JaBzlttKeMWen48SjabqS3iY5w/vzeMzMUNhLDifyhbOwKDSznB1vvrwg==",
|
"integrity": "sha512-KZxYo1BUkWD2TVFLr0MQoM8vUUigWD3LlD83a/75BqC+4qE0Hb1Vo5v1FgcfaNXvfXzr+5EhQ6ing/CaBijTlw==",
|
||||||
|
"license": "MIT",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"minipass": "^7.0.4",
|
"minipass": "^7.1.2"
|
||||||
"rimraf": "^5.0.5"
|
|
||||||
},
|
},
|
||||||
"engines": {
|
"engines": {
|
||||||
"node": ">= 18"
|
"node": ">= 18"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/mkdirp": {
|
|
||||||
"version": "3.0.1",
|
|
||||||
"resolved": "https://registry.npmjs.org/mkdirp/-/mkdirp-3.0.1.tgz",
|
|
||||||
"integrity": "sha512-+NsyUUAZDmo6YVHzL/stxSu3t9YS1iljliy3BSDrXJ/dkn1KYdmtZODGGjLcc9XLgVVpH4KshHB8XmZgMhaBXg==",
|
|
||||||
"bin": {
|
|
||||||
"mkdirp": "dist/cjs/src/bin.js"
|
|
||||||
},
|
|
||||||
"engines": {
|
|
||||||
"node": ">=10"
|
|
||||||
},
|
|
||||||
"funding": {
|
|
||||||
"url": "https://github.com/sponsors/isaacs"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/ms": {
|
"node_modules/ms": {
|
||||||
"version": "2.1.3",
|
"version": "2.1.3",
|
||||||
"resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz",
|
"resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz",
|
||||||
@@ -4010,11 +4034,6 @@
|
|||||||
"node": ">=6"
|
"node": ">=6"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/package-json-from-dist": {
|
|
||||||
"version": "1.0.1",
|
|
||||||
"resolved": "https://registry.npmjs.org/package-json-from-dist/-/package-json-from-dist-1.0.1.tgz",
|
|
||||||
"integrity": "sha512-UEZIS3/by4OC8vL3P2dTXRETpebLI2NiI5vIrjaD/5UtrkFX/tNbwjTSRAGC/+7CAo2pIcBaRgWmcBBHcsaCIw=="
|
|
||||||
},
|
|
||||||
"node_modules/parse-json": {
|
"node_modules/parse-json": {
|
||||||
"version": "5.2.0",
|
"version": "5.2.0",
|
||||||
"resolved": "https://registry.npmjs.org/parse-json/-/parse-json-5.2.0.tgz",
|
"resolved": "https://registry.npmjs.org/parse-json/-/parse-json-5.2.0.tgz",
|
||||||
@@ -4055,6 +4074,7 @@
|
|||||||
"version": "3.1.1",
|
"version": "3.1.1",
|
||||||
"resolved": "https://registry.npmjs.org/path-key/-/path-key-3.1.1.tgz",
|
"resolved": "https://registry.npmjs.org/path-key/-/path-key-3.1.1.tgz",
|
||||||
"integrity": "sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q==",
|
"integrity": "sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q==",
|
||||||
|
"dev": true,
|
||||||
"engines": {
|
"engines": {
|
||||||
"node": ">=8"
|
"node": ">=8"
|
||||||
}
|
}
|
||||||
@@ -4065,26 +4085,6 @@
|
|||||||
"integrity": "sha512-LDJzPVEEEPR+y48z93A0Ed0yXb8pAByGWo/k5YYdYgpY2/2EsOsksJrq7lOHxryrVOn1ejG6oAp8ahvOIQD8sw==",
|
"integrity": "sha512-LDJzPVEEEPR+y48z93A0Ed0yXb8pAByGWo/k5YYdYgpY2/2EsOsksJrq7lOHxryrVOn1ejG6oAp8ahvOIQD8sw==",
|
||||||
"dev": true
|
"dev": true
|
||||||
},
|
},
|
||||||
"node_modules/path-scurry": {
|
|
||||||
"version": "1.11.1",
|
|
||||||
"resolved": "https://registry.npmjs.org/path-scurry/-/path-scurry-1.11.1.tgz",
|
|
||||||
"integrity": "sha512-Xa4Nw17FS9ApQFJ9umLiJS4orGjm7ZzwUrwamcGQuHSzDyth9boKDaycYdDcZDuqYATXw4HFXgaqWTctW/v1HA==",
|
|
||||||
"dependencies": {
|
|
||||||
"lru-cache": "^10.2.0",
|
|
||||||
"minipass": "^5.0.0 || ^6.0.2 || ^7.0.0"
|
|
||||||
},
|
|
||||||
"engines": {
|
|
||||||
"node": ">=16 || 14 >=14.18"
|
|
||||||
},
|
|
||||||
"funding": {
|
|
||||||
"url": "https://github.com/sponsors/isaacs"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/path-scurry/node_modules/lru-cache": {
|
|
||||||
"version": "10.4.3",
|
|
||||||
"resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-10.4.3.tgz",
|
|
||||||
"integrity": "sha512-JNAzZcXrCt42VGLuYz0zfAzDfAvJWW6AfYlDBQyDV5DClI2m5sAmK+OIO7s59XfsRsWHp02jAJrRadPRGTt6SQ=="
|
|
||||||
},
|
|
||||||
"node_modules/picocolors": {
|
"node_modules/picocolors": {
|
||||||
"version": "1.1.1",
|
"version": "1.1.1",
|
||||||
"resolved": "https://registry.npmjs.org/picocolors/-/picocolors-1.1.1.tgz",
|
"resolved": "https://registry.npmjs.org/picocolors/-/picocolors-1.1.1.tgz",
|
||||||
@@ -4246,61 +4246,6 @@
|
|||||||
"node": ">=10"
|
"node": ">=10"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/rimraf": {
|
|
||||||
"version": "5.0.10",
|
|
||||||
"resolved": "https://registry.npmjs.org/rimraf/-/rimraf-5.0.10.tgz",
|
|
||||||
"integrity": "sha512-l0OE8wL34P4nJH/H2ffoaniAokM2qSmrtXHmlpvYr5AVVX8msAyW0l8NVJFDxlSK4u3Uh/f41cQheDVdnYijwQ==",
|
|
||||||
"dependencies": {
|
|
||||||
"glob": "^10.3.7"
|
|
||||||
},
|
|
||||||
"bin": {
|
|
||||||
"rimraf": "dist/esm/bin.mjs"
|
|
||||||
},
|
|
||||||
"funding": {
|
|
||||||
"url": "https://github.com/sponsors/isaacs"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/rimraf/node_modules/brace-expansion": {
|
|
||||||
"version": "2.0.1",
|
|
||||||
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.1.tgz",
|
|
||||||
"integrity": "sha512-XnAIvQ8eM+kC6aULx6wuQiwVsnzsi9d3WxzV3FpWTGA19F621kwdbsAcFKXgKUHZWsy+mY6iL1sHTxWEFCytDA==",
|
|
||||||
"dependencies": {
|
|
||||||
"balanced-match": "^1.0.0"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/rimraf/node_modules/glob": {
|
|
||||||
"version": "10.4.5",
|
|
||||||
"resolved": "https://registry.npmjs.org/glob/-/glob-10.4.5.tgz",
|
|
||||||
"integrity": "sha512-7Bv8RF0k6xjo7d4A/PxYLbUCfb6c+Vpd2/mB2yRDlew7Jb5hEXiCD9ibfO7wpk8i4sevK6DFny9h7EYbM3/sHg==",
|
|
||||||
"dependencies": {
|
|
||||||
"foreground-child": "^3.1.0",
|
|
||||||
"jackspeak": "^3.1.2",
|
|
||||||
"minimatch": "^9.0.4",
|
|
||||||
"minipass": "^7.1.2",
|
|
||||||
"package-json-from-dist": "^1.0.0",
|
|
||||||
"path-scurry": "^1.11.1"
|
|
||||||
},
|
|
||||||
"bin": {
|
|
||||||
"glob": "dist/esm/bin.mjs"
|
|
||||||
},
|
|
||||||
"funding": {
|
|
||||||
"url": "https://github.com/sponsors/isaacs"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/rimraf/node_modules/minimatch": {
|
|
||||||
"version": "9.0.5",
|
|
||||||
"resolved": "https://registry.npmjs.org/minimatch/-/minimatch-9.0.5.tgz",
|
|
||||||
"integrity": "sha512-G6T0ZX48xgozx7587koeX9Ys2NYy6Gmv//P89sEte9V9whIapMNF4idKxnW2QtCcLiTWlb/wfCabAtAFWhhBow==",
|
|
||||||
"dependencies": {
|
|
||||||
"brace-expansion": "^2.0.1"
|
|
||||||
},
|
|
||||||
"engines": {
|
|
||||||
"node": ">=16 || 14 >=14.17"
|
|
||||||
},
|
|
||||||
"funding": {
|
|
||||||
"url": "https://github.com/sponsors/isaacs"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/semver": {
|
"node_modules/semver": {
|
||||||
"version": "7.6.3",
|
"version": "7.6.3",
|
||||||
"resolved": "https://registry.npmjs.org/semver/-/semver-7.6.3.tgz",
|
"resolved": "https://registry.npmjs.org/semver/-/semver-7.6.3.tgz",
|
||||||
@@ -4354,6 +4299,7 @@
|
|||||||
"version": "2.0.0",
|
"version": "2.0.0",
|
||||||
"resolved": "https://registry.npmjs.org/shebang-command/-/shebang-command-2.0.0.tgz",
|
"resolved": "https://registry.npmjs.org/shebang-command/-/shebang-command-2.0.0.tgz",
|
||||||
"integrity": "sha512-kHxr2zZpYtdmrN1qDjrrX/Z1rR1kG8Dx+gkpK1G4eXmvXswmcE1hTWBWYUzlraYw1/yZp6YuDY77YtvbN0dmDA==",
|
"integrity": "sha512-kHxr2zZpYtdmrN1qDjrrX/Z1rR1kG8Dx+gkpK1G4eXmvXswmcE1hTWBWYUzlraYw1/yZp6YuDY77YtvbN0dmDA==",
|
||||||
|
"dev": true,
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"shebang-regex": "^3.0.0"
|
"shebang-regex": "^3.0.0"
|
||||||
},
|
},
|
||||||
@@ -4365,6 +4311,7 @@
|
|||||||
"version": "3.0.0",
|
"version": "3.0.0",
|
||||||
"resolved": "https://registry.npmjs.org/shebang-regex/-/shebang-regex-3.0.0.tgz",
|
"resolved": "https://registry.npmjs.org/shebang-regex/-/shebang-regex-3.0.0.tgz",
|
||||||
"integrity": "sha512-7++dFhtcx3353uBaq8DDR4NuxBetBzC7ZQOhmTQInHEd6bSrXdiEyzCvG07Z44UYdLShWUyXt5M/yhz8ekcb1A==",
|
"integrity": "sha512-7++dFhtcx3353uBaq8DDR4NuxBetBzC7ZQOhmTQInHEd6bSrXdiEyzCvG07Z44UYdLShWUyXt5M/yhz8ekcb1A==",
|
||||||
|
"dev": true,
|
||||||
"engines": {
|
"engines": {
|
||||||
"node": ">=8"
|
"node": ">=8"
|
||||||
}
|
}
|
||||||
@@ -4452,20 +4399,7 @@
|
|||||||
"version": "4.2.3",
|
"version": "4.2.3",
|
||||||
"resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz",
|
"resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz",
|
||||||
"integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==",
|
"integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==",
|
||||||
"dependencies": {
|
"dev": true,
|
||||||
"emoji-regex": "^8.0.0",
|
|
||||||
"is-fullwidth-code-point": "^3.0.0",
|
|
||||||
"strip-ansi": "^6.0.1"
|
|
||||||
},
|
|
||||||
"engines": {
|
|
||||||
"node": ">=8"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/string-width-cjs": {
|
|
||||||
"name": "string-width",
|
|
||||||
"version": "4.2.3",
|
|
||||||
"resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz",
|
|
||||||
"integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==",
|
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"emoji-regex": "^8.0.0",
|
"emoji-regex": "^8.0.0",
|
||||||
"is-fullwidth-code-point": "^3.0.0",
|
"is-fullwidth-code-point": "^3.0.0",
|
||||||
@@ -4479,18 +4413,7 @@
|
|||||||
"version": "6.0.1",
|
"version": "6.0.1",
|
||||||
"resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz",
|
"resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz",
|
||||||
"integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==",
|
"integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==",
|
||||||
"dependencies": {
|
"dev": true,
|
||||||
"ansi-regex": "^5.0.1"
|
|
||||||
},
|
|
||||||
"engines": {
|
|
||||||
"node": ">=8"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/strip-ansi-cjs": {
|
|
||||||
"name": "strip-ansi",
|
|
||||||
"version": "6.0.1",
|
|
||||||
"resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz",
|
|
||||||
"integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==",
|
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"ansi-regex": "^5.0.1"
|
"ansi-regex": "^5.0.1"
|
||||||
},
|
},
|
||||||
@@ -4541,15 +4464,15 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/tar": {
|
"node_modules/tar": {
|
||||||
"version": "7.4.3",
|
"version": "7.5.10",
|
||||||
"resolved": "https://registry.npmjs.org/tar/-/tar-7.4.3.tgz",
|
"resolved": "https://registry.npmjs.org/tar/-/tar-7.5.10.tgz",
|
||||||
"integrity": "sha512-5S7Va8hKfV7W5U6g3aYxXmlPoZVAwUMy9AOKyF2fVuZa2UD3qZjg578OrLRt8PcNN1PleVaL/5/yYATNL0ICUw==",
|
"integrity": "sha512-8mOPs1//5q/rlkNSPcCegA6hiHJYDmSLEI8aMH/CdSQJNWztHC9WHNam5zdQlfpTwB9Xp7IBEsHfV5LKMJGVAw==",
|
||||||
|
"license": "BlueOak-1.0.0",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@isaacs/fs-minipass": "^4.0.0",
|
"@isaacs/fs-minipass": "^4.0.0",
|
||||||
"chownr": "^3.0.0",
|
"chownr": "^3.0.0",
|
||||||
"minipass": "^7.1.2",
|
"minipass": "^7.1.2",
|
||||||
"minizlib": "^3.0.1",
|
"minizlib": "^3.1.0",
|
||||||
"mkdirp": "^3.0.1",
|
|
||||||
"yallist": "^5.0.0"
|
"yallist": "^5.0.0"
|
||||||
},
|
},
|
||||||
"engines": {
|
"engines": {
|
||||||
@@ -4782,6 +4705,7 @@
|
|||||||
"version": "2.0.2",
|
"version": "2.0.2",
|
||||||
"resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz",
|
"resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz",
|
||||||
"integrity": "sha512-BLI3Tl1TW3Pvl70l3yq3Y64i+awpwXqsGBYWkkqMtnbXgrMD+yj7rhW0kuEDxzJaYXGjEW5ogapKNMEKNMjibA==",
|
"integrity": "sha512-BLI3Tl1TW3Pvl70l3yq3Y64i+awpwXqsGBYWkkqMtnbXgrMD+yj7rhW0kuEDxzJaYXGjEW5ogapKNMEKNMjibA==",
|
||||||
|
"dev": true,
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"isexe": "^2.0.0"
|
"isexe": "^2.0.0"
|
||||||
},
|
},
|
||||||
@@ -4809,23 +4733,6 @@
|
|||||||
"url": "https://github.com/chalk/wrap-ansi?sponsor=1"
|
"url": "https://github.com/chalk/wrap-ansi?sponsor=1"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/wrap-ansi-cjs": {
|
|
||||||
"name": "wrap-ansi",
|
|
||||||
"version": "7.0.0",
|
|
||||||
"resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-7.0.0.tgz",
|
|
||||||
"integrity": "sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==",
|
|
||||||
"dependencies": {
|
|
||||||
"ansi-styles": "^4.0.0",
|
|
||||||
"string-width": "^4.1.0",
|
|
||||||
"strip-ansi": "^6.0.0"
|
|
||||||
},
|
|
||||||
"engines": {
|
|
||||||
"node": ">=10"
|
|
||||||
},
|
|
||||||
"funding": {
|
|
||||||
"url": "https://github.com/chalk/wrap-ansi?sponsor=1"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/wrappy": {
|
"node_modules/wrappy": {
|
||||||
"version": "1.0.2",
|
"version": "1.0.2",
|
||||||
"resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz",
|
"resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz",
|
||||||
|
|||||||
@@ -20,6 +20,8 @@ import {
|
|||||||
Float32,
|
Float32,
|
||||||
Float64,
|
Float64,
|
||||||
Int,
|
Int,
|
||||||
|
Int8,
|
||||||
|
Int16,
|
||||||
Int32,
|
Int32,
|
||||||
Int64,
|
Int64,
|
||||||
LargeBinary,
|
LargeBinary,
|
||||||
@@ -35,6 +37,8 @@ import {
|
|||||||
Timestamp,
|
Timestamp,
|
||||||
Type,
|
Type,
|
||||||
Uint8,
|
Uint8,
|
||||||
|
Uint16,
|
||||||
|
Uint32,
|
||||||
Utf8,
|
Utf8,
|
||||||
Vector,
|
Vector,
|
||||||
makeVector as arrowMakeVector,
|
makeVector as arrowMakeVector,
|
||||||
@@ -113,8 +117,9 @@ export type TableLike =
|
|||||||
export type IntoVector =
|
export type IntoVector =
|
||||||
| Float32Array
|
| Float32Array
|
||||||
| Float64Array
|
| Float64Array
|
||||||
|
| Uint8Array
|
||||||
| number[]
|
| number[]
|
||||||
| Promise<Float32Array | Float64Array | number[]>;
|
| Promise<Float32Array | Float64Array | Uint8Array | number[]>;
|
||||||
|
|
||||||
export type MultiVector = IntoVector[];
|
export type MultiVector = IntoVector[];
|
||||||
|
|
||||||
@@ -122,14 +127,48 @@ export function isMultiVector(value: unknown): value is MultiVector {
|
|||||||
return Array.isArray(value) && isIntoVector(value[0]);
|
return Array.isArray(value) && isIntoVector(value[0]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Float16Array is not in TypeScript's standard lib yet; access dynamically
|
||||||
|
type Float16ArrayCtor = new (
|
||||||
|
...args: unknown[]
|
||||||
|
) => { buffer: ArrayBuffer; byteOffset: number; byteLength: number };
|
||||||
|
const float16ArrayCtor = (globalThis as unknown as Record<string, unknown>)
|
||||||
|
.Float16Array as Float16ArrayCtor | undefined;
|
||||||
|
|
||||||
export function isIntoVector(value: unknown): value is IntoVector {
|
export function isIntoVector(value: unknown): value is IntoVector {
|
||||||
return (
|
return (
|
||||||
value instanceof Float32Array ||
|
value instanceof Float32Array ||
|
||||||
value instanceof Float64Array ||
|
value instanceof Float64Array ||
|
||||||
|
value instanceof Uint8Array ||
|
||||||
|
(float16ArrayCtor !== undefined && value instanceof float16ArrayCtor) ||
|
||||||
(Array.isArray(value) && !Array.isArray(value[0]))
|
(Array.isArray(value) && !Array.isArray(value[0]))
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Extract the underlying byte buffer and data type from a typed array
|
||||||
|
* for passing to the Rust NAPI layer without precision loss.
|
||||||
|
*/
|
||||||
|
export function extractVectorBuffer(
|
||||||
|
vector: Float32Array | Float64Array | Uint8Array,
|
||||||
|
): { data: Uint8Array; dtype: string } | null {
|
||||||
|
if (float16ArrayCtor !== undefined && vector instanceof float16ArrayCtor) {
|
||||||
|
return {
|
||||||
|
data: new Uint8Array(vector.buffer, vector.byteOffset, vector.byteLength),
|
||||||
|
dtype: "float16",
|
||||||
|
};
|
||||||
|
}
|
||||||
|
if (vector instanceof Float64Array) {
|
||||||
|
return {
|
||||||
|
data: new Uint8Array(vector.buffer, vector.byteOffset, vector.byteLength),
|
||||||
|
dtype: "float64",
|
||||||
|
};
|
||||||
|
}
|
||||||
|
if (vector instanceof Uint8Array && !(vector instanceof Float32Array)) {
|
||||||
|
return { data: vector, dtype: "uint8" };
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
export function isArrowTable(value: object): value is TableLike {
|
export function isArrowTable(value: object): value is TableLike {
|
||||||
if (value instanceof ArrowTable) return true;
|
if (value instanceof ArrowTable) return true;
|
||||||
return "schema" in value && "batches" in value;
|
return "schema" in value && "batches" in value;
|
||||||
@@ -529,7 +568,8 @@ function isObject(value: unknown): value is Record<string, unknown> {
|
|||||||
!(value instanceof Date) &&
|
!(value instanceof Date) &&
|
||||||
!(value instanceof Set) &&
|
!(value instanceof Set) &&
|
||||||
!(value instanceof Map) &&
|
!(value instanceof Map) &&
|
||||||
!(value instanceof Buffer)
|
!(value instanceof Buffer) &&
|
||||||
|
!ArrayBuffer.isView(value)
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -588,6 +628,13 @@ function inferType(
|
|||||||
return new Bool();
|
return new Bool();
|
||||||
} else if (value instanceof Buffer) {
|
} else if (value instanceof Buffer) {
|
||||||
return new Binary();
|
return new Binary();
|
||||||
|
} else if (ArrayBuffer.isView(value) && !(value instanceof DataView)) {
|
||||||
|
const info = typedArrayToArrowType(value);
|
||||||
|
if (info !== undefined) {
|
||||||
|
const child = new Field("item", info.elementType, true);
|
||||||
|
return new FixedSizeList(info.length, child);
|
||||||
|
}
|
||||||
|
return undefined;
|
||||||
} else if (Array.isArray(value)) {
|
} else if (Array.isArray(value)) {
|
||||||
if (value.length === 0) {
|
if (value.length === 0) {
|
||||||
return undefined; // Without any values we can't infer the type
|
return undefined; // Without any values we can't infer the type
|
||||||
@@ -746,6 +793,32 @@ function makeListVector(lists: unknown[][]): Vector<unknown> {
|
|||||||
return listBuilder.finish().toVector();
|
return listBuilder.finish().toVector();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Map a JS TypedArray instance to the corresponding Arrow element DataType
|
||||||
|
* and its length. Returns undefined if the value is not a recognized TypedArray.
|
||||||
|
*/
|
||||||
|
function typedArrayToArrowType(
|
||||||
|
value: ArrayBufferView,
|
||||||
|
): { elementType: DataType; length: number } | undefined {
|
||||||
|
if (value instanceof Float32Array)
|
||||||
|
return { elementType: new Float32(), length: value.length };
|
||||||
|
if (value instanceof Float64Array)
|
||||||
|
return { elementType: new Float64(), length: value.length };
|
||||||
|
if (value instanceof Uint8Array)
|
||||||
|
return { elementType: new Uint8(), length: value.length };
|
||||||
|
if (value instanceof Uint16Array)
|
||||||
|
return { elementType: new Uint16(), length: value.length };
|
||||||
|
if (value instanceof Uint32Array)
|
||||||
|
return { elementType: new Uint32(), length: value.length };
|
||||||
|
if (value instanceof Int8Array)
|
||||||
|
return { elementType: new Int8(), length: value.length };
|
||||||
|
if (value instanceof Int16Array)
|
||||||
|
return { elementType: new Int16(), length: value.length };
|
||||||
|
if (value instanceof Int32Array)
|
||||||
|
return { elementType: new Int32(), length: value.length };
|
||||||
|
return undefined;
|
||||||
|
}
|
||||||
|
|
||||||
/** Helper function to convert an Array of JS values to an Arrow Vector */
|
/** Helper function to convert an Array of JS values to an Arrow Vector */
|
||||||
function makeVector(
|
function makeVector(
|
||||||
values: unknown[],
|
values: unknown[],
|
||||||
@@ -814,6 +887,16 @@ function makeVector(
|
|||||||
"makeVector cannot infer the type if all values are null or undefined",
|
"makeVector cannot infer the type if all values are null or undefined",
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
if (ArrayBuffer.isView(sampleValue) && !(sampleValue instanceof DataView)) {
|
||||||
|
const info = typedArrayToArrowType(sampleValue);
|
||||||
|
if (info !== undefined) {
|
||||||
|
const fslType = new FixedSizeList(
|
||||||
|
info.length,
|
||||||
|
new Field("item", info.elementType, true),
|
||||||
|
);
|
||||||
|
return vectorFromArray(values, fslType);
|
||||||
|
}
|
||||||
|
}
|
||||||
if (Array.isArray(sampleValue)) {
|
if (Array.isArray(sampleValue)) {
|
||||||
// Default Arrow inference doesn't handle list types
|
// Default Arrow inference doesn't handle list types
|
||||||
return makeListVector(values as unknown[][]);
|
return makeListVector(values as unknown[][]);
|
||||||
|
|||||||
@@ -166,25 +166,25 @@ export abstract class Connection {
|
|||||||
* List all the table names in this database.
|
* List all the table names in this database.
|
||||||
*
|
*
|
||||||
* Tables will be returned in lexicographical order.
|
* Tables will be returned in lexicographical order.
|
||||||
* @param {string[]} namespace - The namespace to list tables from (defaults to root namespace)
|
* @param {string[]} namespacePath - The namespace path to list tables from (defaults to root namespace)
|
||||||
* @param {Partial<TableNamesOptions>} options - options to control the
|
* @param {Partial<TableNamesOptions>} options - options to control the
|
||||||
* paging / start point
|
* paging / start point
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
abstract tableNames(
|
abstract tableNames(
|
||||||
namespace?: string[],
|
namespacePath?: string[],
|
||||||
options?: Partial<TableNamesOptions>,
|
options?: Partial<TableNamesOptions>,
|
||||||
): Promise<string[]>;
|
): Promise<string[]>;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Open a table in the database.
|
* Open a table in the database.
|
||||||
* @param {string} name - The name of the table
|
* @param {string} name - The name of the table
|
||||||
* @param {string[]} namespace - The namespace of the table (defaults to root namespace)
|
* @param {string[]} namespacePath - The namespace path of the table (defaults to root namespace)
|
||||||
* @param {Partial<OpenTableOptions>} options - Additional options
|
* @param {Partial<OpenTableOptions>} options - Additional options
|
||||||
*/
|
*/
|
||||||
abstract openTable(
|
abstract openTable(
|
||||||
name: string,
|
name: string,
|
||||||
namespace?: string[],
|
namespacePath?: string[],
|
||||||
options?: Partial<OpenTableOptions>,
|
options?: Partial<OpenTableOptions>,
|
||||||
): Promise<Table>;
|
): Promise<Table>;
|
||||||
|
|
||||||
@@ -193,7 +193,7 @@ export abstract class Connection {
|
|||||||
* @param {object} options - The options object.
|
* @param {object} options - The options object.
|
||||||
* @param {string} options.name - The name of the table.
|
* @param {string} options.name - The name of the table.
|
||||||
* @param {Data} options.data - Non-empty Array of Records to be inserted into the table
|
* @param {Data} options.data - Non-empty Array of Records to be inserted into the table
|
||||||
* @param {string[]} namespace - The namespace to create the table in (defaults to root namespace)
|
* @param {string[]} namespacePath - The namespace path to create the table in (defaults to root namespace)
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
abstract createTable(
|
abstract createTable(
|
||||||
@@ -201,7 +201,7 @@ export abstract class Connection {
|
|||||||
name: string;
|
name: string;
|
||||||
data: Data;
|
data: Data;
|
||||||
} & Partial<CreateTableOptions>,
|
} & Partial<CreateTableOptions>,
|
||||||
namespace?: string[],
|
namespacePath?: string[],
|
||||||
): Promise<Table>;
|
): Promise<Table>;
|
||||||
/**
|
/**
|
||||||
* Creates a new Table and initialize it with new data.
|
* Creates a new Table and initialize it with new data.
|
||||||
@@ -220,13 +220,13 @@ export abstract class Connection {
|
|||||||
* @param {string} name - The name of the table.
|
* @param {string} name - The name of the table.
|
||||||
* @param {Record<string, unknown>[] | TableLike} data - Non-empty Array of Records
|
* @param {Record<string, unknown>[] | TableLike} data - Non-empty Array of Records
|
||||||
* to be inserted into the table
|
* to be inserted into the table
|
||||||
* @param {string[]} namespace - The namespace to create the table in (defaults to root namespace)
|
* @param {string[]} namespacePath - The namespace path to create the table in (defaults to root namespace)
|
||||||
* @param {Partial<CreateTableOptions>} options - Additional options
|
* @param {Partial<CreateTableOptions>} options - Additional options
|
||||||
*/
|
*/
|
||||||
abstract createTable(
|
abstract createTable(
|
||||||
name: string,
|
name: string,
|
||||||
data: Record<string, unknown>[] | TableLike,
|
data: Record<string, unknown>[] | TableLike,
|
||||||
namespace?: string[],
|
namespacePath?: string[],
|
||||||
options?: Partial<CreateTableOptions>,
|
options?: Partial<CreateTableOptions>,
|
||||||
): Promise<Table>;
|
): Promise<Table>;
|
||||||
|
|
||||||
@@ -245,28 +245,28 @@ export abstract class Connection {
|
|||||||
* Creates a new empty Table
|
* Creates a new empty Table
|
||||||
* @param {string} name - The name of the table.
|
* @param {string} name - The name of the table.
|
||||||
* @param {Schema} schema - The schema of the table
|
* @param {Schema} schema - The schema of the table
|
||||||
* @param {string[]} namespace - The namespace to create the table in (defaults to root namespace)
|
* @param {string[]} namespacePath - The namespace path to create the table in (defaults to root namespace)
|
||||||
* @param {Partial<CreateTableOptions>} options - Additional options
|
* @param {Partial<CreateTableOptions>} options - Additional options
|
||||||
*/
|
*/
|
||||||
abstract createEmptyTable(
|
abstract createEmptyTable(
|
||||||
name: string,
|
name: string,
|
||||||
schema: import("./arrow").SchemaLike,
|
schema: import("./arrow").SchemaLike,
|
||||||
namespace?: string[],
|
namespacePath?: string[],
|
||||||
options?: Partial<CreateTableOptions>,
|
options?: Partial<CreateTableOptions>,
|
||||||
): Promise<Table>;
|
): Promise<Table>;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Drop an existing table.
|
* Drop an existing table.
|
||||||
* @param {string} name The name of the table to drop.
|
* @param {string} name The name of the table to drop.
|
||||||
* @param {string[]} namespace The namespace of the table (defaults to root namespace).
|
* @param {string[]} namespacePath The namespace path of the table (defaults to root namespace).
|
||||||
*/
|
*/
|
||||||
abstract dropTable(name: string, namespace?: string[]): Promise<void>;
|
abstract dropTable(name: string, namespacePath?: string[]): Promise<void>;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Drop all tables in the database.
|
* Drop all tables in the database.
|
||||||
* @param {string[]} namespace The namespace to drop tables from (defaults to root namespace).
|
* @param {string[]} namespacePath The namespace path to drop tables from (defaults to root namespace).
|
||||||
*/
|
*/
|
||||||
abstract dropAllTables(namespace?: string[]): Promise<void>;
|
abstract dropAllTables(namespacePath?: string[]): Promise<void>;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Clone a table from a source table.
|
* Clone a table from a source table.
|
||||||
@@ -279,7 +279,7 @@ export abstract class Connection {
|
|||||||
* @param {string} targetTableName - The name of the target table to create.
|
* @param {string} targetTableName - The name of the target table to create.
|
||||||
* @param {string} sourceUri - The URI of the source table to clone from.
|
* @param {string} sourceUri - The URI of the source table to clone from.
|
||||||
* @param {object} options - Clone options.
|
* @param {object} options - Clone options.
|
||||||
* @param {string[]} options.targetNamespace - The namespace for the target table (defaults to root namespace).
|
* @param {string[]} options.targetNamespacePath - The namespace path for the target table (defaults to root namespace).
|
||||||
* @param {number} options.sourceVersion - The version of the source table to clone.
|
* @param {number} options.sourceVersion - The version of the source table to clone.
|
||||||
* @param {string} options.sourceTag - The tag of the source table to clone.
|
* @param {string} options.sourceTag - The tag of the source table to clone.
|
||||||
* @param {boolean} options.isShallow - Whether to perform a shallow clone (defaults to true).
|
* @param {boolean} options.isShallow - Whether to perform a shallow clone (defaults to true).
|
||||||
@@ -288,7 +288,7 @@ export abstract class Connection {
|
|||||||
targetTableName: string,
|
targetTableName: string,
|
||||||
sourceUri: string,
|
sourceUri: string,
|
||||||
options?: {
|
options?: {
|
||||||
targetNamespace?: string[];
|
targetNamespacePath?: string[];
|
||||||
sourceVersion?: number;
|
sourceVersion?: number;
|
||||||
sourceTag?: string;
|
sourceTag?: string;
|
||||||
isShallow?: boolean;
|
isShallow?: boolean;
|
||||||
@@ -319,25 +319,25 @@ export class LocalConnection extends Connection {
|
|||||||
}
|
}
|
||||||
|
|
||||||
async tableNames(
|
async tableNames(
|
||||||
namespaceOrOptions?: string[] | Partial<TableNamesOptions>,
|
namespacePathOrOptions?: string[] | Partial<TableNamesOptions>,
|
||||||
options?: Partial<TableNamesOptions>,
|
options?: Partial<TableNamesOptions>,
|
||||||
): Promise<string[]> {
|
): Promise<string[]> {
|
||||||
// Detect if first argument is namespace array or options object
|
// Detect if first argument is namespacePath array or options object
|
||||||
let namespace: string[] | undefined;
|
let namespacePath: string[] | undefined;
|
||||||
let tableNamesOptions: Partial<TableNamesOptions> | undefined;
|
let tableNamesOptions: Partial<TableNamesOptions> | undefined;
|
||||||
|
|
||||||
if (Array.isArray(namespaceOrOptions)) {
|
if (Array.isArray(namespacePathOrOptions)) {
|
||||||
// First argument is namespace array
|
// First argument is namespacePath array
|
||||||
namespace = namespaceOrOptions;
|
namespacePath = namespacePathOrOptions;
|
||||||
tableNamesOptions = options;
|
tableNamesOptions = options;
|
||||||
} else {
|
} else {
|
||||||
// First argument is options object (backwards compatibility)
|
// First argument is options object (backwards compatibility)
|
||||||
namespace = undefined;
|
namespacePath = undefined;
|
||||||
tableNamesOptions = namespaceOrOptions;
|
tableNamesOptions = namespacePathOrOptions;
|
||||||
}
|
}
|
||||||
|
|
||||||
return this.inner.tableNames(
|
return this.inner.tableNames(
|
||||||
namespace ?? [],
|
namespacePath ?? [],
|
||||||
tableNamesOptions?.startAfter,
|
tableNamesOptions?.startAfter,
|
||||||
tableNamesOptions?.limit,
|
tableNamesOptions?.limit,
|
||||||
);
|
);
|
||||||
@@ -345,12 +345,12 @@ export class LocalConnection extends Connection {
|
|||||||
|
|
||||||
async openTable(
|
async openTable(
|
||||||
name: string,
|
name: string,
|
||||||
namespace?: string[],
|
namespacePath?: string[],
|
||||||
options?: Partial<OpenTableOptions>,
|
options?: Partial<OpenTableOptions>,
|
||||||
): Promise<Table> {
|
): Promise<Table> {
|
||||||
const innerTable = await this.inner.openTable(
|
const innerTable = await this.inner.openTable(
|
||||||
name,
|
name,
|
||||||
namespace ?? [],
|
namespacePath ?? [],
|
||||||
cleanseStorageOptions(options?.storageOptions),
|
cleanseStorageOptions(options?.storageOptions),
|
||||||
options?.indexCacheSize,
|
options?.indexCacheSize,
|
||||||
);
|
);
|
||||||
@@ -362,7 +362,7 @@ export class LocalConnection extends Connection {
|
|||||||
targetTableName: string,
|
targetTableName: string,
|
||||||
sourceUri: string,
|
sourceUri: string,
|
||||||
options?: {
|
options?: {
|
||||||
targetNamespace?: string[];
|
targetNamespacePath?: string[];
|
||||||
sourceVersion?: number;
|
sourceVersion?: number;
|
||||||
sourceTag?: string;
|
sourceTag?: string;
|
||||||
isShallow?: boolean;
|
isShallow?: boolean;
|
||||||
@@ -371,7 +371,7 @@ export class LocalConnection extends Connection {
|
|||||||
const innerTable = await this.inner.cloneTable(
|
const innerTable = await this.inner.cloneTable(
|
||||||
targetTableName,
|
targetTableName,
|
||||||
sourceUri,
|
sourceUri,
|
||||||
options?.targetNamespace ?? [],
|
options?.targetNamespacePath ?? [],
|
||||||
options?.sourceVersion ?? null,
|
options?.sourceVersion ?? null,
|
||||||
options?.sourceTag ?? null,
|
options?.sourceTag ?? null,
|
||||||
options?.isShallow ?? true,
|
options?.isShallow ?? true,
|
||||||
@@ -406,42 +406,42 @@ export class LocalConnection extends Connection {
|
|||||||
nameOrOptions:
|
nameOrOptions:
|
||||||
| string
|
| string
|
||||||
| ({ name: string; data: Data } & Partial<CreateTableOptions>),
|
| ({ name: string; data: Data } & Partial<CreateTableOptions>),
|
||||||
dataOrNamespace?: Record<string, unknown>[] | TableLike | string[],
|
dataOrNamespacePath?: Record<string, unknown>[] | TableLike | string[],
|
||||||
namespaceOrOptions?: string[] | Partial<CreateTableOptions>,
|
namespacePathOrOptions?: string[] | Partial<CreateTableOptions>,
|
||||||
options?: Partial<CreateTableOptions>,
|
options?: Partial<CreateTableOptions>,
|
||||||
): Promise<Table> {
|
): Promise<Table> {
|
||||||
if (typeof nameOrOptions !== "string" && "name" in nameOrOptions) {
|
if (typeof nameOrOptions !== "string" && "name" in nameOrOptions) {
|
||||||
// First overload: createTable(options, namespace?)
|
// First overload: createTable(options, namespacePath?)
|
||||||
const { name, data, ...createOptions } = nameOrOptions;
|
const { name, data, ...createOptions } = nameOrOptions;
|
||||||
const namespace = dataOrNamespace as string[] | undefined;
|
const namespacePath = dataOrNamespacePath as string[] | undefined;
|
||||||
return this._createTableImpl(name, data, namespace, createOptions);
|
return this._createTableImpl(name, data, namespacePath, createOptions);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Second overload: createTable(name, data, namespace?, options?)
|
// Second overload: createTable(name, data, namespacePath?, options?)
|
||||||
const name = nameOrOptions;
|
const name = nameOrOptions;
|
||||||
const data = dataOrNamespace as Record<string, unknown>[] | TableLike;
|
const data = dataOrNamespacePath as Record<string, unknown>[] | TableLike;
|
||||||
|
|
||||||
// Detect if third argument is namespace array or options object
|
// Detect if third argument is namespacePath array or options object
|
||||||
let namespace: string[] | undefined;
|
let namespacePath: string[] | undefined;
|
||||||
let createOptions: Partial<CreateTableOptions> | undefined;
|
let createOptions: Partial<CreateTableOptions> | undefined;
|
||||||
|
|
||||||
if (Array.isArray(namespaceOrOptions)) {
|
if (Array.isArray(namespacePathOrOptions)) {
|
||||||
// Third argument is namespace array
|
// Third argument is namespacePath array
|
||||||
namespace = namespaceOrOptions;
|
namespacePath = namespacePathOrOptions;
|
||||||
createOptions = options;
|
createOptions = options;
|
||||||
} else {
|
} else {
|
||||||
// Third argument is options object (backwards compatibility)
|
// Third argument is options object (backwards compatibility)
|
||||||
namespace = undefined;
|
namespacePath = undefined;
|
||||||
createOptions = namespaceOrOptions;
|
createOptions = namespacePathOrOptions;
|
||||||
}
|
}
|
||||||
|
|
||||||
return this._createTableImpl(name, data, namespace, createOptions);
|
return this._createTableImpl(name, data, namespacePath, createOptions);
|
||||||
}
|
}
|
||||||
|
|
||||||
private async _createTableImpl(
|
private async _createTableImpl(
|
||||||
name: string,
|
name: string,
|
||||||
data: Data,
|
data: Data,
|
||||||
namespace?: string[],
|
namespacePath?: string[],
|
||||||
options?: Partial<CreateTableOptions>,
|
options?: Partial<CreateTableOptions>,
|
||||||
): Promise<Table> {
|
): Promise<Table> {
|
||||||
if (data === undefined) {
|
if (data === undefined) {
|
||||||
@@ -455,7 +455,7 @@ export class LocalConnection extends Connection {
|
|||||||
name,
|
name,
|
||||||
buf,
|
buf,
|
||||||
mode,
|
mode,
|
||||||
namespace ?? [],
|
namespacePath ?? [],
|
||||||
storageOptions,
|
storageOptions,
|
||||||
);
|
);
|
||||||
|
|
||||||
@@ -465,21 +465,21 @@ export class LocalConnection extends Connection {
|
|||||||
async createEmptyTable(
|
async createEmptyTable(
|
||||||
name: string,
|
name: string,
|
||||||
schema: import("./arrow").SchemaLike,
|
schema: import("./arrow").SchemaLike,
|
||||||
namespaceOrOptions?: string[] | Partial<CreateTableOptions>,
|
namespacePathOrOptions?: string[] | Partial<CreateTableOptions>,
|
||||||
options?: Partial<CreateTableOptions>,
|
options?: Partial<CreateTableOptions>,
|
||||||
): Promise<Table> {
|
): Promise<Table> {
|
||||||
// Detect if third argument is namespace array or options object
|
// Detect if third argument is namespacePath array or options object
|
||||||
let namespace: string[] | undefined;
|
let namespacePath: string[] | undefined;
|
||||||
let createOptions: Partial<CreateTableOptions> | undefined;
|
let createOptions: Partial<CreateTableOptions> | undefined;
|
||||||
|
|
||||||
if (Array.isArray(namespaceOrOptions)) {
|
if (Array.isArray(namespacePathOrOptions)) {
|
||||||
// Third argument is namespace array
|
// Third argument is namespacePath array
|
||||||
namespace = namespaceOrOptions;
|
namespacePath = namespacePathOrOptions;
|
||||||
createOptions = options;
|
createOptions = options;
|
||||||
} else {
|
} else {
|
||||||
// Third argument is options object (backwards compatibility)
|
// Third argument is options object (backwards compatibility)
|
||||||
namespace = undefined;
|
namespacePath = undefined;
|
||||||
createOptions = namespaceOrOptions;
|
createOptions = namespacePathOrOptions;
|
||||||
}
|
}
|
||||||
|
|
||||||
let mode: string = createOptions?.mode ?? "create";
|
let mode: string = createOptions?.mode ?? "create";
|
||||||
@@ -502,18 +502,18 @@ export class LocalConnection extends Connection {
|
|||||||
name,
|
name,
|
||||||
buf,
|
buf,
|
||||||
mode,
|
mode,
|
||||||
namespace ?? [],
|
namespacePath ?? [],
|
||||||
storageOptions,
|
storageOptions,
|
||||||
);
|
);
|
||||||
return new LocalTable(innerTable);
|
return new LocalTable(innerTable);
|
||||||
}
|
}
|
||||||
|
|
||||||
async dropTable(name: string, namespace?: string[]): Promise<void> {
|
async dropTable(name: string, namespacePath?: string[]): Promise<void> {
|
||||||
return this.inner.dropTable(name, namespace ?? []);
|
return this.inner.dropTable(name, namespacePath ?? []);
|
||||||
}
|
}
|
||||||
|
|
||||||
async dropAllTables(namespace?: string[]): Promise<void> {
|
async dropAllTables(namespacePath?: string[]): Promise<void> {
|
||||||
return this.inner.dropAllTables(namespace ?? []);
|
return this.inner.dropAllTables(namespacePath ?? []);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -273,7 +273,9 @@ export async function connect(
|
|||||||
let nativeProvider: NativeJsHeaderProvider | undefined;
|
let nativeProvider: NativeJsHeaderProvider | undefined;
|
||||||
if (finalHeaderProvider) {
|
if (finalHeaderProvider) {
|
||||||
if (typeof finalHeaderProvider === "function") {
|
if (typeof finalHeaderProvider === "function") {
|
||||||
nativeProvider = new NativeJsHeaderProvider(finalHeaderProvider);
|
nativeProvider = new NativeJsHeaderProvider(async () =>
|
||||||
|
finalHeaderProvider(),
|
||||||
|
);
|
||||||
} else if (
|
} else if (
|
||||||
finalHeaderProvider &&
|
finalHeaderProvider &&
|
||||||
typeof finalHeaderProvider.getHeaders === "function"
|
typeof finalHeaderProvider.getHeaders === "function"
|
||||||
|
|||||||
@@ -5,6 +5,7 @@ import {
|
|||||||
Table as ArrowTable,
|
Table as ArrowTable,
|
||||||
type IntoVector,
|
type IntoVector,
|
||||||
RecordBatch,
|
RecordBatch,
|
||||||
|
extractVectorBuffer,
|
||||||
fromBufferToRecordBatch,
|
fromBufferToRecordBatch,
|
||||||
fromRecordBatchToBuffer,
|
fromRecordBatchToBuffer,
|
||||||
tableFromIPC,
|
tableFromIPC,
|
||||||
@@ -661,10 +662,8 @@ export class VectorQuery extends StandardQueryBase<NativeVectorQuery> {
|
|||||||
const res = (async () => {
|
const res = (async () => {
|
||||||
try {
|
try {
|
||||||
const v = await vector;
|
const v = await vector;
|
||||||
const arr = Float32Array.from(v);
|
|
||||||
//
|
|
||||||
// biome-ignore lint/suspicious/noExplicitAny: we need to get the `inner`, but js has no package scoping
|
// biome-ignore lint/suspicious/noExplicitAny: we need to get the `inner`, but js has no package scoping
|
||||||
const value: any = this.addQueryVector(arr);
|
const value: any = this.addQueryVector(v);
|
||||||
const inner = value.inner as
|
const inner = value.inner as
|
||||||
| NativeVectorQuery
|
| NativeVectorQuery
|
||||||
| Promise<NativeVectorQuery>;
|
| Promise<NativeVectorQuery>;
|
||||||
@@ -676,7 +675,12 @@ export class VectorQuery extends StandardQueryBase<NativeVectorQuery> {
|
|||||||
return new VectorQuery(res);
|
return new VectorQuery(res);
|
||||||
} else {
|
} else {
|
||||||
super.doCall((inner) => {
|
super.doCall((inner) => {
|
||||||
inner.addQueryVector(Float32Array.from(vector));
|
const raw = Array.isArray(vector) ? null : extractVectorBuffer(vector);
|
||||||
|
if (raw) {
|
||||||
|
inner.addQueryVectorRaw(raw.data, raw.dtype);
|
||||||
|
} else {
|
||||||
|
inner.addQueryVector(Float32Array.from(vector as number[]));
|
||||||
|
}
|
||||||
});
|
});
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
@@ -684,19 +688,17 @@ export class VectorQuery extends StandardQueryBase<NativeVectorQuery> {
|
|||||||
|
|
||||||
rerank(reranker: Reranker): VectorQuery {
|
rerank(reranker: Reranker): VectorQuery {
|
||||||
super.doCall((inner) =>
|
super.doCall((inner) =>
|
||||||
inner.rerank({
|
inner.rerank(async (args) => {
|
||||||
rerankHybrid: async (_, args) => {
|
const vecResults = await fromBufferToRecordBatch(args.vecResults);
|
||||||
const vecResults = await fromBufferToRecordBatch(args.vecResults);
|
const ftsResults = await fromBufferToRecordBatch(args.ftsResults);
|
||||||
const ftsResults = await fromBufferToRecordBatch(args.ftsResults);
|
const result = await reranker.rerankHybrid(
|
||||||
const result = await reranker.rerankHybrid(
|
args.query,
|
||||||
args.query,
|
vecResults as RecordBatch,
|
||||||
vecResults as RecordBatch,
|
ftsResults as RecordBatch,
|
||||||
ftsResults as RecordBatch,
|
);
|
||||||
);
|
|
||||||
|
|
||||||
const buffer = fromRecordBatchToBuffer(result);
|
const buffer = fromRecordBatchToBuffer(result);
|
||||||
return buffer;
|
return buffer;
|
||||||
},
|
|
||||||
}),
|
}),
|
||||||
);
|
);
|
||||||
|
|
||||||
@@ -767,14 +769,23 @@ export class Query extends StandardQueryBase<NativeQuery> {
|
|||||||
* a default `limit` of 10 will be used. @see {@link Query#limit}
|
* a default `limit` of 10 will be used. @see {@link Query#limit}
|
||||||
*/
|
*/
|
||||||
nearestTo(vector: IntoVector): VectorQuery {
|
nearestTo(vector: IntoVector): VectorQuery {
|
||||||
|
const callNearestTo = (
|
||||||
|
inner: NativeQuery,
|
||||||
|
resolved: Float32Array | Float64Array | Uint8Array | number[],
|
||||||
|
): NativeVectorQuery => {
|
||||||
|
const raw = Array.isArray(resolved)
|
||||||
|
? null
|
||||||
|
: extractVectorBuffer(resolved);
|
||||||
|
if (raw) {
|
||||||
|
return inner.nearestToRaw(raw.data, raw.dtype);
|
||||||
|
}
|
||||||
|
return inner.nearestTo(Float32Array.from(resolved as number[]));
|
||||||
|
};
|
||||||
|
|
||||||
if (this.inner instanceof Promise) {
|
if (this.inner instanceof Promise) {
|
||||||
const nativeQuery = this.inner.then(async (inner) => {
|
const nativeQuery = this.inner.then(async (inner) => {
|
||||||
if (vector instanceof Promise) {
|
const resolved = vector instanceof Promise ? await vector : vector;
|
||||||
const arr = await vector.then((v) => Float32Array.from(v));
|
return callNearestTo(inner, resolved);
|
||||||
return inner.nearestTo(arr);
|
|
||||||
} else {
|
|
||||||
return inner.nearestTo(Float32Array.from(vector));
|
|
||||||
}
|
|
||||||
});
|
});
|
||||||
return new VectorQuery(nativeQuery);
|
return new VectorQuery(nativeQuery);
|
||||||
}
|
}
|
||||||
@@ -782,10 +793,8 @@ export class Query extends StandardQueryBase<NativeQuery> {
|
|||||||
const res = (async () => {
|
const res = (async () => {
|
||||||
try {
|
try {
|
||||||
const v = await vector;
|
const v = await vector;
|
||||||
const arr = Float32Array.from(v);
|
|
||||||
//
|
|
||||||
// biome-ignore lint/suspicious/noExplicitAny: we need to get the `inner`, but js has no package scoping
|
// biome-ignore lint/suspicious/noExplicitAny: we need to get the `inner`, but js has no package scoping
|
||||||
const value: any = this.nearestTo(arr);
|
const value: any = this.nearestTo(v);
|
||||||
const inner = value.inner as
|
const inner = value.inner as
|
||||||
| NativeVectorQuery
|
| NativeVectorQuery
|
||||||
| Promise<NativeVectorQuery>;
|
| Promise<NativeVectorQuery>;
|
||||||
@@ -796,7 +805,7 @@ export class Query extends StandardQueryBase<NativeQuery> {
|
|||||||
})();
|
})();
|
||||||
return new VectorQuery(res);
|
return new VectorQuery(res);
|
||||||
} else {
|
} else {
|
||||||
const vectorQuery = this.inner.nearestTo(Float32Array.from(vector));
|
const vectorQuery = callNearestTo(this.inner, vector);
|
||||||
return new VectorQuery(vectorQuery);
|
return new VectorQuery(vectorQuery);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -5,12 +5,15 @@ import {
|
|||||||
Table as ArrowTable,
|
Table as ArrowTable,
|
||||||
Data,
|
Data,
|
||||||
DataType,
|
DataType,
|
||||||
|
Field,
|
||||||
IntoVector,
|
IntoVector,
|
||||||
MultiVector,
|
MultiVector,
|
||||||
Schema,
|
Schema,
|
||||||
dataTypeToJson,
|
dataTypeToJson,
|
||||||
fromDataToBuffer,
|
fromDataToBuffer,
|
||||||
|
fromTableToBuffer,
|
||||||
isMultiVector,
|
isMultiVector,
|
||||||
|
makeEmptyTable,
|
||||||
tableFromIPC,
|
tableFromIPC,
|
||||||
} from "./arrow";
|
} from "./arrow";
|
||||||
|
|
||||||
@@ -84,6 +87,16 @@ export interface OptimizeOptions {
|
|||||||
* tbl.optimize({cleanupOlderThan: new Date()});
|
* tbl.optimize({cleanupOlderThan: new Date()});
|
||||||
*/
|
*/
|
||||||
cleanupOlderThan: Date;
|
cleanupOlderThan: Date;
|
||||||
|
/**
|
||||||
|
* Because they may be part of an in-progress transaction, files newer than
|
||||||
|
* 7 days old are not deleted by default. If you are sure that there are no
|
||||||
|
* in-progress transactions, then you can set this to true to delete all
|
||||||
|
* files older than `cleanupOlderThan`.
|
||||||
|
*
|
||||||
|
* **WARNING**: This should only be set to true if you can guarantee that
|
||||||
|
* no other process is currently working on this dataset. Otherwise the
|
||||||
|
* dataset could be put into a corrupted state.
|
||||||
|
*/
|
||||||
deleteUnverified: boolean;
|
deleteUnverified: boolean;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -381,15 +394,16 @@ export abstract class Table {
|
|||||||
abstract vectorSearch(vector: IntoVector | MultiVector): VectorQuery;
|
abstract vectorSearch(vector: IntoVector | MultiVector): VectorQuery;
|
||||||
/**
|
/**
|
||||||
* Add new columns with defined values.
|
* Add new columns with defined values.
|
||||||
* @param {AddColumnsSql[]} newColumnTransforms pairs of column names and
|
* @param {AddColumnsSql[] | Field | Field[] | Schema} newColumnTransforms Either:
|
||||||
* the SQL expression to use to calculate the value of the new column. These
|
* - An array of objects with column names and SQL expressions to calculate values
|
||||||
* expressions will be evaluated for each row in the table, and can
|
* - A single Arrow Field defining one column with its data type (column will be initialized with null values)
|
||||||
* reference existing columns in the table.
|
* - An array of Arrow Fields defining columns with their data types (columns will be initialized with null values)
|
||||||
|
* - An Arrow Schema defining columns with their data types (columns will be initialized with null values)
|
||||||
* @returns {Promise<AddColumnsResult>} A promise that resolves to an object
|
* @returns {Promise<AddColumnsResult>} A promise that resolves to an object
|
||||||
* containing the new version number of the table after adding the columns.
|
* containing the new version number of the table after adding the columns.
|
||||||
*/
|
*/
|
||||||
abstract addColumns(
|
abstract addColumns(
|
||||||
newColumnTransforms: AddColumnsSql[],
|
newColumnTransforms: AddColumnsSql[] | Field | Field[] | Schema,
|
||||||
): Promise<AddColumnsResult>;
|
): Promise<AddColumnsResult>;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -501,19 +515,7 @@ export abstract class Table {
|
|||||||
* - Index: Optimizes the indices, adding new data to existing indices
|
* - Index: Optimizes the indices, adding new data to existing indices
|
||||||
*
|
*
|
||||||
*
|
*
|
||||||
* Experimental API
|
* The frequency an application should call optimize is based on the frequency of
|
||||||
* ----------------
|
|
||||||
*
|
|
||||||
* The optimization process is undergoing active development and may change.
|
|
||||||
* Our goal with these changes is to improve the performance of optimization and
|
|
||||||
* reduce the complexity.
|
|
||||||
*
|
|
||||||
* That being said, it is essential today to run optimize if you want the best
|
|
||||||
* performance. It should be stable and safe to use in production, but it our
|
|
||||||
* hope that the API may be simplified (or not even need to be called) in the
|
|
||||||
* future.
|
|
||||||
*
|
|
||||||
* The frequency an application shoudl call optimize is based on the frequency of
|
|
||||||
* data modifications. If data is frequently added, deleted, or updated then
|
* data modifications. If data is frequently added, deleted, or updated then
|
||||||
* optimize should be run frequently. A good rule of thumb is to run optimize if
|
* optimize should be run frequently. A good rule of thumb is to run optimize if
|
||||||
* you have added or modified 100,000 or more records or run more than 20 data
|
* you have added or modified 100,000 or more records or run more than 20 data
|
||||||
@@ -806,9 +808,40 @@ export class LocalTable extends Table {
|
|||||||
// TODO: Support BatchUDF
|
// TODO: Support BatchUDF
|
||||||
|
|
||||||
async addColumns(
|
async addColumns(
|
||||||
newColumnTransforms: AddColumnsSql[],
|
newColumnTransforms: AddColumnsSql[] | Field | Field[] | Schema,
|
||||||
): Promise<AddColumnsResult> {
|
): Promise<AddColumnsResult> {
|
||||||
return await this.inner.addColumns(newColumnTransforms);
|
// Handle single Field -> convert to array of Fields
|
||||||
|
if (newColumnTransforms instanceof Field) {
|
||||||
|
newColumnTransforms = [newColumnTransforms];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Handle array of Fields -> convert to Schema
|
||||||
|
if (
|
||||||
|
Array.isArray(newColumnTransforms) &&
|
||||||
|
newColumnTransforms.length > 0 &&
|
||||||
|
newColumnTransforms[0] instanceof Field
|
||||||
|
) {
|
||||||
|
const fields = newColumnTransforms as Field[];
|
||||||
|
newColumnTransforms = new Schema(fields);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Handle Schema -> use schema-based approach
|
||||||
|
if (newColumnTransforms instanceof Schema) {
|
||||||
|
const schema = newColumnTransforms;
|
||||||
|
// Convert schema to buffer using Arrow IPC format
|
||||||
|
const emptyTable = makeEmptyTable(schema);
|
||||||
|
const schemaBuf = await fromTableToBuffer(emptyTable);
|
||||||
|
return await this.inner.addColumnsWithSchema(schemaBuf);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Handle SQL expressions (existing functionality)
|
||||||
|
if (Array.isArray(newColumnTransforms)) {
|
||||||
|
return await this.inner.addColumns(
|
||||||
|
newColumnTransforms as AddColumnsSql[],
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
throw new Error("Invalid input type for addColumns");
|
||||||
}
|
}
|
||||||
|
|
||||||
async alterColumns(
|
async alterColumns(
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-darwin-arm64",
|
"name": "@lancedb/lancedb-darwin-arm64",
|
||||||
"version": "0.27.0-beta.0",
|
"version": "0.27.2",
|
||||||
"os": ["darwin"],
|
"os": ["darwin"],
|
||||||
"cpu": ["arm64"],
|
"cpu": ["arm64"],
|
||||||
"main": "lancedb.darwin-arm64.node",
|
"main": "lancedb.darwin-arm64.node",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-linux-arm64-gnu",
|
"name": "@lancedb/lancedb-linux-arm64-gnu",
|
||||||
"version": "0.27.0-beta.0",
|
"version": "0.27.2",
|
||||||
"os": ["linux"],
|
"os": ["linux"],
|
||||||
"cpu": ["arm64"],
|
"cpu": ["arm64"],
|
||||||
"main": "lancedb.linux-arm64-gnu.node",
|
"main": "lancedb.linux-arm64-gnu.node",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-linux-arm64-musl",
|
"name": "@lancedb/lancedb-linux-arm64-musl",
|
||||||
"version": "0.27.0-beta.0",
|
"version": "0.27.2",
|
||||||
"os": ["linux"],
|
"os": ["linux"],
|
||||||
"cpu": ["arm64"],
|
"cpu": ["arm64"],
|
||||||
"main": "lancedb.linux-arm64-musl.node",
|
"main": "lancedb.linux-arm64-musl.node",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-linux-x64-gnu",
|
"name": "@lancedb/lancedb-linux-x64-gnu",
|
||||||
"version": "0.27.0-beta.0",
|
"version": "0.27.2",
|
||||||
"os": ["linux"],
|
"os": ["linux"],
|
||||||
"cpu": ["x64"],
|
"cpu": ["x64"],
|
||||||
"main": "lancedb.linux-x64-gnu.node",
|
"main": "lancedb.linux-x64-gnu.node",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-linux-x64-musl",
|
"name": "@lancedb/lancedb-linux-x64-musl",
|
||||||
"version": "0.27.0-beta.0",
|
"version": "0.27.2",
|
||||||
"os": ["linux"],
|
"os": ["linux"],
|
||||||
"cpu": ["x64"],
|
"cpu": ["x64"],
|
||||||
"main": "lancedb.linux-x64-musl.node",
|
"main": "lancedb.linux-x64-musl.node",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-win32-arm64-msvc",
|
"name": "@lancedb/lancedb-win32-arm64-msvc",
|
||||||
"version": "0.27.0-beta.0",
|
"version": "0.27.2",
|
||||||
"os": [
|
"os": [
|
||||||
"win32"
|
"win32"
|
||||||
],
|
],
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-win32-x64-msvc",
|
"name": "@lancedb/lancedb-win32-x64-msvc",
|
||||||
"version": "0.27.0-beta.0",
|
"version": "0.27.2",
|
||||||
"os": ["win32"],
|
"os": ["win32"],
|
||||||
"cpu": ["x64"],
|
"cpu": ["x64"],
|
||||||
"main": "lancedb.win32-x64-msvc.node",
|
"main": "lancedb.win32-x64-msvc.node",
|
||||||
|
|||||||
6014
nodejs/package-lock.json
generated
6014
nodejs/package-lock.json
generated
File diff suppressed because it is too large
Load Diff
@@ -11,7 +11,7 @@
|
|||||||
"ann"
|
"ann"
|
||||||
],
|
],
|
||||||
"private": false,
|
"private": false,
|
||||||
"version": "0.27.0-beta.0",
|
"version": "0.27.2",
|
||||||
"main": "dist/index.js",
|
"main": "dist/index.js",
|
||||||
"exports": {
|
"exports": {
|
||||||
".": "./dist/index.js",
|
".": "./dist/index.js",
|
||||||
@@ -21,19 +21,16 @@
|
|||||||
},
|
},
|
||||||
"types": "dist/index.d.ts",
|
"types": "dist/index.d.ts",
|
||||||
"napi": {
|
"napi": {
|
||||||
"name": "lancedb",
|
"binaryName": "lancedb",
|
||||||
"triples": {
|
"targets": [
|
||||||
"defaults": false,
|
"aarch64-apple-darwin",
|
||||||
"additional": [
|
"x86_64-unknown-linux-gnu",
|
||||||
"aarch64-apple-darwin",
|
"aarch64-unknown-linux-gnu",
|
||||||
"x86_64-unknown-linux-gnu",
|
"x86_64-unknown-linux-musl",
|
||||||
"aarch64-unknown-linux-gnu",
|
"aarch64-unknown-linux-musl",
|
||||||
"x86_64-unknown-linux-musl",
|
"x86_64-pc-windows-msvc",
|
||||||
"aarch64-unknown-linux-musl",
|
"aarch64-pc-windows-msvc"
|
||||||
"x86_64-pc-windows-msvc",
|
]
|
||||||
"aarch64-pc-windows-msvc"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
},
|
},
|
||||||
"license": "Apache-2.0",
|
"license": "Apache-2.0",
|
||||||
"repository": {
|
"repository": {
|
||||||
@@ -46,7 +43,7 @@
|
|||||||
"@aws-sdk/client-s3": "^3.33.0",
|
"@aws-sdk/client-s3": "^3.33.0",
|
||||||
"@biomejs/biome": "^1.7.3",
|
"@biomejs/biome": "^1.7.3",
|
||||||
"@jest/globals": "^29.7.0",
|
"@jest/globals": "^29.7.0",
|
||||||
"@napi-rs/cli": "^2.18.3",
|
"@napi-rs/cli": "^3.5.1",
|
||||||
"@types/axios": "^0.14.0",
|
"@types/axios": "^0.14.0",
|
||||||
"@types/jest": "^29.1.2",
|
"@types/jest": "^29.1.2",
|
||||||
"@types/node": "^22.7.4",
|
"@types/node": "^22.7.4",
|
||||||
@@ -75,9 +72,9 @@
|
|||||||
"os": ["darwin", "linux", "win32"],
|
"os": ["darwin", "linux", "win32"],
|
||||||
"scripts": {
|
"scripts": {
|
||||||
"artifacts": "napi artifacts",
|
"artifacts": "napi artifacts",
|
||||||
"build:debug": "napi build --platform --no-const-enum --dts ../lancedb/native.d.ts --js ../lancedb/native.js lancedb",
|
"build:debug": "napi build --platform --dts ../lancedb/native.d.ts --js ../lancedb/native.js --output-dir lancedb",
|
||||||
"postbuild:debug": "shx mkdir -p dist && shx cp lancedb/*.node dist/",
|
"postbuild:debug": "shx mkdir -p dist && shx cp lancedb/*.node dist/",
|
||||||
"build:release": "napi build --platform --no-const-enum --release --dts ../lancedb/native.d.ts --js ../lancedb/native.js dist/",
|
"build:release": "napi build --platform --release --dts ../lancedb/native.d.ts --js ../lancedb/native.js --output-dir dist",
|
||||||
"postbuild:release": "shx mkdir -p dist && shx cp lancedb/*.node dist/",
|
"postbuild:release": "shx mkdir -p dist && shx cp lancedb/*.node dist/",
|
||||||
"build": "npm run build:debug && npm run tsc",
|
"build": "npm run build:debug && npm run tsc",
|
||||||
"build-release": "npm run build:release && npm run tsc",
|
"build-release": "npm run build:release && npm run tsc",
|
||||||
@@ -91,7 +88,7 @@
|
|||||||
"prepublishOnly": "napi prepublish -t npm",
|
"prepublishOnly": "napi prepublish -t npm",
|
||||||
"test": "jest --verbose",
|
"test": "jest --verbose",
|
||||||
"integration": "S3_TEST=1 npm run test",
|
"integration": "S3_TEST=1 npm run test",
|
||||||
"universal": "napi universal",
|
"universal": "napi universalize",
|
||||||
"version": "napi version"
|
"version": "napi version"
|
||||||
},
|
},
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
|
|||||||
@@ -8,10 +8,10 @@ use lancedb::database::{CreateTableMode, Database};
|
|||||||
use napi::bindgen_prelude::*;
|
use napi::bindgen_prelude::*;
|
||||||
use napi_derive::*;
|
use napi_derive::*;
|
||||||
|
|
||||||
|
use crate::ConnectionOptions;
|
||||||
use crate::error::NapiErrorExt;
|
use crate::error::NapiErrorExt;
|
||||||
use crate::header::JsHeaderProvider;
|
use crate::header::JsHeaderProvider;
|
||||||
use crate::table::Table;
|
use crate::table::Table;
|
||||||
use crate::ConnectionOptions;
|
|
||||||
use lancedb::connection::{ConnectBuilder, Connection as LanceDBConnection};
|
use lancedb::connection::{ConnectBuilder, Connection as LanceDBConnection};
|
||||||
|
|
||||||
use lancedb::ipc::{ipc_file_to_batches, ipc_file_to_schema};
|
use lancedb::ipc::{ipc_file_to_batches, ipc_file_to_schema};
|
||||||
@@ -119,12 +119,12 @@ impl Connection {
|
|||||||
#[napi(catch_unwind)]
|
#[napi(catch_unwind)]
|
||||||
pub async fn table_names(
|
pub async fn table_names(
|
||||||
&self,
|
&self,
|
||||||
namespace: Vec<String>,
|
namespace_path: Option<Vec<String>>,
|
||||||
start_after: Option<String>,
|
start_after: Option<String>,
|
||||||
limit: Option<u32>,
|
limit: Option<u32>,
|
||||||
) -> napi::Result<Vec<String>> {
|
) -> napi::Result<Vec<String>> {
|
||||||
let mut op = self.get_inner()?.table_names();
|
let mut op = self.get_inner()?.table_names();
|
||||||
op = op.namespace(namespace);
|
op = op.namespace(namespace_path.unwrap_or_default());
|
||||||
if let Some(start_after) = start_after {
|
if let Some(start_after) = start_after {
|
||||||
op = op.start_after(start_after);
|
op = op.start_after(start_after);
|
||||||
}
|
}
|
||||||
@@ -146,7 +146,7 @@ impl Connection {
|
|||||||
name: String,
|
name: String,
|
||||||
buf: Buffer,
|
buf: Buffer,
|
||||||
mode: String,
|
mode: String,
|
||||||
namespace: Vec<String>,
|
namespace_path: Option<Vec<String>>,
|
||||||
storage_options: Option<HashMap<String, String>>,
|
storage_options: Option<HashMap<String, String>>,
|
||||||
) -> napi::Result<Table> {
|
) -> napi::Result<Table> {
|
||||||
let batches = ipc_file_to_batches(buf.to_vec())
|
let batches = ipc_file_to_batches(buf.to_vec())
|
||||||
@@ -154,7 +154,7 @@ impl Connection {
|
|||||||
let mode = Self::parse_create_mode_str(&mode)?;
|
let mode = Self::parse_create_mode_str(&mode)?;
|
||||||
let mut builder = self.get_inner()?.create_table(&name, batches).mode(mode);
|
let mut builder = self.get_inner()?.create_table(&name, batches).mode(mode);
|
||||||
|
|
||||||
builder = builder.namespace(namespace);
|
builder = builder.namespace(namespace_path.unwrap_or_default());
|
||||||
|
|
||||||
if let Some(storage_options) = storage_options {
|
if let Some(storage_options) = storage_options {
|
||||||
for (key, value) in storage_options {
|
for (key, value) in storage_options {
|
||||||
@@ -171,7 +171,7 @@ impl Connection {
|
|||||||
name: String,
|
name: String,
|
||||||
schema_buf: Buffer,
|
schema_buf: Buffer,
|
||||||
mode: String,
|
mode: String,
|
||||||
namespace: Vec<String>,
|
namespace_path: Option<Vec<String>>,
|
||||||
storage_options: Option<HashMap<String, String>>,
|
storage_options: Option<HashMap<String, String>>,
|
||||||
) -> napi::Result<Table> {
|
) -> napi::Result<Table> {
|
||||||
let schema = ipc_file_to_schema(schema_buf.to_vec()).map_err(|e| {
|
let schema = ipc_file_to_schema(schema_buf.to_vec()).map_err(|e| {
|
||||||
@@ -183,7 +183,7 @@ impl Connection {
|
|||||||
.create_empty_table(&name, schema)
|
.create_empty_table(&name, schema)
|
||||||
.mode(mode);
|
.mode(mode);
|
||||||
|
|
||||||
builder = builder.namespace(namespace);
|
builder = builder.namespace(namespace_path.unwrap_or_default());
|
||||||
|
|
||||||
if let Some(storage_options) = storage_options {
|
if let Some(storage_options) = storage_options {
|
||||||
for (key, value) in storage_options {
|
for (key, value) in storage_options {
|
||||||
@@ -198,13 +198,13 @@ impl Connection {
|
|||||||
pub async fn open_table(
|
pub async fn open_table(
|
||||||
&self,
|
&self,
|
||||||
name: String,
|
name: String,
|
||||||
namespace: Vec<String>,
|
namespace_path: Option<Vec<String>>,
|
||||||
storage_options: Option<HashMap<String, String>>,
|
storage_options: Option<HashMap<String, String>>,
|
||||||
index_cache_size: Option<u32>,
|
index_cache_size: Option<u32>,
|
||||||
) -> napi::Result<Table> {
|
) -> napi::Result<Table> {
|
||||||
let mut builder = self.get_inner()?.open_table(&name);
|
let mut builder = self.get_inner()?.open_table(&name);
|
||||||
|
|
||||||
builder = builder.namespace(namespace);
|
builder = builder.namespace(namespace_path.unwrap_or_default());
|
||||||
|
|
||||||
if let Some(storage_options) = storage_options {
|
if let Some(storage_options) = storage_options {
|
||||||
for (key, value) in storage_options {
|
for (key, value) in storage_options {
|
||||||
@@ -223,7 +223,7 @@ impl Connection {
|
|||||||
&self,
|
&self,
|
||||||
target_table_name: String,
|
target_table_name: String,
|
||||||
source_uri: String,
|
source_uri: String,
|
||||||
target_namespace: Vec<String>,
|
target_namespace_path: Option<Vec<String>>,
|
||||||
source_version: Option<i64>,
|
source_version: Option<i64>,
|
||||||
source_tag: Option<String>,
|
source_tag: Option<String>,
|
||||||
is_shallow: bool,
|
is_shallow: bool,
|
||||||
@@ -232,7 +232,7 @@ impl Connection {
|
|||||||
.get_inner()?
|
.get_inner()?
|
||||||
.clone_table(&target_table_name, &source_uri);
|
.clone_table(&target_table_name, &source_uri);
|
||||||
|
|
||||||
builder = builder.target_namespace(target_namespace);
|
builder = builder.target_namespace(target_namespace_path.unwrap_or_default());
|
||||||
|
|
||||||
if let Some(version) = source_version {
|
if let Some(version) = source_version {
|
||||||
builder = builder.source_version(version as u64);
|
builder = builder.source_version(version as u64);
|
||||||
@@ -250,18 +250,21 @@ impl Connection {
|
|||||||
|
|
||||||
/// Drop table with the name. Or raise an error if the table does not exist.
|
/// Drop table with the name. Or raise an error if the table does not exist.
|
||||||
#[napi(catch_unwind)]
|
#[napi(catch_unwind)]
|
||||||
pub async fn drop_table(&self, name: String, namespace: Vec<String>) -> napi::Result<()> {
|
pub async fn drop_table(
|
||||||
|
&self,
|
||||||
|
name: String,
|
||||||
|
namespace_path: Option<Vec<String>>,
|
||||||
|
) -> napi::Result<()> {
|
||||||
|
let ns = namespace_path.unwrap_or_default();
|
||||||
self.get_inner()?
|
self.get_inner()?
|
||||||
.drop_table(&name, &namespace)
|
.drop_table(&name, &ns)
|
||||||
.await
|
.await
|
||||||
.default_error()
|
.default_error()
|
||||||
}
|
}
|
||||||
|
|
||||||
#[napi(catch_unwind)]
|
#[napi(catch_unwind)]
|
||||||
pub async fn drop_all_tables(&self, namespace: Vec<String>) -> napi::Result<()> {
|
pub async fn drop_all_tables(&self, namespace_path: Option<Vec<String>>) -> napi::Result<()> {
|
||||||
self.get_inner()?
|
let ns = namespace_path.unwrap_or_default();
|
||||||
.drop_all_tables(&namespace)
|
self.get_inner()?.drop_all_tables(&ns).await.default_error()
|
||||||
.await
|
|
||||||
.default_error()
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,20 +1,19 @@
|
|||||||
// SPDX-License-Identifier: Apache-2.0
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||||
|
|
||||||
use napi::{
|
use napi::{bindgen_prelude::*, threadsafe_function::ThreadsafeFunction};
|
||||||
bindgen_prelude::*,
|
|
||||||
threadsafe_function::{ErrorStrategy, ThreadsafeFunction},
|
|
||||||
};
|
|
||||||
use napi_derive::napi;
|
use napi_derive::napi;
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
type GetHeadersFn = ThreadsafeFunction<(), Promise<HashMap<String, String>>, (), Status, false>;
|
||||||
|
|
||||||
/// JavaScript HeaderProvider implementation that wraps a JavaScript callback.
|
/// JavaScript HeaderProvider implementation that wraps a JavaScript callback.
|
||||||
/// This is the only native header provider - all header provider implementations
|
/// This is the only native header provider - all header provider implementations
|
||||||
/// should provide a JavaScript function that returns headers.
|
/// should provide a JavaScript function that returns headers.
|
||||||
#[napi]
|
#[napi]
|
||||||
pub struct JsHeaderProvider {
|
pub struct JsHeaderProvider {
|
||||||
get_headers_fn: Arc<ThreadsafeFunction<(), ErrorStrategy::CalleeHandled>>,
|
get_headers_fn: Arc<GetHeadersFn>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Clone for JsHeaderProvider {
|
impl Clone for JsHeaderProvider {
|
||||||
@@ -29,9 +28,12 @@ impl Clone for JsHeaderProvider {
|
|||||||
impl JsHeaderProvider {
|
impl JsHeaderProvider {
|
||||||
/// Create a new JsHeaderProvider from a JavaScript callback
|
/// Create a new JsHeaderProvider from a JavaScript callback
|
||||||
#[napi(constructor)]
|
#[napi(constructor)]
|
||||||
pub fn new(get_headers_callback: JsFunction) -> Result<Self> {
|
pub fn new(
|
||||||
|
get_headers_callback: Function<(), Promise<HashMap<String, String>>>,
|
||||||
|
) -> Result<Self> {
|
||||||
let get_headers_fn = get_headers_callback
|
let get_headers_fn = get_headers_callback
|
||||||
.create_threadsafe_function(0, |ctx| Ok(vec![ctx.value]))
|
.build_threadsafe_function()
|
||||||
|
.build()
|
||||||
.map_err(|e| {
|
.map_err(|e| {
|
||||||
Error::new(
|
Error::new(
|
||||||
Status::GenericFailure,
|
Status::GenericFailure,
|
||||||
@@ -51,7 +53,7 @@ impl lancedb::remote::HeaderProvider for JsHeaderProvider {
|
|||||||
async fn get_headers(&self) -> lancedb::error::Result<HashMap<String, String>> {
|
async fn get_headers(&self) -> lancedb::error::Result<HashMap<String, String>> {
|
||||||
// Call the JavaScript function asynchronously
|
// Call the JavaScript function asynchronously
|
||||||
let promise: Promise<HashMap<String, String>> =
|
let promise: Promise<HashMap<String, String>> =
|
||||||
self.get_headers_fn.call_async(Ok(())).await.map_err(|e| {
|
self.get_headers_fn.call_async(()).await.map_err(|e| {
|
||||||
lancedb::error::Error::Runtime {
|
lancedb::error::Error::Runtime {
|
||||||
message: format!("Failed to call JavaScript get_headers: {}", e),
|
message: format!("Failed to call JavaScript get_headers: {}", e),
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -3,12 +3,12 @@
|
|||||||
|
|
||||||
use std::sync::Mutex;
|
use std::sync::Mutex;
|
||||||
|
|
||||||
|
use lancedb::index::Index as LanceDbIndex;
|
||||||
use lancedb::index::scalar::{BTreeIndexBuilder, FtsIndexBuilder};
|
use lancedb::index::scalar::{BTreeIndexBuilder, FtsIndexBuilder};
|
||||||
use lancedb::index::vector::{
|
use lancedb::index::vector::{
|
||||||
IvfFlatIndexBuilder, IvfHnswPqIndexBuilder, IvfHnswSqIndexBuilder, IvfPqIndexBuilder,
|
IvfFlatIndexBuilder, IvfHnswPqIndexBuilder, IvfHnswSqIndexBuilder, IvfPqIndexBuilder,
|
||||||
IvfRqIndexBuilder,
|
IvfRqIndexBuilder,
|
||||||
};
|
};
|
||||||
use lancedb::index::Index as LanceDbIndex;
|
|
||||||
use napi_derive::napi;
|
use napi_derive::napi;
|
||||||
|
|
||||||
use crate::util::parse_distance_type;
|
use crate::util::parse_distance_type;
|
||||||
|
|||||||
@@ -60,7 +60,7 @@ pub struct OpenTableOptions {
|
|||||||
pub storage_options: Option<HashMap<String, String>>,
|
pub storage_options: Option<HashMap<String, String>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[napi::module_init]
|
#[napi_derive::module_init]
|
||||||
fn init() {
|
fn init() {
|
||||||
let env = Env::new()
|
let env = Env::new()
|
||||||
.filter_or("LANCEDB_LOG", "warn")
|
.filter_or("LANCEDB_LOG", "warn")
|
||||||
|
|||||||
@@ -3,6 +3,12 @@
|
|||||||
|
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
use arrow_array::{
|
||||||
|
Array, Float16Array as ArrowFloat16Array, Float32Array as ArrowFloat32Array,
|
||||||
|
Float64Array as ArrowFloat64Array, UInt8Array as ArrowUInt8Array,
|
||||||
|
};
|
||||||
|
use arrow_buffer::ScalarBuffer;
|
||||||
|
use half::f16;
|
||||||
use lancedb::index::scalar::{
|
use lancedb::index::scalar::{
|
||||||
BooleanQuery, BoostQuery, FtsQuery, FullTextSearchQuery, MatchQuery, MultiMatchQuery, Occur,
|
BooleanQuery, BoostQuery, FtsQuery, FullTextSearchQuery, MatchQuery, MultiMatchQuery, Occur,
|
||||||
Operator, PhraseQuery,
|
Operator, PhraseQuery,
|
||||||
@@ -17,13 +23,40 @@ use lancedb::query::VectorQuery as LanceDbVectorQuery;
|
|||||||
use napi::bindgen_prelude::*;
|
use napi::bindgen_prelude::*;
|
||||||
use napi_derive::napi;
|
use napi_derive::napi;
|
||||||
|
|
||||||
use crate::error::convert_error;
|
|
||||||
use crate::error::NapiErrorExt;
|
use crate::error::NapiErrorExt;
|
||||||
|
use crate::error::convert_error;
|
||||||
use crate::iterator::RecordBatchIterator;
|
use crate::iterator::RecordBatchIterator;
|
||||||
|
use crate::rerankers::RerankHybridCallbackArgs;
|
||||||
use crate::rerankers::Reranker;
|
use crate::rerankers::Reranker;
|
||||||
use crate::rerankers::RerankerCallbacks;
|
|
||||||
use crate::util::{parse_distance_type, schema_to_buffer};
|
use crate::util::{parse_distance_type, schema_to_buffer};
|
||||||
|
|
||||||
|
fn bytes_to_arrow_array(data: Uint8Array, dtype: String) -> napi::Result<Arc<dyn Array>> {
|
||||||
|
let buf = arrow_buffer::Buffer::from(data.to_vec());
|
||||||
|
let num_bytes = buf.len();
|
||||||
|
match dtype.as_str() {
|
||||||
|
"float16" => {
|
||||||
|
let scalar_buf = ScalarBuffer::<f16>::new(buf, 0, num_bytes / 2);
|
||||||
|
Ok(Arc::new(ArrowFloat16Array::new(scalar_buf, None)))
|
||||||
|
}
|
||||||
|
"float32" => {
|
||||||
|
let scalar_buf = ScalarBuffer::<f32>::new(buf, 0, num_bytes / 4);
|
||||||
|
Ok(Arc::new(ArrowFloat32Array::new(scalar_buf, None)))
|
||||||
|
}
|
||||||
|
"float64" => {
|
||||||
|
let scalar_buf = ScalarBuffer::<f64>::new(buf, 0, num_bytes / 8);
|
||||||
|
Ok(Arc::new(ArrowFloat64Array::new(scalar_buf, None)))
|
||||||
|
}
|
||||||
|
"uint8" => {
|
||||||
|
let scalar_buf = ScalarBuffer::<u8>::new(buf, 0, num_bytes);
|
||||||
|
Ok(Arc::new(ArrowUInt8Array::new(scalar_buf, None)))
|
||||||
|
}
|
||||||
|
_ => Err(napi::Error::from_reason(format!(
|
||||||
|
"Unsupported vector dtype: {}. Expected one of: float16, float32, float64, uint8",
|
||||||
|
dtype
|
||||||
|
))),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[napi]
|
#[napi]
|
||||||
pub struct Query {
|
pub struct Query {
|
||||||
inner: LanceDbQuery,
|
inner: LanceDbQuery,
|
||||||
@@ -42,7 +75,7 @@ impl Query {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[napi]
|
#[napi]
|
||||||
pub fn full_text_search(&mut self, query: napi::JsObject) -> napi::Result<()> {
|
pub fn full_text_search(&mut self, query: Object) -> napi::Result<()> {
|
||||||
let query = parse_fts_query(query)?;
|
let query = parse_fts_query(query)?;
|
||||||
self.inner = self.inner.clone().full_text_search(query);
|
self.inner = self.inner.clone().full_text_search(query);
|
||||||
Ok(())
|
Ok(())
|
||||||
@@ -78,6 +111,13 @@ impl Query {
|
|||||||
Ok(VectorQuery { inner })
|
Ok(VectorQuery { inner })
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[napi]
|
||||||
|
pub fn nearest_to_raw(&mut self, data: Uint8Array, dtype: String) -> Result<VectorQuery> {
|
||||||
|
let array = bytes_to_arrow_array(data, dtype)?;
|
||||||
|
let inner = self.inner.clone().nearest_to(array).default_error()?;
|
||||||
|
Ok(VectorQuery { inner })
|
||||||
|
}
|
||||||
|
|
||||||
#[napi]
|
#[napi]
|
||||||
pub fn fast_search(&mut self) {
|
pub fn fast_search(&mut self) {
|
||||||
self.inner = self.inner.clone().fast_search();
|
self.inner = self.inner.clone().fast_search();
|
||||||
@@ -163,6 +203,13 @@ impl VectorQuery {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[napi]
|
||||||
|
pub fn add_query_vector_raw(&mut self, data: Uint8Array, dtype: String) -> Result<()> {
|
||||||
|
let array = bytes_to_arrow_array(data, dtype)?;
|
||||||
|
self.inner = self.inner.clone().add_query_vector(array).default_error()?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
#[napi]
|
#[napi]
|
||||||
pub fn distance_type(&mut self, distance_type: String) -> napi::Result<()> {
|
pub fn distance_type(&mut self, distance_type: String) -> napi::Result<()> {
|
||||||
let distance_type = parse_distance_type(distance_type)?;
|
let distance_type = parse_distance_type(distance_type)?;
|
||||||
@@ -235,7 +282,7 @@ impl VectorQuery {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[napi]
|
#[napi]
|
||||||
pub fn full_text_search(&mut self, query: napi::JsObject) -> napi::Result<()> {
|
pub fn full_text_search(&mut self, query: Object) -> napi::Result<()> {
|
||||||
let query = parse_fts_query(query)?;
|
let query = parse_fts_query(query)?;
|
||||||
self.inner = self.inner.clone().full_text_search(query);
|
self.inner = self.inner.clone().full_text_search(query);
|
||||||
Ok(())
|
Ok(())
|
||||||
@@ -272,11 +319,13 @@ impl VectorQuery {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[napi]
|
#[napi]
|
||||||
pub fn rerank(&mut self, callbacks: RerankerCallbacks) {
|
pub fn rerank(
|
||||||
self.inner = self
|
&mut self,
|
||||||
.inner
|
rerank_hybrid: Function<RerankHybridCallbackArgs, Promise<Buffer>>,
|
||||||
.clone()
|
) -> napi::Result<()> {
|
||||||
.rerank(Arc::new(Reranker::new(callbacks)));
|
let reranker = Reranker::new(rerank_hybrid)?;
|
||||||
|
self.inner = self.inner.clone().rerank(Arc::new(reranker));
|
||||||
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
#[napi(catch_unwind)]
|
#[napi(catch_unwind)]
|
||||||
@@ -523,12 +572,12 @@ impl JsFullTextQuery {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_fts_query(query: napi::JsObject) -> napi::Result<FullTextSearchQuery> {
|
fn parse_fts_query(query: Object) -> napi::Result<FullTextSearchQuery> {
|
||||||
if let Ok(Some(query)) = query.get::<_, &JsFullTextQuery>("query") {
|
if let Ok(Some(query)) = query.get::<&JsFullTextQuery>("query") {
|
||||||
Ok(FullTextSearchQuery::new_query(query.inner.clone()))
|
Ok(FullTextSearchQuery::new_query(query.inner.clone()))
|
||||||
} else if let Ok(Some(query_text)) = query.get::<_, String>("query") {
|
} else if let Ok(Some(query_text)) = query.get::<String>("query") {
|
||||||
let mut query_text = query_text;
|
let mut query_text = query_text;
|
||||||
let columns = query.get::<_, Option<Vec<String>>>("columns")?.flatten();
|
let columns = query.get::<Option<Vec<String>>>("columns")?.flatten();
|
||||||
|
|
||||||
let is_phrase =
|
let is_phrase =
|
||||||
query_text.len() >= 2 && query_text.starts_with('"') && query_text.ends_with('"');
|
query_text.len() >= 2 && query_text.starts_with('"') && query_text.ends_with('"');
|
||||||
@@ -549,15 +598,12 @@ fn parse_fts_query(query: napi::JsObject) -> napi::Result<FullTextSearchQuery> {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
let mut query = FullTextSearchQuery::new_query(query);
|
let mut query = FullTextSearchQuery::new_query(query);
|
||||||
if let Some(cols) = columns {
|
if let Some(cols) = columns
|
||||||
if !cols.is_empty() {
|
&& !cols.is_empty()
|
||||||
query = query.with_columns(&cols).map_err(|e| {
|
{
|
||||||
napi::Error::from_reason(format!(
|
query = query.with_columns(&cols).map_err(|e| {
|
||||||
"Failed to set full text search columns: {}",
|
napi::Error::from_reason(format!("Failed to set full text search columns: {}", e))
|
||||||
e
|
})?;
|
||||||
))
|
|
||||||
})?;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
Ok(query)
|
Ok(query)
|
||||||
} else {
|
} else {
|
||||||
|
|||||||
@@ -3,10 +3,7 @@
|
|||||||
|
|
||||||
use arrow_array::RecordBatch;
|
use arrow_array::RecordBatch;
|
||||||
use async_trait::async_trait;
|
use async_trait::async_trait;
|
||||||
use napi::{
|
use napi::{bindgen_prelude::*, threadsafe_function::ThreadsafeFunction};
|
||||||
bindgen_prelude::*,
|
|
||||||
threadsafe_function::{ErrorStrategy, ThreadsafeFunction},
|
|
||||||
};
|
|
||||||
use napi_derive::napi;
|
use napi_derive::napi;
|
||||||
|
|
||||||
use lancedb::ipc::batches_to_ipc_file;
|
use lancedb::ipc::batches_to_ipc_file;
|
||||||
@@ -15,27 +12,28 @@ use lancedb::{error::Error, ipc::ipc_file_to_batches};
|
|||||||
|
|
||||||
use crate::error::NapiErrorExt;
|
use crate::error::NapiErrorExt;
|
||||||
|
|
||||||
|
type RerankHybridFn = ThreadsafeFunction<
|
||||||
|
RerankHybridCallbackArgs,
|
||||||
|
Promise<Buffer>,
|
||||||
|
RerankHybridCallbackArgs,
|
||||||
|
Status,
|
||||||
|
false,
|
||||||
|
>;
|
||||||
|
|
||||||
/// Reranker implementation that "wraps" a NodeJS Reranker implementation.
|
/// Reranker implementation that "wraps" a NodeJS Reranker implementation.
|
||||||
/// This contains references to the callbacks that can be used to invoke the
|
/// This contains references to the callbacks that can be used to invoke the
|
||||||
/// reranking methods on the NodeJS implementation and handles serializing the
|
/// reranking methods on the NodeJS implementation and handles serializing the
|
||||||
/// record batches to Arrow IPC buffers.
|
/// record batches to Arrow IPC buffers.
|
||||||
#[napi]
|
|
||||||
pub struct Reranker {
|
pub struct Reranker {
|
||||||
/// callback to the Javascript which will call the rerankHybrid method of
|
rerank_hybrid: RerankHybridFn,
|
||||||
/// some Reranker implementation
|
|
||||||
rerank_hybrid: ThreadsafeFunction<RerankHybridCallbackArgs, ErrorStrategy::CalleeHandled>,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[napi]
|
|
||||||
impl Reranker {
|
impl Reranker {
|
||||||
#[napi]
|
pub fn new(
|
||||||
pub fn new(callbacks: RerankerCallbacks) -> Self {
|
rerank_hybrid: Function<RerankHybridCallbackArgs, Promise<Buffer>>,
|
||||||
let rerank_hybrid = callbacks
|
) -> napi::Result<Self> {
|
||||||
.rerank_hybrid
|
let rerank_hybrid = rerank_hybrid.build_threadsafe_function().build()?;
|
||||||
.create_threadsafe_function(0, move |ctx| Ok(vec![ctx.value]))
|
Ok(Self { rerank_hybrid })
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
Self { rerank_hybrid }
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -49,16 +47,16 @@ impl lancedb::rerankers::Reranker for Reranker {
|
|||||||
) -> lancedb::error::Result<RecordBatch> {
|
) -> lancedb::error::Result<RecordBatch> {
|
||||||
let callback_args = RerankHybridCallbackArgs {
|
let callback_args = RerankHybridCallbackArgs {
|
||||||
query: query.to_string(),
|
query: query.to_string(),
|
||||||
vec_results: batches_to_ipc_file(&[vector_results])?,
|
vec_results: Buffer::from(batches_to_ipc_file(&[vector_results])?.as_ref()),
|
||||||
fts_results: batches_to_ipc_file(&[fts_results])?,
|
fts_results: Buffer::from(batches_to_ipc_file(&[fts_results])?.as_ref()),
|
||||||
};
|
};
|
||||||
let promised_buffer: Promise<Buffer> = self
|
let promised_buffer: Promise<Buffer> = self
|
||||||
.rerank_hybrid
|
.rerank_hybrid
|
||||||
.call_async(Ok(callback_args))
|
.call_async(callback_args)
|
||||||
.await
|
.await
|
||||||
.map_err(|e| Error::Runtime {
|
.map_err(|e| Error::Runtime {
|
||||||
message: format!("napi error status={}, reason={}", e.status, e.reason),
|
message: format!("napi error status={}, reason={}", e.status, e.reason),
|
||||||
})?;
|
})?;
|
||||||
let buffer = promised_buffer.await.map_err(|e| Error::Runtime {
|
let buffer = promised_buffer.await.map_err(|e| Error::Runtime {
|
||||||
message: format!("napi error status={}, reason={}", e.status, e.reason),
|
message: format!("napi error status={}, reason={}", e.status, e.reason),
|
||||||
})?;
|
})?;
|
||||||
@@ -77,16 +75,11 @@ impl std::fmt::Debug for Reranker {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[napi(object)]
|
|
||||||
pub struct RerankerCallbacks {
|
|
||||||
pub rerank_hybrid: JsFunction,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[napi(object)]
|
#[napi(object)]
|
||||||
pub struct RerankHybridCallbackArgs {
|
pub struct RerankHybridCallbackArgs {
|
||||||
pub query: String,
|
pub query: String,
|
||||||
pub vec_results: Vec<u8>,
|
pub vec_results: Buffer,
|
||||||
pub fts_results: Vec<u8>,
|
pub fts_results: Buffer,
|
||||||
}
|
}
|
||||||
|
|
||||||
fn buffer_to_record_batch(buffer: Buffer) -> Result<RecordBatch> {
|
fn buffer_to_record_batch(buffer: Buffer) -> Result<RecordBatch> {
|
||||||
|
|||||||
@@ -95,8 +95,7 @@ impl napi::bindgen_prelude::FromNapiValue for Session {
|
|||||||
napi_val: napi::sys::napi_value,
|
napi_val: napi::sys::napi_value,
|
||||||
) -> napi::Result<Self> {
|
) -> napi::Result<Self> {
|
||||||
let object: napi::bindgen_prelude::ClassInstance<Self> =
|
let object: napi::bindgen_prelude::ClassInstance<Self> =
|
||||||
napi::bindgen_prelude::ClassInstance::from_napi_value(env, napi_val)?;
|
unsafe { napi::bindgen_prelude::ClassInstance::from_napi_value(env, napi_val)? };
|
||||||
let copy = object.clone();
|
Ok((*object).clone())
|
||||||
Ok(copy)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -3,7 +3,7 @@
|
|||||||
|
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
|
|
||||||
use lancedb::ipc::ipc_file_to_batches;
|
use lancedb::ipc::{ipc_file_to_batches, ipc_file_to_schema};
|
||||||
use lancedb::table::{
|
use lancedb::table::{
|
||||||
AddDataMode, ColumnAlteration as LanceColumnAlteration, Duration, NewColumnTransform,
|
AddDataMode, ColumnAlteration as LanceColumnAlteration, Duration, NewColumnTransform,
|
||||||
OptimizeAction, OptimizeOptions, Table as LanceDbTable,
|
OptimizeAction, OptimizeOptions, Table as LanceDbTable,
|
||||||
@@ -71,6 +71,17 @@ impl Table {
|
|||||||
pub async fn add(&self, buf: Buffer, mode: String) -> napi::Result<AddResult> {
|
pub async fn add(&self, buf: Buffer, mode: String) -> napi::Result<AddResult> {
|
||||||
let batches = ipc_file_to_batches(buf.to_vec())
|
let batches = ipc_file_to_batches(buf.to_vec())
|
||||||
.map_err(|e| napi::Error::from_reason(format!("Failed to read IPC file: {}", e)))?;
|
.map_err(|e| napi::Error::from_reason(format!("Failed to read IPC file: {}", e)))?;
|
||||||
|
let batches = batches
|
||||||
|
.into_iter()
|
||||||
|
.map(|batch| {
|
||||||
|
batch.map_err(|e| {
|
||||||
|
napi::Error::from_reason(format!(
|
||||||
|
"Failed to read record batch from IPC file: {}",
|
||||||
|
e
|
||||||
|
))
|
||||||
|
})
|
||||||
|
})
|
||||||
|
.collect::<Result<Vec<_>>>()?;
|
||||||
let mut op = self.inner_ref()?.add(batches);
|
let mut op = self.inner_ref()?.add(batches);
|
||||||
|
|
||||||
op = if mode == "append" {
|
op = if mode == "append" {
|
||||||
@@ -268,6 +279,23 @@ impl Table {
|
|||||||
Ok(res.into())
|
Ok(res.into())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[napi(catch_unwind)]
|
||||||
|
pub async fn add_columns_with_schema(
|
||||||
|
&self,
|
||||||
|
schema_buf: Buffer,
|
||||||
|
) -> napi::Result<AddColumnsResult> {
|
||||||
|
let schema = ipc_file_to_schema(schema_buf.to_vec())
|
||||||
|
.map_err(|e| napi::Error::from_reason(format!("Failed to read IPC schema: {}", e)))?;
|
||||||
|
|
||||||
|
let transforms = NewColumnTransform::AllNulls(schema);
|
||||||
|
let res = self
|
||||||
|
.inner_ref()?
|
||||||
|
.add_columns(transforms, None)
|
||||||
|
.await
|
||||||
|
.default_error()?;
|
||||||
|
Ok(res.into())
|
||||||
|
}
|
||||||
|
|
||||||
#[napi(catch_unwind)]
|
#[napi(catch_unwind)]
|
||||||
pub async fn alter_columns(
|
pub async fn alter_columns(
|
||||||
&self,
|
&self,
|
||||||
@@ -742,12 +770,14 @@ impl From<lancedb::table::AddResult> for AddResult {
|
|||||||
|
|
||||||
#[napi(object)]
|
#[napi(object)]
|
||||||
pub struct DeleteResult {
|
pub struct DeleteResult {
|
||||||
|
pub num_deleted_rows: i64,
|
||||||
pub version: i64,
|
pub version: i64,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl From<lancedb::table::DeleteResult> for DeleteResult {
|
impl From<lancedb::table::DeleteResult> for DeleteResult {
|
||||||
fn from(value: lancedb::table::DeleteResult) -> Self {
|
fn from(value: lancedb::table::DeleteResult) -> Self {
|
||||||
Self {
|
Self {
|
||||||
|
num_deleted_rows: value.num_deleted_rows as i64,
|
||||||
version: value.version as i64,
|
version: value.version as i64,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
[tool.bumpversion]
|
[tool.bumpversion]
|
||||||
current_version = "0.30.0-beta.1"
|
current_version = "0.31.0-beta.0"
|
||||||
parse = """(?x)
|
parse = """(?x)
|
||||||
(?P<major>0|[1-9]\\d*)\\.
|
(?P<major>0|[1-9]\\d*)\\.
|
||||||
(?P<minor>0|[1-9]\\d*)\\.
|
(?P<minor>0|[1-9]\\d*)\\.
|
||||||
|
|||||||
2
python/.gitignore
vendored
2
python/.gitignore
vendored
@@ -1,3 +1,5 @@
|
|||||||
# Test data created by some example tests
|
# Test data created by some example tests
|
||||||
data/
|
data/
|
||||||
_lancedb.pyd
|
_lancedb.pyd
|
||||||
|
# macOS debug symbols bundle generated during build
|
||||||
|
*.dSYM/
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "lancedb-python"
|
name = "lancedb-python"
|
||||||
version = "0.30.0-beta.1"
|
version = "0.31.0-beta.0"
|
||||||
edition.workspace = true
|
edition.workspace = true
|
||||||
description = "Python bindings for LanceDB"
|
description = "Python bindings for LanceDB"
|
||||||
license.workspace = true
|
license.workspace = true
|
||||||
@@ -16,11 +16,14 @@ crate-type = ["cdylib"]
|
|||||||
[dependencies]
|
[dependencies]
|
||||||
arrow = { version = "57.2", features = ["pyarrow"] }
|
arrow = { version = "57.2", features = ["pyarrow"] }
|
||||||
async-trait = "0.1"
|
async-trait = "0.1"
|
||||||
|
bytes = "1"
|
||||||
lancedb = { path = "../rust/lancedb", default-features = false }
|
lancedb = { path = "../rust/lancedb", default-features = false }
|
||||||
lance-core.workspace = true
|
lance-core.workspace = true
|
||||||
lance-namespace.workspace = true
|
lance-namespace.workspace = true
|
||||||
|
lance-namespace-impls.workspace = true
|
||||||
lance-io.workspace = true
|
lance-io.workspace = true
|
||||||
env_logger.workspace = true
|
env_logger.workspace = true
|
||||||
|
log.workspace = true
|
||||||
pyo3 = { version = "0.26", features = ["extension-module", "abi3-py39"] }
|
pyo3 = { version = "0.26", features = ["extension-module", "abi3-py39"] }
|
||||||
pyo3-async-runtimes = { version = "0.26", features = [
|
pyo3-async-runtimes = { version = "0.26", features = [
|
||||||
"attributes",
|
"attributes",
|
||||||
@@ -28,6 +31,8 @@ pyo3-async-runtimes = { version = "0.26", features = [
|
|||||||
] }
|
] }
|
||||||
pin-project = "1.1.5"
|
pin-project = "1.1.5"
|
||||||
futures.workspace = true
|
futures.workspace = true
|
||||||
|
serde = "1"
|
||||||
|
serde_json = "1"
|
||||||
snafu.workspace = true
|
snafu.workspace = true
|
||||||
tokio = { version = "1.40", features = ["sync"] }
|
tokio = { version = "1.40", features = ["sync"] }
|
||||||
|
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
# LanceDB
|
# LanceDB Python SDK
|
||||||
|
|
||||||
A Python library for [LanceDB](https://github.com/lancedb/lancedb).
|
A Python library for [LanceDB](https://github.com/lancedb/lancedb).
|
||||||
|
|
||||||
|
|||||||
@@ -3,10 +3,10 @@ name = "lancedb"
|
|||||||
# version in Cargo.toml
|
# version in Cargo.toml
|
||||||
dynamic = ["version"]
|
dynamic = ["version"]
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"deprecation",
|
"deprecation>=2.1.0",
|
||||||
"numpy",
|
"numpy>=1.24.0",
|
||||||
"overrides>=0.7; python_version<'3.12'",
|
"overrides>=0.7; python_version<'3.12'",
|
||||||
"packaging",
|
"packaging>=23.0",
|
||||||
"pyarrow>=16",
|
"pyarrow>=16",
|
||||||
"pydantic>=1.10",
|
"pydantic>=1.10",
|
||||||
"tqdm>=4.27.0",
|
"tqdm>=4.27.0",
|
||||||
@@ -45,51 +45,51 @@ repository = "https://github.com/lancedb/lancedb"
|
|||||||
|
|
||||||
[project.optional-dependencies]
|
[project.optional-dependencies]
|
||||||
pylance = [
|
pylance = [
|
||||||
"pylance>=1.0.0b14",
|
"pylance>=5.0.0b3",
|
||||||
]
|
]
|
||||||
tests = [
|
tests = [
|
||||||
"aiohttp",
|
"aiohttp>=3.9.0",
|
||||||
"boto3",
|
"boto3>=1.28.57",
|
||||||
"pandas>=1.4",
|
"pandas>=1.4",
|
||||||
"pytest",
|
"pytest>=7.0",
|
||||||
"pytest-mock",
|
"pytest-mock>=3.10",
|
||||||
"pytest-asyncio",
|
"pytest-asyncio>=0.21",
|
||||||
"duckdb",
|
"duckdb>=0.9.0",
|
||||||
"pytz",
|
"pytz>=2023.3",
|
||||||
"polars>=0.19, <=1.3.0",
|
"polars>=0.19, <=1.3.0",
|
||||||
"tantivy",
|
"tantivy>=0.20.0",
|
||||||
"pyarrow-stubs",
|
"pyarrow-stubs>=16.0",
|
||||||
"pylance>=1.0.0b14",
|
"pylance>=5.0.0b3",
|
||||||
"requests",
|
"requests>=2.31.0",
|
||||||
"datafusion",
|
"datafusion>=52,<53",
|
||||||
]
|
]
|
||||||
dev = [
|
dev = [
|
||||||
"ruff",
|
"ruff>=0.3.0",
|
||||||
"pre-commit",
|
"pre-commit>=3.5.0",
|
||||||
"pyright",
|
"pyright>=1.1.350",
|
||||||
'typing-extensions>=4.0.0; python_version < "3.11"',
|
'typing-extensions>=4.0.0; python_version < "3.11"',
|
||||||
]
|
]
|
||||||
docs = ["mkdocs", "mkdocs-jupyter", "mkdocs-material", "mkdocstrings-python"]
|
docs = ["mkdocs", "mkdocs-jupyter", "mkdocs-material", "mkdocstrings-python"]
|
||||||
clip = ["torch", "pillow", "open-clip-torch"]
|
clip = ["torch", "pillow>=12.1.1", "open-clip-torch"]
|
||||||
siglip = ["torch", "pillow", "transformers>=4.41.0","sentencepiece"]
|
siglip = ["torch", "pillow>=12.1.1", "transformers>=4.41.0","sentencepiece"]
|
||||||
embeddings = [
|
embeddings = [
|
||||||
"requests>=2.31.0",
|
"requests>=2.31.0",
|
||||||
"openai>=1.6.1",
|
"openai>=1.6.1",
|
||||||
"sentence-transformers",
|
"sentence-transformers>=2.2.0",
|
||||||
"torch",
|
"torch>=2.0.0",
|
||||||
"pillow",
|
"pillow>=12.1.1",
|
||||||
"open-clip-torch",
|
"open-clip-torch>=2.20.0",
|
||||||
"cohere",
|
"cohere>=4.0",
|
||||||
"colpali-engine>=0.3.10",
|
"colpali-engine>=0.3.10",
|
||||||
"huggingface_hub",
|
"huggingface_hub>=0.19.0",
|
||||||
"InstructorEmbedding",
|
"InstructorEmbedding>=1.0.1",
|
||||||
"google.generativeai",
|
"google.generativeai>=0.3.0",
|
||||||
"boto3>=1.28.57",
|
"boto3>=1.28.57",
|
||||||
"awscli>=1.29.57",
|
"awscli>=1.44.38",
|
||||||
"botocore>=1.31.57",
|
"botocore>=1.31.57",
|
||||||
'ibm-watsonx-ai>=1.1.2; python_version >= "3.10"',
|
'ibm-watsonx-ai>=1.1.2; python_version >= "3.10"',
|
||||||
"ollama>=0.3.0",
|
"ollama>=0.3.0",
|
||||||
"sentencepiece"
|
"sentencepiece>=0.1.99"
|
||||||
]
|
]
|
||||||
azure = ["adlfs>=2024.2.0"]
|
azure = ["adlfs>=2024.2.0"]
|
||||||
|
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ import importlib.metadata
|
|||||||
import os
|
import os
|
||||||
from concurrent.futures import ThreadPoolExecutor
|
from concurrent.futures import ThreadPoolExecutor
|
||||||
from datetime import timedelta
|
from datetime import timedelta
|
||||||
from typing import Dict, Optional, Union, Any
|
from typing import Dict, Optional, Union, Any, List
|
||||||
import warnings
|
import warnings
|
||||||
|
|
||||||
__version__ = importlib.metadata.version("lancedb")
|
__version__ = importlib.metadata.version("lancedb")
|
||||||
@@ -15,9 +15,9 @@ from ._lancedb import connect as lancedb_connect
|
|||||||
from .common import URI, sanitize_uri
|
from .common import URI, sanitize_uri
|
||||||
from urllib.parse import urlparse
|
from urllib.parse import urlparse
|
||||||
from .db import AsyncConnection, DBConnection, LanceDBConnection
|
from .db import AsyncConnection, DBConnection, LanceDBConnection
|
||||||
from .io import StorageOptionsProvider
|
|
||||||
from .remote import ClientConfig
|
from .remote import ClientConfig
|
||||||
from .remote.db import RemoteDBConnection
|
from .remote.db import RemoteDBConnection
|
||||||
|
from .expr import Expr, col, lit, func
|
||||||
from .schema import vector
|
from .schema import vector
|
||||||
from .table import AsyncTable, Table
|
from .table import AsyncTable, Table
|
||||||
from ._lancedb import Session
|
from ._lancedb import Session
|
||||||
@@ -63,7 +63,7 @@ def _check_s3_bucket_with_dots(
|
|||||||
|
|
||||||
|
|
||||||
def connect(
|
def connect(
|
||||||
uri: URI,
|
uri: Optional[URI] = None,
|
||||||
*,
|
*,
|
||||||
api_key: Optional[str] = None,
|
api_key: Optional[str] = None,
|
||||||
region: str = "us-east-1",
|
region: str = "us-east-1",
|
||||||
@@ -73,14 +73,18 @@ def connect(
|
|||||||
client_config: Union[ClientConfig, Dict[str, Any], None] = None,
|
client_config: Union[ClientConfig, Dict[str, Any], None] = None,
|
||||||
storage_options: Optional[Dict[str, str]] = None,
|
storage_options: Optional[Dict[str, str]] = None,
|
||||||
session: Optional[Session] = None,
|
session: Optional[Session] = None,
|
||||||
|
namespace_client_impl: Optional[str] = None,
|
||||||
|
namespace_client_properties: Optional[Dict[str, str]] = None,
|
||||||
|
namespace_client_pushdown_operations: Optional[List[str]] = None,
|
||||||
**kwargs: Any,
|
**kwargs: Any,
|
||||||
) -> DBConnection:
|
) -> DBConnection:
|
||||||
"""Connect to a LanceDB database.
|
"""Connect to a LanceDB database.
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
uri: str or Path
|
uri: str or Path, optional
|
||||||
The uri of the database.
|
The uri of the database. When ``namespace_client_impl`` is provided you may
|
||||||
|
omit ``uri`` and connect through a namespace client instead.
|
||||||
api_key: str, optional
|
api_key: str, optional
|
||||||
If presented, connect to LanceDB cloud.
|
If presented, connect to LanceDB cloud.
|
||||||
Otherwise, connect to a database on file system or cloud storage.
|
Otherwise, connect to a database on file system or cloud storage.
|
||||||
@@ -113,6 +117,18 @@ def connect(
|
|||||||
cache sizes for index and metadata caches, which can significantly
|
cache sizes for index and metadata caches, which can significantly
|
||||||
impact memory use and performance. They can also be re-used across
|
impact memory use and performance. They can also be re-used across
|
||||||
multiple connections to share the same cache state.
|
multiple connections to share the same cache state.
|
||||||
|
namespace_client_impl : str, optional
|
||||||
|
When provided along with ``namespace_client_properties``, ``connect``
|
||||||
|
returns a namespace-backed connection by delegating to
|
||||||
|
:func:`connect_namespace`. The value identifies which namespace
|
||||||
|
implementation to load (e.g., ``"dir"`` or ``"rest"``).
|
||||||
|
namespace_client_properties : dict, optional
|
||||||
|
Configuration to pass to the namespace client implementation. Required
|
||||||
|
when ``namespace_client_impl`` is set.
|
||||||
|
namespace_client_pushdown_operations : list[str], optional
|
||||||
|
Only used when ``namespace_client_properties`` is provided. Forwards to
|
||||||
|
:func:`connect_namespace` to control which operations are executed on the
|
||||||
|
namespace service (e.g., ``["QueryTable", "CreateTable"]``).
|
||||||
|
|
||||||
Examples
|
Examples
|
||||||
--------
|
--------
|
||||||
@@ -132,11 +148,42 @@ def connect(
|
|||||||
>>> db = lancedb.connect("db://my_database", api_key="ldb_...",
|
>>> db = lancedb.connect("db://my_database", api_key="ldb_...",
|
||||||
... client_config={"retry_config": {"retries": 5}})
|
... client_config={"retry_config": {"retries": 5}})
|
||||||
|
|
||||||
|
Connect to a namespace-backed database:
|
||||||
|
|
||||||
|
>>> db = lancedb.connect(namespace_client_impl="dir",
|
||||||
|
... namespace_client_properties={"root": "/tmp/ns"})
|
||||||
|
|
||||||
Returns
|
Returns
|
||||||
-------
|
-------
|
||||||
conn : DBConnection
|
conn : DBConnection
|
||||||
A connection to a LanceDB database.
|
A connection to a LanceDB database.
|
||||||
"""
|
"""
|
||||||
|
if namespace_client_impl is not None or namespace_client_properties is not None:
|
||||||
|
if namespace_client_impl is None or namespace_client_properties is None:
|
||||||
|
raise ValueError(
|
||||||
|
"Both namespace_client_impl and "
|
||||||
|
"namespace_client_properties must be provided"
|
||||||
|
)
|
||||||
|
if kwargs:
|
||||||
|
raise ValueError(f"Unknown keyword arguments: {kwargs}")
|
||||||
|
return connect_namespace(
|
||||||
|
namespace_client_impl,
|
||||||
|
namespace_client_properties,
|
||||||
|
read_consistency_interval=read_consistency_interval,
|
||||||
|
storage_options=storage_options,
|
||||||
|
session=session,
|
||||||
|
namespace_client_pushdown_operations=namespace_client_pushdown_operations,
|
||||||
|
)
|
||||||
|
|
||||||
|
if namespace_client_pushdown_operations is not None:
|
||||||
|
raise ValueError(
|
||||||
|
"namespace_client_pushdown_operations is only valid when "
|
||||||
|
"connecting through a namespace"
|
||||||
|
)
|
||||||
|
if uri is None:
|
||||||
|
raise ValueError(
|
||||||
|
"uri is required when not connecting through a namespace client"
|
||||||
|
)
|
||||||
if isinstance(uri, str) and uri.startswith("db://"):
|
if isinstance(uri, str) and uri.startswith("db://"):
|
||||||
if api_key is None:
|
if api_key is None:
|
||||||
api_key = os.environ.get("LANCEDB_API_KEY")
|
api_key = os.environ.get("LANCEDB_API_KEY")
|
||||||
@@ -271,6 +318,10 @@ __all__ = [
|
|||||||
"AsyncConnection",
|
"AsyncConnection",
|
||||||
"AsyncLanceNamespaceDBConnection",
|
"AsyncLanceNamespaceDBConnection",
|
||||||
"AsyncTable",
|
"AsyncTable",
|
||||||
|
"col",
|
||||||
|
"Expr",
|
||||||
|
"func",
|
||||||
|
"lit",
|
||||||
"URI",
|
"URI",
|
||||||
"sanitize_uri",
|
"sanitize_uri",
|
||||||
"vector",
|
"vector",
|
||||||
@@ -279,7 +330,6 @@ __all__ = [
|
|||||||
"LanceNamespaceDBConnection",
|
"LanceNamespaceDBConnection",
|
||||||
"RemoteDBConnection",
|
"RemoteDBConnection",
|
||||||
"Session",
|
"Session",
|
||||||
"StorageOptionsProvider",
|
|
||||||
"Table",
|
"Table",
|
||||||
"__version__",
|
"__version__",
|
||||||
]
|
]
|
||||||
|
|||||||
@@ -14,7 +14,6 @@ from .index import (
|
|||||||
HnswSq,
|
HnswSq,
|
||||||
FTS,
|
FTS,
|
||||||
)
|
)
|
||||||
from .io import StorageOptionsProvider
|
|
||||||
from lance_namespace import (
|
from lance_namespace import (
|
||||||
ListNamespacesResponse,
|
ListNamespacesResponse,
|
||||||
CreateNamespaceResponse,
|
CreateNamespaceResponse,
|
||||||
@@ -27,6 +26,32 @@ from .remote import ClientConfig
|
|||||||
IvfHnswPq: type[HnswPq] = HnswPq
|
IvfHnswPq: type[HnswPq] = HnswPq
|
||||||
IvfHnswSq: type[HnswSq] = HnswSq
|
IvfHnswSq: type[HnswSq] = HnswSq
|
||||||
|
|
||||||
|
class PyExpr:
|
||||||
|
"""A type-safe DataFusion expression node (Rust-side handle)."""
|
||||||
|
|
||||||
|
def eq(self, other: "PyExpr") -> "PyExpr": ...
|
||||||
|
def ne(self, other: "PyExpr") -> "PyExpr": ...
|
||||||
|
def lt(self, other: "PyExpr") -> "PyExpr": ...
|
||||||
|
def lte(self, other: "PyExpr") -> "PyExpr": ...
|
||||||
|
def gt(self, other: "PyExpr") -> "PyExpr": ...
|
||||||
|
def gte(self, other: "PyExpr") -> "PyExpr": ...
|
||||||
|
def and_(self, other: "PyExpr") -> "PyExpr": ...
|
||||||
|
def or_(self, other: "PyExpr") -> "PyExpr": ...
|
||||||
|
def not_(self) -> "PyExpr": ...
|
||||||
|
def add(self, other: "PyExpr") -> "PyExpr": ...
|
||||||
|
def sub(self, other: "PyExpr") -> "PyExpr": ...
|
||||||
|
def mul(self, other: "PyExpr") -> "PyExpr": ...
|
||||||
|
def div(self, other: "PyExpr") -> "PyExpr": ...
|
||||||
|
def lower(self) -> "PyExpr": ...
|
||||||
|
def upper(self) -> "PyExpr": ...
|
||||||
|
def contains(self, substr: "PyExpr") -> "PyExpr": ...
|
||||||
|
def cast(self, data_type: pa.DataType) -> "PyExpr": ...
|
||||||
|
def to_sql(self) -> str: ...
|
||||||
|
|
||||||
|
def expr_col(name: str) -> PyExpr: ...
|
||||||
|
def expr_lit(value: Union[bool, int, float, str]) -> PyExpr: ...
|
||||||
|
def expr_func(name: str, args: List[PyExpr]) -> PyExpr: ...
|
||||||
|
|
||||||
class Session:
|
class Session:
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
@@ -46,35 +71,35 @@ class Connection(object):
|
|||||||
async def close(self): ...
|
async def close(self): ...
|
||||||
async def list_namespaces(
|
async def list_namespaces(
|
||||||
self,
|
self,
|
||||||
namespace: Optional[List[str]] = None,
|
namespace_path: Optional[List[str]] = None,
|
||||||
page_token: Optional[str] = None,
|
page_token: Optional[str] = None,
|
||||||
limit: Optional[int] = None,
|
limit: Optional[int] = None,
|
||||||
) -> ListNamespacesResponse: ...
|
) -> ListNamespacesResponse: ...
|
||||||
async def create_namespace(
|
async def create_namespace(
|
||||||
self,
|
self,
|
||||||
namespace: List[str],
|
namespace_path: List[str],
|
||||||
mode: Optional[str] = None,
|
mode: Optional[str] = None,
|
||||||
properties: Optional[Dict[str, str]] = None,
|
properties: Optional[Dict[str, str]] = None,
|
||||||
) -> CreateNamespaceResponse: ...
|
) -> CreateNamespaceResponse: ...
|
||||||
async def drop_namespace(
|
async def drop_namespace(
|
||||||
self,
|
self,
|
||||||
namespace: List[str],
|
namespace_path: List[str],
|
||||||
mode: Optional[str] = None,
|
mode: Optional[str] = None,
|
||||||
behavior: Optional[str] = None,
|
behavior: Optional[str] = None,
|
||||||
) -> DropNamespaceResponse: ...
|
) -> DropNamespaceResponse: ...
|
||||||
async def describe_namespace(
|
async def describe_namespace(
|
||||||
self,
|
self,
|
||||||
namespace: List[str],
|
namespace_path: List[str],
|
||||||
) -> DescribeNamespaceResponse: ...
|
) -> DescribeNamespaceResponse: ...
|
||||||
async def list_tables(
|
async def list_tables(
|
||||||
self,
|
self,
|
||||||
namespace: Optional[List[str]] = None,
|
namespace_path: Optional[List[str]] = None,
|
||||||
page_token: Optional[str] = None,
|
page_token: Optional[str] = None,
|
||||||
limit: Optional[int] = None,
|
limit: Optional[int] = None,
|
||||||
) -> ListTablesResponse: ...
|
) -> ListTablesResponse: ...
|
||||||
async def table_names(
|
async def table_names(
|
||||||
self,
|
self,
|
||||||
namespace: Optional[List[str]],
|
namespace_path: Optional[List[str]],
|
||||||
start_after: Optional[str],
|
start_after: Optional[str],
|
||||||
limit: Optional[int],
|
limit: Optional[int],
|
||||||
) -> list[str]: ... # Deprecated: Use list_tables instead
|
) -> list[str]: ... # Deprecated: Use list_tables instead
|
||||||
@@ -83,9 +108,8 @@ class Connection(object):
|
|||||||
name: str,
|
name: str,
|
||||||
mode: str,
|
mode: str,
|
||||||
data: pa.RecordBatchReader,
|
data: pa.RecordBatchReader,
|
||||||
namespace: Optional[List[str]] = None,
|
namespace_path: Optional[List[str]] = None,
|
||||||
storage_options: Optional[Dict[str, str]] = None,
|
storage_options: Optional[Dict[str, str]] = None,
|
||||||
storage_options_provider: Optional[StorageOptionsProvider] = None,
|
|
||||||
location: Optional[str] = None,
|
location: Optional[str] = None,
|
||||||
) -> Table: ...
|
) -> Table: ...
|
||||||
async def create_empty_table(
|
async def create_empty_table(
|
||||||
@@ -93,17 +117,15 @@ class Connection(object):
|
|||||||
name: str,
|
name: str,
|
||||||
mode: str,
|
mode: str,
|
||||||
schema: pa.Schema,
|
schema: pa.Schema,
|
||||||
namespace: Optional[List[str]] = None,
|
namespace_path: Optional[List[str]] = None,
|
||||||
storage_options: Optional[Dict[str, str]] = None,
|
storage_options: Optional[Dict[str, str]] = None,
|
||||||
storage_options_provider: Optional[StorageOptionsProvider] = None,
|
|
||||||
location: Optional[str] = None,
|
location: Optional[str] = None,
|
||||||
) -> Table: ...
|
) -> Table: ...
|
||||||
async def open_table(
|
async def open_table(
|
||||||
self,
|
self,
|
||||||
name: str,
|
name: str,
|
||||||
namespace: Optional[List[str]] = None,
|
namespace_path: Optional[List[str]] = None,
|
||||||
storage_options: Optional[Dict[str, str]] = None,
|
storage_options: Optional[Dict[str, str]] = None,
|
||||||
storage_options_provider: Optional[StorageOptionsProvider] = None,
|
|
||||||
index_cache_size: Optional[int] = None,
|
index_cache_size: Optional[int] = None,
|
||||||
location: Optional[str] = None,
|
location: Optional[str] = None,
|
||||||
) -> Table: ...
|
) -> Table: ...
|
||||||
@@ -111,7 +133,7 @@ class Connection(object):
|
|||||||
self,
|
self,
|
||||||
target_table_name: str,
|
target_table_name: str,
|
||||||
source_uri: str,
|
source_uri: str,
|
||||||
target_namespace: Optional[List[str]] = None,
|
target_namespace_path: Optional[List[str]] = None,
|
||||||
source_version: Optional[int] = None,
|
source_version: Optional[int] = None,
|
||||||
source_tag: Optional[str] = None,
|
source_tag: Optional[str] = None,
|
||||||
is_shallow: bool = True,
|
is_shallow: bool = True,
|
||||||
@@ -120,13 +142,15 @@ class Connection(object):
|
|||||||
self,
|
self,
|
||||||
cur_name: str,
|
cur_name: str,
|
||||||
new_name: str,
|
new_name: str,
|
||||||
cur_namespace: Optional[List[str]] = None,
|
cur_namespace_path: Optional[List[str]] = None,
|
||||||
new_namespace: Optional[List[str]] = None,
|
new_namespace_path: Optional[List[str]] = None,
|
||||||
) -> None: ...
|
) -> None: ...
|
||||||
async def drop_table(
|
async def drop_table(
|
||||||
self, name: str, namespace: Optional[List[str]] = None
|
self, name: str, namespace_path: Optional[List[str]] = None
|
||||||
|
) -> None: ...
|
||||||
|
async def drop_all_tables(
|
||||||
|
self, namespace_path: Optional[List[str]] = None
|
||||||
) -> None: ...
|
) -> None: ...
|
||||||
async def drop_all_tables(self, namespace: Optional[List[str]] = None) -> None: ...
|
|
||||||
|
|
||||||
class Table:
|
class Table:
|
||||||
def name(self) -> str: ...
|
def name(self) -> str: ...
|
||||||
@@ -135,7 +159,10 @@ class Table:
|
|||||||
def close(self) -> None: ...
|
def close(self) -> None: ...
|
||||||
async def schema(self) -> pa.Schema: ...
|
async def schema(self) -> pa.Schema: ...
|
||||||
async def add(
|
async def add(
|
||||||
self, data: pa.RecordBatchReader, mode: Literal["append", "overwrite"]
|
self,
|
||||||
|
data: pa.RecordBatchReader,
|
||||||
|
mode: Literal["append", "overwrite"],
|
||||||
|
progress: Optional[Any] = None,
|
||||||
) -> AddResult: ...
|
) -> AddResult: ...
|
||||||
async def update(
|
async def update(
|
||||||
self, updates: Dict[str, str], where: Optional[str]
|
self, updates: Dict[str, str], where: Optional[str]
|
||||||
@@ -166,6 +193,8 @@ class Table:
|
|||||||
async def checkout(self, version: Union[int, str]): ...
|
async def checkout(self, version: Union[int, str]): ...
|
||||||
async def checkout_latest(self): ...
|
async def checkout_latest(self): ...
|
||||||
async def restore(self, version: Optional[Union[int, str]] = None): ...
|
async def restore(self, version: Optional[Union[int, str]] = None): ...
|
||||||
|
async def prewarm_index(self, index_name: str) -> None: ...
|
||||||
|
async def prewarm_data(self, columns: Optional[List[str]] = None) -> None: ...
|
||||||
async def list_indices(self) -> list[IndexConfig]: ...
|
async def list_indices(self) -> list[IndexConfig]: ...
|
||||||
async def delete(self, filter: str) -> DeleteResult: ...
|
async def delete(self, filter: str) -> DeleteResult: ...
|
||||||
async def add_columns(self, columns: list[tuple[str, str]]) -> AddColumnsResult: ...
|
async def add_columns(self, columns: list[tuple[str, str]]) -> AddColumnsResult: ...
|
||||||
@@ -220,7 +249,9 @@ class RecordBatchStream:
|
|||||||
|
|
||||||
class Query:
|
class Query:
|
||||||
def where(self, filter: str): ...
|
def where(self, filter: str): ...
|
||||||
def select(self, columns: Tuple[str, str]): ...
|
def where_expr(self, expr: PyExpr): ...
|
||||||
|
def select(self, columns: List[Tuple[str, str]]): ...
|
||||||
|
def select_expr(self, columns: List[Tuple[str, PyExpr]]): ...
|
||||||
def select_columns(self, columns: List[str]): ...
|
def select_columns(self, columns: List[str]): ...
|
||||||
def limit(self, limit: int): ...
|
def limit(self, limit: int): ...
|
||||||
def offset(self, offset: int): ...
|
def offset(self, offset: int): ...
|
||||||
@@ -246,7 +277,9 @@ class TakeQuery:
|
|||||||
|
|
||||||
class FTSQuery:
|
class FTSQuery:
|
||||||
def where(self, filter: str): ...
|
def where(self, filter: str): ...
|
||||||
def select(self, columns: List[str]): ...
|
def where_expr(self, expr: PyExpr): ...
|
||||||
|
def select(self, columns: List[Tuple[str, str]]): ...
|
||||||
|
def select_expr(self, columns: List[Tuple[str, PyExpr]]): ...
|
||||||
def limit(self, limit: int): ...
|
def limit(self, limit: int): ...
|
||||||
def offset(self, offset: int): ...
|
def offset(self, offset: int): ...
|
||||||
def fast_search(self): ...
|
def fast_search(self): ...
|
||||||
@@ -265,7 +298,9 @@ class VectorQuery:
|
|||||||
async def output_schema(self) -> pa.Schema: ...
|
async def output_schema(self) -> pa.Schema: ...
|
||||||
async def execute(self) -> RecordBatchStream: ...
|
async def execute(self) -> RecordBatchStream: ...
|
||||||
def where(self, filter: str): ...
|
def where(self, filter: str): ...
|
||||||
def select(self, columns: List[str]): ...
|
def where_expr(self, expr: PyExpr): ...
|
||||||
|
def select(self, columns: List[Tuple[str, str]]): ...
|
||||||
|
def select_expr(self, columns: List[Tuple[str, PyExpr]]): ...
|
||||||
def select_with_projection(self, columns: Tuple[str, str]): ...
|
def select_with_projection(self, columns: Tuple[str, str]): ...
|
||||||
def limit(self, limit: int): ...
|
def limit(self, limit: int): ...
|
||||||
def offset(self, offset: int): ...
|
def offset(self, offset: int): ...
|
||||||
@@ -282,7 +317,9 @@ class VectorQuery:
|
|||||||
|
|
||||||
class HybridQuery:
|
class HybridQuery:
|
||||||
def where(self, filter: str): ...
|
def where(self, filter: str): ...
|
||||||
def select(self, columns: List[str]): ...
|
def where_expr(self, expr: PyExpr): ...
|
||||||
|
def select(self, columns: List[Tuple[str, str]]): ...
|
||||||
|
def select_expr(self, columns: List[Tuple[str, PyExpr]]): ...
|
||||||
def limit(self, limit: int): ...
|
def limit(self, limit: int): ...
|
||||||
def offset(self, offset: int): ...
|
def offset(self, offset: int): ...
|
||||||
def fast_search(self): ...
|
def fast_search(self): ...
|
||||||
|
|||||||
@@ -1,8 +1,10 @@
|
|||||||
# SPDX-License-Identifier: Apache-2.0
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
# SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
# SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||||
|
|
||||||
|
from functools import singledispatch
|
||||||
from typing import List, Optional, Tuple, Union
|
from typing import List, Optional, Tuple, Union
|
||||||
|
|
||||||
|
from lancedb.pydantic import LanceModel, model_to_dict
|
||||||
import pyarrow as pa
|
import pyarrow as pa
|
||||||
|
|
||||||
from ._lancedb import RecordBatchStream
|
from ._lancedb import RecordBatchStream
|
||||||
@@ -80,3 +82,32 @@ def peek_reader(
|
|||||||
yield from reader
|
yield from reader
|
||||||
|
|
||||||
return batch, pa.RecordBatchReader.from_batches(batch.schema, all_batches())
|
return batch, pa.RecordBatchReader.from_batches(batch.schema, all_batches())
|
||||||
|
|
||||||
|
|
||||||
|
@singledispatch
|
||||||
|
def to_arrow(data) -> pa.Table:
|
||||||
|
"""Convert a single data object to a pa.Table."""
|
||||||
|
raise NotImplementedError(f"to_arrow not implemented for type {type(data)}")
|
||||||
|
|
||||||
|
|
||||||
|
@to_arrow.register(pa.RecordBatch)
|
||||||
|
def _arrow_from_batch(data: pa.RecordBatch) -> pa.Table:
|
||||||
|
return pa.Table.from_batches([data])
|
||||||
|
|
||||||
|
|
||||||
|
@to_arrow.register(pa.Table)
|
||||||
|
def _arrow_from_table(data: pa.Table) -> pa.Table:
|
||||||
|
return data
|
||||||
|
|
||||||
|
|
||||||
|
@to_arrow.register(list)
|
||||||
|
def _arrow_from_list(data: list) -> pa.Table:
|
||||||
|
if not data:
|
||||||
|
raise ValueError("Cannot create table from empty list without a schema")
|
||||||
|
|
||||||
|
if isinstance(data[0], LanceModel):
|
||||||
|
schema = data[0].__class__.to_arrow_schema()
|
||||||
|
dicts = [model_to_dict(d) for d in data]
|
||||||
|
return pa.Table.from_pylist(dicts, schema=schema)
|
||||||
|
|
||||||
|
return pa.Table.from_pylist(data)
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ from abc import abstractmethod
|
|||||||
from datetime import timedelta
|
from datetime import timedelta
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
import sys
|
import sys
|
||||||
from typing import TYPE_CHECKING, Dict, Iterable, List, Literal, Optional, Union
|
from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Literal, Optional, Union
|
||||||
|
|
||||||
if sys.version_info >= (3, 12):
|
if sys.version_info >= (3, 12):
|
||||||
from typing import override
|
from typing import override
|
||||||
@@ -52,7 +52,6 @@ if TYPE_CHECKING:
|
|||||||
from ._lancedb import Connection as LanceDbConnection
|
from ._lancedb import Connection as LanceDbConnection
|
||||||
from .common import DATA, URI
|
from .common import DATA, URI
|
||||||
from .embeddings import EmbeddingFunctionConfig
|
from .embeddings import EmbeddingFunctionConfig
|
||||||
from .io import StorageOptionsProvider
|
|
||||||
from ._lancedb import Session
|
from ._lancedb import Session
|
||||||
|
|
||||||
from .namespace_utils import (
|
from .namespace_utils import (
|
||||||
@@ -67,7 +66,7 @@ class DBConnection(EnforceOverrides):
|
|||||||
|
|
||||||
def list_namespaces(
|
def list_namespaces(
|
||||||
self,
|
self,
|
||||||
namespace: Optional[List[str]] = None,
|
namespace_path: Optional[List[str]] = None,
|
||||||
page_token: Optional[str] = None,
|
page_token: Optional[str] = None,
|
||||||
limit: Optional[int] = None,
|
limit: Optional[int] = None,
|
||||||
) -> ListNamespacesResponse:
|
) -> ListNamespacesResponse:
|
||||||
@@ -75,7 +74,7 @@ class DBConnection(EnforceOverrides):
|
|||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
namespace: List[str], default []
|
namespace_path: List[str], default []
|
||||||
The parent namespace to list namespaces in.
|
The parent namespace to list namespaces in.
|
||||||
Empty list represents root namespace.
|
Empty list represents root namespace.
|
||||||
page_token: str, optional
|
page_token: str, optional
|
||||||
@@ -89,13 +88,13 @@ class DBConnection(EnforceOverrides):
|
|||||||
ListNamespacesResponse
|
ListNamespacesResponse
|
||||||
Response containing namespace names and optional page_token for pagination.
|
Response containing namespace names and optional page_token for pagination.
|
||||||
"""
|
"""
|
||||||
if namespace is None:
|
if namespace_path is None:
|
||||||
namespace = []
|
namespace_path = []
|
||||||
return ListNamespacesResponse(namespaces=[], page_token=None)
|
return ListNamespacesResponse(namespaces=[], page_token=None)
|
||||||
|
|
||||||
def create_namespace(
|
def create_namespace(
|
||||||
self,
|
self,
|
||||||
namespace: List[str],
|
namespace_path: List[str],
|
||||||
mode: Optional[str] = None,
|
mode: Optional[str] = None,
|
||||||
properties: Optional[Dict[str, str]] = None,
|
properties: Optional[Dict[str, str]] = None,
|
||||||
) -> CreateNamespaceResponse:
|
) -> CreateNamespaceResponse:
|
||||||
@@ -103,7 +102,7 @@ class DBConnection(EnforceOverrides):
|
|||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
namespace: List[str]
|
namespace_path: List[str]
|
||||||
The namespace identifier to create.
|
The namespace identifier to create.
|
||||||
mode: str, optional
|
mode: str, optional
|
||||||
Creation mode - "create" (fail if exists), "exist_ok" (skip if exists),
|
Creation mode - "create" (fail if exists), "exist_ok" (skip if exists),
|
||||||
@@ -122,7 +121,7 @@ class DBConnection(EnforceOverrides):
|
|||||||
|
|
||||||
def drop_namespace(
|
def drop_namespace(
|
||||||
self,
|
self,
|
||||||
namespace: List[str],
|
namespace_path: List[str],
|
||||||
mode: Optional[str] = None,
|
mode: Optional[str] = None,
|
||||||
behavior: Optional[str] = None,
|
behavior: Optional[str] = None,
|
||||||
) -> DropNamespaceResponse:
|
) -> DropNamespaceResponse:
|
||||||
@@ -130,7 +129,7 @@ class DBConnection(EnforceOverrides):
|
|||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
namespace: List[str]
|
namespace_path: List[str]
|
||||||
The namespace identifier to drop.
|
The namespace identifier to drop.
|
||||||
mode: str, optional
|
mode: str, optional
|
||||||
Whether to skip if not exists ("SKIP") or fail ("FAIL"). Case insensitive.
|
Whether to skip if not exists ("SKIP") or fail ("FAIL"). Case insensitive.
|
||||||
@@ -147,12 +146,14 @@ class DBConnection(EnforceOverrides):
|
|||||||
"Namespace operations are not supported for this connection type"
|
"Namespace operations are not supported for this connection type"
|
||||||
)
|
)
|
||||||
|
|
||||||
def describe_namespace(self, namespace: List[str]) -> DescribeNamespaceResponse:
|
def describe_namespace(
|
||||||
|
self, namespace_path: List[str]
|
||||||
|
) -> DescribeNamespaceResponse:
|
||||||
"""Describe a namespace.
|
"""Describe a namespace.
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
namespace: List[str]
|
namespace_path: List[str]
|
||||||
The namespace identifier to describe.
|
The namespace identifier to describe.
|
||||||
|
|
||||||
Returns
|
Returns
|
||||||
@@ -166,7 +167,7 @@ class DBConnection(EnforceOverrides):
|
|||||||
|
|
||||||
def list_tables(
|
def list_tables(
|
||||||
self,
|
self,
|
||||||
namespace: Optional[List[str]] = None,
|
namespace_path: Optional[List[str]] = None,
|
||||||
page_token: Optional[str] = None,
|
page_token: Optional[str] = None,
|
||||||
limit: Optional[int] = None,
|
limit: Optional[int] = None,
|
||||||
) -> ListTablesResponse:
|
) -> ListTablesResponse:
|
||||||
@@ -174,7 +175,7 @@ class DBConnection(EnforceOverrides):
|
|||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
namespace: List[str], optional
|
namespace_path: List[str], optional
|
||||||
The namespace to list tables in.
|
The namespace to list tables in.
|
||||||
None or empty list represents root namespace.
|
None or empty list represents root namespace.
|
||||||
page_token: str, optional
|
page_token: str, optional
|
||||||
@@ -198,13 +199,13 @@ class DBConnection(EnforceOverrides):
|
|||||||
page_token: Optional[str] = None,
|
page_token: Optional[str] = None,
|
||||||
limit: int = 10,
|
limit: int = 10,
|
||||||
*,
|
*,
|
||||||
namespace: Optional[List[str]] = None,
|
namespace_path: Optional[List[str]] = None,
|
||||||
) -> Iterable[str]:
|
) -> Iterable[str]:
|
||||||
"""List all tables in this database, in sorted order
|
"""List all tables in this database, in sorted order
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
namespace: List[str], default []
|
namespace_path: List[str], default []
|
||||||
The namespace to list tables in.
|
The namespace to list tables in.
|
||||||
Empty list represents root namespace.
|
Empty list represents root namespace.
|
||||||
page_token: str, optional
|
page_token: str, optional
|
||||||
@@ -231,9 +232,8 @@ class DBConnection(EnforceOverrides):
|
|||||||
fill_value: float = 0.0,
|
fill_value: float = 0.0,
|
||||||
embedding_functions: Optional[List[EmbeddingFunctionConfig]] = None,
|
embedding_functions: Optional[List[EmbeddingFunctionConfig]] = None,
|
||||||
*,
|
*,
|
||||||
namespace: Optional[List[str]] = None,
|
namespace_path: Optional[List[str]] = None,
|
||||||
storage_options: Optional[Dict[str, str]] = None,
|
storage_options: Optional[Dict[str, str]] = None,
|
||||||
storage_options_provider: Optional["StorageOptionsProvider"] = None,
|
|
||||||
data_storage_version: Optional[str] = None,
|
data_storage_version: Optional[str] = None,
|
||||||
enable_v2_manifest_paths: Optional[bool] = None,
|
enable_v2_manifest_paths: Optional[bool] = None,
|
||||||
) -> Table:
|
) -> Table:
|
||||||
@@ -243,7 +243,7 @@ class DBConnection(EnforceOverrides):
|
|||||||
----------
|
----------
|
||||||
name: str
|
name: str
|
||||||
The name of the table.
|
The name of the table.
|
||||||
namespace: List[str], default []
|
namespace_path: List[str], default []
|
||||||
The namespace to create the table in.
|
The namespace to create the table in.
|
||||||
Empty list represents root namespace.
|
Empty list represents root namespace.
|
||||||
data: The data to initialize the table, *optional*
|
data: The data to initialize the table, *optional*
|
||||||
@@ -401,9 +401,8 @@ class DBConnection(EnforceOverrides):
|
|||||||
self,
|
self,
|
||||||
name: str,
|
name: str,
|
||||||
*,
|
*,
|
||||||
namespace: Optional[List[str]] = None,
|
namespace_path: Optional[List[str]] = None,
|
||||||
storage_options: Optional[Dict[str, str]] = None,
|
storage_options: Optional[Dict[str, str]] = None,
|
||||||
storage_options_provider: Optional["StorageOptionsProvider"] = None,
|
|
||||||
index_cache_size: Optional[int] = None,
|
index_cache_size: Optional[int] = None,
|
||||||
) -> Table:
|
) -> Table:
|
||||||
"""Open a Lance Table in the database.
|
"""Open a Lance Table in the database.
|
||||||
@@ -412,7 +411,7 @@ class DBConnection(EnforceOverrides):
|
|||||||
----------
|
----------
|
||||||
name: str
|
name: str
|
||||||
The name of the table.
|
The name of the table.
|
||||||
namespace: List[str], optional
|
namespace_path: List[str], optional
|
||||||
The namespace to open the table from.
|
The namespace to open the table from.
|
||||||
None or empty list represents root namespace.
|
None or empty list represents root namespace.
|
||||||
index_cache_size: int, default 256
|
index_cache_size: int, default 256
|
||||||
@@ -440,27 +439,27 @@ class DBConnection(EnforceOverrides):
|
|||||||
"""
|
"""
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
def drop_table(self, name: str, namespace: Optional[List[str]] = None):
|
def drop_table(self, name: str, namespace_path: Optional[List[str]] = None):
|
||||||
"""Drop a table from the database.
|
"""Drop a table from the database.
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
name: str
|
name: str
|
||||||
The name of the table.
|
The name of the table.
|
||||||
namespace: List[str], default []
|
namespace_path: List[str], default []
|
||||||
The namespace to drop the table from.
|
The namespace to drop the table from.
|
||||||
Empty list represents root namespace.
|
Empty list represents root namespace.
|
||||||
"""
|
"""
|
||||||
if namespace is None:
|
if namespace_path is None:
|
||||||
namespace = []
|
namespace_path = []
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
def rename_table(
|
def rename_table(
|
||||||
self,
|
self,
|
||||||
cur_name: str,
|
cur_name: str,
|
||||||
new_name: str,
|
new_name: str,
|
||||||
cur_namespace: Optional[List[str]] = None,
|
cur_namespace_path: Optional[List[str]] = None,
|
||||||
new_namespace: Optional[List[str]] = None,
|
new_namespace_path: Optional[List[str]] = None,
|
||||||
):
|
):
|
||||||
"""Rename a table in the database.
|
"""Rename a table in the database.
|
||||||
|
|
||||||
@@ -470,17 +469,17 @@ class DBConnection(EnforceOverrides):
|
|||||||
The current name of the table.
|
The current name of the table.
|
||||||
new_name: str
|
new_name: str
|
||||||
The new name of the table.
|
The new name of the table.
|
||||||
cur_namespace: List[str], optional
|
cur_namespace_path: List[str], optional
|
||||||
The namespace of the current table.
|
The namespace of the current table.
|
||||||
None or empty list represents root namespace.
|
None or empty list represents root namespace.
|
||||||
new_namespace: List[str], optional
|
new_namespace_path: List[str], optional
|
||||||
The namespace to move the table to.
|
The namespace to move the table to.
|
||||||
If not specified, defaults to the same as cur_namespace.
|
If not specified, defaults to the same as cur_namespace.
|
||||||
"""
|
"""
|
||||||
if cur_namespace is None:
|
if cur_namespace_path is None:
|
||||||
cur_namespace = []
|
cur_namespace_path = []
|
||||||
if new_namespace is None:
|
if new_namespace_path is None:
|
||||||
new_namespace = []
|
new_namespace_path = []
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
def drop_database(self):
|
def drop_database(self):
|
||||||
@@ -490,18 +489,18 @@ class DBConnection(EnforceOverrides):
|
|||||||
"""
|
"""
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
def drop_all_tables(self, namespace: Optional[List[str]] = None):
|
def drop_all_tables(self, namespace_path: Optional[List[str]] = None):
|
||||||
"""
|
"""
|
||||||
Drop all tables from the database
|
Drop all tables from the database
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
namespace: List[str], optional
|
namespace_path: List[str], optional
|
||||||
The namespace to drop all tables from.
|
The namespace to drop all tables from.
|
||||||
None or empty list represents root namespace.
|
None or empty list represents root namespace.
|
||||||
"""
|
"""
|
||||||
if namespace is None:
|
if namespace_path is None:
|
||||||
namespace = []
|
namespace_path = []
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
@property
|
@property
|
||||||
@@ -642,7 +641,7 @@ class LanceDBConnection(DBConnection):
|
|||||||
@override
|
@override
|
||||||
def list_namespaces(
|
def list_namespaces(
|
||||||
self,
|
self,
|
||||||
namespace: Optional[List[str]] = None,
|
namespace_path: Optional[List[str]] = None,
|
||||||
page_token: Optional[str] = None,
|
page_token: Optional[str] = None,
|
||||||
limit: Optional[int] = None,
|
limit: Optional[int] = None,
|
||||||
) -> ListNamespacesResponse:
|
) -> ListNamespacesResponse:
|
||||||
@@ -650,7 +649,7 @@ class LanceDBConnection(DBConnection):
|
|||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
namespace: List[str], optional
|
namespace_path: List[str], optional
|
||||||
The parent namespace to list namespaces in.
|
The parent namespace to list namespaces in.
|
||||||
None or empty list represents root namespace.
|
None or empty list represents root namespace.
|
||||||
page_token: str, optional
|
page_token: str, optional
|
||||||
@@ -664,18 +663,18 @@ class LanceDBConnection(DBConnection):
|
|||||||
ListNamespacesResponse
|
ListNamespacesResponse
|
||||||
Response containing namespace names and optional page_token for pagination.
|
Response containing namespace names and optional page_token for pagination.
|
||||||
"""
|
"""
|
||||||
if namespace is None:
|
if namespace_path is None:
|
||||||
namespace = []
|
namespace_path = []
|
||||||
return LOOP.run(
|
return LOOP.run(
|
||||||
self._conn.list_namespaces(
|
self._conn.list_namespaces(
|
||||||
namespace=namespace, page_token=page_token, limit=limit
|
namespace_path=namespace_path, page_token=page_token, limit=limit
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
@override
|
@override
|
||||||
def create_namespace(
|
def create_namespace(
|
||||||
self,
|
self,
|
||||||
namespace: List[str],
|
namespace_path: List[str],
|
||||||
mode: Optional[str] = None,
|
mode: Optional[str] = None,
|
||||||
properties: Optional[Dict[str, str]] = None,
|
properties: Optional[Dict[str, str]] = None,
|
||||||
) -> CreateNamespaceResponse:
|
) -> CreateNamespaceResponse:
|
||||||
@@ -683,7 +682,7 @@ class LanceDBConnection(DBConnection):
|
|||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
namespace: List[str]
|
namespace_path: List[str]
|
||||||
The namespace identifier to create.
|
The namespace identifier to create.
|
||||||
mode: str, optional
|
mode: str, optional
|
||||||
Creation mode - "create" (fail if exists), "exist_ok" (skip if exists),
|
Creation mode - "create" (fail if exists), "exist_ok" (skip if exists),
|
||||||
@@ -698,14 +697,14 @@ class LanceDBConnection(DBConnection):
|
|||||||
"""
|
"""
|
||||||
return LOOP.run(
|
return LOOP.run(
|
||||||
self._conn.create_namespace(
|
self._conn.create_namespace(
|
||||||
namespace=namespace, mode=mode, properties=properties
|
namespace_path=namespace_path, mode=mode, properties=properties
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
@override
|
@override
|
||||||
def drop_namespace(
|
def drop_namespace(
|
||||||
self,
|
self,
|
||||||
namespace: List[str],
|
namespace_path: List[str],
|
||||||
mode: Optional[str] = None,
|
mode: Optional[str] = None,
|
||||||
behavior: Optional[str] = None,
|
behavior: Optional[str] = None,
|
||||||
) -> DropNamespaceResponse:
|
) -> DropNamespaceResponse:
|
||||||
@@ -713,7 +712,7 @@ class LanceDBConnection(DBConnection):
|
|||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
namespace: List[str]
|
namespace_path: List[str]
|
||||||
The namespace identifier to drop.
|
The namespace identifier to drop.
|
||||||
mode: str, optional
|
mode: str, optional
|
||||||
Whether to skip if not exists ("SKIP") or fail ("FAIL"). Case insensitive.
|
Whether to skip if not exists ("SKIP") or fail ("FAIL"). Case insensitive.
|
||||||
@@ -727,16 +726,20 @@ class LanceDBConnection(DBConnection):
|
|||||||
Response containing properties and transaction_id if applicable.
|
Response containing properties and transaction_id if applicable.
|
||||||
"""
|
"""
|
||||||
return LOOP.run(
|
return LOOP.run(
|
||||||
self._conn.drop_namespace(namespace=namespace, mode=mode, behavior=behavior)
|
self._conn.drop_namespace(
|
||||||
|
namespace_path=namespace_path, mode=mode, behavior=behavior
|
||||||
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
@override
|
@override
|
||||||
def describe_namespace(self, namespace: List[str]) -> DescribeNamespaceResponse:
|
def describe_namespace(
|
||||||
|
self, namespace_path: List[str]
|
||||||
|
) -> DescribeNamespaceResponse:
|
||||||
"""Describe a namespace.
|
"""Describe a namespace.
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
namespace: List[str]
|
namespace_path: List[str]
|
||||||
The namespace identifier to describe.
|
The namespace identifier to describe.
|
||||||
|
|
||||||
Returns
|
Returns
|
||||||
@@ -744,12 +747,12 @@ class LanceDBConnection(DBConnection):
|
|||||||
DescribeNamespaceResponse
|
DescribeNamespaceResponse
|
||||||
Response containing the namespace properties.
|
Response containing the namespace properties.
|
||||||
"""
|
"""
|
||||||
return LOOP.run(self._conn.describe_namespace(namespace=namespace))
|
return LOOP.run(self._conn.describe_namespace(namespace_path=namespace_path))
|
||||||
|
|
||||||
@override
|
@override
|
||||||
def list_tables(
|
def list_tables(
|
||||||
self,
|
self,
|
||||||
namespace: Optional[List[str]] = None,
|
namespace_path: Optional[List[str]] = None,
|
||||||
page_token: Optional[str] = None,
|
page_token: Optional[str] = None,
|
||||||
limit: Optional[int] = None,
|
limit: Optional[int] = None,
|
||||||
) -> ListTablesResponse:
|
) -> ListTablesResponse:
|
||||||
@@ -757,7 +760,7 @@ class LanceDBConnection(DBConnection):
|
|||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
namespace: List[str], optional
|
namespace_path: List[str], optional
|
||||||
The namespace to list tables in.
|
The namespace to list tables in.
|
||||||
None or empty list represents root namespace.
|
None or empty list represents root namespace.
|
||||||
page_token: str, optional
|
page_token: str, optional
|
||||||
@@ -771,11 +774,11 @@ class LanceDBConnection(DBConnection):
|
|||||||
ListTablesResponse
|
ListTablesResponse
|
||||||
Response containing table names and optional page_token for pagination.
|
Response containing table names and optional page_token for pagination.
|
||||||
"""
|
"""
|
||||||
if namespace is None:
|
if namespace_path is None:
|
||||||
namespace = []
|
namespace_path = []
|
||||||
return LOOP.run(
|
return LOOP.run(
|
||||||
self._conn.list_tables(
|
self._conn.list_tables(
|
||||||
namespace=namespace, page_token=page_token, limit=limit
|
namespace_path=namespace_path, page_token=page_token, limit=limit
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -785,7 +788,7 @@ class LanceDBConnection(DBConnection):
|
|||||||
page_token: Optional[str] = None,
|
page_token: Optional[str] = None,
|
||||||
limit: int = 10,
|
limit: int = 10,
|
||||||
*,
|
*,
|
||||||
namespace: Optional[List[str]] = None,
|
namespace_path: Optional[List[str]] = None,
|
||||||
) -> Iterable[str]:
|
) -> Iterable[str]:
|
||||||
"""Get the names of all tables in the database. The names are sorted.
|
"""Get the names of all tables in the database. The names are sorted.
|
||||||
|
|
||||||
@@ -794,7 +797,7 @@ class LanceDBConnection(DBConnection):
|
|||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
namespace: List[str], optional
|
namespace_path: List[str], optional
|
||||||
The namespace to list tables in.
|
The namespace to list tables in.
|
||||||
page_token: str, optional
|
page_token: str, optional
|
||||||
The token to use for pagination.
|
The token to use for pagination.
|
||||||
@@ -813,11 +816,11 @@ class LanceDBConnection(DBConnection):
|
|||||||
DeprecationWarning,
|
DeprecationWarning,
|
||||||
stacklevel=2,
|
stacklevel=2,
|
||||||
)
|
)
|
||||||
if namespace is None:
|
if namespace_path is None:
|
||||||
namespace = []
|
namespace_path = []
|
||||||
return LOOP.run(
|
return LOOP.run(
|
||||||
self._conn.table_names(
|
self._conn.table_names(
|
||||||
namespace=namespace, start_after=page_token, limit=limit
|
namespace_path=namespace_path, start_after=page_token, limit=limit
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -839,9 +842,8 @@ class LanceDBConnection(DBConnection):
|
|||||||
fill_value: float = 0.0,
|
fill_value: float = 0.0,
|
||||||
embedding_functions: Optional[List[EmbeddingFunctionConfig]] = None,
|
embedding_functions: Optional[List[EmbeddingFunctionConfig]] = None,
|
||||||
*,
|
*,
|
||||||
namespace: Optional[List[str]] = None,
|
namespace_path: Optional[List[str]] = None,
|
||||||
storage_options: Optional[Dict[str, str]] = None,
|
storage_options: Optional[Dict[str, str]] = None,
|
||||||
storage_options_provider: Optional["StorageOptionsProvider"] = None,
|
|
||||||
data_storage_version: Optional[str] = None,
|
data_storage_version: Optional[str] = None,
|
||||||
enable_v2_manifest_paths: Optional[bool] = None,
|
enable_v2_manifest_paths: Optional[bool] = None,
|
||||||
) -> LanceTable:
|
) -> LanceTable:
|
||||||
@@ -849,15 +851,15 @@ class LanceDBConnection(DBConnection):
|
|||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
namespace: List[str], optional
|
namespace_path: List[str], optional
|
||||||
The namespace to create the table in.
|
The namespace to create the table in.
|
||||||
|
|
||||||
See
|
See
|
||||||
---
|
---
|
||||||
DBConnection.create_table
|
DBConnection.create_table
|
||||||
"""
|
"""
|
||||||
if namespace is None:
|
if namespace_path is None:
|
||||||
namespace = []
|
namespace_path = []
|
||||||
if mode.lower() not in ["create", "overwrite"]:
|
if mode.lower() not in ["create", "overwrite"]:
|
||||||
raise ValueError("mode must be either 'create' or 'overwrite'")
|
raise ValueError("mode must be either 'create' or 'overwrite'")
|
||||||
validate_table_name(name)
|
validate_table_name(name)
|
||||||
@@ -872,9 +874,8 @@ class LanceDBConnection(DBConnection):
|
|||||||
on_bad_vectors=on_bad_vectors,
|
on_bad_vectors=on_bad_vectors,
|
||||||
fill_value=fill_value,
|
fill_value=fill_value,
|
||||||
embedding_functions=embedding_functions,
|
embedding_functions=embedding_functions,
|
||||||
namespace=namespace,
|
namespace_path=namespace_path,
|
||||||
storage_options=storage_options,
|
storage_options=storage_options,
|
||||||
storage_options_provider=storage_options_provider,
|
|
||||||
)
|
)
|
||||||
return tbl
|
return tbl
|
||||||
|
|
||||||
@@ -883,9 +884,8 @@ class LanceDBConnection(DBConnection):
|
|||||||
self,
|
self,
|
||||||
name: str,
|
name: str,
|
||||||
*,
|
*,
|
||||||
namespace: Optional[List[str]] = None,
|
namespace_path: Optional[List[str]] = None,
|
||||||
storage_options: Optional[Dict[str, str]] = None,
|
storage_options: Optional[Dict[str, str]] = None,
|
||||||
storage_options_provider: Optional["StorageOptionsProvider"] = None,
|
|
||||||
index_cache_size: Optional[int] = None,
|
index_cache_size: Optional[int] = None,
|
||||||
) -> LanceTable:
|
) -> LanceTable:
|
||||||
"""Open a table in the database.
|
"""Open a table in the database.
|
||||||
@@ -894,15 +894,15 @@ class LanceDBConnection(DBConnection):
|
|||||||
----------
|
----------
|
||||||
name: str
|
name: str
|
||||||
The name of the table.
|
The name of the table.
|
||||||
namespace: List[str], optional
|
namespace_path: List[str], optional
|
||||||
The namespace to open the table from.
|
The namespace to open the table from.
|
||||||
|
|
||||||
Returns
|
Returns
|
||||||
-------
|
-------
|
||||||
A LanceTable object representing the table.
|
A LanceTable object representing the table.
|
||||||
"""
|
"""
|
||||||
if namespace is None:
|
if namespace_path is None:
|
||||||
namespace = []
|
namespace_path = []
|
||||||
if index_cache_size is not None:
|
if index_cache_size is not None:
|
||||||
import warnings
|
import warnings
|
||||||
|
|
||||||
@@ -917,9 +917,8 @@ class LanceDBConnection(DBConnection):
|
|||||||
return LanceTable.open(
|
return LanceTable.open(
|
||||||
self,
|
self,
|
||||||
name,
|
name,
|
||||||
namespace=namespace,
|
namespace_path=namespace_path,
|
||||||
storage_options=storage_options,
|
storage_options=storage_options,
|
||||||
storage_options_provider=storage_options_provider,
|
|
||||||
index_cache_size=index_cache_size,
|
index_cache_size=index_cache_size,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -928,7 +927,7 @@ class LanceDBConnection(DBConnection):
|
|||||||
target_table_name: str,
|
target_table_name: str,
|
||||||
source_uri: str,
|
source_uri: str,
|
||||||
*,
|
*,
|
||||||
target_namespace: Optional[List[str]] = None,
|
target_namespace_path: Optional[List[str]] = None,
|
||||||
source_version: Optional[int] = None,
|
source_version: Optional[int] = None,
|
||||||
source_tag: Optional[str] = None,
|
source_tag: Optional[str] = None,
|
||||||
is_shallow: bool = True,
|
is_shallow: bool = True,
|
||||||
@@ -946,7 +945,7 @@ class LanceDBConnection(DBConnection):
|
|||||||
The name of the target table to create.
|
The name of the target table to create.
|
||||||
source_uri: str
|
source_uri: str
|
||||||
The URI of the source table to clone from.
|
The URI of the source table to clone from.
|
||||||
target_namespace: List[str], optional
|
target_namespace_path: List[str], optional
|
||||||
The namespace for the target table.
|
The namespace for the target table.
|
||||||
None or empty list represents root namespace.
|
None or empty list represents root namespace.
|
||||||
source_version: int, optional
|
source_version: int, optional
|
||||||
@@ -961,13 +960,13 @@ class LanceDBConnection(DBConnection):
|
|||||||
-------
|
-------
|
||||||
A LanceTable object representing the cloned table.
|
A LanceTable object representing the cloned table.
|
||||||
"""
|
"""
|
||||||
if target_namespace is None:
|
if target_namespace_path is None:
|
||||||
target_namespace = []
|
target_namespace_path = []
|
||||||
LOOP.run(
|
LOOP.run(
|
||||||
self._conn.clone_table(
|
self._conn.clone_table(
|
||||||
target_table_name,
|
target_table_name,
|
||||||
source_uri,
|
source_uri,
|
||||||
target_namespace=target_namespace,
|
target_namespace_path=target_namespace_path,
|
||||||
source_version=source_version,
|
source_version=source_version,
|
||||||
source_tag=source_tag,
|
source_tag=source_tag,
|
||||||
is_shallow=is_shallow,
|
is_shallow=is_shallow,
|
||||||
@@ -976,14 +975,14 @@ class LanceDBConnection(DBConnection):
|
|||||||
return LanceTable.open(
|
return LanceTable.open(
|
||||||
self,
|
self,
|
||||||
target_table_name,
|
target_table_name,
|
||||||
namespace=target_namespace,
|
namespace_path=target_namespace_path,
|
||||||
)
|
)
|
||||||
|
|
||||||
@override
|
@override
|
||||||
def drop_table(
|
def drop_table(
|
||||||
self,
|
self,
|
||||||
name: str,
|
name: str,
|
||||||
namespace: Optional[List[str]] = None,
|
namespace_path: Optional[List[str]] = None,
|
||||||
ignore_missing: bool = False,
|
ignore_missing: bool = False,
|
||||||
):
|
):
|
||||||
"""Drop a table from the database.
|
"""Drop a table from the database.
|
||||||
@@ -992,32 +991,32 @@ class LanceDBConnection(DBConnection):
|
|||||||
----------
|
----------
|
||||||
name: str
|
name: str
|
||||||
The name of the table.
|
The name of the table.
|
||||||
namespace: List[str], optional
|
namespace_path: List[str], optional
|
||||||
The namespace to drop the table from.
|
The namespace to drop the table from.
|
||||||
ignore_missing: bool, default False
|
ignore_missing: bool, default False
|
||||||
If True, ignore if the table does not exist.
|
If True, ignore if the table does not exist.
|
||||||
"""
|
"""
|
||||||
if namespace is None:
|
if namespace_path is None:
|
||||||
namespace = []
|
namespace_path = []
|
||||||
LOOP.run(
|
LOOP.run(
|
||||||
self._conn.drop_table(
|
self._conn.drop_table(
|
||||||
name, namespace=namespace, ignore_missing=ignore_missing
|
name, namespace_path=namespace_path, ignore_missing=ignore_missing
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
@override
|
@override
|
||||||
def drop_all_tables(self, namespace: Optional[List[str]] = None):
|
def drop_all_tables(self, namespace_path: Optional[List[str]] = None):
|
||||||
if namespace is None:
|
if namespace_path is None:
|
||||||
namespace = []
|
namespace_path = []
|
||||||
LOOP.run(self._conn.drop_all_tables(namespace=namespace))
|
LOOP.run(self._conn.drop_all_tables(namespace_path=namespace_path))
|
||||||
|
|
||||||
@override
|
@override
|
||||||
def rename_table(
|
def rename_table(
|
||||||
self,
|
self,
|
||||||
cur_name: str,
|
cur_name: str,
|
||||||
new_name: str,
|
new_name: str,
|
||||||
cur_namespace: Optional[List[str]] = None,
|
cur_namespace_path: Optional[List[str]] = None,
|
||||||
new_namespace: Optional[List[str]] = None,
|
new_namespace_path: Optional[List[str]] = None,
|
||||||
):
|
):
|
||||||
"""Rename a table in the database.
|
"""Rename a table in the database.
|
||||||
|
|
||||||
@@ -1027,21 +1026,21 @@ class LanceDBConnection(DBConnection):
|
|||||||
The current name of the table.
|
The current name of the table.
|
||||||
new_name: str
|
new_name: str
|
||||||
The new name of the table.
|
The new name of the table.
|
||||||
cur_namespace: List[str], optional
|
cur_namespace_path: List[str], optional
|
||||||
The namespace of the current table.
|
The namespace of the current table.
|
||||||
new_namespace: List[str], optional
|
new_namespace_path: List[str], optional
|
||||||
The namespace to move the table to.
|
The namespace to move the table to.
|
||||||
"""
|
"""
|
||||||
if cur_namespace is None:
|
if cur_namespace_path is None:
|
||||||
cur_namespace = []
|
cur_namespace_path = []
|
||||||
if new_namespace is None:
|
if new_namespace_path is None:
|
||||||
new_namespace = []
|
new_namespace_path = []
|
||||||
LOOP.run(
|
LOOP.run(
|
||||||
self._conn.rename_table(
|
self._conn.rename_table(
|
||||||
cur_name,
|
cur_name,
|
||||||
new_name,
|
new_name,
|
||||||
cur_namespace=cur_namespace,
|
cur_namespace_path=cur_namespace_path,
|
||||||
new_namespace=new_namespace,
|
new_namespace_path=new_namespace_path,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -1125,7 +1124,7 @@ class AsyncConnection(object):
|
|||||||
|
|
||||||
async def list_namespaces(
|
async def list_namespaces(
|
||||||
self,
|
self,
|
||||||
namespace: Optional[List[str]] = None,
|
namespace_path: Optional[List[str]] = None,
|
||||||
page_token: Optional[str] = None,
|
page_token: Optional[str] = None,
|
||||||
limit: Optional[int] = None,
|
limit: Optional[int] = None,
|
||||||
) -> ListNamespacesResponse:
|
) -> ListNamespacesResponse:
|
||||||
@@ -1133,7 +1132,7 @@ class AsyncConnection(object):
|
|||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
namespace: List[str], optional
|
namespace_path: List[str], optional
|
||||||
The parent namespace to list namespaces in.
|
The parent namespace to list namespaces in.
|
||||||
None or empty list represents root namespace.
|
None or empty list represents root namespace.
|
||||||
page_token: str, optional
|
page_token: str, optional
|
||||||
@@ -1146,16 +1145,16 @@ class AsyncConnection(object):
|
|||||||
ListNamespacesResponse
|
ListNamespacesResponse
|
||||||
Response containing namespace names and optional pagination token
|
Response containing namespace names and optional pagination token
|
||||||
"""
|
"""
|
||||||
if namespace is None:
|
if namespace_path is None:
|
||||||
namespace = []
|
namespace_path = []
|
||||||
result = await self._inner.list_namespaces(
|
result = await self._inner.list_namespaces(
|
||||||
namespace=namespace, page_token=page_token, limit=limit
|
namespace_path=namespace_path, page_token=page_token, limit=limit
|
||||||
)
|
)
|
||||||
return ListNamespacesResponse(**result)
|
return ListNamespacesResponse(**result)
|
||||||
|
|
||||||
async def create_namespace(
|
async def create_namespace(
|
||||||
self,
|
self,
|
||||||
namespace: List[str],
|
namespace_path: List[str],
|
||||||
mode: Optional[str] = None,
|
mode: Optional[str] = None,
|
||||||
properties: Optional[Dict[str, str]] = None,
|
properties: Optional[Dict[str, str]] = None,
|
||||||
) -> CreateNamespaceResponse:
|
) -> CreateNamespaceResponse:
|
||||||
@@ -1163,7 +1162,7 @@ class AsyncConnection(object):
|
|||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
namespace: List[str]
|
namespace_path: List[str]
|
||||||
The namespace identifier to create.
|
The namespace identifier to create.
|
||||||
mode: str, optional
|
mode: str, optional
|
||||||
Creation mode - "create", "exist_ok", or "overwrite". Case insensitive.
|
Creation mode - "create", "exist_ok", or "overwrite". Case insensitive.
|
||||||
@@ -1176,7 +1175,7 @@ class AsyncConnection(object):
|
|||||||
Response containing namespace properties
|
Response containing namespace properties
|
||||||
"""
|
"""
|
||||||
result = await self._inner.create_namespace(
|
result = await self._inner.create_namespace(
|
||||||
namespace,
|
namespace_path,
|
||||||
mode=_normalize_create_namespace_mode(mode),
|
mode=_normalize_create_namespace_mode(mode),
|
||||||
properties=properties,
|
properties=properties,
|
||||||
)
|
)
|
||||||
@@ -1184,7 +1183,7 @@ class AsyncConnection(object):
|
|||||||
|
|
||||||
async def drop_namespace(
|
async def drop_namespace(
|
||||||
self,
|
self,
|
||||||
namespace: List[str],
|
namespace_path: List[str],
|
||||||
mode: Optional[str] = None,
|
mode: Optional[str] = None,
|
||||||
behavior: Optional[str] = None,
|
behavior: Optional[str] = None,
|
||||||
) -> DropNamespaceResponse:
|
) -> DropNamespaceResponse:
|
||||||
@@ -1192,7 +1191,7 @@ class AsyncConnection(object):
|
|||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
namespace: List[str]
|
namespace_path: List[str]
|
||||||
The namespace identifier to drop.
|
The namespace identifier to drop.
|
||||||
mode: str, optional
|
mode: str, optional
|
||||||
Whether to skip if not exists ("SKIP") or fail ("FAIL"). Case insensitive.
|
Whether to skip if not exists ("SKIP") or fail ("FAIL"). Case insensitive.
|
||||||
@@ -1206,20 +1205,20 @@ class AsyncConnection(object):
|
|||||||
Response containing properties and transaction_id if applicable.
|
Response containing properties and transaction_id if applicable.
|
||||||
"""
|
"""
|
||||||
result = await self._inner.drop_namespace(
|
result = await self._inner.drop_namespace(
|
||||||
namespace,
|
namespace_path,
|
||||||
mode=_normalize_drop_namespace_mode(mode),
|
mode=_normalize_drop_namespace_mode(mode),
|
||||||
behavior=_normalize_drop_namespace_behavior(behavior),
|
behavior=_normalize_drop_namespace_behavior(behavior),
|
||||||
)
|
)
|
||||||
return DropNamespaceResponse(**result)
|
return DropNamespaceResponse(**result)
|
||||||
|
|
||||||
async def describe_namespace(
|
async def describe_namespace(
|
||||||
self, namespace: List[str]
|
self, namespace_path: List[str]
|
||||||
) -> DescribeNamespaceResponse:
|
) -> DescribeNamespaceResponse:
|
||||||
"""Describe a namespace.
|
"""Describe a namespace.
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
namespace: List[str]
|
namespace_path: List[str]
|
||||||
The namespace identifier to describe.
|
The namespace identifier to describe.
|
||||||
|
|
||||||
Returns
|
Returns
|
||||||
@@ -1227,12 +1226,12 @@ class AsyncConnection(object):
|
|||||||
DescribeNamespaceResponse
|
DescribeNamespaceResponse
|
||||||
Response containing the namespace properties.
|
Response containing the namespace properties.
|
||||||
"""
|
"""
|
||||||
result = await self._inner.describe_namespace(namespace)
|
result = await self._inner.describe_namespace(namespace_path)
|
||||||
return DescribeNamespaceResponse(**result)
|
return DescribeNamespaceResponse(**result)
|
||||||
|
|
||||||
async def list_tables(
|
async def list_tables(
|
||||||
self,
|
self,
|
||||||
namespace: Optional[List[str]] = None,
|
namespace_path: Optional[List[str]] = None,
|
||||||
page_token: Optional[str] = None,
|
page_token: Optional[str] = None,
|
||||||
limit: Optional[int] = None,
|
limit: Optional[int] = None,
|
||||||
) -> ListTablesResponse:
|
) -> ListTablesResponse:
|
||||||
@@ -1240,7 +1239,7 @@ class AsyncConnection(object):
|
|||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
namespace: List[str], optional
|
namespace_path: List[str], optional
|
||||||
The namespace to list tables in.
|
The namespace to list tables in.
|
||||||
None or empty list represents root namespace.
|
None or empty list represents root namespace.
|
||||||
page_token: str, optional
|
page_token: str, optional
|
||||||
@@ -1254,17 +1253,17 @@ class AsyncConnection(object):
|
|||||||
ListTablesResponse
|
ListTablesResponse
|
||||||
Response containing table names and optional page_token for pagination.
|
Response containing table names and optional page_token for pagination.
|
||||||
"""
|
"""
|
||||||
if namespace is None:
|
if namespace_path is None:
|
||||||
namespace = []
|
namespace_path = []
|
||||||
result = await self._inner.list_tables(
|
result = await self._inner.list_tables(
|
||||||
namespace=namespace, page_token=page_token, limit=limit
|
namespace_path=namespace_path, page_token=page_token, limit=limit
|
||||||
)
|
)
|
||||||
return ListTablesResponse(**result)
|
return ListTablesResponse(**result)
|
||||||
|
|
||||||
async def table_names(
|
async def table_names(
|
||||||
self,
|
self,
|
||||||
*,
|
*,
|
||||||
namespace: Optional[List[str]] = None,
|
namespace_path: Optional[List[str]] = None,
|
||||||
start_after: Optional[str] = None,
|
start_after: Optional[str] = None,
|
||||||
limit: Optional[int] = None,
|
limit: Optional[int] = None,
|
||||||
) -> Iterable[str]:
|
) -> Iterable[str]:
|
||||||
@@ -1275,7 +1274,7 @@ class AsyncConnection(object):
|
|||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
namespace: List[str], optional
|
namespace_path: List[str], optional
|
||||||
The namespace to list tables in.
|
The namespace to list tables in.
|
||||||
None or empty list represents root namespace.
|
None or empty list represents root namespace.
|
||||||
start_after: str, optional
|
start_after: str, optional
|
||||||
@@ -1298,10 +1297,10 @@ class AsyncConnection(object):
|
|||||||
DeprecationWarning,
|
DeprecationWarning,
|
||||||
stacklevel=2,
|
stacklevel=2,
|
||||||
)
|
)
|
||||||
if namespace is None:
|
if namespace_path is None:
|
||||||
namespace = []
|
namespace_path = []
|
||||||
return await self._inner.table_names(
|
return await self._inner.table_names(
|
||||||
namespace=namespace, start_after=start_after, limit=limit
|
namespace_path=namespace_path, start_after=start_after, limit=limit
|
||||||
)
|
)
|
||||||
|
|
||||||
async def create_table(
|
async def create_table(
|
||||||
@@ -1314,9 +1313,8 @@ class AsyncConnection(object):
|
|||||||
on_bad_vectors: Optional[str] = None,
|
on_bad_vectors: Optional[str] = None,
|
||||||
fill_value: Optional[float] = None,
|
fill_value: Optional[float] = None,
|
||||||
storage_options: Optional[Dict[str, str]] = None,
|
storage_options: Optional[Dict[str, str]] = None,
|
||||||
storage_options_provider: Optional["StorageOptionsProvider"] = None,
|
|
||||||
*,
|
*,
|
||||||
namespace: Optional[List[str]] = None,
|
namespace_path: Optional[List[str]] = None,
|
||||||
embedding_functions: Optional[List[EmbeddingFunctionConfig]] = None,
|
embedding_functions: Optional[List[EmbeddingFunctionConfig]] = None,
|
||||||
location: Optional[str] = None,
|
location: Optional[str] = None,
|
||||||
) -> AsyncTable:
|
) -> AsyncTable:
|
||||||
@@ -1326,7 +1324,7 @@ class AsyncConnection(object):
|
|||||||
----------
|
----------
|
||||||
name: str
|
name: str
|
||||||
The name of the table.
|
The name of the table.
|
||||||
namespace: List[str], default []
|
namespace_path: List[str], default []
|
||||||
The namespace to create the table in.
|
The namespace to create the table in.
|
||||||
Empty list represents root namespace.
|
Empty list represents root namespace.
|
||||||
data: The data to initialize the table, *optional*
|
data: The data to initialize the table, *optional*
|
||||||
@@ -1477,8 +1475,8 @@ class AsyncConnection(object):
|
|||||||
... await db.create_table("table4", make_batches(), schema=schema)
|
... await db.create_table("table4", make_batches(), schema=schema)
|
||||||
>>> asyncio.run(iterable_example())
|
>>> asyncio.run(iterable_example())
|
||||||
"""
|
"""
|
||||||
if namespace is None:
|
if namespace_path is None:
|
||||||
namespace = []
|
namespace_path = []
|
||||||
metadata = None
|
metadata = None
|
||||||
|
|
||||||
if embedding_functions is not None:
|
if embedding_functions is not None:
|
||||||
@@ -1513,9 +1511,8 @@ class AsyncConnection(object):
|
|||||||
name,
|
name,
|
||||||
mode,
|
mode,
|
||||||
schema,
|
schema,
|
||||||
namespace=namespace,
|
namespace_path=namespace_path,
|
||||||
storage_options=storage_options,
|
storage_options=storage_options,
|
||||||
storage_options_provider=storage_options_provider,
|
|
||||||
location=location,
|
location=location,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
@@ -1524,9 +1521,8 @@ class AsyncConnection(object):
|
|||||||
name,
|
name,
|
||||||
mode,
|
mode,
|
||||||
data,
|
data,
|
||||||
namespace=namespace,
|
namespace_path=namespace_path,
|
||||||
storage_options=storage_options,
|
storage_options=storage_options,
|
||||||
storage_options_provider=storage_options_provider,
|
|
||||||
location=location,
|
location=location,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -1536,11 +1532,12 @@ class AsyncConnection(object):
|
|||||||
self,
|
self,
|
||||||
name: str,
|
name: str,
|
||||||
*,
|
*,
|
||||||
namespace: Optional[List[str]] = None,
|
namespace_path: Optional[List[str]] = None,
|
||||||
storage_options: Optional[Dict[str, str]] = None,
|
storage_options: Optional[Dict[str, str]] = None,
|
||||||
storage_options_provider: Optional["StorageOptionsProvider"] = None,
|
|
||||||
index_cache_size: Optional[int] = None,
|
index_cache_size: Optional[int] = None,
|
||||||
location: Optional[str] = None,
|
location: Optional[str] = None,
|
||||||
|
namespace_client: Optional[Any] = None,
|
||||||
|
managed_versioning: Optional[bool] = None,
|
||||||
) -> AsyncTable:
|
) -> AsyncTable:
|
||||||
"""Open a Lance Table in the database.
|
"""Open a Lance Table in the database.
|
||||||
|
|
||||||
@@ -1548,7 +1545,7 @@ class AsyncConnection(object):
|
|||||||
----------
|
----------
|
||||||
name: str
|
name: str
|
||||||
The name of the table.
|
The name of the table.
|
||||||
namespace: List[str], optional
|
namespace_path: List[str], optional
|
||||||
The namespace to open the table from.
|
The namespace to open the table from.
|
||||||
None or empty list represents root namespace.
|
None or empty list represents root namespace.
|
||||||
storage_options: dict, optional
|
storage_options: dict, optional
|
||||||
@@ -1573,20 +1570,24 @@ class AsyncConnection(object):
|
|||||||
The explicit location (URI) of the table. If provided, the table will be
|
The explicit location (URI) of the table. If provided, the table will be
|
||||||
opened from this location instead of deriving it from the database URI
|
opened from this location instead of deriving it from the database URI
|
||||||
and table name.
|
and table name.
|
||||||
|
managed_versioning: bool, optional
|
||||||
|
Whether managed versioning is enabled for this table. If provided,
|
||||||
|
avoids a redundant describe_table call when namespace_client is set.
|
||||||
|
|
||||||
Returns
|
Returns
|
||||||
-------
|
-------
|
||||||
A LanceTable object representing the table.
|
A LanceTable object representing the table.
|
||||||
"""
|
"""
|
||||||
if namespace is None:
|
if namespace_path is None:
|
||||||
namespace = []
|
namespace_path = []
|
||||||
table = await self._inner.open_table(
|
table = await self._inner.open_table(
|
||||||
name,
|
name,
|
||||||
namespace=namespace,
|
namespace_path=namespace_path,
|
||||||
storage_options=storage_options,
|
storage_options=storage_options,
|
||||||
storage_options_provider=storage_options_provider,
|
|
||||||
index_cache_size=index_cache_size,
|
index_cache_size=index_cache_size,
|
||||||
location=location,
|
location=location,
|
||||||
|
namespace_client=namespace_client,
|
||||||
|
managed_versioning=managed_versioning,
|
||||||
)
|
)
|
||||||
return AsyncTable(table)
|
return AsyncTable(table)
|
||||||
|
|
||||||
@@ -1595,7 +1596,7 @@ class AsyncConnection(object):
|
|||||||
target_table_name: str,
|
target_table_name: str,
|
||||||
source_uri: str,
|
source_uri: str,
|
||||||
*,
|
*,
|
||||||
target_namespace: Optional[List[str]] = None,
|
target_namespace_path: Optional[List[str]] = None,
|
||||||
source_version: Optional[int] = None,
|
source_version: Optional[int] = None,
|
||||||
source_tag: Optional[str] = None,
|
source_tag: Optional[str] = None,
|
||||||
is_shallow: bool = True,
|
is_shallow: bool = True,
|
||||||
@@ -1613,7 +1614,7 @@ class AsyncConnection(object):
|
|||||||
The name of the target table to create.
|
The name of the target table to create.
|
||||||
source_uri: str
|
source_uri: str
|
||||||
The URI of the source table to clone from.
|
The URI of the source table to clone from.
|
||||||
target_namespace: List[str], optional
|
target_namespace_path: List[str], optional
|
||||||
The namespace for the target table.
|
The namespace for the target table.
|
||||||
None or empty list represents root namespace.
|
None or empty list represents root namespace.
|
||||||
source_version: int, optional
|
source_version: int, optional
|
||||||
@@ -1628,12 +1629,12 @@ class AsyncConnection(object):
|
|||||||
-------
|
-------
|
||||||
An AsyncTable object representing the cloned table.
|
An AsyncTable object representing the cloned table.
|
||||||
"""
|
"""
|
||||||
if target_namespace is None:
|
if target_namespace_path is None:
|
||||||
target_namespace = []
|
target_namespace_path = []
|
||||||
table = await self._inner.clone_table(
|
table = await self._inner.clone_table(
|
||||||
target_table_name,
|
target_table_name,
|
||||||
source_uri,
|
source_uri,
|
||||||
target_namespace=target_namespace,
|
target_namespace_path=target_namespace_path,
|
||||||
source_version=source_version,
|
source_version=source_version,
|
||||||
source_tag=source_tag,
|
source_tag=source_tag,
|
||||||
is_shallow=is_shallow,
|
is_shallow=is_shallow,
|
||||||
@@ -1644,8 +1645,8 @@ class AsyncConnection(object):
|
|||||||
self,
|
self,
|
||||||
cur_name: str,
|
cur_name: str,
|
||||||
new_name: str,
|
new_name: str,
|
||||||
cur_namespace: Optional[List[str]] = None,
|
cur_namespace_path: Optional[List[str]] = None,
|
||||||
new_namespace: Optional[List[str]] = None,
|
new_namespace_path: Optional[List[str]] = None,
|
||||||
):
|
):
|
||||||
"""Rename a table in the database.
|
"""Rename a table in the database.
|
||||||
|
|
||||||
@@ -1655,26 +1656,29 @@ class AsyncConnection(object):
|
|||||||
The current name of the table.
|
The current name of the table.
|
||||||
new_name: str
|
new_name: str
|
||||||
The new name of the table.
|
The new name of the table.
|
||||||
cur_namespace: List[str], optional
|
cur_namespace_path: List[str], optional
|
||||||
The namespace of the current table.
|
The namespace of the current table.
|
||||||
None or empty list represents root namespace.
|
None or empty list represents root namespace.
|
||||||
new_namespace: List[str], optional
|
new_namespace_path: List[str], optional
|
||||||
The namespace to move the table to.
|
The namespace to move the table to.
|
||||||
If not specified, defaults to the same as cur_namespace.
|
If not specified, defaults to the same as cur_namespace.
|
||||||
"""
|
"""
|
||||||
if cur_namespace is None:
|
if cur_namespace_path is None:
|
||||||
cur_namespace = []
|
cur_namespace_path = []
|
||||||
if new_namespace is None:
|
if new_namespace_path is None:
|
||||||
new_namespace = []
|
new_namespace_path = []
|
||||||
await self._inner.rename_table(
|
await self._inner.rename_table(
|
||||||
cur_name, new_name, cur_namespace=cur_namespace, new_namespace=new_namespace
|
cur_name,
|
||||||
|
new_name,
|
||||||
|
cur_namespace_path=cur_namespace_path,
|
||||||
|
new_namespace_path=new_namespace_path,
|
||||||
)
|
)
|
||||||
|
|
||||||
async def drop_table(
|
async def drop_table(
|
||||||
self,
|
self,
|
||||||
name: str,
|
name: str,
|
||||||
*,
|
*,
|
||||||
namespace: Optional[List[str]] = None,
|
namespace_path: Optional[List[str]] = None,
|
||||||
ignore_missing: bool = False,
|
ignore_missing: bool = False,
|
||||||
):
|
):
|
||||||
"""Drop a table from the database.
|
"""Drop a table from the database.
|
||||||
@@ -1683,34 +1687,34 @@ class AsyncConnection(object):
|
|||||||
----------
|
----------
|
||||||
name: str
|
name: str
|
||||||
The name of the table.
|
The name of the table.
|
||||||
namespace: List[str], default []
|
namespace_path: List[str], default []
|
||||||
The namespace to drop the table from.
|
The namespace to drop the table from.
|
||||||
Empty list represents root namespace.
|
Empty list represents root namespace.
|
||||||
ignore_missing: bool, default False
|
ignore_missing: bool, default False
|
||||||
If True, ignore if the table does not exist.
|
If True, ignore if the table does not exist.
|
||||||
"""
|
"""
|
||||||
if namespace is None:
|
if namespace_path is None:
|
||||||
namespace = []
|
namespace_path = []
|
||||||
try:
|
try:
|
||||||
await self._inner.drop_table(name, namespace=namespace)
|
await self._inner.drop_table(name, namespace_path=namespace_path)
|
||||||
except ValueError as e:
|
except ValueError as e:
|
||||||
if not ignore_missing:
|
if not ignore_missing:
|
||||||
raise e
|
raise e
|
||||||
if f"Table '{name}' was not found" not in str(e):
|
if f"Table '{name}' was not found" not in str(e):
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
async def drop_all_tables(self, namespace: Optional[List[str]] = None):
|
async def drop_all_tables(self, namespace_path: Optional[List[str]] = None):
|
||||||
"""Drop all tables from the database.
|
"""Drop all tables from the database.
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
namespace: List[str], optional
|
namespace_path: List[str], optional
|
||||||
The namespace to drop all tables from.
|
The namespace to drop all tables from.
|
||||||
None or empty list represents root namespace.
|
None or empty list represents root namespace.
|
||||||
"""
|
"""
|
||||||
if namespace is None:
|
if namespace_path is None:
|
||||||
namespace = []
|
namespace_path = []
|
||||||
await self._inner.drop_all_tables(namespace=namespace)
|
await self._inner.drop_all_tables(namespace_path=namespace_path)
|
||||||
|
|
||||||
@deprecation.deprecated(
|
@deprecation.deprecated(
|
||||||
deprecated_in="0.15.1",
|
deprecated_in="0.15.1",
|
||||||
|
|||||||
@@ -10,6 +10,7 @@ import sys
|
|||||||
import threading
|
import threading
|
||||||
import time
|
import time
|
||||||
import urllib.error
|
import urllib.error
|
||||||
|
import urllib.request
|
||||||
import weakref
|
import weakref
|
||||||
import logging
|
import logging
|
||||||
from functools import wraps
|
from functools import wraps
|
||||||
|
|||||||
298
python/python/lancedb/expr.py
Normal file
298
python/python/lancedb/expr.py
Normal file
@@ -0,0 +1,298 @@
|
|||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
# SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||||
|
|
||||||
|
"""Type-safe expression builder for filters and projections.
|
||||||
|
|
||||||
|
Instead of writing raw SQL strings you can build expressions with Python
|
||||||
|
operators::
|
||||||
|
|
||||||
|
from lancedb.expr import col, lit
|
||||||
|
|
||||||
|
# filter: age > 18 AND status = 'active'
|
||||||
|
filt = (col("age") > lit(18)) & (col("status") == lit("active"))
|
||||||
|
|
||||||
|
# projection: compute a derived column
|
||||||
|
proj = {"score": col("raw_score") * lit(1.5)}
|
||||||
|
|
||||||
|
table.search().where(filt).select(proj).to_list()
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Union
|
||||||
|
|
||||||
|
import pyarrow as pa
|
||||||
|
|
||||||
|
from lancedb._lancedb import PyExpr, expr_col, expr_lit, expr_func
|
||||||
|
|
||||||
|
__all__ = ["Expr", "col", "lit", "func"]
|
||||||
|
|
||||||
|
_STR_TO_PA_TYPE: dict = {
|
||||||
|
"bool": pa.bool_(),
|
||||||
|
"boolean": pa.bool_(),
|
||||||
|
"int8": pa.int8(),
|
||||||
|
"int16": pa.int16(),
|
||||||
|
"int32": pa.int32(),
|
||||||
|
"int64": pa.int64(),
|
||||||
|
"uint8": pa.uint8(),
|
||||||
|
"uint16": pa.uint16(),
|
||||||
|
"uint32": pa.uint32(),
|
||||||
|
"uint64": pa.uint64(),
|
||||||
|
"float16": pa.float16(),
|
||||||
|
"float32": pa.float32(),
|
||||||
|
"float": pa.float32(),
|
||||||
|
"float64": pa.float64(),
|
||||||
|
"double": pa.float64(),
|
||||||
|
"string": pa.string(),
|
||||||
|
"utf8": pa.string(),
|
||||||
|
"str": pa.string(),
|
||||||
|
"large_string": pa.large_utf8(),
|
||||||
|
"large_utf8": pa.large_utf8(),
|
||||||
|
"date32": pa.date32(),
|
||||||
|
"date": pa.date32(),
|
||||||
|
"date64": pa.date64(),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _coerce(value: "ExprLike") -> "Expr":
|
||||||
|
"""Return *value* as an :class:`Expr`, wrapping plain Python values via
|
||||||
|
:func:`lit` if needed."""
|
||||||
|
if isinstance(value, Expr):
|
||||||
|
return value
|
||||||
|
return lit(value)
|
||||||
|
|
||||||
|
|
||||||
|
# Type alias used in annotations.
|
||||||
|
ExprLike = Union["Expr", bool, int, float, str]
|
||||||
|
|
||||||
|
|
||||||
|
class Expr:
|
||||||
|
"""A type-safe expression node.
|
||||||
|
|
||||||
|
Construct instances with :func:`col` and :func:`lit`, then combine them
|
||||||
|
using Python operators or the named methods below.
|
||||||
|
|
||||||
|
Examples
|
||||||
|
--------
|
||||||
|
>>> from lancedb.expr import col, lit
|
||||||
|
>>> filt = (col("age") > lit(18)) & (col("name").lower() == lit("alice"))
|
||||||
|
>>> proj = {"double": col("x") * lit(2)}
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Make Expr unhashable so that == returns an Expr rather than being used
|
||||||
|
# for dict keys / set membership.
|
||||||
|
__hash__ = None # type: ignore[assignment]
|
||||||
|
|
||||||
|
def __init__(self, inner: PyExpr) -> None:
|
||||||
|
self._inner = inner
|
||||||
|
|
||||||
|
# ── comparisons ──────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def __eq__(self, other: ExprLike) -> "Expr": # type: ignore[override]
|
||||||
|
"""Equal to (``col("x") == 1``)."""
|
||||||
|
return Expr(self._inner.eq(_coerce(other)._inner))
|
||||||
|
|
||||||
|
def __ne__(self, other: ExprLike) -> "Expr": # type: ignore[override]
|
||||||
|
"""Not equal to (``col("x") != 1``)."""
|
||||||
|
return Expr(self._inner.ne(_coerce(other)._inner))
|
||||||
|
|
||||||
|
def __lt__(self, other: ExprLike) -> "Expr":
|
||||||
|
"""Less than (``col("x") < 1``)."""
|
||||||
|
return Expr(self._inner.lt(_coerce(other)._inner))
|
||||||
|
|
||||||
|
def __le__(self, other: ExprLike) -> "Expr":
|
||||||
|
"""Less than or equal to (``col("x") <= 1``)."""
|
||||||
|
return Expr(self._inner.lte(_coerce(other)._inner))
|
||||||
|
|
||||||
|
def __gt__(self, other: ExprLike) -> "Expr":
|
||||||
|
"""Greater than (``col("x") > 1``)."""
|
||||||
|
return Expr(self._inner.gt(_coerce(other)._inner))
|
||||||
|
|
||||||
|
def __ge__(self, other: ExprLike) -> "Expr":
|
||||||
|
"""Greater than or equal to (``col("x") >= 1``)."""
|
||||||
|
return Expr(self._inner.gte(_coerce(other)._inner))
|
||||||
|
|
||||||
|
# ── logical ──────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def __and__(self, other: "Expr") -> "Expr":
|
||||||
|
"""Logical AND (``expr_a & expr_b``)."""
|
||||||
|
return Expr(self._inner.and_(_coerce(other)._inner))
|
||||||
|
|
||||||
|
def __or__(self, other: "Expr") -> "Expr":
|
||||||
|
"""Logical OR (``expr_a | expr_b``)."""
|
||||||
|
return Expr(self._inner.or_(_coerce(other)._inner))
|
||||||
|
|
||||||
|
def __invert__(self) -> "Expr":
|
||||||
|
"""Logical NOT (``~expr``)."""
|
||||||
|
return Expr(self._inner.not_())
|
||||||
|
|
||||||
|
# ── arithmetic ───────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def __add__(self, other: ExprLike) -> "Expr":
|
||||||
|
"""Add (``col("x") + 1``)."""
|
||||||
|
return Expr(self._inner.add(_coerce(other)._inner))
|
||||||
|
|
||||||
|
def __radd__(self, other: ExprLike) -> "Expr":
|
||||||
|
"""Right-hand add (``1 + col("x")``)."""
|
||||||
|
return Expr(_coerce(other)._inner.add(self._inner))
|
||||||
|
|
||||||
|
def __sub__(self, other: ExprLike) -> "Expr":
|
||||||
|
"""Subtract (``col("x") - 1``)."""
|
||||||
|
return Expr(self._inner.sub(_coerce(other)._inner))
|
||||||
|
|
||||||
|
def __rsub__(self, other: ExprLike) -> "Expr":
|
||||||
|
"""Right-hand subtract (``1 - col("x")``)."""
|
||||||
|
return Expr(_coerce(other)._inner.sub(self._inner))
|
||||||
|
|
||||||
|
def __mul__(self, other: ExprLike) -> "Expr":
|
||||||
|
"""Multiply (``col("x") * 2``)."""
|
||||||
|
return Expr(self._inner.mul(_coerce(other)._inner))
|
||||||
|
|
||||||
|
def __rmul__(self, other: ExprLike) -> "Expr":
|
||||||
|
"""Right-hand multiply (``2 * col("x")``)."""
|
||||||
|
return Expr(_coerce(other)._inner.mul(self._inner))
|
||||||
|
|
||||||
|
def __truediv__(self, other: ExprLike) -> "Expr":
|
||||||
|
"""Divide (``col("x") / 2``)."""
|
||||||
|
return Expr(self._inner.div(_coerce(other)._inner))
|
||||||
|
|
||||||
|
def __rtruediv__(self, other: ExprLike) -> "Expr":
|
||||||
|
"""Right-hand divide (``1 / col("x")``)."""
|
||||||
|
return Expr(_coerce(other)._inner.div(self._inner))
|
||||||
|
|
||||||
|
# ── string methods ───────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def lower(self) -> "Expr":
|
||||||
|
"""Convert string column values to lowercase."""
|
||||||
|
return Expr(self._inner.lower())
|
||||||
|
|
||||||
|
def upper(self) -> "Expr":
|
||||||
|
"""Convert string column values to uppercase."""
|
||||||
|
return Expr(self._inner.upper())
|
||||||
|
|
||||||
|
def contains(self, substr: "ExprLike") -> "Expr":
|
||||||
|
"""Return True where the string contains *substr*."""
|
||||||
|
return Expr(self._inner.contains(_coerce(substr)._inner))
|
||||||
|
|
||||||
|
# ── type cast ────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def cast(self, data_type: Union[str, "pa.DataType"]) -> "Expr":
|
||||||
|
"""Cast values to *data_type*.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
data_type:
|
||||||
|
A PyArrow ``DataType`` (e.g. ``pa.int32()``) or one of the type
|
||||||
|
name strings: ``"bool"``, ``"int8"``, ``"int16"``, ``"int32"``,
|
||||||
|
``"int64"``, ``"uint8"``–``"uint64"``, ``"float32"``,
|
||||||
|
``"float64"``, ``"string"``, ``"date32"``, ``"date64"``.
|
||||||
|
"""
|
||||||
|
if isinstance(data_type, str):
|
||||||
|
try:
|
||||||
|
data_type = _STR_TO_PA_TYPE[data_type]
|
||||||
|
except KeyError:
|
||||||
|
raise ValueError(
|
||||||
|
f"unsupported data type: '{data_type}'. Supported: "
|
||||||
|
f"{', '.join(_STR_TO_PA_TYPE)}"
|
||||||
|
)
|
||||||
|
return Expr(self._inner.cast(data_type))
|
||||||
|
|
||||||
|
# ── named comparison helpers (alternative to operators) ──────────────────
|
||||||
|
|
||||||
|
def eq(self, other: ExprLike) -> "Expr":
|
||||||
|
"""Equal to."""
|
||||||
|
return self.__eq__(other)
|
||||||
|
|
||||||
|
def ne(self, other: ExprLike) -> "Expr":
|
||||||
|
"""Not equal to."""
|
||||||
|
return self.__ne__(other)
|
||||||
|
|
||||||
|
def lt(self, other: ExprLike) -> "Expr":
|
||||||
|
"""Less than."""
|
||||||
|
return self.__lt__(other)
|
||||||
|
|
||||||
|
def lte(self, other: ExprLike) -> "Expr":
|
||||||
|
"""Less than or equal to."""
|
||||||
|
return self.__le__(other)
|
||||||
|
|
||||||
|
def gt(self, other: ExprLike) -> "Expr":
|
||||||
|
"""Greater than."""
|
||||||
|
return self.__gt__(other)
|
||||||
|
|
||||||
|
def gte(self, other: ExprLike) -> "Expr":
|
||||||
|
"""Greater than or equal to."""
|
||||||
|
return self.__ge__(other)
|
||||||
|
|
||||||
|
def and_(self, other: "Expr") -> "Expr":
|
||||||
|
"""Logical AND."""
|
||||||
|
return self.__and__(other)
|
||||||
|
|
||||||
|
def or_(self, other: "Expr") -> "Expr":
|
||||||
|
"""Logical OR."""
|
||||||
|
return self.__or__(other)
|
||||||
|
|
||||||
|
# ── utilities ────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def to_sql(self) -> str:
|
||||||
|
"""Render the expression as a SQL string (useful for debugging)."""
|
||||||
|
return self._inner.to_sql()
|
||||||
|
|
||||||
|
def __repr__(self) -> str:
|
||||||
|
return f"Expr({self._inner.to_sql()})"
|
||||||
|
|
||||||
|
|
||||||
|
# ── free functions ────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
def col(name: str) -> Expr:
|
||||||
|
"""Reference a table column by name.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
name:
|
||||||
|
The column name.
|
||||||
|
|
||||||
|
Examples
|
||||||
|
--------
|
||||||
|
>>> from lancedb.expr import col, lit
|
||||||
|
>>> col("age") > lit(18)
|
||||||
|
Expr((age > 18))
|
||||||
|
"""
|
||||||
|
return Expr(expr_col(name))
|
||||||
|
|
||||||
|
|
||||||
|
def lit(value: Union[bool, int, float, str]) -> Expr:
|
||||||
|
"""Create a literal (constant) value expression.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
value:
|
||||||
|
A Python ``bool``, ``int``, ``float``, or ``str``.
|
||||||
|
|
||||||
|
Examples
|
||||||
|
--------
|
||||||
|
>>> from lancedb.expr import col, lit
|
||||||
|
>>> col("price") * lit(1.1)
|
||||||
|
Expr((price * 1.1))
|
||||||
|
"""
|
||||||
|
return Expr(expr_lit(value))
|
||||||
|
|
||||||
|
|
||||||
|
def func(name: str, *args: ExprLike) -> Expr:
|
||||||
|
"""Call an arbitrary SQL function by name.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
name:
|
||||||
|
The SQL function name (e.g. ``"lower"``, ``"upper"``).
|
||||||
|
*args:
|
||||||
|
The function arguments as :class:`Expr` or plain Python literals.
|
||||||
|
|
||||||
|
Examples
|
||||||
|
--------
|
||||||
|
>>> from lancedb.expr import col, func
|
||||||
|
>>> func("lower", col("name"))
|
||||||
|
Expr(lower(name))
|
||||||
|
"""
|
||||||
|
inner_args = [_coerce(a)._inner for a in args]
|
||||||
|
return Expr(expr_func(name, inner_args))
|
||||||
@@ -2,70 +2,3 @@
|
|||||||
# SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
# SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||||
|
|
||||||
"""I/O utilities and interfaces for LanceDB."""
|
"""I/O utilities and interfaces for LanceDB."""
|
||||||
|
|
||||||
from abc import ABC, abstractmethod
|
|
||||||
from typing import Dict
|
|
||||||
|
|
||||||
|
|
||||||
class StorageOptionsProvider(ABC):
|
|
||||||
"""Abstract base class for providing storage options to LanceDB tables.
|
|
||||||
|
|
||||||
Storage options providers enable automatic credential refresh for cloud
|
|
||||||
storage backends (e.g., AWS S3, Azure Blob Storage, GCS). When credentials
|
|
||||||
have an expiration time, the provider's fetch_storage_options() method will
|
|
||||||
be called periodically to get fresh credentials before they expire.
|
|
||||||
|
|
||||||
Example
|
|
||||||
-------
|
|
||||||
>>> class MyProvider(StorageOptionsProvider):
|
|
||||||
... def fetch_storage_options(self) -> Dict[str, str]:
|
|
||||||
... # Fetch fresh credentials from your credential manager
|
|
||||||
... return {
|
|
||||||
... "aws_access_key_id": "...",
|
|
||||||
... "aws_secret_access_key": "...",
|
|
||||||
... "expires_at_millis": "1234567890000" # Optional
|
|
||||||
... }
|
|
||||||
"""
|
|
||||||
|
|
||||||
@abstractmethod
|
|
||||||
def fetch_storage_options(self) -> Dict[str, str]:
|
|
||||||
"""Fetch fresh storage credentials.
|
|
||||||
|
|
||||||
This method is called by LanceDB when credentials need to be refreshed.
|
|
||||||
If the returned dictionary contains an "expires_at_millis" key with a
|
|
||||||
Unix timestamp in milliseconds, LanceDB will automatically refresh the
|
|
||||||
credentials before that time. If the key is not present, credentials
|
|
||||||
are assumed to not expire.
|
|
||||||
|
|
||||||
Returns
|
|
||||||
-------
|
|
||||||
Dict[str, str]
|
|
||||||
Dictionary containing cloud storage credentials and optionally an
|
|
||||||
expiration time:
|
|
||||||
- "expires_at_millis" (optional): Unix timestamp in milliseconds when
|
|
||||||
credentials expire
|
|
||||||
- Provider-specific credential keys (e.g., aws_access_key_id,
|
|
||||||
aws_secret_access_key, etc.)
|
|
||||||
|
|
||||||
Raises
|
|
||||||
------
|
|
||||||
RuntimeError
|
|
||||||
If credentials cannot be fetched or are invalid
|
|
||||||
"""
|
|
||||||
pass
|
|
||||||
|
|
||||||
def provider_id(self) -> str:
|
|
||||||
"""Return a human-readable unique identifier for this provider instance.
|
|
||||||
|
|
||||||
This identifier is used for caching and equality comparison. Two providers
|
|
||||||
with the same ID will share the same cached object store connection.
|
|
||||||
|
|
||||||
The default implementation uses the class name and string representation.
|
|
||||||
Override this method if you need custom identification logic.
|
|
||||||
|
|
||||||
Returns
|
|
||||||
-------
|
|
||||||
str
|
|
||||||
A unique identifier for this provider instance
|
|
||||||
"""
|
|
||||||
return f"{self.__class__.__name__} {{ repr: {str(self)!r} }}"
|
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@@ -38,6 +38,7 @@ from .rerankers.base import Reranker
|
|||||||
from .rerankers.rrf import RRFReranker
|
from .rerankers.rrf import RRFReranker
|
||||||
from .rerankers.util import check_reranker_result
|
from .rerankers.util import check_reranker_result
|
||||||
from .util import flatten_columns
|
from .util import flatten_columns
|
||||||
|
from .expr import Expr
|
||||||
from lancedb._lancedb import fts_query_to_json
|
from lancedb._lancedb import fts_query_to_json
|
||||||
from typing_extensions import Annotated
|
from typing_extensions import Annotated
|
||||||
|
|
||||||
@@ -70,7 +71,7 @@ def ensure_vector_query(
|
|||||||
) -> Union[List[float], List[List[float]], pa.Array, List[pa.Array]]:
|
) -> Union[List[float], List[List[float]], pa.Array, List[pa.Array]]:
|
||||||
if isinstance(val, list):
|
if isinstance(val, list):
|
||||||
if len(val) == 0:
|
if len(val) == 0:
|
||||||
return ValueError("Vector query must be a non-empty list")
|
raise ValueError("Vector query must be a non-empty list")
|
||||||
sample = val[0]
|
sample = val[0]
|
||||||
else:
|
else:
|
||||||
if isinstance(val, float):
|
if isinstance(val, float):
|
||||||
@@ -83,7 +84,7 @@ def ensure_vector_query(
|
|||||||
return val
|
return val
|
||||||
if isinstance(sample, list):
|
if isinstance(sample, list):
|
||||||
if len(sample) == 0:
|
if len(sample) == 0:
|
||||||
return ValueError("Vector query must be a non-empty list")
|
raise ValueError("Vector query must be a non-empty list")
|
||||||
if isinstance(sample[0], float):
|
if isinstance(sample[0], float):
|
||||||
# val is list of list of floats
|
# val is list of list of floats
|
||||||
return val
|
return val
|
||||||
@@ -449,8 +450,8 @@ class Query(pydantic.BaseModel):
|
|||||||
ensure_vector_query,
|
ensure_vector_query,
|
||||||
] = None
|
] = None
|
||||||
|
|
||||||
# sql filter to refine the query with
|
# sql filter or type-safe Expr to refine the query with
|
||||||
filter: Optional[str] = None
|
filter: Optional[Union[str, Expr]] = None
|
||||||
|
|
||||||
# if True then apply the filter after vector search
|
# if True then apply the filter after vector search
|
||||||
postfilter: Optional[bool] = None
|
postfilter: Optional[bool] = None
|
||||||
@@ -464,8 +465,8 @@ class Query(pydantic.BaseModel):
|
|||||||
# distance type to use for vector search
|
# distance type to use for vector search
|
||||||
distance_type: Optional[str] = None
|
distance_type: Optional[str] = None
|
||||||
|
|
||||||
# which columns to return in the results
|
# which columns to return in the results (dict values may be str or Expr)
|
||||||
columns: Optional[Union[List[str], Dict[str, str]]] = None
|
columns: Optional[Union[List[str], Dict[str, Union[str, Expr]]]] = None
|
||||||
|
|
||||||
# minimum number of IVF partitions to search
|
# minimum number of IVF partitions to search
|
||||||
#
|
#
|
||||||
@@ -606,6 +607,7 @@ class LanceQueryBuilder(ABC):
|
|||||||
query,
|
query,
|
||||||
ordering_field_name=ordering_field_name,
|
ordering_field_name=ordering_field_name,
|
||||||
fts_columns=fts_columns,
|
fts_columns=fts_columns,
|
||||||
|
fast_search=fast_search,
|
||||||
)
|
)
|
||||||
|
|
||||||
if isinstance(query, list):
|
if isinstance(query, list):
|
||||||
@@ -855,14 +857,15 @@ class LanceQueryBuilder(ABC):
|
|||||||
self._offset = offset
|
self._offset = offset
|
||||||
return self
|
return self
|
||||||
|
|
||||||
def select(self, columns: Union[list[str], dict[str, str]]) -> Self:
|
def select(self, columns: Union[list[str], dict[str, Union[str, Expr]]]) -> Self:
|
||||||
"""Set the columns to return.
|
"""Set the columns to return.
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
columns: list of str, or dict of str to str default None
|
columns: list of str, or dict of str to str or Expr
|
||||||
List of column names to be fetched.
|
List of column names to be fetched.
|
||||||
Or a dictionary of column names to SQL expressions.
|
Or a dictionary of column names to SQL expressions or
|
||||||
|
:class:`~lancedb.expr.Expr` objects.
|
||||||
All columns are fetched if None or unspecified.
|
All columns are fetched if None or unspecified.
|
||||||
|
|
||||||
Returns
|
Returns
|
||||||
@@ -876,15 +879,15 @@ class LanceQueryBuilder(ABC):
|
|||||||
raise ValueError("columns must be a list or a dictionary")
|
raise ValueError("columns must be a list or a dictionary")
|
||||||
return self
|
return self
|
||||||
|
|
||||||
def where(self, where: str, prefilter: bool = True) -> Self:
|
def where(self, where: Union[str, Expr], prefilter: bool = True) -> Self:
|
||||||
"""Set the where clause.
|
"""Set the where clause.
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
where: str
|
where: str or :class:`~lancedb.expr.Expr`
|
||||||
The where clause which is a valid SQL where clause. See
|
The filter condition. Can be a SQL string or a type-safe
|
||||||
`Lance filter pushdown <https://lance.org/guide/read_and_write#filter-push-down>`_
|
:class:`~lancedb.expr.Expr` built with :func:`~lancedb.expr.col`
|
||||||
for valid SQL expressions.
|
and :func:`~lancedb.expr.lit`.
|
||||||
prefilter: bool, default True
|
prefilter: bool, default True
|
||||||
If True, apply the filter before vector search, otherwise the
|
If True, apply the filter before vector search, otherwise the
|
||||||
filter is applied on the result of vector search.
|
filter is applied on the result of vector search.
|
||||||
@@ -1354,15 +1357,17 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
|
|||||||
|
|
||||||
return result_set
|
return result_set
|
||||||
|
|
||||||
def where(self, where: str, prefilter: bool = None) -> LanceVectorQueryBuilder:
|
def where(
|
||||||
|
self, where: Union[str, Expr], prefilter: bool = None
|
||||||
|
) -> LanceVectorQueryBuilder:
|
||||||
"""Set the where clause.
|
"""Set the where clause.
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
where: str
|
where: str or :class:`~lancedb.expr.Expr`
|
||||||
The where clause which is a valid SQL where clause. See
|
The filter condition. Can be a SQL string or a type-safe
|
||||||
`Lance filter pushdown <https://lance.org/guide/read_and_write#filter-push-down>`_
|
:class:`~lancedb.expr.Expr` built with :func:`~lancedb.expr.col`
|
||||||
for valid SQL expressions.
|
and :func:`~lancedb.expr.lit`.
|
||||||
prefilter: bool, default True
|
prefilter: bool, default True
|
||||||
If True, apply the filter before vector search, otherwise the
|
If True, apply the filter before vector search, otherwise the
|
||||||
filter is applied on the result of vector search.
|
filter is applied on the result of vector search.
|
||||||
@@ -1456,12 +1461,14 @@ class LanceFtsQueryBuilder(LanceQueryBuilder):
|
|||||||
query: str | FullTextQuery,
|
query: str | FullTextQuery,
|
||||||
ordering_field_name: Optional[str] = None,
|
ordering_field_name: Optional[str] = None,
|
||||||
fts_columns: Optional[Union[str, List[str]]] = None,
|
fts_columns: Optional[Union[str, List[str]]] = None,
|
||||||
|
fast_search: bool = None,
|
||||||
):
|
):
|
||||||
super().__init__(table)
|
super().__init__(table)
|
||||||
self._query = query
|
self._query = query
|
||||||
self._phrase_query = False
|
self._phrase_query = False
|
||||||
self.ordering_field_name = ordering_field_name
|
self.ordering_field_name = ordering_field_name
|
||||||
self._reranker = None
|
self._reranker = None
|
||||||
|
self._fast_search = fast_search
|
||||||
if isinstance(fts_columns, str):
|
if isinstance(fts_columns, str):
|
||||||
fts_columns = [fts_columns]
|
fts_columns = [fts_columns]
|
||||||
self._fts_columns = fts_columns
|
self._fts_columns = fts_columns
|
||||||
@@ -1483,6 +1490,19 @@ class LanceFtsQueryBuilder(LanceQueryBuilder):
|
|||||||
self._phrase_query = phrase_query
|
self._phrase_query = phrase_query
|
||||||
return self
|
return self
|
||||||
|
|
||||||
|
def fast_search(self) -> LanceFtsQueryBuilder:
|
||||||
|
"""
|
||||||
|
Skip a flat search of unindexed data. This will improve
|
||||||
|
search performance but search results will not include unindexed data.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
LanceFtsQueryBuilder
|
||||||
|
The LanceFtsQueryBuilder object.
|
||||||
|
"""
|
||||||
|
self._fast_search = True
|
||||||
|
return self
|
||||||
|
|
||||||
def to_query_object(self) -> Query:
|
def to_query_object(self) -> Query:
|
||||||
return Query(
|
return Query(
|
||||||
columns=self._columns,
|
columns=self._columns,
|
||||||
@@ -1494,6 +1514,7 @@ class LanceFtsQueryBuilder(LanceQueryBuilder):
|
|||||||
query=self._query, columns=self._fts_columns
|
query=self._query, columns=self._fts_columns
|
||||||
),
|
),
|
||||||
offset=self._offset,
|
offset=self._offset,
|
||||||
|
fast_search=self._fast_search,
|
||||||
)
|
)
|
||||||
|
|
||||||
def output_schema(self) -> pa.Schema:
|
def output_schema(self) -> pa.Schema:
|
||||||
@@ -2188,8 +2209,8 @@ class LanceHybridQueryBuilder(LanceQueryBuilder):
|
|||||||
self._vector_query.select(self._columns)
|
self._vector_query.select(self._columns)
|
||||||
self._fts_query.select(self._columns)
|
self._fts_query.select(self._columns)
|
||||||
if self._where:
|
if self._where:
|
||||||
self._vector_query.where(self._where, self._postfilter)
|
self._vector_query.where(self._where, not self._postfilter)
|
||||||
self._fts_query.where(self._where, self._postfilter)
|
self._fts_query.where(self._where, not self._postfilter)
|
||||||
if self._with_row_id:
|
if self._with_row_id:
|
||||||
self._vector_query.with_row_id(True)
|
self._vector_query.with_row_id(True)
|
||||||
self._fts_query.with_row_id(True)
|
self._fts_query.with_row_id(True)
|
||||||
@@ -2269,10 +2290,20 @@ class AsyncQueryBase(object):
|
|||||||
"""
|
"""
|
||||||
if isinstance(columns, list) and all(isinstance(c, str) for c in columns):
|
if isinstance(columns, list) and all(isinstance(c, str) for c in columns):
|
||||||
self._inner.select_columns(columns)
|
self._inner.select_columns(columns)
|
||||||
elif isinstance(columns, dict) and all(
|
elif isinstance(columns, dict) and all(isinstance(k, str) for k in columns):
|
||||||
isinstance(k, str) and isinstance(v, str) for k, v in columns.items()
|
if any(isinstance(v, Expr) for v in columns.values()):
|
||||||
):
|
# At least one value is an Expr — use the type-safe path.
|
||||||
self._inner.select(list(columns.items()))
|
from .expr import _coerce
|
||||||
|
|
||||||
|
pairs = [(k, _coerce(v)._inner) for k, v in columns.items()]
|
||||||
|
self._inner.select_expr(pairs)
|
||||||
|
elif all(isinstance(v, str) for v in columns.values()):
|
||||||
|
self._inner.select(list(columns.items()))
|
||||||
|
else:
|
||||||
|
raise TypeError(
|
||||||
|
"dict values must be str or Expr, got "
|
||||||
|
+ str({k: type(v) for k, v in columns.items()})
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
raise TypeError("columns must be a list of column names or a dict")
|
raise TypeError("columns must be a list of column names or a dict")
|
||||||
return self
|
return self
|
||||||
@@ -2512,11 +2543,13 @@ class AsyncStandardQuery(AsyncQueryBase):
|
|||||||
"""
|
"""
|
||||||
super().__init__(inner)
|
super().__init__(inner)
|
||||||
|
|
||||||
def where(self, predicate: str) -> Self:
|
def where(self, predicate: Union[str, Expr]) -> Self:
|
||||||
"""
|
"""
|
||||||
Only return rows matching the given predicate
|
Only return rows matching the given predicate
|
||||||
|
|
||||||
The predicate should be supplied as an SQL query string.
|
The predicate can be a SQL string or a type-safe
|
||||||
|
:class:`~lancedb.expr.Expr` built with :func:`~lancedb.expr.col`
|
||||||
|
and :func:`~lancedb.expr.lit`.
|
||||||
|
|
||||||
Examples
|
Examples
|
||||||
--------
|
--------
|
||||||
@@ -2528,7 +2561,10 @@ class AsyncStandardQuery(AsyncQueryBase):
|
|||||||
Filtering performance can often be improved by creating a scalar index
|
Filtering performance can often be improved by creating a scalar index
|
||||||
on the filter column(s).
|
on the filter column(s).
|
||||||
"""
|
"""
|
||||||
self._inner.where(predicate)
|
if isinstance(predicate, Expr):
|
||||||
|
self._inner.where_expr(predicate._inner)
|
||||||
|
else:
|
||||||
|
self._inner.where(predicate)
|
||||||
return self
|
return self
|
||||||
|
|
||||||
def limit(self, limit: int) -> Self:
|
def limit(self, limit: int) -> Self:
|
||||||
|
|||||||
@@ -111,7 +111,7 @@ class RemoteDBConnection(DBConnection):
|
|||||||
@override
|
@override
|
||||||
def list_namespaces(
|
def list_namespaces(
|
||||||
self,
|
self,
|
||||||
namespace: Optional[List[str]] = None,
|
namespace_path: Optional[List[str]] = None,
|
||||||
page_token: Optional[str] = None,
|
page_token: Optional[str] = None,
|
||||||
limit: Optional[int] = None,
|
limit: Optional[int] = None,
|
||||||
) -> ListNamespacesResponse:
|
) -> ListNamespacesResponse:
|
||||||
@@ -119,7 +119,7 @@ class RemoteDBConnection(DBConnection):
|
|||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
namespace: List[str], optional
|
namespace_path: List[str], optional
|
||||||
The parent namespace to list namespaces in.
|
The parent namespace to list namespaces in.
|
||||||
None or empty list represents root namespace.
|
None or empty list represents root namespace.
|
||||||
page_token: str, optional
|
page_token: str, optional
|
||||||
@@ -133,18 +133,18 @@ class RemoteDBConnection(DBConnection):
|
|||||||
ListNamespacesResponse
|
ListNamespacesResponse
|
||||||
Response containing namespace names and optional page_token for pagination.
|
Response containing namespace names and optional page_token for pagination.
|
||||||
"""
|
"""
|
||||||
if namespace is None:
|
if namespace_path is None:
|
||||||
namespace = []
|
namespace_path = []
|
||||||
return LOOP.run(
|
return LOOP.run(
|
||||||
self._conn.list_namespaces(
|
self._conn.list_namespaces(
|
||||||
namespace=namespace, page_token=page_token, limit=limit
|
namespace_path=namespace_path, page_token=page_token, limit=limit
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
@override
|
@override
|
||||||
def create_namespace(
|
def create_namespace(
|
||||||
self,
|
self,
|
||||||
namespace: List[str],
|
namespace_path: List[str],
|
||||||
mode: Optional[str] = None,
|
mode: Optional[str] = None,
|
||||||
properties: Optional[Dict[str, str]] = None,
|
properties: Optional[Dict[str, str]] = None,
|
||||||
) -> CreateNamespaceResponse:
|
) -> CreateNamespaceResponse:
|
||||||
@@ -152,7 +152,7 @@ class RemoteDBConnection(DBConnection):
|
|||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
namespace: List[str]
|
namespace_path: List[str]
|
||||||
The namespace identifier to create.
|
The namespace identifier to create.
|
||||||
mode: str, optional
|
mode: str, optional
|
||||||
Creation mode - "create" (fail if exists), "exist_ok" (skip if exists),
|
Creation mode - "create" (fail if exists), "exist_ok" (skip if exists),
|
||||||
@@ -167,14 +167,14 @@ class RemoteDBConnection(DBConnection):
|
|||||||
"""
|
"""
|
||||||
return LOOP.run(
|
return LOOP.run(
|
||||||
self._conn.create_namespace(
|
self._conn.create_namespace(
|
||||||
namespace=namespace, mode=mode, properties=properties
|
namespace_path=namespace_path, mode=mode, properties=properties
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
@override
|
@override
|
||||||
def drop_namespace(
|
def drop_namespace(
|
||||||
self,
|
self,
|
||||||
namespace: List[str],
|
namespace_path: List[str],
|
||||||
mode: Optional[str] = None,
|
mode: Optional[str] = None,
|
||||||
behavior: Optional[str] = None,
|
behavior: Optional[str] = None,
|
||||||
) -> DropNamespaceResponse:
|
) -> DropNamespaceResponse:
|
||||||
@@ -182,7 +182,7 @@ class RemoteDBConnection(DBConnection):
|
|||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
namespace: List[str]
|
namespace_path: List[str]
|
||||||
The namespace identifier to drop.
|
The namespace identifier to drop.
|
||||||
mode: str, optional
|
mode: str, optional
|
||||||
Whether to skip if not exists ("SKIP") or fail ("FAIL"). Case insensitive.
|
Whether to skip if not exists ("SKIP") or fail ("FAIL"). Case insensitive.
|
||||||
@@ -196,16 +196,20 @@ class RemoteDBConnection(DBConnection):
|
|||||||
Response containing properties and transaction_id if applicable.
|
Response containing properties and transaction_id if applicable.
|
||||||
"""
|
"""
|
||||||
return LOOP.run(
|
return LOOP.run(
|
||||||
self._conn.drop_namespace(namespace=namespace, mode=mode, behavior=behavior)
|
self._conn.drop_namespace(
|
||||||
|
namespace_path=namespace_path, mode=mode, behavior=behavior
|
||||||
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
@override
|
@override
|
||||||
def describe_namespace(self, namespace: List[str]) -> DescribeNamespaceResponse:
|
def describe_namespace(
|
||||||
|
self, namespace_path: List[str]
|
||||||
|
) -> DescribeNamespaceResponse:
|
||||||
"""Describe a namespace.
|
"""Describe a namespace.
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
namespace: List[str]
|
namespace_path: List[str]
|
||||||
The namespace identifier to describe.
|
The namespace identifier to describe.
|
||||||
|
|
||||||
Returns
|
Returns
|
||||||
@@ -213,12 +217,12 @@ class RemoteDBConnection(DBConnection):
|
|||||||
DescribeNamespaceResponse
|
DescribeNamespaceResponse
|
||||||
Response containing the namespace properties.
|
Response containing the namespace properties.
|
||||||
"""
|
"""
|
||||||
return LOOP.run(self._conn.describe_namespace(namespace=namespace))
|
return LOOP.run(self._conn.describe_namespace(namespace_path=namespace_path))
|
||||||
|
|
||||||
@override
|
@override
|
||||||
def list_tables(
|
def list_tables(
|
||||||
self,
|
self,
|
||||||
namespace: Optional[List[str]] = None,
|
namespace_path: Optional[List[str]] = None,
|
||||||
page_token: Optional[str] = None,
|
page_token: Optional[str] = None,
|
||||||
limit: Optional[int] = None,
|
limit: Optional[int] = None,
|
||||||
) -> ListTablesResponse:
|
) -> ListTablesResponse:
|
||||||
@@ -226,7 +230,7 @@ class RemoteDBConnection(DBConnection):
|
|||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
namespace: List[str], optional
|
namespace_path: List[str], optional
|
||||||
The namespace to list tables in.
|
The namespace to list tables in.
|
||||||
None or empty list represents root namespace.
|
None or empty list represents root namespace.
|
||||||
page_token: str, optional
|
page_token: str, optional
|
||||||
@@ -240,11 +244,11 @@ class RemoteDBConnection(DBConnection):
|
|||||||
ListTablesResponse
|
ListTablesResponse
|
||||||
Response containing table names and optional page_token for pagination.
|
Response containing table names and optional page_token for pagination.
|
||||||
"""
|
"""
|
||||||
if namespace is None:
|
if namespace_path is None:
|
||||||
namespace = []
|
namespace_path = []
|
||||||
return LOOP.run(
|
return LOOP.run(
|
||||||
self._conn.list_tables(
|
self._conn.list_tables(
|
||||||
namespace=namespace, page_token=page_token, limit=limit
|
namespace_path=namespace_path, page_token=page_token, limit=limit
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -254,7 +258,7 @@ class RemoteDBConnection(DBConnection):
|
|||||||
page_token: Optional[str] = None,
|
page_token: Optional[str] = None,
|
||||||
limit: int = 10,
|
limit: int = 10,
|
||||||
*,
|
*,
|
||||||
namespace: Optional[List[str]] = None,
|
namespace_path: Optional[List[str]] = None,
|
||||||
) -> Iterable[str]:
|
) -> Iterable[str]:
|
||||||
"""List the names of all tables in the database.
|
"""List the names of all tables in the database.
|
||||||
|
|
||||||
@@ -263,7 +267,7 @@ class RemoteDBConnection(DBConnection):
|
|||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
namespace: List[str], default []
|
namespace_path: List[str], default []
|
||||||
The namespace to list tables in.
|
The namespace to list tables in.
|
||||||
Empty list represents root namespace.
|
Empty list represents root namespace.
|
||||||
page_token: str
|
page_token: str
|
||||||
@@ -282,11 +286,11 @@ class RemoteDBConnection(DBConnection):
|
|||||||
DeprecationWarning,
|
DeprecationWarning,
|
||||||
stacklevel=2,
|
stacklevel=2,
|
||||||
)
|
)
|
||||||
if namespace is None:
|
if namespace_path is None:
|
||||||
namespace = []
|
namespace_path = []
|
||||||
return LOOP.run(
|
return LOOP.run(
|
||||||
self._conn.table_names(
|
self._conn.table_names(
|
||||||
namespace=namespace, start_after=page_token, limit=limit
|
namespace_path=namespace_path, start_after=page_token, limit=limit
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -295,7 +299,7 @@ class RemoteDBConnection(DBConnection):
|
|||||||
self,
|
self,
|
||||||
name: str,
|
name: str,
|
||||||
*,
|
*,
|
||||||
namespace: Optional[List[str]] = None,
|
namespace_path: Optional[List[str]] = None,
|
||||||
storage_options: Optional[Dict[str, str]] = None,
|
storage_options: Optional[Dict[str, str]] = None,
|
||||||
index_cache_size: Optional[int] = None,
|
index_cache_size: Optional[int] = None,
|
||||||
) -> Table:
|
) -> Table:
|
||||||
@@ -305,7 +309,7 @@ class RemoteDBConnection(DBConnection):
|
|||||||
----------
|
----------
|
||||||
name: str
|
name: str
|
||||||
The name of the table.
|
The name of the table.
|
||||||
namespace: List[str], optional
|
namespace_path: List[str], optional
|
||||||
The namespace to open the table from.
|
The namespace to open the table from.
|
||||||
None or empty list represents root namespace.
|
None or empty list represents root namespace.
|
||||||
|
|
||||||
@@ -315,15 +319,15 @@ class RemoteDBConnection(DBConnection):
|
|||||||
"""
|
"""
|
||||||
from .table import RemoteTable
|
from .table import RemoteTable
|
||||||
|
|
||||||
if namespace is None:
|
if namespace_path is None:
|
||||||
namespace = []
|
namespace_path = []
|
||||||
if index_cache_size is not None:
|
if index_cache_size is not None:
|
||||||
logging.info(
|
logging.info(
|
||||||
"index_cache_size is ignored in LanceDb Cloud"
|
"index_cache_size is ignored in LanceDb Cloud"
|
||||||
" (there is no local cache to configure)"
|
" (there is no local cache to configure)"
|
||||||
)
|
)
|
||||||
|
|
||||||
table = LOOP.run(self._conn.open_table(name, namespace=namespace))
|
table = LOOP.run(self._conn.open_table(name, namespace_path=namespace_path))
|
||||||
return RemoteTable(table, self.db_name)
|
return RemoteTable(table, self.db_name)
|
||||||
|
|
||||||
def clone_table(
|
def clone_table(
|
||||||
@@ -331,7 +335,7 @@ class RemoteDBConnection(DBConnection):
|
|||||||
target_table_name: str,
|
target_table_name: str,
|
||||||
source_uri: str,
|
source_uri: str,
|
||||||
*,
|
*,
|
||||||
target_namespace: Optional[List[str]] = None,
|
target_namespace_path: Optional[List[str]] = None,
|
||||||
source_version: Optional[int] = None,
|
source_version: Optional[int] = None,
|
||||||
source_tag: Optional[str] = None,
|
source_tag: Optional[str] = None,
|
||||||
is_shallow: bool = True,
|
is_shallow: bool = True,
|
||||||
@@ -344,7 +348,7 @@ class RemoteDBConnection(DBConnection):
|
|||||||
The name of the target table to create.
|
The name of the target table to create.
|
||||||
source_uri: str
|
source_uri: str
|
||||||
The URI of the source table to clone from.
|
The URI of the source table to clone from.
|
||||||
target_namespace: List[str], optional
|
target_namespace_path: List[str], optional
|
||||||
The namespace for the target table.
|
The namespace for the target table.
|
||||||
None or empty list represents root namespace.
|
None or empty list represents root namespace.
|
||||||
source_version: int, optional
|
source_version: int, optional
|
||||||
@@ -361,13 +365,13 @@ class RemoteDBConnection(DBConnection):
|
|||||||
"""
|
"""
|
||||||
from .table import RemoteTable
|
from .table import RemoteTable
|
||||||
|
|
||||||
if target_namespace is None:
|
if target_namespace_path is None:
|
||||||
target_namespace = []
|
target_namespace_path = []
|
||||||
table = LOOP.run(
|
table = LOOP.run(
|
||||||
self._conn.clone_table(
|
self._conn.clone_table(
|
||||||
target_table_name,
|
target_table_name,
|
||||||
source_uri,
|
source_uri,
|
||||||
target_namespace=target_namespace,
|
target_namespace_path=target_namespace_path,
|
||||||
source_version=source_version,
|
source_version=source_version,
|
||||||
source_tag=source_tag,
|
source_tag=source_tag,
|
||||||
is_shallow=is_shallow,
|
is_shallow=is_shallow,
|
||||||
@@ -387,7 +391,7 @@ class RemoteDBConnection(DBConnection):
|
|||||||
exist_ok: bool = False,
|
exist_ok: bool = False,
|
||||||
embedding_functions: Optional[List[EmbeddingFunctionConfig]] = None,
|
embedding_functions: Optional[List[EmbeddingFunctionConfig]] = None,
|
||||||
*,
|
*,
|
||||||
namespace: Optional[List[str]] = None,
|
namespace_path: Optional[List[str]] = None,
|
||||||
) -> Table:
|
) -> Table:
|
||||||
"""Create a [Table][lancedb.table.Table] in the database.
|
"""Create a [Table][lancedb.table.Table] in the database.
|
||||||
|
|
||||||
@@ -395,7 +399,7 @@ class RemoteDBConnection(DBConnection):
|
|||||||
----------
|
----------
|
||||||
name: str
|
name: str
|
||||||
The name of the table.
|
The name of the table.
|
||||||
namespace: List[str], optional
|
namespace_path: List[str], optional
|
||||||
The namespace to create the table in.
|
The namespace to create the table in.
|
||||||
None or empty list represents root namespace.
|
None or empty list represents root namespace.
|
||||||
data: The data to initialize the table, *optional*
|
data: The data to initialize the table, *optional*
|
||||||
@@ -495,8 +499,8 @@ class RemoteDBConnection(DBConnection):
|
|||||||
mode = "exist_ok"
|
mode = "exist_ok"
|
||||||
elif not mode:
|
elif not mode:
|
||||||
mode = "exist_ok"
|
mode = "exist_ok"
|
||||||
if namespace is None:
|
if namespace_path is None:
|
||||||
namespace = []
|
namespace_path = []
|
||||||
validate_table_name(name)
|
validate_table_name(name)
|
||||||
if embedding_functions is not None:
|
if embedding_functions is not None:
|
||||||
logging.warning(
|
logging.warning(
|
||||||
@@ -511,7 +515,7 @@ class RemoteDBConnection(DBConnection):
|
|||||||
self._conn.create_table(
|
self._conn.create_table(
|
||||||
name,
|
name,
|
||||||
data,
|
data,
|
||||||
namespace=namespace,
|
namespace_path=namespace_path,
|
||||||
mode=mode,
|
mode=mode,
|
||||||
schema=schema,
|
schema=schema,
|
||||||
on_bad_vectors=on_bad_vectors,
|
on_bad_vectors=on_bad_vectors,
|
||||||
@@ -521,28 +525,28 @@ class RemoteDBConnection(DBConnection):
|
|||||||
return RemoteTable(table, self.db_name)
|
return RemoteTable(table, self.db_name)
|
||||||
|
|
||||||
@override
|
@override
|
||||||
def drop_table(self, name: str, namespace: Optional[List[str]] = None):
|
def drop_table(self, name: str, namespace_path: Optional[List[str]] = None):
|
||||||
"""Drop a table from the database.
|
"""Drop a table from the database.
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
name: str
|
name: str
|
||||||
The name of the table.
|
The name of the table.
|
||||||
namespace: List[str], optional
|
namespace_path: List[str], optional
|
||||||
The namespace to drop the table from.
|
The namespace to drop the table from.
|
||||||
None or empty list represents root namespace.
|
None or empty list represents root namespace.
|
||||||
"""
|
"""
|
||||||
if namespace is None:
|
if namespace_path is None:
|
||||||
namespace = []
|
namespace_path = []
|
||||||
LOOP.run(self._conn.drop_table(name, namespace=namespace))
|
LOOP.run(self._conn.drop_table(name, namespace_path=namespace_path))
|
||||||
|
|
||||||
@override
|
@override
|
||||||
def rename_table(
|
def rename_table(
|
||||||
self,
|
self,
|
||||||
cur_name: str,
|
cur_name: str,
|
||||||
new_name: str,
|
new_name: str,
|
||||||
cur_namespace: Optional[List[str]] = None,
|
cur_namespace_path: Optional[List[str]] = None,
|
||||||
new_namespace: Optional[List[str]] = None,
|
new_namespace_path: Optional[List[str]] = None,
|
||||||
):
|
):
|
||||||
"""Rename a table in the database.
|
"""Rename a table in the database.
|
||||||
|
|
||||||
@@ -553,19 +557,19 @@ class RemoteDBConnection(DBConnection):
|
|||||||
new_name: str
|
new_name: str
|
||||||
The new name of the table.
|
The new name of the table.
|
||||||
"""
|
"""
|
||||||
if cur_namespace is None:
|
if cur_namespace_path is None:
|
||||||
cur_namespace = []
|
cur_namespace_path = []
|
||||||
if new_namespace is None:
|
if new_namespace_path is None:
|
||||||
new_namespace = []
|
new_namespace_path = []
|
||||||
LOOP.run(
|
LOOP.run(
|
||||||
self._conn.rename_table(
|
self._conn.rename_table(
|
||||||
cur_name,
|
cur_name,
|
||||||
new_name,
|
new_name,
|
||||||
cur_namespace=cur_namespace,
|
cur_namespace_path=cur_namespace_path,
|
||||||
new_namespace=new_namespace,
|
new_namespace_path=new_namespace_path,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
async def close(self):
|
async def close(self):
|
||||||
"""Close the connection to the database."""
|
"""Close the connection to the database."""
|
||||||
self._client.close()
|
self._conn.close()
|
||||||
|
|||||||
@@ -4,7 +4,7 @@
|
|||||||
from datetime import timedelta
|
from datetime import timedelta
|
||||||
import logging
|
import logging
|
||||||
from functools import cached_property
|
from functools import cached_property
|
||||||
from typing import Dict, Iterable, List, Optional, Union, Literal
|
from typing import Any, Callable, Dict, Iterable, List, Optional, Union, Literal
|
||||||
import warnings
|
import warnings
|
||||||
|
|
||||||
from lancedb._lancedb import (
|
from lancedb._lancedb import (
|
||||||
@@ -35,6 +35,7 @@ import pyarrow as pa
|
|||||||
from lancedb.common import DATA, VEC, VECTOR_COLUMN_NAME
|
from lancedb.common import DATA, VEC, VECTOR_COLUMN_NAME
|
||||||
from lancedb.merge import LanceMergeInsertBuilder
|
from lancedb.merge import LanceMergeInsertBuilder
|
||||||
from lancedb.embeddings import EmbeddingFunctionRegistry
|
from lancedb.embeddings import EmbeddingFunctionRegistry
|
||||||
|
from lancedb.table import _normalize_progress
|
||||||
|
|
||||||
from ..query import LanceVectorQueryBuilder, LanceQueryBuilder, LanceTakeQueryBuilder
|
from ..query import LanceVectorQueryBuilder, LanceQueryBuilder, LanceTakeQueryBuilder
|
||||||
from ..table import AsyncTable, IndexStatistics, Query, Table, Tags
|
from ..table import AsyncTable, IndexStatistics, Query, Table, Tags
|
||||||
@@ -218,8 +219,6 @@ class RemoteTable(Table):
|
|||||||
train: bool = True,
|
train: bool = True,
|
||||||
):
|
):
|
||||||
"""Create an index on the table.
|
"""Create an index on the table.
|
||||||
Currently, the only parameters that matter are
|
|
||||||
the metric and the vector column name.
|
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
@@ -250,11 +249,6 @@ class RemoteTable(Table):
|
|||||||
>>> table.create_index("l2", "vector") # doctest: +SKIP
|
>>> table.create_index("l2", "vector") # doctest: +SKIP
|
||||||
"""
|
"""
|
||||||
|
|
||||||
if num_sub_vectors is not None:
|
|
||||||
logging.warning(
|
|
||||||
"num_sub_vectors is not supported on LanceDB cloud."
|
|
||||||
"This parameter will be tuned automatically."
|
|
||||||
)
|
|
||||||
if accelerator is not None:
|
if accelerator is not None:
|
||||||
logging.warning(
|
logging.warning(
|
||||||
"GPU accelerator is not yet supported on LanceDB cloud."
|
"GPU accelerator is not yet supported on LanceDB cloud."
|
||||||
@@ -315,6 +309,7 @@ class RemoteTable(Table):
|
|||||||
mode: str = "append",
|
mode: str = "append",
|
||||||
on_bad_vectors: str = "error",
|
on_bad_vectors: str = "error",
|
||||||
fill_value: float = 0.0,
|
fill_value: float = 0.0,
|
||||||
|
progress: Optional[Union[bool, Callable, Any]] = None,
|
||||||
) -> AddResult:
|
) -> AddResult:
|
||||||
"""Add more data to the [Table](Table). It has the same API signature as
|
"""Add more data to the [Table](Table). It has the same API signature as
|
||||||
the OSS version.
|
the OSS version.
|
||||||
@@ -337,17 +332,29 @@ class RemoteTable(Table):
|
|||||||
One of "error", "drop", "fill".
|
One of "error", "drop", "fill".
|
||||||
fill_value: float, default 0.
|
fill_value: float, default 0.
|
||||||
The value to use when filling vectors. Only used if on_bad_vectors="fill".
|
The value to use when filling vectors. Only used if on_bad_vectors="fill".
|
||||||
|
progress: bool, callable, or tqdm-like, optional
|
||||||
|
A callback or tqdm-compatible progress bar. See
|
||||||
|
:meth:`Table.add` for details.
|
||||||
|
|
||||||
Returns
|
Returns
|
||||||
-------
|
-------
|
||||||
AddResult
|
AddResult
|
||||||
An object containing the new version number of the table after adding data.
|
An object containing the new version number of the table after adding data.
|
||||||
"""
|
"""
|
||||||
return LOOP.run(
|
progress, owns = _normalize_progress(progress)
|
||||||
self._table.add(
|
try:
|
||||||
data, mode=mode, on_bad_vectors=on_bad_vectors, fill_value=fill_value
|
return LOOP.run(
|
||||||
|
self._table.add(
|
||||||
|
data,
|
||||||
|
mode=mode,
|
||||||
|
on_bad_vectors=on_bad_vectors,
|
||||||
|
fill_value=fill_value,
|
||||||
|
progress=progress,
|
||||||
|
)
|
||||||
)
|
)
|
||||||
)
|
finally:
|
||||||
|
if owns:
|
||||||
|
progress.close()
|
||||||
|
|
||||||
def search(
|
def search(
|
||||||
self,
|
self,
|
||||||
@@ -647,6 +654,45 @@ class RemoteTable(Table):
|
|||||||
def drop_index(self, index_name: str):
|
def drop_index(self, index_name: str):
|
||||||
return LOOP.run(self._table.drop_index(index_name))
|
return LOOP.run(self._table.drop_index(index_name))
|
||||||
|
|
||||||
|
def prewarm_index(self, name: str) -> None:
|
||||||
|
"""Prewarm an index in the table.
|
||||||
|
|
||||||
|
This is a hint to the database that the index will be accessed in the
|
||||||
|
future and should be loaded into memory if possible. This can reduce
|
||||||
|
cold-start latency for subsequent queries.
|
||||||
|
|
||||||
|
This call initiates prewarming and returns once the request is accepted.
|
||||||
|
It is idempotent and safe to call from multiple clients concurrently.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
name: str
|
||||||
|
The name of the index to prewarm
|
||||||
|
"""
|
||||||
|
return LOOP.run(self._table.prewarm_index(name))
|
||||||
|
|
||||||
|
def prewarm_data(self, columns: Optional[List[str]] = None) -> None:
|
||||||
|
"""Prewarm data for the table.
|
||||||
|
|
||||||
|
This is a hint to the database that the given columns will be accessed
|
||||||
|
in the future and the database should prefetch the data if possible.
|
||||||
|
Currently only supported on remote tables.
|
||||||
|
|
||||||
|
This call initiates prewarming and returns once the request is accepted.
|
||||||
|
It is idempotent and safe to call from multiple clients concurrently.
|
||||||
|
|
||||||
|
This operation has a large upfront cost but can speed up future queries
|
||||||
|
that need to fetch the given columns. Large columns such as embeddings
|
||||||
|
or binary data may not be practical to prewarm. This feature is intended
|
||||||
|
for workloads that issue many queries against the same columns.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
columns: list of str, optional
|
||||||
|
The columns to prewarm. If None, all columns are prewarmed.
|
||||||
|
"""
|
||||||
|
return LOOP.run(self._table.prewarm_data(columns))
|
||||||
|
|
||||||
def wait_for_index(
|
def wait_for_index(
|
||||||
self, index_names: Iterable[str], timeout: timedelta = timedelta(seconds=300)
|
self, index_names: Iterable[str], timeout: timedelta = timedelta(seconds=300)
|
||||||
):
|
):
|
||||||
|
|||||||
214
python/python/lancedb/scannable.py
Normal file
214
python/python/lancedb/scannable.py
Normal file
@@ -0,0 +1,214 @@
|
|||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
# SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||||
|
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from functools import singledispatch
|
||||||
|
import sys
|
||||||
|
from typing import Callable, Iterator, Optional
|
||||||
|
from lancedb.arrow import to_arrow
|
||||||
|
import pyarrow as pa
|
||||||
|
import pyarrow.dataset as ds
|
||||||
|
|
||||||
|
from .pydantic import LanceModel
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class Scannable:
|
||||||
|
schema: pa.Schema
|
||||||
|
num_rows: Optional[int]
|
||||||
|
# Factory function to create a new reader each time (supports re-scanning)
|
||||||
|
reader: Callable[[], pa.RecordBatchReader]
|
||||||
|
# Whether reader can be called more than once. For example, an iterator can
|
||||||
|
# only be consumed once, while a DataFrame can be converted to a new reader
|
||||||
|
# each time.
|
||||||
|
rescannable: bool = True
|
||||||
|
|
||||||
|
|
||||||
|
@singledispatch
|
||||||
|
def to_scannable(data) -> Scannable:
|
||||||
|
# Fallback: try iterable protocol
|
||||||
|
if hasattr(data, "__iter__"):
|
||||||
|
return _from_iterable(iter(data))
|
||||||
|
raise NotImplementedError(f"to_scannable not implemented for type {type(data)}")
|
||||||
|
|
||||||
|
|
||||||
|
@to_scannable.register(pa.RecordBatchReader)
|
||||||
|
def _from_reader(data: pa.RecordBatchReader) -> Scannable:
|
||||||
|
# RecordBatchReader can only be consumed once - not rescannable
|
||||||
|
return Scannable(
|
||||||
|
schema=data.schema, num_rows=None, reader=lambda: data, rescannable=False
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@to_scannable.register(pa.RecordBatch)
|
||||||
|
def _from_batch(data: pa.RecordBatch) -> Scannable:
|
||||||
|
return Scannable(
|
||||||
|
schema=data.schema,
|
||||||
|
num_rows=data.num_rows,
|
||||||
|
reader=lambda: pa.RecordBatchReader.from_batches(data.schema, [data]),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@to_scannable.register(pa.Table)
|
||||||
|
def _from_table(data: pa.Table) -> Scannable:
|
||||||
|
return Scannable(schema=data.schema, num_rows=data.num_rows, reader=data.to_reader)
|
||||||
|
|
||||||
|
|
||||||
|
@to_scannable.register(ds.Dataset)
|
||||||
|
def _from_dataset(data: ds.Dataset) -> Scannable:
|
||||||
|
return Scannable(
|
||||||
|
schema=data.schema,
|
||||||
|
num_rows=data.count_rows(),
|
||||||
|
reader=lambda: data.scanner().to_reader(),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@to_scannable.register(ds.Scanner)
|
||||||
|
def _from_scanner(data: ds.Scanner) -> Scannable:
|
||||||
|
# Scanner can only be consumed once - not rescannable
|
||||||
|
return Scannable(
|
||||||
|
schema=data.projected_schema,
|
||||||
|
num_rows=None,
|
||||||
|
reader=data.to_reader,
|
||||||
|
rescannable=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@to_scannable.register(list)
|
||||||
|
def _from_list(data: list) -> Scannable:
|
||||||
|
if not data:
|
||||||
|
raise ValueError("Cannot create table from empty list without a schema")
|
||||||
|
table = to_arrow(data)
|
||||||
|
return Scannable(
|
||||||
|
schema=table.schema, num_rows=table.num_rows, reader=table.to_reader
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@to_scannable.register(dict)
|
||||||
|
def _from_dict(data: dict) -> Scannable:
|
||||||
|
raise ValueError("Cannot add a single dictionary to a table. Use a list.")
|
||||||
|
|
||||||
|
|
||||||
|
@to_scannable.register(LanceModel)
|
||||||
|
def _from_lance_model(data: LanceModel) -> Scannable:
|
||||||
|
raise ValueError("Cannot add a single LanceModel to a table. Use a list.")
|
||||||
|
|
||||||
|
|
||||||
|
def _from_iterable(data: Iterator) -> Scannable:
|
||||||
|
first_item = next(data, None)
|
||||||
|
if first_item is None:
|
||||||
|
raise ValueError("Cannot create table from empty iterator")
|
||||||
|
first = to_arrow(first_item)
|
||||||
|
schema = first.schema
|
||||||
|
|
||||||
|
def iter():
|
||||||
|
yield from first.to_batches()
|
||||||
|
for item in data:
|
||||||
|
batch = to_arrow(item)
|
||||||
|
if batch.schema != schema:
|
||||||
|
try:
|
||||||
|
batch = batch.cast(schema)
|
||||||
|
except pa.lib.ArrowInvalid:
|
||||||
|
raise ValueError(
|
||||||
|
f"Input iterator yielded a batch with schema that "
|
||||||
|
f"does not match the schema of other batches.\n"
|
||||||
|
f"Expected:\n{schema}\nGot:\n{batch.schema}"
|
||||||
|
)
|
||||||
|
yield from batch.to_batches()
|
||||||
|
|
||||||
|
reader = pa.RecordBatchReader.from_batches(schema, iter())
|
||||||
|
return to_scannable(reader)
|
||||||
|
|
||||||
|
|
||||||
|
_registered_modules: set[str] = set()
|
||||||
|
|
||||||
|
|
||||||
|
def _register_optional_converters():
|
||||||
|
"""Register converters for optional dependencies that are already imported."""
|
||||||
|
|
||||||
|
if "pandas" in sys.modules and "pandas" not in _registered_modules:
|
||||||
|
_registered_modules.add("pandas")
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
@to_arrow.register(pd.DataFrame)
|
||||||
|
def _arrow_from_pandas(data: pd.DataFrame) -> pa.Table:
|
||||||
|
table = pa.Table.from_pandas(data, preserve_index=False)
|
||||||
|
return table.replace_schema_metadata(None)
|
||||||
|
|
||||||
|
@to_scannable.register(pd.DataFrame)
|
||||||
|
def _from_pandas(data: pd.DataFrame) -> Scannable:
|
||||||
|
return to_scannable(_arrow_from_pandas(data))
|
||||||
|
|
||||||
|
if "polars" in sys.modules and "polars" not in _registered_modules:
|
||||||
|
_registered_modules.add("polars")
|
||||||
|
import polars as pl
|
||||||
|
|
||||||
|
@to_arrow.register(pl.DataFrame)
|
||||||
|
def _arrow_from_polars(data: pl.DataFrame) -> pa.Table:
|
||||||
|
return data.to_arrow()
|
||||||
|
|
||||||
|
@to_scannable.register(pl.DataFrame)
|
||||||
|
def _from_polars(data: pl.DataFrame) -> Scannable:
|
||||||
|
arrow = data.to_arrow()
|
||||||
|
return Scannable(
|
||||||
|
schema=arrow.schema, num_rows=len(data), reader=arrow.to_reader
|
||||||
|
)
|
||||||
|
|
||||||
|
@to_scannable.register(pl.LazyFrame)
|
||||||
|
def _from_polars_lazy(data: pl.LazyFrame) -> Scannable:
|
||||||
|
arrow = data.collect().to_arrow()
|
||||||
|
return Scannable(
|
||||||
|
schema=arrow.schema, num_rows=arrow.num_rows, reader=arrow.to_reader
|
||||||
|
)
|
||||||
|
|
||||||
|
if "datasets" in sys.modules and "datasets" not in _registered_modules:
|
||||||
|
_registered_modules.add("datasets")
|
||||||
|
from datasets import Dataset as HFDataset
|
||||||
|
from datasets import DatasetDict as HFDatasetDict
|
||||||
|
|
||||||
|
@to_scannable.register(HFDataset)
|
||||||
|
def _from_hf_dataset(data: HFDataset) -> Scannable:
|
||||||
|
table = data.data.table # Access underlying Arrow table
|
||||||
|
return Scannable(
|
||||||
|
schema=table.schema, num_rows=len(data), reader=table.to_reader
|
||||||
|
)
|
||||||
|
|
||||||
|
@to_scannable.register(HFDatasetDict)
|
||||||
|
def _from_hf_dataset_dict(data: HFDatasetDict) -> Scannable:
|
||||||
|
# HuggingFace DatasetDict: combine all splits with a 'split' column
|
||||||
|
schema = data[list(data.keys())[0]].features.arrow_schema
|
||||||
|
if "split" not in schema.names:
|
||||||
|
schema = schema.append(pa.field("split", pa.string()))
|
||||||
|
|
||||||
|
def gen():
|
||||||
|
for split_name, dataset in data.items():
|
||||||
|
for batch in dataset.data.to_batches():
|
||||||
|
split_arr = pa.array(
|
||||||
|
[split_name] * len(batch), type=pa.string()
|
||||||
|
)
|
||||||
|
yield pa.RecordBatch.from_arrays(
|
||||||
|
list(batch.columns) + [split_arr], schema=schema
|
||||||
|
)
|
||||||
|
|
||||||
|
total_rows = sum(len(dataset) for dataset in data.values())
|
||||||
|
return Scannable(
|
||||||
|
schema=schema,
|
||||||
|
num_rows=total_rows,
|
||||||
|
reader=lambda: pa.RecordBatchReader.from_batches(schema, gen()),
|
||||||
|
)
|
||||||
|
|
||||||
|
if "lance" in sys.modules and "lance" not in _registered_modules:
|
||||||
|
_registered_modules.add("lance")
|
||||||
|
import lance
|
||||||
|
|
||||||
|
@to_scannable.register(lance.LanceDataset)
|
||||||
|
def _from_lance(data: lance.LanceDataset) -> Scannable:
|
||||||
|
return Scannable(
|
||||||
|
schema=data.schema,
|
||||||
|
num_rows=data.count_rows(),
|
||||||
|
reader=lambda: data.scanner().to_reader(),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# Register on module load
|
||||||
|
_register_optional_converters()
|
||||||
@@ -14,6 +14,7 @@ from functools import cached_property
|
|||||||
from typing import (
|
from typing import (
|
||||||
TYPE_CHECKING,
|
TYPE_CHECKING,
|
||||||
Any,
|
Any,
|
||||||
|
Callable,
|
||||||
Dict,
|
Dict,
|
||||||
Iterable,
|
Iterable,
|
||||||
List,
|
List,
|
||||||
@@ -25,6 +26,8 @@ from typing import (
|
|||||||
)
|
)
|
||||||
from urllib.parse import urlparse
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
|
from lancedb.scannable import _register_optional_converters, to_scannable
|
||||||
|
|
||||||
from . import __version__
|
from . import __version__
|
||||||
from lancedb.arrow import peek_reader
|
from lancedb.arrow import peek_reader
|
||||||
from lancedb.background_loop import LOOP
|
from lancedb.background_loop import LOOP
|
||||||
@@ -86,7 +89,6 @@ from .index import lang_mapping
|
|||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from .db import LanceDBConnection
|
from .db import LanceDBConnection
|
||||||
from .io import StorageOptionsProvider
|
|
||||||
from ._lancedb import (
|
from ._lancedb import (
|
||||||
Table as LanceDBTable,
|
Table as LanceDBTable,
|
||||||
OptimizeStats,
|
OptimizeStats,
|
||||||
@@ -275,7 +277,7 @@ def _sanitize_data(
|
|||||||
|
|
||||||
if metadata:
|
if metadata:
|
||||||
new_metadata = target_schema.metadata or {}
|
new_metadata = target_schema.metadata or {}
|
||||||
new_metadata = new_metadata.update(metadata)
|
new_metadata.update(metadata)
|
||||||
target_schema = target_schema.with_metadata(new_metadata)
|
target_schema = target_schema.with_metadata(new_metadata)
|
||||||
|
|
||||||
_validate_schema(target_schema)
|
_validate_schema(target_schema)
|
||||||
@@ -554,6 +556,21 @@ def _table_uri(base: str, table_name: str) -> str:
|
|||||||
return join_uri(base, f"{table_name}.lance")
|
return join_uri(base, f"{table_name}.lance")
|
||||||
|
|
||||||
|
|
||||||
|
def _normalize_progress(progress):
|
||||||
|
"""Normalize a ``progress`` parameter for :meth:`Table.add`.
|
||||||
|
|
||||||
|
Returns ``(progress_obj, owns)`` where *owns* is True when we created a
|
||||||
|
tqdm bar that the caller must close.
|
||||||
|
"""
|
||||||
|
if progress is True:
|
||||||
|
from tqdm.auto import tqdm
|
||||||
|
|
||||||
|
return tqdm(unit=" rows"), True
|
||||||
|
if progress is False or progress is None:
|
||||||
|
return None, False
|
||||||
|
return progress, False
|
||||||
|
|
||||||
|
|
||||||
class Table(ABC):
|
class Table(ABC):
|
||||||
"""
|
"""
|
||||||
A Table is a collection of Records in a LanceDB Database.
|
A Table is a collection of Records in a LanceDB Database.
|
||||||
@@ -972,6 +989,7 @@ class Table(ABC):
|
|||||||
mode: AddMode = "append",
|
mode: AddMode = "append",
|
||||||
on_bad_vectors: OnBadVectorsType = "error",
|
on_bad_vectors: OnBadVectorsType = "error",
|
||||||
fill_value: float = 0.0,
|
fill_value: float = 0.0,
|
||||||
|
progress: Optional[Union[bool, Callable, Any]] = None,
|
||||||
) -> AddResult:
|
) -> AddResult:
|
||||||
"""Add more data to the [Table](Table).
|
"""Add more data to the [Table](Table).
|
||||||
|
|
||||||
@@ -993,6 +1011,29 @@ class Table(ABC):
|
|||||||
One of "error", "drop", "fill".
|
One of "error", "drop", "fill".
|
||||||
fill_value: float, default 0.
|
fill_value: float, default 0.
|
||||||
The value to use when filling vectors. Only used if on_bad_vectors="fill".
|
The value to use when filling vectors. Only used if on_bad_vectors="fill".
|
||||||
|
progress: bool, callable, or tqdm-like, optional
|
||||||
|
Progress reporting during the add operation. Can be:
|
||||||
|
|
||||||
|
- ``True`` to automatically create and display a tqdm progress
|
||||||
|
bar (requires ``tqdm`` to be installed)::
|
||||||
|
|
||||||
|
table.add(data, progress=True)
|
||||||
|
|
||||||
|
- A **callable** that receives a dict with keys ``output_rows``,
|
||||||
|
``output_bytes``, ``total_rows``, ``elapsed_seconds``,
|
||||||
|
``active_tasks``, ``total_tasks``, and ``done``::
|
||||||
|
|
||||||
|
def on_progress(p):
|
||||||
|
print(f"{p['output_rows']}/{p['total_rows']} rows, "
|
||||||
|
f"{p['active_tasks']}/{p['total_tasks']} workers")
|
||||||
|
table.add(data, progress=on_progress)
|
||||||
|
|
||||||
|
- A **tqdm-compatible** progress bar whose ``total`` and
|
||||||
|
``update()`` will be called automatically. The postfix shows
|
||||||
|
write throughput (MB/s) and active worker count::
|
||||||
|
|
||||||
|
with tqdm() as pbar:
|
||||||
|
table.add(data, progress=pbar)
|
||||||
|
|
||||||
Returns
|
Returns
|
||||||
-------
|
-------
|
||||||
@@ -1329,7 +1370,7 @@ class Table(ABC):
|
|||||||
1 2 [3.0, 4.0]
|
1 2 [3.0, 4.0]
|
||||||
2 3 [5.0, 6.0]
|
2 3 [5.0, 6.0]
|
||||||
>>> table.delete("x = 2")
|
>>> table.delete("x = 2")
|
||||||
DeleteResult(version=2)
|
DeleteResult(num_deleted_rows=1, version=2)
|
||||||
>>> table.to_pandas()
|
>>> table.to_pandas()
|
||||||
x vector
|
x vector
|
||||||
0 1 [1.0, 2.0]
|
0 1 [1.0, 2.0]
|
||||||
@@ -1343,7 +1384,7 @@ class Table(ABC):
|
|||||||
>>> to_remove
|
>>> to_remove
|
||||||
'1, 5'
|
'1, 5'
|
||||||
>>> table.delete(f"x IN ({to_remove})")
|
>>> table.delete(f"x IN ({to_remove})")
|
||||||
DeleteResult(version=3)
|
DeleteResult(num_deleted_rows=1, version=3)
|
||||||
>>> table.to_pandas()
|
>>> table.to_pandas()
|
||||||
x vector
|
x vector
|
||||||
0 3 [5.0, 6.0]
|
0 3 [5.0, 6.0]
|
||||||
@@ -1504,22 +1545,17 @@ class Table(ABC):
|
|||||||
in-progress operation (e.g. appending new data) and these files will not
|
in-progress operation (e.g. appending new data) and these files will not
|
||||||
be deleted unless they are at least 7 days old. If delete_unverified is True
|
be deleted unless they are at least 7 days old. If delete_unverified is True
|
||||||
then these files will be deleted regardless of their age.
|
then these files will be deleted regardless of their age.
|
||||||
|
|
||||||
|
.. warning::
|
||||||
|
|
||||||
|
This should only be set to True if you can guarantee that no other
|
||||||
|
process is currently working on this dataset. Otherwise the dataset
|
||||||
|
could be put into a corrupted state.
|
||||||
|
|
||||||
retrain: bool, default False
|
retrain: bool, default False
|
||||||
This parameter is no longer used and is deprecated.
|
This parameter is no longer used and is deprecated.
|
||||||
|
|
||||||
Experimental API
|
The frequency an application should call optimize is based on the frequency of
|
||||||
----------------
|
|
||||||
|
|
||||||
The optimization process is undergoing active development and may change.
|
|
||||||
Our goal with these changes is to improve the performance of optimization and
|
|
||||||
reduce the complexity.
|
|
||||||
|
|
||||||
That being said, it is essential today to run optimize if you want the best
|
|
||||||
performance. It should be stable and safe to use in production, but it our
|
|
||||||
hope that the API may be simplified (or not even need to be called) in the
|
|
||||||
future.
|
|
||||||
|
|
||||||
The frequency an application shoudl call optimize is based on the frequency of
|
|
||||||
data modifications. If data is frequently added, deleted, or updated then
|
data modifications. If data is frequently added, deleted, or updated then
|
||||||
optimize should be run frequently. A good rule of thumb is to run optimize if
|
optimize should be run frequently. A good rule of thumb is to run optimize if
|
||||||
you have added or modified 100,000 or more records or run more than 20 data
|
you have added or modified 100,000 or more records or run more than 20 data
|
||||||
@@ -1739,29 +1775,34 @@ class LanceTable(Table):
|
|||||||
connection: "LanceDBConnection",
|
connection: "LanceDBConnection",
|
||||||
name: str,
|
name: str,
|
||||||
*,
|
*,
|
||||||
namespace: Optional[List[str]] = None,
|
namespace_path: Optional[List[str]] = None,
|
||||||
storage_options: Optional[Dict[str, str]] = None,
|
storage_options: Optional[Dict[str, str]] = None,
|
||||||
storage_options_provider: Optional["StorageOptionsProvider"] = None,
|
|
||||||
index_cache_size: Optional[int] = None,
|
index_cache_size: Optional[int] = None,
|
||||||
location: Optional[str] = None,
|
location: Optional[str] = None,
|
||||||
|
namespace_client: Optional[Any] = None,
|
||||||
|
managed_versioning: Optional[bool] = None,
|
||||||
|
pushdown_operations: Optional[set] = None,
|
||||||
_async: AsyncTable = None,
|
_async: AsyncTable = None,
|
||||||
):
|
):
|
||||||
if namespace is None:
|
if namespace_path is None:
|
||||||
namespace = []
|
namespace_path = []
|
||||||
self._conn = connection
|
self._conn = connection
|
||||||
self._namespace = namespace
|
self._namespace_path = namespace_path
|
||||||
self._location = location # Store location for use in _dataset_path
|
self._location = location # Store location for use in _dataset_path
|
||||||
|
self._namespace_client = namespace_client
|
||||||
|
self._pushdown_operations = pushdown_operations or set()
|
||||||
if _async is not None:
|
if _async is not None:
|
||||||
self._table = _async
|
self._table = _async
|
||||||
else:
|
else:
|
||||||
self._table = LOOP.run(
|
self._table = LOOP.run(
|
||||||
connection._conn.open_table(
|
connection._conn.open_table(
|
||||||
name,
|
name,
|
||||||
namespace=namespace,
|
namespace_path=namespace_path,
|
||||||
storage_options=storage_options,
|
storage_options=storage_options,
|
||||||
storage_options_provider=storage_options_provider,
|
|
||||||
index_cache_size=index_cache_size,
|
index_cache_size=index_cache_size,
|
||||||
location=location,
|
location=location,
|
||||||
|
namespace_client=namespace_client,
|
||||||
|
managed_versioning=managed_versioning,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -1772,13 +1813,13 @@ class LanceTable(Table):
|
|||||||
@property
|
@property
|
||||||
def namespace(self) -> List[str]:
|
def namespace(self) -> List[str]:
|
||||||
"""Return the namespace path of the table."""
|
"""Return the namespace path of the table."""
|
||||||
return self._namespace
|
return self._namespace_path
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def id(self) -> str:
|
def id(self) -> str:
|
||||||
"""Return the full identifier of the table (namespace$name)."""
|
"""Return the full identifier of the table (namespace$name)."""
|
||||||
if self._namespace:
|
if self._namespace_path:
|
||||||
return "$".join(self._namespace + [self.name])
|
return "$".join(self._namespace_path + [self.name])
|
||||||
return self.name
|
return self.name
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
@@ -1799,22 +1840,26 @@ class LanceTable(Table):
|
|||||||
db,
|
db,
|
||||||
name,
|
name,
|
||||||
*,
|
*,
|
||||||
namespace: Optional[List[str]] = None,
|
namespace_path: Optional[List[str]] = None,
|
||||||
storage_options: Optional[Dict[str, str]] = None,
|
storage_options: Optional[Dict[str, str]] = None,
|
||||||
storage_options_provider: Optional["StorageOptionsProvider"] = None,
|
|
||||||
index_cache_size: Optional[int] = None,
|
index_cache_size: Optional[int] = None,
|
||||||
location: Optional[str] = None,
|
location: Optional[str] = None,
|
||||||
|
namespace_client: Optional[Any] = None,
|
||||||
|
managed_versioning: Optional[bool] = None,
|
||||||
|
pushdown_operations: Optional[set] = None,
|
||||||
):
|
):
|
||||||
if namespace is None:
|
if namespace_path is None:
|
||||||
namespace = []
|
namespace_path = []
|
||||||
tbl = cls(
|
tbl = cls(
|
||||||
db,
|
db,
|
||||||
name,
|
name,
|
||||||
namespace=namespace,
|
namespace_path=namespace_path,
|
||||||
storage_options=storage_options,
|
storage_options=storage_options,
|
||||||
storage_options_provider=storage_options_provider,
|
|
||||||
index_cache_size=index_cache_size,
|
index_cache_size=index_cache_size,
|
||||||
location=location,
|
location=location,
|
||||||
|
namespace_client=namespace_client,
|
||||||
|
managed_versioning=managed_versioning,
|
||||||
|
pushdown_operations=pushdown_operations,
|
||||||
)
|
)
|
||||||
|
|
||||||
# check the dataset exists
|
# check the dataset exists
|
||||||
@@ -1846,6 +1891,16 @@ class LanceTable(Table):
|
|||||||
"Please install with `pip install pylance`."
|
"Please install with `pip install pylance`."
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if self._namespace_client is not None:
|
||||||
|
table_id = self._namespace_path + [self.name]
|
||||||
|
return lance.dataset(
|
||||||
|
version=self.version,
|
||||||
|
storage_options=self._conn.storage_options,
|
||||||
|
namespace_client=self._namespace_client,
|
||||||
|
table_id=table_id,
|
||||||
|
**kwargs,
|
||||||
|
)
|
||||||
|
|
||||||
return lance.dataset(
|
return lance.dataset(
|
||||||
self._dataset_path,
|
self._dataset_path,
|
||||||
version=self.version,
|
version=self.version,
|
||||||
@@ -2198,12 +2253,18 @@ class LanceTable(Table):
|
|||||||
|
|
||||||
def prewarm_index(self, name: str) -> None:
|
def prewarm_index(self, name: str) -> None:
|
||||||
"""
|
"""
|
||||||
Prewarms an index in the table
|
Prewarm an index in the table.
|
||||||
|
|
||||||
This loads the entire index into memory
|
This is a hint to the database that the index will be accessed in the
|
||||||
|
future and should be loaded into memory if possible. This can reduce
|
||||||
|
cold-start latency for subsequent queries.
|
||||||
|
|
||||||
If the index does not fit into the available cache this call
|
This call initiates prewarming and returns once the request is accepted.
|
||||||
may be wasteful
|
It is idempotent and safe to call from multiple clients concurrently.
|
||||||
|
|
||||||
|
It is generally wasteful to call this if the index does not fit into the
|
||||||
|
available cache. Not all index types support prewarming; unsupported
|
||||||
|
indices will silently ignore the request.
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
@@ -2212,6 +2273,29 @@ class LanceTable(Table):
|
|||||||
"""
|
"""
|
||||||
return LOOP.run(self._table.prewarm_index(name))
|
return LOOP.run(self._table.prewarm_index(name))
|
||||||
|
|
||||||
|
def prewarm_data(self, columns: Optional[List[str]] = None) -> None:
|
||||||
|
"""
|
||||||
|
Prewarm data for the table.
|
||||||
|
|
||||||
|
This is a hint to the database that the given columns will be accessed
|
||||||
|
in the future and the database should prefetch the data if possible.
|
||||||
|
Currently only supported on remote tables.
|
||||||
|
|
||||||
|
This call initiates prewarming and returns once the request is accepted.
|
||||||
|
It is idempotent and safe to call from multiple clients concurrently.
|
||||||
|
|
||||||
|
This operation has a large upfront cost but can speed up future queries
|
||||||
|
that need to fetch the given columns. Large columns such as embeddings
|
||||||
|
or binary data may not be practical to prewarm. This feature is intended
|
||||||
|
for workloads that issue many queries against the same columns.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
columns: list of str, optional
|
||||||
|
The columns to prewarm. If None, all columns are prewarmed.
|
||||||
|
"""
|
||||||
|
return LOOP.run(self._table.prewarm_data(columns))
|
||||||
|
|
||||||
def wait_for_index(
|
def wait_for_index(
|
||||||
self, index_names: Iterable[str], timeout: timedelta = timedelta(seconds=300)
|
self, index_names: Iterable[str], timeout: timedelta = timedelta(seconds=300)
|
||||||
) -> None:
|
) -> None:
|
||||||
@@ -2447,6 +2531,7 @@ class LanceTable(Table):
|
|||||||
mode: AddMode = "append",
|
mode: AddMode = "append",
|
||||||
on_bad_vectors: OnBadVectorsType = "error",
|
on_bad_vectors: OnBadVectorsType = "error",
|
||||||
fill_value: float = 0.0,
|
fill_value: float = 0.0,
|
||||||
|
progress: Optional[Union[bool, Callable, Any]] = None,
|
||||||
) -> AddResult:
|
) -> AddResult:
|
||||||
"""Add data to the table.
|
"""Add data to the table.
|
||||||
If vector columns are missing and the table
|
If vector columns are missing and the table
|
||||||
@@ -2465,17 +2550,29 @@ class LanceTable(Table):
|
|||||||
One of "error", "drop", "fill", "null".
|
One of "error", "drop", "fill", "null".
|
||||||
fill_value: float, default 0.
|
fill_value: float, default 0.
|
||||||
The value to use when filling vectors. Only used if on_bad_vectors="fill".
|
The value to use when filling vectors. Only used if on_bad_vectors="fill".
|
||||||
|
progress: bool, callable, or tqdm-like, optional
|
||||||
|
A callback or tqdm-compatible progress bar. See
|
||||||
|
:meth:`Table.add` for details.
|
||||||
|
|
||||||
Returns
|
Returns
|
||||||
-------
|
-------
|
||||||
int
|
int
|
||||||
The number of vectors in the table.
|
The number of vectors in the table.
|
||||||
"""
|
"""
|
||||||
return LOOP.run(
|
progress, owns = _normalize_progress(progress)
|
||||||
self._table.add(
|
try:
|
||||||
data, mode=mode, on_bad_vectors=on_bad_vectors, fill_value=fill_value
|
return LOOP.run(
|
||||||
|
self._table.add(
|
||||||
|
data,
|
||||||
|
mode=mode,
|
||||||
|
on_bad_vectors=on_bad_vectors,
|
||||||
|
fill_value=fill_value,
|
||||||
|
progress=progress,
|
||||||
|
)
|
||||||
)
|
)
|
||||||
)
|
finally:
|
||||||
|
if owns:
|
||||||
|
progress.close()
|
||||||
|
|
||||||
def merge(
|
def merge(
|
||||||
self,
|
self,
|
||||||
@@ -2705,12 +2802,13 @@ class LanceTable(Table):
|
|||||||
fill_value: float = 0.0,
|
fill_value: float = 0.0,
|
||||||
embedding_functions: Optional[List[EmbeddingFunctionConfig]] = None,
|
embedding_functions: Optional[List[EmbeddingFunctionConfig]] = None,
|
||||||
*,
|
*,
|
||||||
namespace: Optional[List[str]] = None,
|
namespace_path: Optional[List[str]] = None,
|
||||||
storage_options: Optional[Dict[str, str | bool]] = None,
|
storage_options: Optional[Dict[str, str | bool]] = None,
|
||||||
storage_options_provider: Optional["StorageOptionsProvider"] = None,
|
|
||||||
data_storage_version: Optional[str] = None,
|
data_storage_version: Optional[str] = None,
|
||||||
enable_v2_manifest_paths: Optional[bool] = None,
|
enable_v2_manifest_paths: Optional[bool] = None,
|
||||||
location: Optional[str] = None,
|
location: Optional[str] = None,
|
||||||
|
namespace_client: Optional[Any] = None,
|
||||||
|
pushdown_operations: Optional[set] = None,
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Create a new table.
|
Create a new table.
|
||||||
@@ -2765,12 +2863,14 @@ class LanceTable(Table):
|
|||||||
Deprecated. Set `storage_options` when connecting to the database and set
|
Deprecated. Set `storage_options` when connecting to the database and set
|
||||||
`new_table_enable_v2_manifest_paths` in the options.
|
`new_table_enable_v2_manifest_paths` in the options.
|
||||||
"""
|
"""
|
||||||
if namespace is None:
|
if namespace_path is None:
|
||||||
namespace = []
|
namespace_path = []
|
||||||
self = cls.__new__(cls)
|
self = cls.__new__(cls)
|
||||||
self._conn = db
|
self._conn = db
|
||||||
self._namespace = namespace
|
self._namespace_path = namespace_path
|
||||||
self._location = location
|
self._location = location
|
||||||
|
self._namespace_client = namespace_client
|
||||||
|
self._pushdown_operations = pushdown_operations or set()
|
||||||
|
|
||||||
if data_storage_version is not None:
|
if data_storage_version is not None:
|
||||||
warnings.warn(
|
warnings.warn(
|
||||||
@@ -2803,9 +2903,8 @@ class LanceTable(Table):
|
|||||||
on_bad_vectors=on_bad_vectors,
|
on_bad_vectors=on_bad_vectors,
|
||||||
fill_value=fill_value,
|
fill_value=fill_value,
|
||||||
embedding_functions=embedding_functions,
|
embedding_functions=embedding_functions,
|
||||||
namespace=namespace,
|
namespace_path=namespace_path,
|
||||||
storage_options=storage_options,
|
storage_options=storage_options,
|
||||||
storage_options_provider=storage_options_provider,
|
|
||||||
location=location,
|
location=location,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
@@ -2874,6 +2973,15 @@ class LanceTable(Table):
|
|||||||
batch_size: Optional[int] = None,
|
batch_size: Optional[int] = None,
|
||||||
timeout: Optional[timedelta] = None,
|
timeout: Optional[timedelta] = None,
|
||||||
) -> pa.RecordBatchReader:
|
) -> pa.RecordBatchReader:
|
||||||
|
if (
|
||||||
|
"QueryTable" in self._pushdown_operations
|
||||||
|
and self._namespace_client is not None
|
||||||
|
):
|
||||||
|
from lancedb.namespace import _execute_server_side_query
|
||||||
|
|
||||||
|
table_id = self._namespace_path + [self.name]
|
||||||
|
return _execute_server_side_query(self._namespace_client, table_id, query)
|
||||||
|
|
||||||
async_iter = LOOP.run(
|
async_iter = LOOP.run(
|
||||||
self._table._execute_query(query, batch_size=batch_size, timeout=timeout)
|
self._table._execute_query(query, batch_size=batch_size, timeout=timeout)
|
||||||
)
|
)
|
||||||
@@ -2995,22 +3103,17 @@ class LanceTable(Table):
|
|||||||
in-progress operation (e.g. appending new data) and these files will not
|
in-progress operation (e.g. appending new data) and these files will not
|
||||||
be deleted unless they are at least 7 days old. If delete_unverified is True
|
be deleted unless they are at least 7 days old. If delete_unverified is True
|
||||||
then these files will be deleted regardless of their age.
|
then these files will be deleted regardless of their age.
|
||||||
|
|
||||||
|
.. warning::
|
||||||
|
|
||||||
|
This should only be set to True if you can guarantee that no other
|
||||||
|
process is currently working on this dataset. Otherwise the dataset
|
||||||
|
could be put into a corrupted state.
|
||||||
|
|
||||||
retrain: bool, default False
|
retrain: bool, default False
|
||||||
This parameter is no longer used and is deprecated.
|
This parameter is no longer used and is deprecated.
|
||||||
|
|
||||||
Experimental API
|
The frequency an application should call optimize is based on the frequency of
|
||||||
----------------
|
|
||||||
|
|
||||||
The optimization process is undergoing active development and may change.
|
|
||||||
Our goal with these changes is to improve the performance of optimization and
|
|
||||||
reduce the complexity.
|
|
||||||
|
|
||||||
That being said, it is essential today to run optimize if you want the best
|
|
||||||
performance. It should be stable and safe to use in production, but it our
|
|
||||||
hope that the API may be simplified (or not even need to be called) in the
|
|
||||||
future.
|
|
||||||
|
|
||||||
The frequency an application shoudl call optimize is based on the frequency of
|
|
||||||
data modifications. If data is frequently added, deleted, or updated then
|
data modifications. If data is frequently added, deleted, or updated then
|
||||||
optimize should be run frequently. A good rule of thumb is to run optimize if
|
optimize should be run frequently. A good rule of thumb is to run optimize if
|
||||||
you have added or modified 100,000 or more records or run more than 20 data
|
you have added or modified 100,000 or more records or run more than 20 data
|
||||||
@@ -3611,19 +3714,47 @@ class AsyncTable:
|
|||||||
"""
|
"""
|
||||||
Prewarm an index in the table.
|
Prewarm an index in the table.
|
||||||
|
|
||||||
|
This is a hint to the database that the index will be accessed in the
|
||||||
|
future and should be loaded into memory if possible. This can reduce
|
||||||
|
cold-start latency for subsequent queries.
|
||||||
|
|
||||||
|
This call initiates prewarming and returns once the request is accepted.
|
||||||
|
It is idempotent and safe to call from multiple clients concurrently.
|
||||||
|
|
||||||
|
It is generally wasteful to call this if the index does not fit into the
|
||||||
|
available cache. Not all index types support prewarming; unsupported
|
||||||
|
indices will silently ignore the request.
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
name: str
|
name: str
|
||||||
The name of the index to prewarm
|
The name of the index to prewarm
|
||||||
|
|
||||||
Notes
|
|
||||||
-----
|
|
||||||
This will load the index into memory. This may reduce the cold-start time for
|
|
||||||
future queries. If the index does not fit in the cache then this call may be
|
|
||||||
wasteful.
|
|
||||||
"""
|
"""
|
||||||
await self._inner.prewarm_index(name)
|
await self._inner.prewarm_index(name)
|
||||||
|
|
||||||
|
async def prewarm_data(self, columns: Optional[List[str]] = None) -> None:
|
||||||
|
"""
|
||||||
|
Prewarm data for the table.
|
||||||
|
|
||||||
|
This is a hint to the database that the given columns will be accessed
|
||||||
|
in the future and the database should prefetch the data if possible.
|
||||||
|
Currently only supported on remote tables.
|
||||||
|
|
||||||
|
This call initiates prewarming and returns once the request is accepted.
|
||||||
|
It is idempotent and safe to call from multiple clients concurrently.
|
||||||
|
|
||||||
|
This operation has a large upfront cost but can speed up future queries
|
||||||
|
that need to fetch the given columns. Large columns such as embeddings
|
||||||
|
or binary data may not be practical to prewarm. This feature is intended
|
||||||
|
for workloads that issue many queries against the same columns.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
columns: list of str, optional
|
||||||
|
The columns to prewarm. If None, all columns are prewarmed.
|
||||||
|
"""
|
||||||
|
await self._inner.prewarm_data(columns)
|
||||||
|
|
||||||
async def wait_for_index(
|
async def wait_for_index(
|
||||||
self, index_names: Iterable[str], timeout: timedelta = timedelta(seconds=300)
|
self, index_names: Iterable[str], timeout: timedelta = timedelta(seconds=300)
|
||||||
) -> None:
|
) -> None:
|
||||||
@@ -3699,6 +3830,7 @@ class AsyncTable:
|
|||||||
mode: Optional[Literal["append", "overwrite"]] = "append",
|
mode: Optional[Literal["append", "overwrite"]] = "append",
|
||||||
on_bad_vectors: Optional[OnBadVectorsType] = None,
|
on_bad_vectors: Optional[OnBadVectorsType] = None,
|
||||||
fill_value: Optional[float] = None,
|
fill_value: Optional[float] = None,
|
||||||
|
progress: Optional[Union[bool, Callable, Any]] = None,
|
||||||
) -> AddResult:
|
) -> AddResult:
|
||||||
"""Add more data to the [Table](Table).
|
"""Add more data to the [Table](Table).
|
||||||
|
|
||||||
@@ -3720,6 +3852,9 @@ class AsyncTable:
|
|||||||
One of "error", "drop", "fill", "null".
|
One of "error", "drop", "fill", "null".
|
||||||
fill_value: float, default 0.
|
fill_value: float, default 0.
|
||||||
The value to use when filling vectors. Only used if on_bad_vectors="fill".
|
The value to use when filling vectors. Only used if on_bad_vectors="fill".
|
||||||
|
progress: callable or tqdm-like, optional
|
||||||
|
A callback or tqdm-compatible progress bar. See
|
||||||
|
:meth:`Table.add` for details.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
schema = await self.schema()
|
schema = await self.schema()
|
||||||
@@ -3727,18 +3862,41 @@ class AsyncTable:
|
|||||||
on_bad_vectors = "error"
|
on_bad_vectors = "error"
|
||||||
if fill_value is None:
|
if fill_value is None:
|
||||||
fill_value = 0.0
|
fill_value = 0.0
|
||||||
data = _sanitize_data(
|
|
||||||
data,
|
|
||||||
schema,
|
|
||||||
metadata=schema.metadata,
|
|
||||||
on_bad_vectors=on_bad_vectors,
|
|
||||||
fill_value=fill_value,
|
|
||||||
allow_subschema=True,
|
|
||||||
)
|
|
||||||
if isinstance(data, pa.Table):
|
|
||||||
data = data.to_reader()
|
|
||||||
|
|
||||||
return await self._inner.add(data, mode or "append")
|
# _santitize_data is an old code path, but we will use it until the
|
||||||
|
# new code path is ready.
|
||||||
|
if mode == "overwrite":
|
||||||
|
# For overwrite, apply the same preprocessing as create_table
|
||||||
|
# so vector columns are inferred as FixedSizeList.
|
||||||
|
data, _ = sanitize_create_table(
|
||||||
|
data, None, on_bad_vectors=on_bad_vectors, fill_value=fill_value
|
||||||
|
)
|
||||||
|
elif on_bad_vectors != "error" or (
|
||||||
|
schema.metadata is not None and b"embedding_functions" in schema.metadata
|
||||||
|
):
|
||||||
|
data = _sanitize_data(
|
||||||
|
data,
|
||||||
|
schema,
|
||||||
|
metadata=schema.metadata,
|
||||||
|
on_bad_vectors=on_bad_vectors,
|
||||||
|
fill_value=fill_value,
|
||||||
|
allow_subschema=True,
|
||||||
|
)
|
||||||
|
_register_optional_converters()
|
||||||
|
data = to_scannable(data)
|
||||||
|
progress, owns = _normalize_progress(progress)
|
||||||
|
try:
|
||||||
|
return await self._inner.add(data, mode or "append", progress=progress)
|
||||||
|
except RuntimeError as e:
|
||||||
|
if "Cast error" in str(e):
|
||||||
|
raise ValueError(e)
|
||||||
|
elif "Vector column contains NaN" in str(e):
|
||||||
|
raise ValueError(e)
|
||||||
|
else:
|
||||||
|
raise
|
||||||
|
finally:
|
||||||
|
if owns:
|
||||||
|
progress.close()
|
||||||
|
|
||||||
def merge_insert(self, on: Union[str, Iterable[str]]) -> LanceMergeInsertBuilder:
|
def merge_insert(self, on: Union[str, Iterable[str]]) -> LanceMergeInsertBuilder:
|
||||||
"""
|
"""
|
||||||
@@ -4061,7 +4219,7 @@ class AsyncTable:
|
|||||||
async_query = async_query.offset(query.offset)
|
async_query = async_query.offset(query.offset)
|
||||||
if query.columns:
|
if query.columns:
|
||||||
async_query = async_query.select(query.columns)
|
async_query = async_query.select(query.columns)
|
||||||
if query.filter:
|
if query.filter is not None:
|
||||||
async_query = async_query.where(query.filter)
|
async_query = async_query.where(query.filter)
|
||||||
if query.fast_search:
|
if query.fast_search:
|
||||||
async_query = async_query.fast_search()
|
async_query = async_query.fast_search()
|
||||||
@@ -4200,7 +4358,7 @@ class AsyncTable:
|
|||||||
1 2 [3.0, 4.0]
|
1 2 [3.0, 4.0]
|
||||||
2 3 [5.0, 6.0]
|
2 3 [5.0, 6.0]
|
||||||
>>> table.delete("x = 2")
|
>>> table.delete("x = 2")
|
||||||
DeleteResult(version=2)
|
DeleteResult(num_deleted_rows=1, version=2)
|
||||||
>>> table.to_pandas()
|
>>> table.to_pandas()
|
||||||
x vector
|
x vector
|
||||||
0 1 [1.0, 2.0]
|
0 1 [1.0, 2.0]
|
||||||
@@ -4214,7 +4372,7 @@ class AsyncTable:
|
|||||||
>>> to_remove
|
>>> to_remove
|
||||||
'1, 5'
|
'1, 5'
|
||||||
>>> table.delete(f"x IN ({to_remove})")
|
>>> table.delete(f"x IN ({to_remove})")
|
||||||
DeleteResult(version=3)
|
DeleteResult(num_deleted_rows=1, version=3)
|
||||||
>>> table.to_pandas()
|
>>> table.to_pandas()
|
||||||
x vector
|
x vector
|
||||||
0 3 [5.0, 6.0]
|
0 3 [5.0, 6.0]
|
||||||
@@ -4537,22 +4695,17 @@ class AsyncTable:
|
|||||||
in-progress operation (e.g. appending new data) and these files will not
|
in-progress operation (e.g. appending new data) and these files will not
|
||||||
be deleted unless they are at least 7 days old. If delete_unverified is True
|
be deleted unless they are at least 7 days old. If delete_unverified is True
|
||||||
then these files will be deleted regardless of their age.
|
then these files will be deleted regardless of their age.
|
||||||
|
|
||||||
|
.. warning::
|
||||||
|
|
||||||
|
This should only be set to True if you can guarantee that no other
|
||||||
|
process is currently working on this dataset. Otherwise the dataset
|
||||||
|
could be put into a corrupted state.
|
||||||
|
|
||||||
retrain: bool, default False
|
retrain: bool, default False
|
||||||
This parameter is no longer used and is deprecated.
|
This parameter is no longer used and is deprecated.
|
||||||
|
|
||||||
Experimental API
|
The frequency an application should call optimize is based on the frequency of
|
||||||
----------------
|
|
||||||
|
|
||||||
The optimization process is undergoing active development and may change.
|
|
||||||
Our goal with these changes is to improve the performance of optimization and
|
|
||||||
reduce the complexity.
|
|
||||||
|
|
||||||
That being said, it is essential today to run optimize if you want the best
|
|
||||||
performance. It should be stable and safe to use in production, but it our
|
|
||||||
hope that the API may be simplified (or not even need to be called) in the
|
|
||||||
future.
|
|
||||||
|
|
||||||
The frequency an application shoudl call optimize is based on the frequency of
|
|
||||||
data modifications. If data is frequently added, deleted, or updated then
|
data modifications. If data is frequently added, deleted, or updated then
|
||||||
optimize should be run frequently. A good rule of thumb is to run optimize if
|
optimize should be run frequently. A good rule of thumb is to run optimize if
|
||||||
you have added or modified 100,000 or more records or run more than 20 data
|
you have added or modified 100,000 or more records or run more than 20 data
|
||||||
@@ -4673,7 +4826,16 @@ class IndexStatistics:
|
|||||||
num_indexed_rows: int
|
num_indexed_rows: int
|
||||||
num_unindexed_rows: int
|
num_unindexed_rows: int
|
||||||
index_type: Literal[
|
index_type: Literal[
|
||||||
"IVF_PQ", "IVF_HNSW_PQ", "IVF_HNSW_SQ", "FTS", "BTREE", "BITMAP", "LABEL_LIST"
|
"IVF_FLAT",
|
||||||
|
"IVF_SQ",
|
||||||
|
"IVF_PQ",
|
||||||
|
"IVF_RQ",
|
||||||
|
"IVF_HNSW_SQ",
|
||||||
|
"IVF_HNSW_PQ",
|
||||||
|
"FTS",
|
||||||
|
"BTREE",
|
||||||
|
"BITMAP",
|
||||||
|
"LABEL_LIST",
|
||||||
]
|
]
|
||||||
distance_type: Optional[Literal["l2", "cosine", "dot"]] = None
|
distance_type: Optional[Literal["l2", "cosine", "dot"]] = None
|
||||||
num_indices: Optional[int] = None
|
num_indices: Optional[int] = None
|
||||||
|
|||||||
@@ -324,6 +324,16 @@ def _(value: list):
|
|||||||
return "[" + ", ".join(map(value_to_sql, value)) + "]"
|
return "[" + ", ".join(map(value_to_sql, value)) + "]"
|
||||||
|
|
||||||
|
|
||||||
|
@value_to_sql.register(dict)
|
||||||
|
def _(value: dict):
|
||||||
|
# https://datafusion.apache.org/user-guide/sql/scalar_functions.html#named-struct
|
||||||
|
return (
|
||||||
|
"named_struct("
|
||||||
|
+ ", ".join(f"'{k}', {value_to_sql(v)}" for k, v in value.items())
|
||||||
|
+ ")"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@value_to_sql.register(np.ndarray)
|
@value_to_sql.register(np.ndarray)
|
||||||
def _(value: np.ndarray):
|
def _(value: np.ndarray):
|
||||||
return value_to_sql(value.tolist())
|
return value_to_sql(value.tolist())
|
||||||
|
|||||||
@@ -183,8 +183,8 @@ def test_table_names(tmp_db: lancedb.DBConnection):
|
|||||||
result = list(tmp_db.table_names("test2", limit=2))
|
result = list(tmp_db.table_names("test2", limit=2))
|
||||||
assert result == ["test3"], f"Expected ['test3'], got {result}"
|
assert result == ["test3"], f"Expected ['test3'], got {result}"
|
||||||
|
|
||||||
# Test that namespace parameter can be passed as keyword
|
# Test that namespace_path parameter can be passed as keyword
|
||||||
result = list(tmp_db.table_names(namespace=[]))
|
result = list(tmp_db.table_names(namespace_path=[]))
|
||||||
assert len(result) == 3
|
assert len(result) == 3
|
||||||
|
|
||||||
|
|
||||||
@@ -909,7 +909,7 @@ def test_local_namespace_operations(tmp_path):
|
|||||||
NotImplementedError,
|
NotImplementedError,
|
||||||
match="Namespace operations are not supported for listing database",
|
match="Namespace operations are not supported for listing database",
|
||||||
):
|
):
|
||||||
db.list_namespaces(namespace=["test"])
|
db.list_namespaces(namespace_path=["test"])
|
||||||
|
|
||||||
|
|
||||||
def test_local_create_namespace_not_supported(tmp_path):
|
def test_local_create_namespace_not_supported(tmp_path):
|
||||||
|
|||||||
@@ -546,3 +546,24 @@ def test_openai_no_retry_on_401(mock_sleep):
|
|||||||
assert mock_func.call_count == 1
|
assert mock_func.call_count == 1
|
||||||
# Verify that sleep was never called (no retries)
|
# Verify that sleep was never called (no retries)
|
||||||
assert mock_sleep.call_count == 0
|
assert mock_sleep.call_count == 0
|
||||||
|
|
||||||
|
|
||||||
|
def test_url_retrieve_downloads_image():
|
||||||
|
"""
|
||||||
|
Embedding functions like open-clip, siglip, and jinaai use url_retrieve()
|
||||||
|
to download images from HTTP URLs. For example, open_clip._to_pil() calls:
|
||||||
|
|
||||||
|
PIL_Image.open(io.BytesIO(url_retrieve(image)))
|
||||||
|
|
||||||
|
Verify that url_retrieve() can download an image and open it as PIL Image,
|
||||||
|
matching the real usage pattern in embedding functions.
|
||||||
|
"""
|
||||||
|
import io
|
||||||
|
|
||||||
|
Image = pytest.importorskip("PIL.Image")
|
||||||
|
from lancedb.embeddings.utils import url_retrieve
|
||||||
|
|
||||||
|
image_url = "http://farm1.staticflickr.com/53/167798175_7c7845bbbd_z.jpg"
|
||||||
|
image_bytes = url_retrieve(image_url)
|
||||||
|
img = Image.open(io.BytesIO(image_bytes))
|
||||||
|
assert img.size[0] > 0 and img.size[1] > 0
|
||||||
|
|||||||
@@ -27,6 +27,7 @@ from lancedb.query import (
|
|||||||
PhraseQuery,
|
PhraseQuery,
|
||||||
BooleanQuery,
|
BooleanQuery,
|
||||||
Occur,
|
Occur,
|
||||||
|
LanceFtsQueryBuilder,
|
||||||
)
|
)
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import pyarrow as pa
|
import pyarrow as pa
|
||||||
@@ -882,3 +883,109 @@ def test_fts_query_to_json():
|
|||||||
'"must_not":[]}}'
|
'"must_not":[]}}'
|
||||||
)
|
)
|
||||||
assert json_str == expected
|
assert json_str == expected
|
||||||
|
|
||||||
|
|
||||||
|
def test_fts_fast_search(table):
|
||||||
|
table.create_fts_index("text", use_tantivy=False)
|
||||||
|
|
||||||
|
# Insert some unindexed data
|
||||||
|
table.add(
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"text": "xyz",
|
||||||
|
"vector": [0 for _ in range(128)],
|
||||||
|
"id": 101,
|
||||||
|
"text2": "xyz",
|
||||||
|
"nested": {"text": "xyz"},
|
||||||
|
"count": 10,
|
||||||
|
}
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
# Without fast_search, the query object should not have fast_search set
|
||||||
|
builder = table.search("xyz", query_type="fts").limit(10)
|
||||||
|
query = builder.to_query_object()
|
||||||
|
assert query.fast_search is None
|
||||||
|
|
||||||
|
# With fast_search, the query object should have fast_search=True
|
||||||
|
builder = table.search("xyz", query_type="fts").fast_search().limit(10)
|
||||||
|
query = builder.to_query_object()
|
||||||
|
assert query.fast_search is True
|
||||||
|
|
||||||
|
# fast_search should be chainable with other methods
|
||||||
|
builder = (
|
||||||
|
table.search("xyz", query_type="fts").fast_search().select(["text"]).limit(5)
|
||||||
|
)
|
||||||
|
query = builder.to_query_object()
|
||||||
|
assert query.fast_search is True
|
||||||
|
assert query.limit == 5
|
||||||
|
assert query.columns == ["text"]
|
||||||
|
|
||||||
|
# fast_search should be enabled by keyword argument too
|
||||||
|
query = LanceFtsQueryBuilder(table, "xyz", fast_search=True).to_query_object()
|
||||||
|
assert query.fast_search is True
|
||||||
|
|
||||||
|
# Verify it executes without error and skips unindexed data
|
||||||
|
results = table.search("xyz", query_type="fts").fast_search().limit(5).to_list()
|
||||||
|
assert len(results) == 0
|
||||||
|
|
||||||
|
# Update index and verify it returns results
|
||||||
|
table.optimize()
|
||||||
|
results = table.search("xyz", query_type="fts").fast_search().limit(5).to_list()
|
||||||
|
assert len(results) > 0
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_fts_fast_search_async(async_table):
|
||||||
|
await async_table.create_index("text", config=FTS())
|
||||||
|
|
||||||
|
# Insert some unindexed data
|
||||||
|
await async_table.add(
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"text": "xyz",
|
||||||
|
"vector": [0 for _ in range(128)],
|
||||||
|
"id": 101,
|
||||||
|
"text2": "xyz",
|
||||||
|
"nested": {"text": "xyz"},
|
||||||
|
"count": 10,
|
||||||
|
}
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
# Without fast_search, should return results
|
||||||
|
results = await async_table.query().nearest_to_text("xyz").limit(5).to_list()
|
||||||
|
assert len(results) > 0
|
||||||
|
|
||||||
|
# With fast_search, should return no results data unindexed
|
||||||
|
fast_results = (
|
||||||
|
await async_table.query()
|
||||||
|
.nearest_to_text("xyz")
|
||||||
|
.fast_search()
|
||||||
|
.limit(5)
|
||||||
|
.to_list()
|
||||||
|
)
|
||||||
|
assert len(fast_results) == 0
|
||||||
|
|
||||||
|
# Update index and verify it returns results
|
||||||
|
await async_table.optimize()
|
||||||
|
|
||||||
|
fast_results = (
|
||||||
|
await async_table.query()
|
||||||
|
.nearest_to_text("xyz")
|
||||||
|
.fast_search()
|
||||||
|
.limit(5)
|
||||||
|
.to_list()
|
||||||
|
)
|
||||||
|
assert len(fast_results) > 0
|
||||||
|
|
||||||
|
# fast_search should be chainable with other methods
|
||||||
|
results = (
|
||||||
|
await async_table.query()
|
||||||
|
.nearest_to_text("xyz")
|
||||||
|
.fast_search()
|
||||||
|
.select(["text"])
|
||||||
|
.limit(5)
|
||||||
|
.to_list()
|
||||||
|
)
|
||||||
|
assert len(results) > 0
|
||||||
|
|||||||
@@ -177,6 +177,60 @@ async def test_analyze_plan(table: AsyncTable):
|
|||||||
assert "metrics=" in res
|
assert "metrics=" in res
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def table_with_id(tmpdir_factory) -> Table:
|
||||||
|
tmp_path = str(tmpdir_factory.mktemp("data"))
|
||||||
|
db = lancedb.connect(tmp_path)
|
||||||
|
data = pa.table(
|
||||||
|
{
|
||||||
|
"id": pa.array([1, 2, 3, 4], type=pa.int64()),
|
||||||
|
"text": pa.array(["a", "b", "cat", "dog"]),
|
||||||
|
"vector": pa.array(
|
||||||
|
[[0.1, 0.1], [2, 2], [-0.1, -0.1], [0.5, -0.5]],
|
||||||
|
type=pa.list_(pa.float32(), list_size=2),
|
||||||
|
),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
table = db.create_table("test_with_id", data)
|
||||||
|
table.create_fts_index("text", with_position=False, use_tantivy=False)
|
||||||
|
return table
|
||||||
|
|
||||||
|
|
||||||
|
def test_hybrid_prefilter_explain_plan(table_with_id: Table):
|
||||||
|
"""
|
||||||
|
Verify that the prefilter logic is not inverted in LanceHybridQueryBuilder.
|
||||||
|
"""
|
||||||
|
plan_prefilter = (
|
||||||
|
table_with_id.search(query_type="hybrid")
|
||||||
|
.vector([0.0, 0.0])
|
||||||
|
.text("dog")
|
||||||
|
.where("id = 1", prefilter=True)
|
||||||
|
.limit(2)
|
||||||
|
.explain_plan(verbose=True)
|
||||||
|
)
|
||||||
|
|
||||||
|
plan_postfilter = (
|
||||||
|
table_with_id.search(query_type="hybrid")
|
||||||
|
.vector([0.0, 0.0])
|
||||||
|
.text("dog")
|
||||||
|
.where("id = 1", prefilter=False)
|
||||||
|
.limit(2)
|
||||||
|
.explain_plan(verbose=True)
|
||||||
|
)
|
||||||
|
|
||||||
|
# prefilter=True: filter is pushed into the LanceRead scan.
|
||||||
|
# The FTS sub-plan exposes this as "full_filter=id = Int64(1)" inside LanceRead.
|
||||||
|
assert "full_filter=id = Int64(1)" in plan_prefilter, (
|
||||||
|
f"Should push the filter into the scan.\nPlan:\n{plan_prefilter}"
|
||||||
|
)
|
||||||
|
|
||||||
|
# prefilter=False: filter is applied as a separate FilterExec after the search.
|
||||||
|
# The filter must NOT be embedded in the scan.
|
||||||
|
assert "full_filter=id = Int64(1)" not in plan_postfilter, (
|
||||||
|
f"Should NOT push the filter into the scan.\nPlan:\n{plan_postfilter}"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def test_normalize_scores():
|
def test_normalize_scores():
|
||||||
cases = [
|
cases = [
|
||||||
(pa.array([0.1, 0.4]), pa.array([0.0, 1.0])),
|
(pa.array([0.1, 0.4]), pa.array([0.0, 1.0])),
|
||||||
|
|||||||
@@ -3,6 +3,7 @@
|
|||||||
|
|
||||||
from datetime import timedelta
|
from datetime import timedelta
|
||||||
import random
|
import random
|
||||||
|
from typing import get_args, get_type_hints
|
||||||
|
|
||||||
import pyarrow as pa
|
import pyarrow as pa
|
||||||
import pytest
|
import pytest
|
||||||
@@ -22,6 +23,7 @@ from lancedb.index import (
|
|||||||
HnswSq,
|
HnswSq,
|
||||||
FTS,
|
FTS,
|
||||||
)
|
)
|
||||||
|
from lancedb.table import IndexStatistics
|
||||||
|
|
||||||
|
|
||||||
@pytest_asyncio.fixture
|
@pytest_asyncio.fixture
|
||||||
@@ -283,3 +285,23 @@ async def test_create_index_with_binary_vectors(binary_table: AsyncTable):
|
|||||||
for v in range(256):
|
for v in range(256):
|
||||||
res = await binary_table.query().nearest_to([v] * 128).to_arrow()
|
res = await binary_table.query().nearest_to([v] * 128).to_arrow()
|
||||||
assert res["id"][0].as_py() == v
|
assert res["id"][0].as_py() == v
|
||||||
|
|
||||||
|
|
||||||
|
def test_index_statistics_index_type_lists_all_supported_values():
|
||||||
|
expected_index_types = {
|
||||||
|
"IVF_FLAT",
|
||||||
|
"IVF_SQ",
|
||||||
|
"IVF_PQ",
|
||||||
|
"IVF_RQ",
|
||||||
|
"IVF_HNSW_SQ",
|
||||||
|
"IVF_HNSW_PQ",
|
||||||
|
"FTS",
|
||||||
|
"BTREE",
|
||||||
|
"BITMAP",
|
||||||
|
"LABEL_LIST",
|
||||||
|
}
|
||||||
|
|
||||||
|
assert (
|
||||||
|
set(get_args(get_type_hints(IndexStatistics)["index_type"]))
|
||||||
|
== expected_index_types
|
||||||
|
)
|
||||||
|
|||||||
@@ -8,6 +8,7 @@ import shutil
|
|||||||
import pytest
|
import pytest
|
||||||
import pyarrow as pa
|
import pyarrow as pa
|
||||||
import lancedb
|
import lancedb
|
||||||
|
from lance_namespace.errors import NamespaceNotEmptyError, TableNotFoundError
|
||||||
|
|
||||||
|
|
||||||
class TestNamespaceConnection:
|
class TestNamespaceConnection:
|
||||||
@@ -32,6 +33,16 @@ class TestNamespaceConnection:
|
|||||||
# Initially no tables in root
|
# Initially no tables in root
|
||||||
assert len(list(db.table_names())) == 0
|
assert len(list(db.table_names())) == 0
|
||||||
|
|
||||||
|
def test_connect_via_connect_helper(self):
|
||||||
|
"""Connecting via lancedb.connect should delegate to namespace connection."""
|
||||||
|
db = lancedb.connect(
|
||||||
|
namespace_client_impl="dir",
|
||||||
|
namespace_client_properties={"root": self.temp_dir},
|
||||||
|
)
|
||||||
|
|
||||||
|
assert isinstance(db, lancedb.LanceNamespaceDBConnection)
|
||||||
|
assert len(list(db.table_names())) == 0
|
||||||
|
|
||||||
def test_create_table_through_namespace(self):
|
def test_create_table_through_namespace(self):
|
||||||
"""Test creating a table through namespace."""
|
"""Test creating a table through namespace."""
|
||||||
db = lancedb.connect_namespace("dir", {"root": self.temp_dir})
|
db = lancedb.connect_namespace("dir", {"root": self.temp_dir})
|
||||||
@@ -49,14 +60,14 @@ class TestNamespaceConnection:
|
|||||||
)
|
)
|
||||||
|
|
||||||
# Create empty table in child namespace
|
# Create empty table in child namespace
|
||||||
table = db.create_table("test_table", schema=schema, namespace=["test_ns"])
|
table = db.create_table("test_table", schema=schema, namespace_path=["test_ns"])
|
||||||
assert table is not None
|
assert table is not None
|
||||||
assert table.name == "test_table"
|
assert table.name == "test_table"
|
||||||
assert table.namespace == ["test_ns"]
|
assert table.namespace == ["test_ns"]
|
||||||
assert table.id == "test_ns$test_table"
|
assert table.id == "test_ns$test_table"
|
||||||
|
|
||||||
# Table should appear in child namespace
|
# Table should appear in child namespace
|
||||||
table_names = list(db.table_names(namespace=["test_ns"]))
|
table_names = list(db.table_names(namespace_path=["test_ns"]))
|
||||||
assert "test_table" in table_names
|
assert "test_table" in table_names
|
||||||
assert len(table_names) == 1
|
assert len(table_names) == 1
|
||||||
|
|
||||||
@@ -79,10 +90,10 @@ class TestNamespaceConnection:
|
|||||||
pa.field("vector", pa.list_(pa.float32(), 2)),
|
pa.field("vector", pa.list_(pa.float32(), 2)),
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
db.create_table("test_table", schema=schema, namespace=["test_ns"])
|
db.create_table("test_table", schema=schema, namespace_path=["test_ns"])
|
||||||
|
|
||||||
# Open the table
|
# Open the table
|
||||||
table = db.open_table("test_table", namespace=["test_ns"])
|
table = db.open_table("test_table", namespace_path=["test_ns"])
|
||||||
assert table is not None
|
assert table is not None
|
||||||
assert table.name == "test_table"
|
assert table.name == "test_table"
|
||||||
assert table.namespace == ["test_ns"]
|
assert table.namespace == ["test_ns"]
|
||||||
@@ -107,31 +118,31 @@ class TestNamespaceConnection:
|
|||||||
pa.field("vector", pa.list_(pa.float32(), 2)),
|
pa.field("vector", pa.list_(pa.float32(), 2)),
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
db.create_table("table1", schema=schema, namespace=["test_ns"])
|
db.create_table("table1", schema=schema, namespace_path=["test_ns"])
|
||||||
db.create_table("table2", schema=schema, namespace=["test_ns"])
|
db.create_table("table2", schema=schema, namespace_path=["test_ns"])
|
||||||
|
|
||||||
# Verify both tables exist in child namespace
|
# Verify both tables exist in child namespace
|
||||||
table_names = list(db.table_names(namespace=["test_ns"]))
|
table_names = list(db.table_names(namespace_path=["test_ns"]))
|
||||||
assert "table1" in table_names
|
assert "table1" in table_names
|
||||||
assert "table2" in table_names
|
assert "table2" in table_names
|
||||||
assert len(table_names) == 2
|
assert len(table_names) == 2
|
||||||
|
|
||||||
# Drop one table
|
# Drop one table
|
||||||
db.drop_table("table1", namespace=["test_ns"])
|
db.drop_table("table1", namespace_path=["test_ns"])
|
||||||
|
|
||||||
# Verify only table2 remains
|
# Verify only table2 remains
|
||||||
table_names = list(db.table_names(namespace=["test_ns"]))
|
table_names = list(db.table_names(namespace_path=["test_ns"]))
|
||||||
assert "table1" not in table_names
|
assert "table1" not in table_names
|
||||||
assert "table2" in table_names
|
assert "table2" in table_names
|
||||||
assert len(table_names) == 1
|
assert len(table_names) == 1
|
||||||
|
|
||||||
# Drop the second table
|
# Drop the second table
|
||||||
db.drop_table("table2", namespace=["test_ns"])
|
db.drop_table("table2", namespace_path=["test_ns"])
|
||||||
assert len(list(db.table_names(namespace=["test_ns"]))) == 0
|
assert len(list(db.table_names(namespace_path=["test_ns"]))) == 0
|
||||||
|
|
||||||
# Should not be able to open dropped table
|
# Should not be able to open dropped table
|
||||||
with pytest.raises(RuntimeError):
|
with pytest.raises(TableNotFoundError):
|
||||||
db.open_table("table1", namespace=["test_ns"])
|
db.open_table("table1", namespace_path=["test_ns"])
|
||||||
|
|
||||||
def test_create_table_with_schema(self):
|
def test_create_table_with_schema(self):
|
||||||
"""Test creating a table with explicit schema through namespace."""
|
"""Test creating a table with explicit schema through namespace."""
|
||||||
@@ -150,7 +161,7 @@ class TestNamespaceConnection:
|
|||||||
)
|
)
|
||||||
|
|
||||||
# Create table with schema in child namespace
|
# Create table with schema in child namespace
|
||||||
table = db.create_table("test_table", schema=schema, namespace=["test_ns"])
|
table = db.create_table("test_table", schema=schema, namespace_path=["test_ns"])
|
||||||
assert table is not None
|
assert table is not None
|
||||||
assert table.namespace == ["test_ns"]
|
assert table.namespace == ["test_ns"]
|
||||||
|
|
||||||
@@ -174,7 +185,7 @@ class TestNamespaceConnection:
|
|||||||
pa.field("vector", pa.list_(pa.float32(), 2)),
|
pa.field("vector", pa.list_(pa.float32(), 2)),
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
db.create_table("old_name", schema=schema, namespace=["test_ns"])
|
db.create_table("old_name", schema=schema, namespace_path=["test_ns"])
|
||||||
|
|
||||||
# Rename should raise NotImplementedError
|
# Rename should raise NotImplementedError
|
||||||
with pytest.raises(NotImplementedError, match="rename_table is not supported"):
|
with pytest.raises(NotImplementedError, match="rename_table is not supported"):
|
||||||
@@ -195,20 +206,20 @@ class TestNamespaceConnection:
|
|||||||
]
|
]
|
||||||
)
|
)
|
||||||
for i in range(3):
|
for i in range(3):
|
||||||
db.create_table(f"table{i}", schema=schema, namespace=["test_ns"])
|
db.create_table(f"table{i}", schema=schema, namespace_path=["test_ns"])
|
||||||
|
|
||||||
# Verify tables exist in child namespace
|
# Verify tables exist in child namespace
|
||||||
assert len(list(db.table_names(namespace=["test_ns"]))) == 3
|
assert len(list(db.table_names(namespace_path=["test_ns"]))) == 3
|
||||||
|
|
||||||
# Drop all tables in child namespace
|
# Drop all tables in child namespace
|
||||||
db.drop_all_tables(namespace=["test_ns"])
|
db.drop_all_tables(namespace_path=["test_ns"])
|
||||||
|
|
||||||
# Verify all tables are gone from child namespace
|
# Verify all tables are gone from child namespace
|
||||||
assert len(list(db.table_names(namespace=["test_ns"]))) == 0
|
assert len(list(db.table_names(namespace_path=["test_ns"]))) == 0
|
||||||
|
|
||||||
# Test that table_names works with keyword-only namespace parameter
|
# Test that table_names works with keyword-only namespace parameter
|
||||||
db.create_table("test_table", schema=schema, namespace=["test_ns"])
|
db.create_table("test_table", schema=schema, namespace_path=["test_ns"])
|
||||||
result = list(db.table_names(namespace=["test_ns"]))
|
result = list(db.table_names(namespace_path=["test_ns"]))
|
||||||
assert "test_table" in result
|
assert "test_table" in result
|
||||||
|
|
||||||
def test_table_operations(self):
|
def test_table_operations(self):
|
||||||
@@ -226,7 +237,7 @@ class TestNamespaceConnection:
|
|||||||
pa.field("text", pa.string()),
|
pa.field("text", pa.string()),
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
table = db.create_table("test_table", schema=schema, namespace=["test_ns"])
|
table = db.create_table("test_table", schema=schema, namespace_path=["test_ns"])
|
||||||
|
|
||||||
# Verify empty table was created
|
# Verify empty table was created
|
||||||
result = table.to_pandas()
|
result = table.to_pandas()
|
||||||
@@ -297,25 +308,25 @@ class TestNamespaceConnection:
|
|||||||
]
|
]
|
||||||
)
|
)
|
||||||
table = db.create_table(
|
table = db.create_table(
|
||||||
"test_table", schema=schema, namespace=["test_namespace"]
|
"test_table", schema=schema, namespace_path=["test_namespace"]
|
||||||
)
|
)
|
||||||
assert table is not None
|
assert table is not None
|
||||||
|
|
||||||
# Verify table exists in namespace
|
# Verify table exists in namespace
|
||||||
tables_in_namespace = list(db.table_names(namespace=["test_namespace"]))
|
tables_in_namespace = list(db.table_names(namespace_path=["test_namespace"]))
|
||||||
assert "test_table" in tables_in_namespace
|
assert "test_table" in tables_in_namespace
|
||||||
assert len(tables_in_namespace) == 1
|
assert len(tables_in_namespace) == 1
|
||||||
|
|
||||||
# Open table from namespace
|
# Open table from namespace
|
||||||
table = db.open_table("test_table", namespace=["test_namespace"])
|
table = db.open_table("test_table", namespace_path=["test_namespace"])
|
||||||
assert table is not None
|
assert table is not None
|
||||||
assert table.name == "test_table"
|
assert table.name == "test_table"
|
||||||
|
|
||||||
# Drop table from namespace
|
# Drop table from namespace
|
||||||
db.drop_table("test_table", namespace=["test_namespace"])
|
db.drop_table("test_table", namespace_path=["test_namespace"])
|
||||||
|
|
||||||
# Verify table no longer exists in namespace
|
# Verify table no longer exists in namespace
|
||||||
tables_in_namespace = list(db.table_names(namespace=["test_namespace"]))
|
tables_in_namespace = list(db.table_names(namespace_path=["test_namespace"]))
|
||||||
assert len(tables_in_namespace) == 0
|
assert len(tables_in_namespace) == 0
|
||||||
|
|
||||||
# Drop namespace
|
# Drop namespace
|
||||||
@@ -337,14 +348,14 @@ class TestNamespaceConnection:
|
|||||||
pa.field("vector", pa.list_(pa.float32(), 2)),
|
pa.field("vector", pa.list_(pa.float32(), 2)),
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
db.create_table("test_table", schema=schema, namespace=["test_namespace"])
|
db.create_table("test_table", schema=schema, namespace_path=["test_namespace"])
|
||||||
|
|
||||||
# Try to drop namespace with tables - should fail
|
# Try to drop namespace with tables - should fail
|
||||||
with pytest.raises(RuntimeError, match="is not empty"):
|
with pytest.raises(NamespaceNotEmptyError):
|
||||||
db.drop_namespace(["test_namespace"])
|
db.drop_namespace(["test_namespace"])
|
||||||
|
|
||||||
# Drop table first
|
# Drop table first
|
||||||
db.drop_table("test_table", namespace=["test_namespace"])
|
db.drop_table("test_table", namespace_path=["test_namespace"])
|
||||||
|
|
||||||
# Now dropping namespace should work
|
# Now dropping namespace should work
|
||||||
db.drop_namespace(["test_namespace"])
|
db.drop_namespace(["test_namespace"])
|
||||||
@@ -367,10 +378,10 @@ class TestNamespaceConnection:
|
|||||||
|
|
||||||
# Create table with same name in both namespaces
|
# Create table with same name in both namespaces
|
||||||
table_a = db.create_table(
|
table_a = db.create_table(
|
||||||
"same_name_table", schema=schema, namespace=["namespace_a"]
|
"same_name_table", schema=schema, namespace_path=["namespace_a"]
|
||||||
)
|
)
|
||||||
table_b = db.create_table(
|
table_b = db.create_table(
|
||||||
"same_name_table", schema=schema, namespace=["namespace_b"]
|
"same_name_table", schema=schema, namespace_path=["namespace_b"]
|
||||||
)
|
)
|
||||||
|
|
||||||
# Add different data to each table
|
# Add different data to each table
|
||||||
@@ -388,7 +399,9 @@ class TestNamespaceConnection:
|
|||||||
table_b.add(data_b)
|
table_b.add(data_b)
|
||||||
|
|
||||||
# Verify data in namespace_a table
|
# Verify data in namespace_a table
|
||||||
opened_table_a = db.open_table("same_name_table", namespace=["namespace_a"])
|
opened_table_a = db.open_table(
|
||||||
|
"same_name_table", namespace_path=["namespace_a"]
|
||||||
|
)
|
||||||
result_a = opened_table_a.to_pandas().sort_values("id").reset_index(drop=True)
|
result_a = opened_table_a.to_pandas().sort_values("id").reset_index(drop=True)
|
||||||
assert len(result_a) == 2
|
assert len(result_a) == 2
|
||||||
assert result_a["id"].tolist() == [1, 2]
|
assert result_a["id"].tolist() == [1, 2]
|
||||||
@@ -399,7 +412,9 @@ class TestNamespaceConnection:
|
|||||||
assert [v.tolist() for v in result_a["vector"]] == [[1.0, 2.0], [3.0, 4.0]]
|
assert [v.tolist() for v in result_a["vector"]] == [[1.0, 2.0], [3.0, 4.0]]
|
||||||
|
|
||||||
# Verify data in namespace_b table
|
# Verify data in namespace_b table
|
||||||
opened_table_b = db.open_table("same_name_table", namespace=["namespace_b"])
|
opened_table_b = db.open_table(
|
||||||
|
"same_name_table", namespace_path=["namespace_b"]
|
||||||
|
)
|
||||||
result_b = opened_table_b.to_pandas().sort_values("id").reset_index(drop=True)
|
result_b = opened_table_b.to_pandas().sort_values("id").reset_index(drop=True)
|
||||||
assert len(result_b) == 3
|
assert len(result_b) == 3
|
||||||
assert result_b["id"].tolist() == [10, 20, 30]
|
assert result_b["id"].tolist() == [10, 20, 30]
|
||||||
@@ -419,8 +434,8 @@ class TestNamespaceConnection:
|
|||||||
assert "same_name_table" not in root_tables
|
assert "same_name_table" not in root_tables
|
||||||
|
|
||||||
# Clean up
|
# Clean up
|
||||||
db.drop_table("same_name_table", namespace=["namespace_a"])
|
db.drop_table("same_name_table", namespace_path=["namespace_a"])
|
||||||
db.drop_table("same_name_table", namespace=["namespace_b"])
|
db.drop_table("same_name_table", namespace_path=["namespace_b"])
|
||||||
db.drop_namespace(["namespace_a"])
|
db.drop_namespace(["namespace_a"])
|
||||||
db.drop_namespace(["namespace_b"])
|
db.drop_namespace(["namespace_b"])
|
||||||
|
|
||||||
@@ -448,6 +463,8 @@ class TestAsyncNamespaceConnection:
|
|||||||
table_names = await db.table_names()
|
table_names = await db.table_names()
|
||||||
assert len(list(table_names)) == 0
|
assert len(list(table_names)) == 0
|
||||||
|
|
||||||
|
# Async connect via namespace helper is not enabled yet.
|
||||||
|
|
||||||
async def test_create_table_async(self):
|
async def test_create_table_async(self):
|
||||||
"""Test creating a table asynchronously through namespace."""
|
"""Test creating a table asynchronously through namespace."""
|
||||||
db = lancedb.connect_namespace_async("dir", {"root": self.temp_dir})
|
db = lancedb.connect_namespace_async("dir", {"root": self.temp_dir})
|
||||||
@@ -466,13 +483,13 @@ class TestAsyncNamespaceConnection:
|
|||||||
|
|
||||||
# Create empty table in child namespace
|
# Create empty table in child namespace
|
||||||
table = await db.create_table(
|
table = await db.create_table(
|
||||||
"test_table", schema=schema, namespace=["test_ns"]
|
"test_table", schema=schema, namespace_path=["test_ns"]
|
||||||
)
|
)
|
||||||
assert table is not None
|
assert table is not None
|
||||||
assert isinstance(table, lancedb.AsyncTable)
|
assert isinstance(table, lancedb.AsyncTable)
|
||||||
|
|
||||||
# Table should appear in child namespace
|
# Table should appear in child namespace
|
||||||
table_names = await db.table_names(namespace=["test_ns"])
|
table_names = await db.table_names(namespace_path=["test_ns"])
|
||||||
assert "test_table" in list(table_names)
|
assert "test_table" in list(table_names)
|
||||||
|
|
||||||
async def test_open_table_async(self):
|
async def test_open_table_async(self):
|
||||||
@@ -489,10 +506,10 @@ class TestAsyncNamespaceConnection:
|
|||||||
pa.field("vector", pa.list_(pa.float32(), 2)),
|
pa.field("vector", pa.list_(pa.float32(), 2)),
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
await db.create_table("test_table", schema=schema, namespace=["test_ns"])
|
await db.create_table("test_table", schema=schema, namespace_path=["test_ns"])
|
||||||
|
|
||||||
# Open the table
|
# Open the table
|
||||||
table = await db.open_table("test_table", namespace=["test_ns"])
|
table = await db.open_table("test_table", namespace_path=["test_ns"])
|
||||||
assert table is not None
|
assert table is not None
|
||||||
assert isinstance(table, lancedb.AsyncTable)
|
assert isinstance(table, lancedb.AsyncTable)
|
||||||
|
|
||||||
@@ -546,20 +563,20 @@ class TestAsyncNamespaceConnection:
|
|||||||
pa.field("vector", pa.list_(pa.float32(), 2)),
|
pa.field("vector", pa.list_(pa.float32(), 2)),
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
await db.create_table("table1", schema=schema, namespace=["test_ns"])
|
await db.create_table("table1", schema=schema, namespace_path=["test_ns"])
|
||||||
await db.create_table("table2", schema=schema, namespace=["test_ns"])
|
await db.create_table("table2", schema=schema, namespace_path=["test_ns"])
|
||||||
|
|
||||||
# Verify both tables exist in child namespace
|
# Verify both tables exist in child namespace
|
||||||
table_names = list(await db.table_names(namespace=["test_ns"]))
|
table_names = list(await db.table_names(namespace_path=["test_ns"]))
|
||||||
assert "table1" in table_names
|
assert "table1" in table_names
|
||||||
assert "table2" in table_names
|
assert "table2" in table_names
|
||||||
assert len(table_names) == 2
|
assert len(table_names) == 2
|
||||||
|
|
||||||
# Drop one table
|
# Drop one table
|
||||||
await db.drop_table("table1", namespace=["test_ns"])
|
await db.drop_table("table1", namespace_path=["test_ns"])
|
||||||
|
|
||||||
# Verify only table2 remains
|
# Verify only table2 remains
|
||||||
table_names = list(await db.table_names(namespace=["test_ns"]))
|
table_names = list(await db.table_names(namespace_path=["test_ns"]))
|
||||||
assert "table1" not in table_names
|
assert "table1" not in table_names
|
||||||
assert "table2" in table_names
|
assert "table2" in table_names
|
||||||
assert len(table_names) == 1
|
assert len(table_names) == 1
|
||||||
@@ -588,20 +605,24 @@ class TestAsyncNamespaceConnection:
|
|||||||
]
|
]
|
||||||
)
|
)
|
||||||
table = await db.create_table(
|
table = await db.create_table(
|
||||||
"test_table", schema=schema, namespace=["test_namespace"]
|
"test_table", schema=schema, namespace_path=["test_namespace"]
|
||||||
)
|
)
|
||||||
assert table is not None
|
assert table is not None
|
||||||
|
|
||||||
# Verify table exists in namespace
|
# Verify table exists in namespace
|
||||||
tables_in_namespace = list(await db.table_names(namespace=["test_namespace"]))
|
tables_in_namespace = list(
|
||||||
|
await db.table_names(namespace_path=["test_namespace"])
|
||||||
|
)
|
||||||
assert "test_table" in tables_in_namespace
|
assert "test_table" in tables_in_namespace
|
||||||
assert len(tables_in_namespace) == 1
|
assert len(tables_in_namespace) == 1
|
||||||
|
|
||||||
# Drop table from namespace
|
# Drop table from namespace
|
||||||
await db.drop_table("test_table", namespace=["test_namespace"])
|
await db.drop_table("test_table", namespace_path=["test_namespace"])
|
||||||
|
|
||||||
# Verify table no longer exists in namespace
|
# Verify table no longer exists in namespace
|
||||||
tables_in_namespace = list(await db.table_names(namespace=["test_namespace"]))
|
tables_in_namespace = list(
|
||||||
|
await db.table_names(namespace_path=["test_namespace"])
|
||||||
|
)
|
||||||
assert len(tables_in_namespace) == 0
|
assert len(tables_in_namespace) == 0
|
||||||
|
|
||||||
# Drop namespace
|
# Drop namespace
|
||||||
@@ -626,15 +647,98 @@ class TestAsyncNamespaceConnection:
|
|||||||
]
|
]
|
||||||
)
|
)
|
||||||
for i in range(3):
|
for i in range(3):
|
||||||
await db.create_table(f"table{i}", schema=schema, namespace=["test_ns"])
|
await db.create_table(
|
||||||
|
f"table{i}", schema=schema, namespace_path=["test_ns"]
|
||||||
|
)
|
||||||
|
|
||||||
# Verify tables exist in child namespace
|
# Verify tables exist in child namespace
|
||||||
table_names = await db.table_names(namespace=["test_ns"])
|
table_names = await db.table_names(namespace_path=["test_ns"])
|
||||||
assert len(list(table_names)) == 3
|
assert len(list(table_names)) == 3
|
||||||
|
|
||||||
# Drop all tables in child namespace
|
# Drop all tables in child namespace
|
||||||
await db.drop_all_tables(namespace=["test_ns"])
|
await db.drop_all_tables(namespace_path=["test_ns"])
|
||||||
|
|
||||||
# Verify all tables are gone from child namespace
|
# Verify all tables are gone from child namespace
|
||||||
table_names = await db.table_names(namespace=["test_ns"])
|
table_names = await db.table_names(namespace_path=["test_ns"])
|
||||||
assert len(list(table_names)) == 0
|
assert len(list(table_names)) == 0
|
||||||
|
|
||||||
|
|
||||||
|
class TestPushdownOperations:
|
||||||
|
"""Test pushdown operations on namespace connections."""
|
||||||
|
|
||||||
|
def setup_method(self):
|
||||||
|
"""Set up test fixtures."""
|
||||||
|
self.temp_dir = tempfile.mkdtemp()
|
||||||
|
|
||||||
|
def teardown_method(self):
|
||||||
|
"""Clean up test fixtures."""
|
||||||
|
shutil.rmtree(self.temp_dir, ignore_errors=True)
|
||||||
|
|
||||||
|
def test_query_table_pushdown_stored(self):
|
||||||
|
"""Test that QueryTable pushdown is stored on sync connection."""
|
||||||
|
db = lancedb.connect_namespace(
|
||||||
|
"dir",
|
||||||
|
{"root": self.temp_dir},
|
||||||
|
namespace_client_pushdown_operations=["QueryTable"],
|
||||||
|
)
|
||||||
|
assert "QueryTable" in db._pushdown_operations
|
||||||
|
|
||||||
|
def test_create_table_pushdown_stored(self):
|
||||||
|
"""Test that CreateTable pushdown is stored on sync connection."""
|
||||||
|
db = lancedb.connect_namespace(
|
||||||
|
"dir",
|
||||||
|
{"root": self.temp_dir},
|
||||||
|
namespace_client_pushdown_operations=["CreateTable"],
|
||||||
|
)
|
||||||
|
assert "CreateTable" in db._pushdown_operations
|
||||||
|
|
||||||
|
def test_both_pushdowns_stored(self):
|
||||||
|
"""Test that both pushdown operations can be set together."""
|
||||||
|
db = lancedb.connect_namespace(
|
||||||
|
"dir",
|
||||||
|
{"root": self.temp_dir},
|
||||||
|
namespace_client_pushdown_operations=["QueryTable", "CreateTable"],
|
||||||
|
)
|
||||||
|
assert "QueryTable" in db._pushdown_operations
|
||||||
|
assert "CreateTable" in db._pushdown_operations
|
||||||
|
|
||||||
|
def test_pushdown_defaults_to_empty(self):
|
||||||
|
"""Test that pushdown operations default to empty."""
|
||||||
|
db = lancedb.connect_namespace("dir", {"root": self.temp_dir})
|
||||||
|
assert len(db._pushdown_operations) == 0
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
class TestAsyncPushdownOperations:
|
||||||
|
"""Test pushdown operations on async namespace connections."""
|
||||||
|
|
||||||
|
def setup_method(self):
|
||||||
|
"""Set up test fixtures."""
|
||||||
|
self.temp_dir = tempfile.mkdtemp()
|
||||||
|
|
||||||
|
def teardown_method(self):
|
||||||
|
"""Clean up test fixtures."""
|
||||||
|
shutil.rmtree(self.temp_dir, ignore_errors=True)
|
||||||
|
|
||||||
|
async def test_async_query_table_pushdown_stored(self):
|
||||||
|
"""Test that QueryTable pushdown is stored on async connection."""
|
||||||
|
db = lancedb.connect_namespace_async(
|
||||||
|
"dir",
|
||||||
|
{"root": self.temp_dir},
|
||||||
|
namespace_client_pushdown_operations=["QueryTable"],
|
||||||
|
)
|
||||||
|
assert "QueryTable" in db._pushdown_operations
|
||||||
|
|
||||||
|
async def test_async_create_table_pushdown_stored(self):
|
||||||
|
"""Test that CreateTable pushdown is stored on async connection."""
|
||||||
|
db = lancedb.connect_namespace_async(
|
||||||
|
"dir",
|
||||||
|
{"root": self.temp_dir},
|
||||||
|
namespace_client_pushdown_operations=["CreateTable"],
|
||||||
|
)
|
||||||
|
assert "CreateTable" in db._pushdown_operations
|
||||||
|
|
||||||
|
async def test_async_pushdown_defaults_to_empty(self):
|
||||||
|
"""Test that pushdown operations default to empty on async connection."""
|
||||||
|
db = lancedb.connect_namespace_async("dir", {"root": self.temp_dir})
|
||||||
|
assert len(db._pushdown_operations) == 0
|
||||||
|
|||||||
@@ -4,9 +4,11 @@
|
|||||||
"""
|
"""
|
||||||
Integration tests for LanceDB Namespace with S3 and credential refresh.
|
Integration tests for LanceDB Namespace with S3 and credential refresh.
|
||||||
|
|
||||||
This test simulates a namespace server that returns incrementing credentials
|
This test uses DirectoryNamespace with native ops_metrics and vend_input_storage_options
|
||||||
and verifies that the credential refresh mechanism works correctly for both
|
features to track API calls and test credential refresh mechanisms.
|
||||||
create_table and open_table operations.
|
|
||||||
|
Tests are parameterized to run with both DirectoryNamespace and a CustomNamespace
|
||||||
|
wrapper to verify Python-Rust binding works correctly for custom implementations.
|
||||||
|
|
||||||
Tests verify:
|
Tests verify:
|
||||||
- Storage options provider is auto-created and used
|
- Storage options provider is auto-created and used
|
||||||
@@ -18,22 +20,136 @@ Tests verify:
|
|||||||
import copy
|
import copy
|
||||||
import time
|
import time
|
||||||
import uuid
|
import uuid
|
||||||
from threading import Lock
|
from typing import Dict, Optional
|
||||||
from typing import Dict
|
|
||||||
|
|
||||||
import pyarrow as pa
|
import pyarrow as pa
|
||||||
import pytest
|
import pytest
|
||||||
from lance_namespace import (
|
from lance.namespace import (
|
||||||
CreateEmptyTableRequest,
|
|
||||||
CreateEmptyTableResponse,
|
|
||||||
DeclareTableRequest,
|
DeclareTableRequest,
|
||||||
DeclareTableResponse,
|
DeclareTableResponse,
|
||||||
DescribeTableRequest,
|
DescribeTableRequest,
|
||||||
DescribeTableResponse,
|
DescribeTableResponse,
|
||||||
|
DirectoryNamespace,
|
||||||
LanceNamespace,
|
LanceNamespace,
|
||||||
)
|
)
|
||||||
|
from lance_namespace import (
|
||||||
|
CreateNamespaceRequest,
|
||||||
|
CreateNamespaceResponse,
|
||||||
|
CreateTableRequest,
|
||||||
|
CreateTableResponse,
|
||||||
|
CreateTableVersionRequest,
|
||||||
|
CreateTableVersionResponse,
|
||||||
|
DeregisterTableRequest,
|
||||||
|
DeregisterTableResponse,
|
||||||
|
DescribeNamespaceRequest,
|
||||||
|
DescribeNamespaceResponse,
|
||||||
|
DescribeTableVersionRequest,
|
||||||
|
DescribeTableVersionResponse,
|
||||||
|
DropNamespaceRequest,
|
||||||
|
DropNamespaceResponse,
|
||||||
|
DropTableRequest,
|
||||||
|
DropTableResponse,
|
||||||
|
ListNamespacesRequest,
|
||||||
|
ListNamespacesResponse,
|
||||||
|
ListTablesRequest,
|
||||||
|
ListTablesResponse,
|
||||||
|
ListTableVersionsRequest,
|
||||||
|
ListTableVersionsResponse,
|
||||||
|
NamespaceExistsRequest,
|
||||||
|
RegisterTableRequest,
|
||||||
|
RegisterTableResponse,
|
||||||
|
TableExistsRequest,
|
||||||
|
)
|
||||||
from lancedb.namespace import LanceNamespaceDBConnection
|
from lancedb.namespace import LanceNamespaceDBConnection
|
||||||
|
|
||||||
|
|
||||||
|
class CustomNamespace(LanceNamespace):
|
||||||
|
"""A custom namespace wrapper that delegates to DirectoryNamespace.
|
||||||
|
|
||||||
|
This class verifies that the Python-Rust binding works correctly for
|
||||||
|
custom namespace implementations that wrap the native DirectoryNamespace.
|
||||||
|
All methods simply delegate to the underlying DirectoryNamespace instance.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, inner: DirectoryNamespace):
|
||||||
|
self._inner = inner
|
||||||
|
|
||||||
|
def namespace_id(self) -> str:
|
||||||
|
return f"CustomNamespace[{self._inner.namespace_id()}]"
|
||||||
|
|
||||||
|
def create_namespace(
|
||||||
|
self, request: CreateNamespaceRequest
|
||||||
|
) -> CreateNamespaceResponse:
|
||||||
|
return self._inner.create_namespace(request)
|
||||||
|
|
||||||
|
def describe_namespace(
|
||||||
|
self, request: DescribeNamespaceRequest
|
||||||
|
) -> DescribeNamespaceResponse:
|
||||||
|
return self._inner.describe_namespace(request)
|
||||||
|
|
||||||
|
def namespace_exists(self, request: NamespaceExistsRequest) -> None:
|
||||||
|
return self._inner.namespace_exists(request)
|
||||||
|
|
||||||
|
def drop_namespace(self, request: DropNamespaceRequest) -> DropNamespaceResponse:
|
||||||
|
return self._inner.drop_namespace(request)
|
||||||
|
|
||||||
|
def list_namespaces(self, request: ListNamespacesRequest) -> ListNamespacesResponse:
|
||||||
|
return self._inner.list_namespaces(request)
|
||||||
|
|
||||||
|
def create_table(
|
||||||
|
self, request: CreateTableRequest, data: bytes
|
||||||
|
) -> CreateTableResponse:
|
||||||
|
return self._inner.create_table(request, data)
|
||||||
|
|
||||||
|
def declare_table(self, request: DeclareTableRequest) -> DeclareTableResponse:
|
||||||
|
return self._inner.declare_table(request)
|
||||||
|
|
||||||
|
def describe_table(self, request: DescribeTableRequest) -> DescribeTableResponse:
|
||||||
|
return self._inner.describe_table(request)
|
||||||
|
|
||||||
|
def table_exists(self, request: TableExistsRequest) -> None:
|
||||||
|
return self._inner.table_exists(request)
|
||||||
|
|
||||||
|
def drop_table(self, request: DropTableRequest) -> DropTableResponse:
|
||||||
|
return self._inner.drop_table(request)
|
||||||
|
|
||||||
|
def list_tables(self, request: ListTablesRequest) -> ListTablesResponse:
|
||||||
|
return self._inner.list_tables(request)
|
||||||
|
|
||||||
|
def register_table(self, request: RegisterTableRequest) -> RegisterTableResponse:
|
||||||
|
return self._inner.register_table(request)
|
||||||
|
|
||||||
|
def deregister_table(
|
||||||
|
self, request: DeregisterTableRequest
|
||||||
|
) -> DeregisterTableResponse:
|
||||||
|
return self._inner.deregister_table(request)
|
||||||
|
|
||||||
|
def list_table_versions(
|
||||||
|
self, request: ListTableVersionsRequest
|
||||||
|
) -> ListTableVersionsResponse:
|
||||||
|
return self._inner.list_table_versions(request)
|
||||||
|
|
||||||
|
def describe_table_version(
|
||||||
|
self, request: DescribeTableVersionRequest
|
||||||
|
) -> DescribeTableVersionResponse:
|
||||||
|
return self._inner.describe_table_version(request)
|
||||||
|
|
||||||
|
def create_table_version(
|
||||||
|
self, request: CreateTableVersionRequest
|
||||||
|
) -> CreateTableVersionResponse:
|
||||||
|
return self._inner.create_table_version(request)
|
||||||
|
|
||||||
|
def retrieve_ops_metrics(self) -> Optional[Dict[str, int]]:
|
||||||
|
return self._inner.retrieve_ops_metrics()
|
||||||
|
|
||||||
|
|
||||||
|
def _wrap_if_custom(ns_client: DirectoryNamespace, use_custom: bool):
|
||||||
|
"""Wrap namespace client in CustomNamespace if use_custom is True."""
|
||||||
|
if use_custom:
|
||||||
|
return CustomNamespace(ns_client)
|
||||||
|
return ns_client
|
||||||
|
|
||||||
|
|
||||||
# LocalStack S3 configuration
|
# LocalStack S3 configuration
|
||||||
CONFIG = {
|
CONFIG = {
|
||||||
"allow_http": "true",
|
"allow_http": "true",
|
||||||
@@ -89,157 +205,88 @@ def delete_bucket(s3, bucket_name):
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
class TrackingNamespace(LanceNamespace):
|
def create_tracking_namespace(
|
||||||
|
bucket_name: str,
|
||||||
|
storage_options: dict,
|
||||||
|
credential_expires_in_seconds: int = 60,
|
||||||
|
use_custom: bool = False,
|
||||||
|
):
|
||||||
|
"""Create a DirectoryNamespace with ops metrics and credential vending enabled.
|
||||||
|
|
||||||
|
Uses native DirectoryNamespace features:
|
||||||
|
- ops_metrics_enabled=true: Tracks API call counts via retrieve_ops_metrics()
|
||||||
|
- vend_input_storage_options=true: Returns input storage options in responses
|
||||||
|
- vend_input_storage_options_refresh_interval_millis: Adds expires_at_millis
|
||||||
|
|
||||||
|
Args:
|
||||||
|
bucket_name: S3 bucket name or local path
|
||||||
|
storage_options: Storage options to pass through (credentials, endpoint, etc.)
|
||||||
|
credential_expires_in_seconds: Interval in seconds for credential expiration
|
||||||
|
use_custom: If True, wrap in CustomNamespace for testing custom implementations
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Tuple of (namespace_client, inner_namespace_client) where inner is always
|
||||||
|
the DirectoryNamespace (used for metrics retrieval)
|
||||||
"""
|
"""
|
||||||
Mock namespace that wraps DirectoryNamespace and tracks API calls.
|
# Add refresh_offset_millis to storage options so that credentials are not
|
||||||
|
# considered expired immediately. Set to 1 second (1000ms) so that refresh
|
||||||
|
# checks work correctly with short-lived credentials in tests.
|
||||||
|
storage_options_with_refresh = dict(storage_options)
|
||||||
|
storage_options_with_refresh["refresh_offset_millis"] = "1000"
|
||||||
|
|
||||||
This namespace returns incrementing credentials with each API call to simulate
|
dir_props = {f"storage.{k}": v for k, v in storage_options_with_refresh.items()}
|
||||||
credential rotation. It also tracks the number of times each API is called
|
|
||||||
to verify caching behavior.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(
|
if bucket_name.startswith("/") or bucket_name.startswith("file://"):
|
||||||
self,
|
dir_props["root"] = f"{bucket_name}/namespace_root"
|
||||||
bucket_name: str,
|
else:
|
||||||
storage_options: Dict[str, str],
|
dir_props["root"] = f"s3://{bucket_name}/namespace_root"
|
||||||
credential_expires_in_seconds: int = 60,
|
|
||||||
):
|
|
||||||
from lance.namespace import DirectoryNamespace
|
|
||||||
|
|
||||||
self.bucket_name = bucket_name
|
# Enable ops metrics tracking
|
||||||
self.base_storage_options = storage_options
|
dir_props["ops_metrics_enabled"] = "true"
|
||||||
self.credential_expires_in_seconds = credential_expires_in_seconds
|
# Enable storage options vending
|
||||||
self.describe_call_count = 0
|
dir_props["vend_input_storage_options"] = "true"
|
||||||
self.create_call_count = 0
|
# Set refresh interval in milliseconds
|
||||||
self.lock = Lock()
|
dir_props["vend_input_storage_options_refresh_interval_millis"] = str(
|
||||||
|
credential_expires_in_seconds * 1000
|
||||||
|
)
|
||||||
|
|
||||||
# Create underlying DirectoryNamespace with storage options
|
inner_ns_client = DirectoryNamespace(**dir_props)
|
||||||
dir_props = {f"storage.{k}": v for k, v in storage_options.items()}
|
ns_client = _wrap_if_custom(inner_ns_client, use_custom)
|
||||||
|
return ns_client, inner_ns_client
|
||||||
|
|
||||||
# Use S3 path for bucket name, local path for file paths
|
|
||||||
if bucket_name.startswith("/") or bucket_name.startswith("file://"):
|
|
||||||
dir_props["root"] = f"{bucket_name}/namespace_root"
|
|
||||||
else:
|
|
||||||
dir_props["root"] = f"s3://{bucket_name}/namespace_root"
|
|
||||||
|
|
||||||
self.inner = DirectoryNamespace(**dir_props)
|
def get_describe_call_count(namespace_client) -> int:
|
||||||
|
"""Get the number of describe_table calls made to the namespace client."""
|
||||||
|
return namespace_client.retrieve_ops_metrics().get("describe_table", 0)
|
||||||
|
|
||||||
def get_describe_call_count(self) -> int:
|
|
||||||
"""Thread-safe getter for describe call count."""
|
|
||||||
with self.lock:
|
|
||||||
return self.describe_call_count
|
|
||||||
|
|
||||||
def get_create_call_count(self) -> int:
|
def get_declare_call_count(namespace_client) -> int:
|
||||||
"""Thread-safe getter for create call count."""
|
"""Get the number of declare_table calls made to the namespace client."""
|
||||||
with self.lock:
|
return namespace_client.retrieve_ops_metrics().get("declare_table", 0)
|
||||||
return self.create_call_count
|
|
||||||
|
|
||||||
def namespace_id(self) -> str:
|
|
||||||
"""Return namespace identifier."""
|
|
||||||
return f"TrackingNamespace {{ inner: {self.inner.namespace_id()} }}"
|
|
||||||
|
|
||||||
def _modify_storage_options(
|
|
||||||
self, storage_options: Dict[str, str], count: int
|
|
||||||
) -> Dict[str, str]:
|
|
||||||
"""
|
|
||||||
Add incrementing credentials with expiration timestamp.
|
|
||||||
|
|
||||||
This simulates a credential rotation system where each call returns
|
|
||||||
new credentials that expire after credential_expires_in_seconds.
|
|
||||||
"""
|
|
||||||
modified = copy.deepcopy(storage_options) if storage_options else {}
|
|
||||||
|
|
||||||
# Increment credentials to simulate rotation
|
|
||||||
modified["aws_access_key_id"] = f"AKID_{count}"
|
|
||||||
modified["aws_secret_access_key"] = f"SECRET_{count}"
|
|
||||||
modified["aws_session_token"] = f"TOKEN_{count}"
|
|
||||||
|
|
||||||
# Set expiration time
|
|
||||||
expires_at_millis = int(
|
|
||||||
(time.time() + self.credential_expires_in_seconds) * 1000
|
|
||||||
)
|
|
||||||
modified["expires_at_millis"] = str(expires_at_millis)
|
|
||||||
|
|
||||||
return modified
|
|
||||||
|
|
||||||
def declare_table(self, request: DeclareTableRequest) -> DeclareTableResponse:
|
|
||||||
"""Track declare_table calls and inject rotating credentials."""
|
|
||||||
with self.lock:
|
|
||||||
self.create_call_count += 1
|
|
||||||
count = self.create_call_count
|
|
||||||
|
|
||||||
response = self.inner.declare_table(request)
|
|
||||||
response.storage_options = self._modify_storage_options(
|
|
||||||
response.storage_options, count
|
|
||||||
)
|
|
||||||
|
|
||||||
return response
|
|
||||||
|
|
||||||
def create_empty_table(
|
|
||||||
self, request: CreateEmptyTableRequest
|
|
||||||
) -> CreateEmptyTableResponse:
|
|
||||||
"""Track create_empty_table calls and inject rotating credentials."""
|
|
||||||
with self.lock:
|
|
||||||
self.create_call_count += 1
|
|
||||||
count = self.create_call_count
|
|
||||||
|
|
||||||
response = self.inner.create_empty_table(request)
|
|
||||||
response.storage_options = self._modify_storage_options(
|
|
||||||
response.storage_options, count
|
|
||||||
)
|
|
||||||
|
|
||||||
return response
|
|
||||||
|
|
||||||
def describe_table(self, request: DescribeTableRequest) -> DescribeTableResponse:
|
|
||||||
"""Track describe_table calls and inject rotating credentials."""
|
|
||||||
with self.lock:
|
|
||||||
self.describe_call_count += 1
|
|
||||||
count = self.describe_call_count
|
|
||||||
|
|
||||||
response = self.inner.describe_table(request)
|
|
||||||
response.storage_options = self._modify_storage_options(
|
|
||||||
response.storage_options, count
|
|
||||||
)
|
|
||||||
|
|
||||||
return response
|
|
||||||
|
|
||||||
# Pass through other methods to inner namespace
|
|
||||||
def list_tables(self, request):
|
|
||||||
return self.inner.list_tables(request)
|
|
||||||
|
|
||||||
def drop_table(self, request):
|
|
||||||
return self.inner.drop_table(request)
|
|
||||||
|
|
||||||
def list_namespaces(self, request):
|
|
||||||
return self.inner.list_namespaces(request)
|
|
||||||
|
|
||||||
def create_namespace(self, request):
|
|
||||||
return self.inner.create_namespace(request)
|
|
||||||
|
|
||||||
def drop_namespace(self, request):
|
|
||||||
return self.inner.drop_namespace(request)
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.s3_test
|
@pytest.mark.s3_test
|
||||||
def test_namespace_create_table_with_provider(s3_bucket: str):
|
@pytest.mark.parametrize("use_custom", [False, True], ids=["DirectoryNS", "CustomNS"])
|
||||||
|
def test_namespace_create_table_with_provider(s3_bucket: str, use_custom: bool):
|
||||||
"""
|
"""
|
||||||
Test creating a table through namespace with storage options provider.
|
Test creating a table through namespace with storage options provider.
|
||||||
|
|
||||||
Verifies:
|
Verifies:
|
||||||
- create_empty_table is called once to reserve location
|
- declare_table is called once to reserve location
|
||||||
- Storage options provider is auto-created
|
- Storage options provider is auto-created
|
||||||
- Table can be written successfully
|
- Table can be written successfully
|
||||||
- Credentials are cached during write operations
|
- Credentials are cached during write operations
|
||||||
"""
|
"""
|
||||||
storage_options = copy.deepcopy(CONFIG)
|
storage_options = copy.deepcopy(CONFIG)
|
||||||
|
|
||||||
namespace = TrackingNamespace(
|
ns_client, inner_ns_client = create_tracking_namespace(
|
||||||
bucket_name=s3_bucket,
|
bucket_name=s3_bucket,
|
||||||
storage_options=storage_options,
|
storage_options=storage_options,
|
||||||
credential_expires_in_seconds=3600, # 1 hour
|
credential_expires_in_seconds=3600, # 1 hour
|
||||||
|
use_custom=use_custom,
|
||||||
)
|
)
|
||||||
|
|
||||||
db = LanceNamespaceDBConnection(namespace)
|
db = LanceNamespaceDBConnection(ns_client)
|
||||||
|
|
||||||
# Create unique namespace for this test
|
# Create unique namespace for this test
|
||||||
namespace_name = f"test_ns_{uuid.uuid4().hex[:8]}"
|
namespace_name = f"test_ns_{uuid.uuid4().hex[:8]}"
|
||||||
@@ -249,8 +296,8 @@ def test_namespace_create_table_with_provider(s3_bucket: str):
|
|||||||
namespace_path = [namespace_name]
|
namespace_path = [namespace_name]
|
||||||
|
|
||||||
# Verify initial state
|
# Verify initial state
|
||||||
assert namespace.get_create_call_count() == 0
|
assert get_declare_call_count(inner_ns_client) == 0
|
||||||
assert namespace.get_describe_call_count() == 0
|
assert get_describe_call_count(inner_ns_client) == 0
|
||||||
|
|
||||||
# Create table with data
|
# Create table with data
|
||||||
data = pa.table(
|
data = pa.table(
|
||||||
@@ -261,12 +308,12 @@ def test_namespace_create_table_with_provider(s3_bucket: str):
|
|||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
table = db.create_table(table_name, data, namespace=namespace_path)
|
table = db.create_table(table_name, data, namespace_path=namespace_path)
|
||||||
|
|
||||||
# Verify create_empty_table was called exactly once
|
# Verify declare_table was called exactly once
|
||||||
assert namespace.get_create_call_count() == 1
|
assert get_declare_call_count(inner_ns_client) == 1
|
||||||
# describe_table should NOT be called during create in create mode
|
# describe_table should NOT be called during create in create mode
|
||||||
assert namespace.get_describe_call_count() == 0
|
assert get_describe_call_count(inner_ns_client) == 0
|
||||||
|
|
||||||
# Verify table was created successfully
|
# Verify table was created successfully
|
||||||
assert table.name == table_name
|
assert table.name == table_name
|
||||||
@@ -276,7 +323,8 @@ def test_namespace_create_table_with_provider(s3_bucket: str):
|
|||||||
|
|
||||||
|
|
||||||
@pytest.mark.s3_test
|
@pytest.mark.s3_test
|
||||||
def test_namespace_open_table_with_provider(s3_bucket: str):
|
@pytest.mark.parametrize("use_custom", [False, True], ids=["DirectoryNS", "CustomNS"])
|
||||||
|
def test_namespace_open_table_with_provider(s3_bucket: str, use_custom: bool):
|
||||||
"""
|
"""
|
||||||
Test opening a table through namespace with storage options provider.
|
Test opening a table through namespace with storage options provider.
|
||||||
|
|
||||||
@@ -288,13 +336,14 @@ def test_namespace_open_table_with_provider(s3_bucket: str):
|
|||||||
"""
|
"""
|
||||||
storage_options = copy.deepcopy(CONFIG)
|
storage_options = copy.deepcopy(CONFIG)
|
||||||
|
|
||||||
namespace = TrackingNamespace(
|
ns_client, inner_ns_client = create_tracking_namespace(
|
||||||
bucket_name=s3_bucket,
|
bucket_name=s3_bucket,
|
||||||
storage_options=storage_options,
|
storage_options=storage_options,
|
||||||
credential_expires_in_seconds=3600,
|
credential_expires_in_seconds=3600,
|
||||||
|
use_custom=use_custom,
|
||||||
)
|
)
|
||||||
|
|
||||||
db = LanceNamespaceDBConnection(namespace)
|
db = LanceNamespaceDBConnection(ns_client)
|
||||||
|
|
||||||
# Create unique namespace for this test
|
# Create unique namespace for this test
|
||||||
namespace_name = f"test_ns_{uuid.uuid4().hex[:8]}"
|
namespace_name = f"test_ns_{uuid.uuid4().hex[:8]}"
|
||||||
@@ -312,21 +361,21 @@ def test_namespace_open_table_with_provider(s3_bucket: str):
|
|||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
db.create_table(table_name, data, namespace=namespace_path)
|
db.create_table(table_name, data, namespace_path=namespace_path)
|
||||||
|
|
||||||
initial_create_count = namespace.get_create_call_count()
|
initial_declare_count = get_declare_call_count(inner_ns_client)
|
||||||
assert initial_create_count == 1
|
assert initial_declare_count == 1
|
||||||
|
|
||||||
# Open the table
|
# Open the table
|
||||||
opened_table = db.open_table(table_name, namespace=namespace_path)
|
opened_table = db.open_table(table_name, namespace_path=namespace_path)
|
||||||
|
|
||||||
# Verify describe_table was called exactly once
|
# Verify describe_table was called exactly once
|
||||||
assert namespace.get_describe_call_count() == 1
|
assert get_describe_call_count(inner_ns_client) == 1
|
||||||
# create_empty_table should not be called again
|
# declare_table should not be called again
|
||||||
assert namespace.get_create_call_count() == initial_create_count
|
assert get_declare_call_count(inner_ns_client) == initial_declare_count
|
||||||
|
|
||||||
# Perform multiple read operations
|
# Perform multiple read operations
|
||||||
describe_count_after_open = namespace.get_describe_call_count()
|
describe_count_after_open = get_describe_call_count(inner_ns_client)
|
||||||
|
|
||||||
for _ in range(3):
|
for _ in range(3):
|
||||||
result = opened_table.to_pandas()
|
result = opened_table.to_pandas()
|
||||||
@@ -335,11 +384,12 @@ def test_namespace_open_table_with_provider(s3_bucket: str):
|
|||||||
assert count == 5
|
assert count == 5
|
||||||
|
|
||||||
# Verify credentials were cached (no additional describe_table calls)
|
# Verify credentials were cached (no additional describe_table calls)
|
||||||
assert namespace.get_describe_call_count() == describe_count_after_open
|
assert get_describe_call_count(inner_ns_client) == describe_count_after_open
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.s3_test
|
@pytest.mark.s3_test
|
||||||
def test_namespace_credential_refresh_on_read(s3_bucket: str):
|
@pytest.mark.parametrize("use_custom", [False, True], ids=["DirectoryNS", "CustomNS"])
|
||||||
|
def test_namespace_credential_refresh_on_read(s3_bucket: str, use_custom: bool):
|
||||||
"""
|
"""
|
||||||
Test credential refresh when credentials expire during read operations.
|
Test credential refresh when credentials expire during read operations.
|
||||||
|
|
||||||
@@ -350,13 +400,14 @@ def test_namespace_credential_refresh_on_read(s3_bucket: str):
|
|||||||
"""
|
"""
|
||||||
storage_options = copy.deepcopy(CONFIG)
|
storage_options = copy.deepcopy(CONFIG)
|
||||||
|
|
||||||
namespace = TrackingNamespace(
|
ns_client, inner_ns_client = create_tracking_namespace(
|
||||||
bucket_name=s3_bucket,
|
bucket_name=s3_bucket,
|
||||||
storage_options=storage_options,
|
storage_options=storage_options,
|
||||||
credential_expires_in_seconds=3, # Short expiration for testing
|
credential_expires_in_seconds=3, # Short expiration for testing
|
||||||
|
use_custom=use_custom,
|
||||||
)
|
)
|
||||||
|
|
||||||
db = LanceNamespaceDBConnection(namespace)
|
db = LanceNamespaceDBConnection(ns_client)
|
||||||
|
|
||||||
# Create unique namespace for this test
|
# Create unique namespace for this test
|
||||||
namespace_name = f"test_ns_{uuid.uuid4().hex[:8]}"
|
namespace_name = f"test_ns_{uuid.uuid4().hex[:8]}"
|
||||||
@@ -373,16 +424,16 @@ def test_namespace_credential_refresh_on_read(s3_bucket: str):
|
|||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
db.create_table(table_name, data, namespace=namespace_path)
|
db.create_table(table_name, data, namespace_path=namespace_path)
|
||||||
|
|
||||||
# Open table (triggers describe_table)
|
# Open table (triggers describe_table)
|
||||||
opened_table = db.open_table(table_name, namespace=namespace_path)
|
opened_table = db.open_table(table_name, namespace_path=namespace_path)
|
||||||
|
|
||||||
# Perform an immediate read (should use credentials from open)
|
# Perform an immediate read (should use credentials from open)
|
||||||
result = opened_table.to_pandas()
|
result = opened_table.to_pandas()
|
||||||
assert len(result) == 3
|
assert len(result) == 3
|
||||||
|
|
||||||
describe_count_after_first_read = namespace.get_describe_call_count()
|
describe_count_after_first_read = get_describe_call_count(inner_ns_client)
|
||||||
|
|
||||||
# Wait for credentials to expire (3 seconds + buffer)
|
# Wait for credentials to expire (3 seconds + buffer)
|
||||||
time.sleep(5)
|
time.sleep(5)
|
||||||
@@ -391,7 +442,7 @@ def test_namespace_credential_refresh_on_read(s3_bucket: str):
|
|||||||
result = opened_table.to_pandas()
|
result = opened_table.to_pandas()
|
||||||
assert len(result) == 3
|
assert len(result) == 3
|
||||||
|
|
||||||
describe_count_after_refresh = namespace.get_describe_call_count()
|
describe_count_after_refresh = get_describe_call_count(inner_ns_client)
|
||||||
# Verify describe_table was called again (credential refresh)
|
# Verify describe_table was called again (credential refresh)
|
||||||
refresh_delta = describe_count_after_refresh - describe_count_after_first_read
|
refresh_delta = describe_count_after_refresh - describe_count_after_first_read
|
||||||
|
|
||||||
@@ -404,7 +455,8 @@ def test_namespace_credential_refresh_on_read(s3_bucket: str):
|
|||||||
|
|
||||||
|
|
||||||
@pytest.mark.s3_test
|
@pytest.mark.s3_test
|
||||||
def test_namespace_credential_refresh_on_write(s3_bucket: str):
|
@pytest.mark.parametrize("use_custom", [False, True], ids=["DirectoryNS", "CustomNS"])
|
||||||
|
def test_namespace_credential_refresh_on_write(s3_bucket: str, use_custom: bool):
|
||||||
"""
|
"""
|
||||||
Test credential refresh when credentials expire during write operations.
|
Test credential refresh when credentials expire during write operations.
|
||||||
|
|
||||||
@@ -415,13 +467,14 @@ def test_namespace_credential_refresh_on_write(s3_bucket: str):
|
|||||||
"""
|
"""
|
||||||
storage_options = copy.deepcopy(CONFIG)
|
storage_options = copy.deepcopy(CONFIG)
|
||||||
|
|
||||||
namespace = TrackingNamespace(
|
ns_client, inner_ns_client = create_tracking_namespace(
|
||||||
bucket_name=s3_bucket,
|
bucket_name=s3_bucket,
|
||||||
storage_options=storage_options,
|
storage_options=storage_options,
|
||||||
credential_expires_in_seconds=3, # Short expiration
|
credential_expires_in_seconds=3, # Short expiration
|
||||||
|
use_custom=use_custom,
|
||||||
)
|
)
|
||||||
|
|
||||||
db = LanceNamespaceDBConnection(namespace)
|
db = LanceNamespaceDBConnection(ns_client)
|
||||||
|
|
||||||
# Create unique namespace for this test
|
# Create unique namespace for this test
|
||||||
namespace_name = f"test_ns_{uuid.uuid4().hex[:8]}"
|
namespace_name = f"test_ns_{uuid.uuid4().hex[:8]}"
|
||||||
@@ -438,7 +491,7 @@ def test_namespace_credential_refresh_on_write(s3_bucket: str):
|
|||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
table = db.create_table(table_name, initial_data, namespace=namespace_path)
|
table = db.create_table(table_name, initial_data, namespace_path=namespace_path)
|
||||||
|
|
||||||
# Add more data (should use cached credentials)
|
# Add more data (should use cached credentials)
|
||||||
new_data = pa.table(
|
new_data = pa.table(
|
||||||
@@ -466,24 +519,26 @@ def test_namespace_credential_refresh_on_write(s3_bucket: str):
|
|||||||
|
|
||||||
|
|
||||||
@pytest.mark.s3_test
|
@pytest.mark.s3_test
|
||||||
def test_namespace_overwrite_mode(s3_bucket: str):
|
@pytest.mark.parametrize("use_custom", [False, True], ids=["DirectoryNS", "CustomNS"])
|
||||||
|
def test_namespace_overwrite_mode(s3_bucket: str, use_custom: bool):
|
||||||
"""
|
"""
|
||||||
Test creating table in overwrite mode with credential tracking.
|
Test creating table in overwrite mode with credential tracking.
|
||||||
|
|
||||||
Verifies:
|
Verifies:
|
||||||
- First create calls create_empty_table exactly once
|
- First create calls declare_table exactly once
|
||||||
- Overwrite mode calls describe_table exactly once to check existence
|
- Overwrite mode calls describe_table exactly once to check existence
|
||||||
- Storage options provider works in overwrite mode
|
- Storage options provider works in overwrite mode
|
||||||
"""
|
"""
|
||||||
storage_options = copy.deepcopy(CONFIG)
|
storage_options = copy.deepcopy(CONFIG)
|
||||||
|
|
||||||
namespace = TrackingNamespace(
|
ns_client, inner_ns_client = create_tracking_namespace(
|
||||||
bucket_name=s3_bucket,
|
bucket_name=s3_bucket,
|
||||||
storage_options=storage_options,
|
storage_options=storage_options,
|
||||||
credential_expires_in_seconds=3600,
|
credential_expires_in_seconds=3600,
|
||||||
|
use_custom=use_custom,
|
||||||
)
|
)
|
||||||
|
|
||||||
db = LanceNamespaceDBConnection(namespace)
|
db = LanceNamespaceDBConnection(ns_client)
|
||||||
|
|
||||||
# Create unique namespace for this test
|
# Create unique namespace for this test
|
||||||
namespace_name = f"test_ns_{uuid.uuid4().hex[:8]}"
|
namespace_name = f"test_ns_{uuid.uuid4().hex[:8]}"
|
||||||
@@ -500,11 +555,11 @@ def test_namespace_overwrite_mode(s3_bucket: str):
|
|||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
table = db.create_table(table_name, data1, namespace=namespace_path)
|
table = db.create_table(table_name, data1, namespace_path=namespace_path)
|
||||||
# Exactly one create_empty_table call for initial create
|
# Exactly one declare_table call for initial create
|
||||||
assert namespace.get_create_call_count() == 1
|
assert get_declare_call_count(inner_ns_client) == 1
|
||||||
# No describe_table calls in create mode
|
# No describe_table calls in create mode
|
||||||
assert namespace.get_describe_call_count() == 0
|
assert get_describe_call_count(inner_ns_client) == 0
|
||||||
assert table.count_rows() == 2
|
assert table.count_rows() == 2
|
||||||
|
|
||||||
# Overwrite the table
|
# Overwrite the table
|
||||||
@@ -516,14 +571,14 @@ def test_namespace_overwrite_mode(s3_bucket: str):
|
|||||||
)
|
)
|
||||||
|
|
||||||
table2 = db.create_table(
|
table2 = db.create_table(
|
||||||
table_name, data2, namespace=namespace_path, mode="overwrite"
|
table_name, data2, namespace_path=namespace_path, mode="overwrite"
|
||||||
)
|
)
|
||||||
|
|
||||||
# Should still have only 1 create_empty_table call
|
# Should still have only 1 declare_table call
|
||||||
# (overwrite reuses location from describe_table)
|
# (overwrite reuses location from describe_table)
|
||||||
assert namespace.get_create_call_count() == 1
|
assert get_declare_call_count(inner_ns_client) == 1
|
||||||
# Should have called describe_table exactly once to get existing table location
|
# Should have called describe_table exactly once to get existing table location
|
||||||
assert namespace.get_describe_call_count() == 1
|
assert get_describe_call_count(inner_ns_client) == 1
|
||||||
|
|
||||||
# Verify new data
|
# Verify new data
|
||||||
assert table2.count_rows() == 3
|
assert table2.count_rows() == 3
|
||||||
@@ -532,7 +587,8 @@ def test_namespace_overwrite_mode(s3_bucket: str):
|
|||||||
|
|
||||||
|
|
||||||
@pytest.mark.s3_test
|
@pytest.mark.s3_test
|
||||||
def test_namespace_multiple_tables(s3_bucket: str):
|
@pytest.mark.parametrize("use_custom", [False, True], ids=["DirectoryNS", "CustomNS"])
|
||||||
|
def test_namespace_multiple_tables(s3_bucket: str, use_custom: bool):
|
||||||
"""
|
"""
|
||||||
Test creating and opening multiple tables in the same namespace.
|
Test creating and opening multiple tables in the same namespace.
|
||||||
|
|
||||||
@@ -543,13 +599,14 @@ def test_namespace_multiple_tables(s3_bucket: str):
|
|||||||
"""
|
"""
|
||||||
storage_options = copy.deepcopy(CONFIG)
|
storage_options = copy.deepcopy(CONFIG)
|
||||||
|
|
||||||
namespace = TrackingNamespace(
|
ns_client, inner_ns_client = create_tracking_namespace(
|
||||||
bucket_name=s3_bucket,
|
bucket_name=s3_bucket,
|
||||||
storage_options=storage_options,
|
storage_options=storage_options,
|
||||||
credential_expires_in_seconds=3600,
|
credential_expires_in_seconds=3600,
|
||||||
|
use_custom=use_custom,
|
||||||
)
|
)
|
||||||
|
|
||||||
db = LanceNamespaceDBConnection(namespace)
|
db = LanceNamespaceDBConnection(ns_client)
|
||||||
|
|
||||||
# Create unique namespace for this test
|
# Create unique namespace for this test
|
||||||
namespace_name = f"test_ns_{uuid.uuid4().hex[:8]}"
|
namespace_name = f"test_ns_{uuid.uuid4().hex[:8]}"
|
||||||
@@ -559,22 +616,22 @@ def test_namespace_multiple_tables(s3_bucket: str):
|
|||||||
# Create first table
|
# Create first table
|
||||||
table1_name = f"table1_{uuid.uuid4().hex}"
|
table1_name = f"table1_{uuid.uuid4().hex}"
|
||||||
data1 = pa.table({"id": [1, 2], "value": [10, 20]})
|
data1 = pa.table({"id": [1, 2], "value": [10, 20]})
|
||||||
db.create_table(table1_name, data1, namespace=namespace_path)
|
db.create_table(table1_name, data1, namespace_path=namespace_path)
|
||||||
|
|
||||||
# Create second table
|
# Create second table
|
||||||
table2_name = f"table2_{uuid.uuid4().hex}"
|
table2_name = f"table2_{uuid.uuid4().hex}"
|
||||||
data2 = pa.table({"id": [3, 4], "value": [30, 40]})
|
data2 = pa.table({"id": [3, 4], "value": [30, 40]})
|
||||||
db.create_table(table2_name, data2, namespace=namespace_path)
|
db.create_table(table2_name, data2, namespace_path=namespace_path)
|
||||||
|
|
||||||
# Should have 2 create calls (one per table)
|
# Should have 2 declare calls (one per table)
|
||||||
assert namespace.get_create_call_count() == 2
|
assert get_declare_call_count(inner_ns_client) == 2
|
||||||
|
|
||||||
# Open both tables
|
# Open both tables
|
||||||
opened1 = db.open_table(table1_name, namespace=namespace_path)
|
opened1 = db.open_table(table1_name, namespace_path=namespace_path)
|
||||||
opened2 = db.open_table(table2_name, namespace=namespace_path)
|
opened2 = db.open_table(table2_name, namespace_path=namespace_path)
|
||||||
|
|
||||||
# Should have 2 describe calls (one per open)
|
# Should have 2 describe calls (one per open)
|
||||||
assert namespace.get_describe_call_count() == 2
|
assert get_describe_call_count(inner_ns_client) == 2
|
||||||
|
|
||||||
# Verify both tables work independently
|
# Verify both tables work independently
|
||||||
assert opened1.count_rows() == 2
|
assert opened1.count_rows() == 2
|
||||||
@@ -588,7 +645,8 @@ def test_namespace_multiple_tables(s3_bucket: str):
|
|||||||
|
|
||||||
|
|
||||||
@pytest.mark.s3_test
|
@pytest.mark.s3_test
|
||||||
def test_namespace_with_schema_only(s3_bucket: str):
|
@pytest.mark.parametrize("use_custom", [False, True], ids=["DirectoryNS", "CustomNS"])
|
||||||
|
def test_namespace_with_schema_only(s3_bucket: str, use_custom: bool):
|
||||||
"""
|
"""
|
||||||
Test creating empty table with schema only (no data).
|
Test creating empty table with schema only (no data).
|
||||||
|
|
||||||
@@ -599,13 +657,14 @@ def test_namespace_with_schema_only(s3_bucket: str):
|
|||||||
"""
|
"""
|
||||||
storage_options = copy.deepcopy(CONFIG)
|
storage_options = copy.deepcopy(CONFIG)
|
||||||
|
|
||||||
namespace = TrackingNamespace(
|
ns_client, inner_ns_client = create_tracking_namespace(
|
||||||
bucket_name=s3_bucket,
|
bucket_name=s3_bucket,
|
||||||
storage_options=storage_options,
|
storage_options=storage_options,
|
||||||
credential_expires_in_seconds=3600,
|
credential_expires_in_seconds=3600,
|
||||||
|
use_custom=use_custom,
|
||||||
)
|
)
|
||||||
|
|
||||||
db = LanceNamespaceDBConnection(namespace)
|
db = LanceNamespaceDBConnection(ns_client)
|
||||||
|
|
||||||
# Create unique namespace for this test
|
# Create unique namespace for this test
|
||||||
namespace_name = f"test_ns_{uuid.uuid4().hex[:8]}"
|
namespace_name = f"test_ns_{uuid.uuid4().hex[:8]}"
|
||||||
@@ -623,12 +682,12 @@ def test_namespace_with_schema_only(s3_bucket: str):
|
|||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
||||||
table = db.create_table(table_name, schema=schema, namespace=namespace_path)
|
table = db.create_table(table_name, schema=schema, namespace_path=namespace_path)
|
||||||
|
|
||||||
# Should have called create_empty_table once
|
# Should have called declare_table once
|
||||||
assert namespace.get_create_call_count() == 1
|
assert get_declare_call_count(inner_ns_client) == 1
|
||||||
# Should NOT have called describe_table in create mode
|
# Should NOT have called describe_table in create mode
|
||||||
assert namespace.get_describe_call_count() == 0
|
assert get_describe_call_count(inner_ns_client) == 0
|
||||||
|
|
||||||
# Verify empty table
|
# Verify empty table
|
||||||
assert table.count_rows() == 0
|
assert table.count_rows() == 0
|
||||||
|
|||||||
@@ -30,6 +30,7 @@ from lancedb.query import (
|
|||||||
PhraseQuery,
|
PhraseQuery,
|
||||||
Query,
|
Query,
|
||||||
FullTextSearchQuery,
|
FullTextSearchQuery,
|
||||||
|
ensure_vector_query,
|
||||||
)
|
)
|
||||||
from lancedb.rerankers.cross_encoder import CrossEncoderReranker
|
from lancedb.rerankers.cross_encoder import CrossEncoderReranker
|
||||||
from lancedb.table import AsyncTable, LanceTable
|
from lancedb.table import AsyncTable, LanceTable
|
||||||
@@ -1501,6 +1502,18 @@ def test_search_empty_table(mem_db):
|
|||||||
assert results == []
|
assert results == []
|
||||||
|
|
||||||
|
|
||||||
|
def test_ensure_vector_query_empty_list():
|
||||||
|
"""Regression: ensure_vector_query used to return instead of raise ValueError."""
|
||||||
|
with pytest.raises(ValueError, match="non-empty"):
|
||||||
|
ensure_vector_query([])
|
||||||
|
|
||||||
|
|
||||||
|
def test_ensure_vector_query_nested_empty_list():
|
||||||
|
"""Regression: ensure_vector_query used to return instead of raise ValueError."""
|
||||||
|
with pytest.raises(ValueError, match="non-empty"):
|
||||||
|
ensure_vector_query([[]])
|
||||||
|
|
||||||
|
|
||||||
def test_fast_search(tmp_path):
|
def test_fast_search(tmp_path):
|
||||||
db = lancedb.connect(tmp_path)
|
db = lancedb.connect(tmp_path)
|
||||||
|
|
||||||
|
|||||||
@@ -1201,6 +1201,18 @@ async def test_header_provider_overrides_static_headers():
|
|||||||
await db.table_names()
|
await db.table_names()
|
||||||
|
|
||||||
|
|
||||||
|
def test_close():
|
||||||
|
"""Test that close() works without AttributeError."""
|
||||||
|
import asyncio
|
||||||
|
|
||||||
|
def handler(req):
|
||||||
|
req.send_response(200)
|
||||||
|
req.end_headers()
|
||||||
|
|
||||||
|
with mock_lancedb_connection(handler) as db:
|
||||||
|
asyncio.run(db.close())
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("exception", [KeyboardInterrupt, SystemExit, GeneratorExit])
|
@pytest.mark.parametrize("exception", [KeyboardInterrupt, SystemExit, GeneratorExit])
|
||||||
def test_background_loop_cancellation(exception):
|
def test_background_loop_cancellation(exception):
|
||||||
"""Test that BackgroundEventLoop.run() cancels the future on interrupt."""
|
"""Test that BackgroundEventLoop.run() cancels the future on interrupt."""
|
||||||
|
|||||||
@@ -326,6 +326,24 @@ def test_add_struct(mem_db: DBConnection):
|
|||||||
table = mem_db.create_table("test2", schema=schema)
|
table = mem_db.create_table("test2", schema=schema)
|
||||||
table.add(data)
|
table.add(data)
|
||||||
|
|
||||||
|
struct_type = pa.struct(
|
||||||
|
[
|
||||||
|
("b", pa.int64()),
|
||||||
|
("a", pa.int64()),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
expected = pa.table(
|
||||||
|
{
|
||||||
|
"s_list": [
|
||||||
|
[
|
||||||
|
pa.scalar({"b": 1, "a": 2}, type=struct_type),
|
||||||
|
pa.scalar({"b": 4, "a": None}, type=struct_type),
|
||||||
|
]
|
||||||
|
],
|
||||||
|
}
|
||||||
|
)
|
||||||
|
assert table.to_arrow() == expected
|
||||||
|
|
||||||
|
|
||||||
def test_add_subschema(mem_db: DBConnection):
|
def test_add_subschema(mem_db: DBConnection):
|
||||||
schema = pa.schema(
|
schema = pa.schema(
|
||||||
@@ -509,6 +527,132 @@ async def test_add_async(mem_db_async: AsyncConnection):
|
|||||||
assert await table.count_rows() == 3
|
assert await table.count_rows() == 3
|
||||||
|
|
||||||
|
|
||||||
|
def test_add_overwrite_infers_vector_schema(mem_db: DBConnection):
|
||||||
|
"""Overwrite should infer vector columns the same way create_table does.
|
||||||
|
|
||||||
|
Regression test for https://github.com/lancedb/lancedb/issues/3183
|
||||||
|
"""
|
||||||
|
table = mem_db.create_table(
|
||||||
|
"test_overwrite_vec",
|
||||||
|
data=[
|
||||||
|
{"vector": [1.0, 2.0, 3.0, 4.0], "item": "foo"},
|
||||||
|
{"vector": [5.0, 6.0, 7.0, 8.0], "item": "bar"},
|
||||||
|
],
|
||||||
|
)
|
||||||
|
# create_table infers vector as fixed_size_list<float32, 4>
|
||||||
|
original_type = table.schema.field("vector").type
|
||||||
|
assert pa.types.is_fixed_size_list(original_type)
|
||||||
|
|
||||||
|
# overwrite with plain Python lists (PyArrow infers list<double>)
|
||||||
|
table.add(
|
||||||
|
[
|
||||||
|
{"vector": [10.0, 20.0, 30.0, 40.0], "item": "baz"},
|
||||||
|
],
|
||||||
|
mode="overwrite",
|
||||||
|
)
|
||||||
|
# overwrite should infer vector column the same way as create_table
|
||||||
|
new_type = table.schema.field("vector").type
|
||||||
|
assert pa.types.is_fixed_size_list(new_type), (
|
||||||
|
f"Expected fixed_size_list after overwrite, got {new_type}"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_add_progress_callback(mem_db: DBConnection):
|
||||||
|
table = mem_db.create_table(
|
||||||
|
"test",
|
||||||
|
data=[{"id": 1}, {"id": 2}],
|
||||||
|
)
|
||||||
|
|
||||||
|
updates = []
|
||||||
|
table.add([{"id": 3}, {"id": 4}], progress=lambda p: updates.append(dict(p)))
|
||||||
|
|
||||||
|
assert len(table) == 4
|
||||||
|
# The done callback always fires, so we should always get at least one.
|
||||||
|
assert len(updates) >= 1, "expected at least one progress callback"
|
||||||
|
for p in updates:
|
||||||
|
assert "output_rows" in p
|
||||||
|
assert "output_bytes" in p
|
||||||
|
assert "total_rows" in p
|
||||||
|
assert "elapsed_seconds" in p
|
||||||
|
assert "active_tasks" in p
|
||||||
|
assert "total_tasks" in p
|
||||||
|
assert "done" in p
|
||||||
|
# The last callback should have done=True.
|
||||||
|
assert updates[-1]["done"] is True
|
||||||
|
|
||||||
|
|
||||||
|
def test_add_progress_tqdm_like(mem_db: DBConnection):
|
||||||
|
"""Test that a tqdm-like object gets total set and update() called."""
|
||||||
|
|
||||||
|
class FakeBar:
|
||||||
|
def __init__(self):
|
||||||
|
self.total = None
|
||||||
|
self.n = 0
|
||||||
|
self.postfix = None
|
||||||
|
|
||||||
|
def update(self, n):
|
||||||
|
self.n += n
|
||||||
|
|
||||||
|
def set_postfix_str(self, s):
|
||||||
|
self.postfix = s
|
||||||
|
|
||||||
|
def refresh(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
table = mem_db.create_table(
|
||||||
|
"test",
|
||||||
|
data=[{"id": 1}, {"id": 2}],
|
||||||
|
)
|
||||||
|
|
||||||
|
bar = FakeBar()
|
||||||
|
table.add([{"id": 3}, {"id": 4}], progress=bar)
|
||||||
|
|
||||||
|
assert len(table) == 4
|
||||||
|
# Postfix should contain throughput and worker count
|
||||||
|
if bar.postfix is not None:
|
||||||
|
assert "MB/s" in bar.postfix
|
||||||
|
assert "workers" in bar.postfix
|
||||||
|
|
||||||
|
|
||||||
|
def test_add_progress_bool(mem_db: DBConnection):
|
||||||
|
"""Test that progress=True creates and closes a tqdm bar automatically."""
|
||||||
|
table = mem_db.create_table(
|
||||||
|
"test",
|
||||||
|
data=[{"id": 1}, {"id": 2}],
|
||||||
|
)
|
||||||
|
|
||||||
|
table.add([{"id": 3}, {"id": 4}], progress=True)
|
||||||
|
assert len(table) == 4
|
||||||
|
|
||||||
|
# progress=False should be the same as None
|
||||||
|
table.add([{"id": 5}], progress=False)
|
||||||
|
assert len(table) == 5
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_add_progress_callback_async(mem_db_async: AsyncConnection):
|
||||||
|
"""Progress callbacks work through the async path too."""
|
||||||
|
table = await mem_db_async.create_table("test", data=[{"id": 1}, {"id": 2}])
|
||||||
|
|
||||||
|
updates = []
|
||||||
|
await table.add([{"id": 3}, {"id": 4}], progress=lambda p: updates.append(dict(p)))
|
||||||
|
|
||||||
|
assert await table.count_rows() == 4
|
||||||
|
assert len(updates) >= 1
|
||||||
|
assert updates[-1]["done"] is True
|
||||||
|
|
||||||
|
|
||||||
|
def test_add_progress_callback_error(mem_db: DBConnection):
|
||||||
|
"""A failing callback must not prevent the write from succeeding."""
|
||||||
|
table = mem_db.create_table("test", data=[{"id": 1}, {"id": 2}])
|
||||||
|
|
||||||
|
def bad_callback(p):
|
||||||
|
raise RuntimeError("boom")
|
||||||
|
|
||||||
|
table.add([{"id": 3}, {"id": 4}], progress=bad_callback)
|
||||||
|
assert len(table) == 4
|
||||||
|
|
||||||
|
|
||||||
def test_polars(mem_db: DBConnection):
|
def test_polars(mem_db: DBConnection):
|
||||||
data = {
|
data = {
|
||||||
"vector": [[3.1, 4.1], [5.9, 26.5]],
|
"vector": [[3.1, 4.1], [5.9, 26.5]],
|
||||||
@@ -810,7 +954,7 @@ def test_create_index_name_and_train_parameters(
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def test_add_with_nans(mem_db: DBConnection):
|
def test_create_with_nans(mem_db: DBConnection):
|
||||||
# by default we raise an error on bad input vectors
|
# by default we raise an error on bad input vectors
|
||||||
bad_data = [
|
bad_data = [
|
||||||
{"vector": [np.nan], "item": "bar", "price": 20.0},
|
{"vector": [np.nan], "item": "bar", "price": 20.0},
|
||||||
@@ -854,6 +998,57 @@ def test_add_with_nans(mem_db: DBConnection):
|
|||||||
assert np.allclose(v, np.array([0.0, 0.0]))
|
assert np.allclose(v, np.array([0.0, 0.0]))
|
||||||
|
|
||||||
|
|
||||||
|
def test_add_with_nans(mem_db: DBConnection):
|
||||||
|
schema = pa.schema(
|
||||||
|
[
|
||||||
|
pa.field("vector", pa.list_(pa.float32(), 2), nullable=True),
|
||||||
|
pa.field("item", pa.string(), nullable=True),
|
||||||
|
pa.field("price", pa.float64(), nullable=False),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
table = mem_db.create_table("test", schema=schema)
|
||||||
|
# by default we raise an error on bad input vectors
|
||||||
|
bad_data = [
|
||||||
|
{"vector": [np.nan], "item": "bar", "price": 20.0},
|
||||||
|
{"vector": [5], "item": "bar", "price": 20.0},
|
||||||
|
{"vector": [np.nan, np.nan], "item": "bar", "price": 20.0},
|
||||||
|
{"vector": [np.nan, 5.0], "item": "bar", "price": 20.0},
|
||||||
|
]
|
||||||
|
for row in bad_data:
|
||||||
|
with pytest.raises(ValueError):
|
||||||
|
table.add(
|
||||||
|
data=[row],
|
||||||
|
)
|
||||||
|
|
||||||
|
table.add(
|
||||||
|
[
|
||||||
|
{"vector": [3.1, 4.1], "item": "foo", "price": 10.0},
|
||||||
|
{"vector": [2.1, 4.1], "item": "foo", "price": 9.0},
|
||||||
|
{"vector": [np.nan], "item": "bar", "price": 20.0},
|
||||||
|
{"vector": [5], "item": "bar", "price": 20.0},
|
||||||
|
{"vector": [np.nan, np.nan], "item": "bar", "price": 20.0},
|
||||||
|
],
|
||||||
|
on_bad_vectors="drop",
|
||||||
|
)
|
||||||
|
assert len(table) == 2
|
||||||
|
table.delete("true")
|
||||||
|
|
||||||
|
# We can fill bad input with some value
|
||||||
|
table.add(
|
||||||
|
data=[
|
||||||
|
{"vector": [3.1, 4.1], "item": "foo", "price": 10.0},
|
||||||
|
{"vector": [np.nan], "item": "bar", "price": 20.0},
|
||||||
|
{"vector": [np.nan, np.nan], "item": "bar", "price": 20.0},
|
||||||
|
],
|
||||||
|
on_bad_vectors="fill",
|
||||||
|
fill_value=0.0,
|
||||||
|
)
|
||||||
|
assert len(table) == 3
|
||||||
|
arrow_tbl = table.search().where("item == 'bar'").to_arrow()
|
||||||
|
v = arrow_tbl["vector"].to_pylist()[0]
|
||||||
|
assert np.allclose(v, np.array([0.0, 0.0]))
|
||||||
|
|
||||||
|
|
||||||
def test_restore(mem_db: DBConnection):
|
def test_restore(mem_db: DBConnection):
|
||||||
table = mem_db.create_table(
|
table = mem_db.create_table(
|
||||||
"my_table",
|
"my_table",
|
||||||
@@ -1913,7 +2108,7 @@ def test_stats(mem_db: DBConnection):
|
|||||||
stats = table.stats()
|
stats = table.stats()
|
||||||
print(f"{stats=}")
|
print(f"{stats=}")
|
||||||
assert stats == {
|
assert stats == {
|
||||||
"total_bytes": 38,
|
"total_bytes": 60,
|
||||||
"num_rows": 2,
|
"num_rows": 2,
|
||||||
"num_indices": 0,
|
"num_indices": 0,
|
||||||
"fragment_stats": {
|
"fragment_stats": {
|
||||||
@@ -1978,3 +2173,33 @@ def test_table_uri(tmp_path):
|
|||||||
db = lancedb.connect(tmp_path)
|
db = lancedb.connect(tmp_path)
|
||||||
table = db.create_table("my_table", data=[{"x": 0}])
|
table = db.create_table("my_table", data=[{"x": 0}])
|
||||||
assert table.uri == str(tmp_path / "my_table.lance")
|
assert table.uri == str(tmp_path / "my_table.lance")
|
||||||
|
|
||||||
|
|
||||||
|
def test_sanitize_data_metadata_not_stripped():
|
||||||
|
"""Regression test: dict.update() returns None, so assigning its result
|
||||||
|
would silently replace metadata with None, causing with_metadata(None)
|
||||||
|
to strip all schema metadata from the target schema."""
|
||||||
|
from lancedb.table import _sanitize_data
|
||||||
|
|
||||||
|
schema = pa.schema(
|
||||||
|
[pa.field("x", pa.int64())],
|
||||||
|
metadata={b"existing_key": b"existing_value"},
|
||||||
|
)
|
||||||
|
batch = pa.record_batch([pa.array([1, 2, 3])], schema=schema)
|
||||||
|
|
||||||
|
# Use a different field type so the reader and target schemas differ,
|
||||||
|
# forcing _cast_to_target_schema to rebuild the schema with the
|
||||||
|
# target's metadata (instead of taking the fast-path).
|
||||||
|
target_schema = pa.schema(
|
||||||
|
[pa.field("x", pa.int32())],
|
||||||
|
metadata={b"existing_key": b"existing_value"},
|
||||||
|
)
|
||||||
|
|
||||||
|
reader = pa.RecordBatchReader.from_batches(schema, [batch])
|
||||||
|
metadata = {b"new_key": b"new_value"}
|
||||||
|
result = _sanitize_data(reader, target_schema=target_schema, metadata=metadata)
|
||||||
|
|
||||||
|
result_schema = result.schema
|
||||||
|
assert result_schema.metadata is not None
|
||||||
|
assert result_schema.metadata[b"existing_key"] == b"existing_value"
|
||||||
|
assert result_schema.metadata[b"new_key"] == b"new_value"
|
||||||
|
|||||||
@@ -121,6 +121,32 @@ def test_value_to_sql_string(tmp_path):
|
|||||||
assert table.to_pandas().query("search == @value")["replace"].item() == value
|
assert table.to_pandas().query("search == @value")["replace"].item() == value
|
||||||
|
|
||||||
|
|
||||||
|
def test_value_to_sql_dict():
|
||||||
|
# Simple flat struct
|
||||||
|
assert value_to_sql({"a": 1, "b": "hello"}) == "named_struct('a', 1, 'b', 'hello')"
|
||||||
|
|
||||||
|
# Nested struct
|
||||||
|
assert (
|
||||||
|
value_to_sql({"outer": {"inner": 1}})
|
||||||
|
== "named_struct('outer', named_struct('inner', 1))"
|
||||||
|
)
|
||||||
|
|
||||||
|
# List inside struct
|
||||||
|
assert value_to_sql({"a": [1, 2]}) == "named_struct('a', [1, 2])"
|
||||||
|
|
||||||
|
# Mixed types
|
||||||
|
assert (
|
||||||
|
value_to_sql({"name": "test", "count": 42, "rate": 3.14, "active": True})
|
||||||
|
== "named_struct('name', 'test', 'count', 42, 'rate', 3.14, 'active', TRUE)"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Null value inside struct
|
||||||
|
assert value_to_sql({"a": None}) == "named_struct('a', NULL)"
|
||||||
|
|
||||||
|
# Empty dict
|
||||||
|
assert value_to_sql({}) == "named_struct()"
|
||||||
|
|
||||||
|
|
||||||
def test_append_vector_columns():
|
def test_append_vector_columns():
|
||||||
registry = EmbeddingFunctionRegistry.get_instance()
|
registry = EmbeddingFunctionRegistry.get_instance()
|
||||||
registry.register("test")(MockTextEmbeddingFunction)
|
registry.register("test")(MockTextEmbeddingFunction)
|
||||||
|
|||||||
@@ -10,7 +10,7 @@ use arrow::{
|
|||||||
use futures::stream::StreamExt;
|
use futures::stream::StreamExt;
|
||||||
use lancedb::arrow::SendableRecordBatchStream;
|
use lancedb::arrow::SendableRecordBatchStream;
|
||||||
use pyo3::{
|
use pyo3::{
|
||||||
exceptions::PyStopAsyncIteration, pyclass, pymethods, Bound, Py, PyAny, PyRef, PyResult, Python,
|
Bound, Py, PyAny, PyRef, PyResult, Python, exceptions::PyStopAsyncIteration, pyclass, pymethods,
|
||||||
};
|
};
|
||||||
use pyo3_async_runtimes::tokio::future_into_py;
|
use pyo3_async_runtimes::tokio::future_into_py;
|
||||||
|
|
||||||
|
|||||||
@@ -9,15 +9,17 @@ use lancedb::{
|
|||||||
database::{CreateTableMode, Database, ReadConsistency},
|
database::{CreateTableMode, Database, ReadConsistency},
|
||||||
};
|
};
|
||||||
use pyo3::{
|
use pyo3::{
|
||||||
|
Bound, FromPyObject, Py, PyAny, PyRef, PyResult, Python,
|
||||||
exceptions::{PyRuntimeError, PyValueError},
|
exceptions::{PyRuntimeError, PyValueError},
|
||||||
pyclass, pyfunction, pymethods,
|
pyclass, pyfunction, pymethods,
|
||||||
types::{PyDict, PyDictMethods},
|
types::{PyDict, PyDictMethods},
|
||||||
Bound, FromPyObject, Py, PyAny, PyRef, PyResult, Python,
|
|
||||||
};
|
};
|
||||||
use pyo3_async_runtimes::tokio::future_into_py;
|
use pyo3_async_runtimes::tokio::future_into_py;
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
error::PythonErrorExt, storage_options::py_object_to_storage_options_provider, table::Table,
|
error::PythonErrorExt,
|
||||||
|
namespace::{create_namespace_storage_options_provider, extract_namespace_arc},
|
||||||
|
table::Table,
|
||||||
};
|
};
|
||||||
|
|
||||||
#[pyclass]
|
#[pyclass]
|
||||||
@@ -86,16 +88,16 @@ impl Connection {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
#[pyo3(signature = (namespace=vec![], start_after=None, limit=None))]
|
#[pyo3(signature = (namespace_path=None, start_after=None, limit=None))]
|
||||||
pub fn table_names(
|
pub fn table_names(
|
||||||
self_: PyRef<'_, Self>,
|
self_: PyRef<'_, Self>,
|
||||||
namespace: Vec<String>,
|
namespace_path: Option<Vec<String>>,
|
||||||
start_after: Option<String>,
|
start_after: Option<String>,
|
||||||
limit: Option<u32>,
|
limit: Option<u32>,
|
||||||
) -> PyResult<Bound<'_, PyAny>> {
|
) -> PyResult<Bound<'_, PyAny>> {
|
||||||
let inner = self_.get_inner()?.clone();
|
let inner = self_.get_inner()?.clone();
|
||||||
let mut op = inner.table_names();
|
let mut op = inner.table_names();
|
||||||
op = op.namespace(namespace);
|
op = op.namespace(namespace_path.unwrap_or_default());
|
||||||
if let Some(start_after) = start_after {
|
if let Some(start_after) = start_after {
|
||||||
op = op.start_after(start_after);
|
op = op.start_after(start_after);
|
||||||
}
|
}
|
||||||
@@ -106,34 +108,43 @@ impl Connection {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[allow(clippy::too_many_arguments)]
|
#[allow(clippy::too_many_arguments)]
|
||||||
#[pyo3(signature = (name, mode, data, namespace=vec![], storage_options=None, storage_options_provider=None, location=None))]
|
#[pyo3(signature = (name, mode, data, namespace_path=None, storage_options=None, location=None, namespace_client=None))]
|
||||||
pub fn create_table<'a>(
|
pub fn create_table<'a>(
|
||||||
self_: PyRef<'a, Self>,
|
self_: PyRef<'a, Self>,
|
||||||
name: String,
|
name: String,
|
||||||
mode: &str,
|
mode: &str,
|
||||||
data: Bound<'_, PyAny>,
|
data: Bound<'_, PyAny>,
|
||||||
namespace: Vec<String>,
|
namespace_path: Option<Vec<String>>,
|
||||||
storage_options: Option<HashMap<String, String>>,
|
storage_options: Option<HashMap<String, String>>,
|
||||||
storage_options_provider: Option<Py<PyAny>>,
|
|
||||||
location: Option<String>,
|
location: Option<String>,
|
||||||
|
namespace_client: Option<Py<PyAny>>,
|
||||||
) -> PyResult<Bound<'a, PyAny>> {
|
) -> PyResult<Bound<'a, PyAny>> {
|
||||||
let inner = self_.get_inner()?.clone();
|
let inner = self_.get_inner()?.clone();
|
||||||
|
let py = self_.py();
|
||||||
|
|
||||||
let mode = Self::parse_create_mode_str(mode)?;
|
let mode = Self::parse_create_mode_str(mode)?;
|
||||||
|
|
||||||
let batches: Box<dyn arrow::array::RecordBatchReader + Send> =
|
let batches: Box<dyn arrow::array::RecordBatchReader + Send> =
|
||||||
Box::new(ArrowArrayStreamReader::from_pyarrow_bound(&data)?);
|
Box::new(ArrowArrayStreamReader::from_pyarrow_bound(&data)?);
|
||||||
|
|
||||||
let mut builder = inner.create_table(name, batches).mode(mode);
|
let ns_path = namespace_path.clone().unwrap_or_default();
|
||||||
|
let mut builder = inner.create_table(name.clone(), batches).mode(mode);
|
||||||
|
|
||||||
builder = builder.namespace(namespace);
|
builder = builder.namespace(ns_path.clone());
|
||||||
if let Some(storage_options) = storage_options {
|
if let Some(storage_options) = storage_options {
|
||||||
builder = builder.storage_options(storage_options);
|
builder = builder.storage_options(storage_options);
|
||||||
}
|
}
|
||||||
if let Some(provider_obj) = storage_options_provider {
|
|
||||||
let provider = py_object_to_storage_options_provider(provider_obj)?;
|
// Auto-create storage options provider from namespace_client
|
||||||
|
if let Some(ns_obj) = namespace_client {
|
||||||
|
let ns_client = extract_namespace_arc(py, ns_obj)?;
|
||||||
|
// Create table_id by combining namespace_path with table name
|
||||||
|
let mut table_id = ns_path;
|
||||||
|
table_id.push(name);
|
||||||
|
let provider = create_namespace_storage_options_provider(ns_client, table_id);
|
||||||
builder = builder.storage_options_provider(provider);
|
builder = builder.storage_options_provider(provider);
|
||||||
}
|
}
|
||||||
|
|
||||||
if let Some(location) = location {
|
if let Some(location) = location {
|
||||||
builder = builder.location(location);
|
builder = builder.location(location);
|
||||||
}
|
}
|
||||||
@@ -145,33 +156,44 @@ impl Connection {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[allow(clippy::too_many_arguments)]
|
#[allow(clippy::too_many_arguments)]
|
||||||
#[pyo3(signature = (name, mode, schema, namespace=vec![], storage_options=None, storage_options_provider=None, location=None))]
|
#[pyo3(signature = (name, mode, schema, namespace_path=None, storage_options=None, location=None, namespace_client=None))]
|
||||||
pub fn create_empty_table<'a>(
|
pub fn create_empty_table<'a>(
|
||||||
self_: PyRef<'a, Self>,
|
self_: PyRef<'a, Self>,
|
||||||
name: String,
|
name: String,
|
||||||
mode: &str,
|
mode: &str,
|
||||||
schema: Bound<'_, PyAny>,
|
schema: Bound<'_, PyAny>,
|
||||||
namespace: Vec<String>,
|
namespace_path: Option<Vec<String>>,
|
||||||
storage_options: Option<HashMap<String, String>>,
|
storage_options: Option<HashMap<String, String>>,
|
||||||
storage_options_provider: Option<Py<PyAny>>,
|
|
||||||
location: Option<String>,
|
location: Option<String>,
|
||||||
|
namespace_client: Option<Py<PyAny>>,
|
||||||
) -> PyResult<Bound<'a, PyAny>> {
|
) -> PyResult<Bound<'a, PyAny>> {
|
||||||
let inner = self_.get_inner()?.clone();
|
let inner = self_.get_inner()?.clone();
|
||||||
|
let py = self_.py();
|
||||||
|
|
||||||
let mode = Self::parse_create_mode_str(mode)?;
|
let mode = Self::parse_create_mode_str(mode)?;
|
||||||
|
|
||||||
let schema = Schema::from_pyarrow_bound(&schema)?;
|
let schema = Schema::from_pyarrow_bound(&schema)?;
|
||||||
|
|
||||||
let mut builder = inner.create_empty_table(name, Arc::new(schema)).mode(mode);
|
let ns_path = namespace_path.clone().unwrap_or_default();
|
||||||
|
let mut builder = inner
|
||||||
|
.create_empty_table(name.clone(), Arc::new(schema))
|
||||||
|
.mode(mode);
|
||||||
|
|
||||||
builder = builder.namespace(namespace);
|
builder = builder.namespace(ns_path.clone());
|
||||||
if let Some(storage_options) = storage_options {
|
if let Some(storage_options) = storage_options {
|
||||||
builder = builder.storage_options(storage_options);
|
builder = builder.storage_options(storage_options);
|
||||||
}
|
}
|
||||||
if let Some(provider_obj) = storage_options_provider {
|
|
||||||
let provider = py_object_to_storage_options_provider(provider_obj)?;
|
// Auto-create storage options provider from namespace_client
|
||||||
|
if let Some(ns_obj) = namespace_client {
|
||||||
|
let ns_client = extract_namespace_arc(py, ns_obj)?;
|
||||||
|
// Create table_id by combining namespace_path with table name
|
||||||
|
let mut table_id = ns_path;
|
||||||
|
table_id.push(name);
|
||||||
|
let provider = create_namespace_storage_options_provider(ns_client, table_id);
|
||||||
builder = builder.storage_options_provider(provider);
|
builder = builder.storage_options_provider(provider);
|
||||||
}
|
}
|
||||||
|
|
||||||
if let Some(location) = location {
|
if let Some(location) = location {
|
||||||
builder = builder.location(location);
|
builder = builder.location(location);
|
||||||
}
|
}
|
||||||
@@ -182,33 +204,49 @@ impl Connection {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
#[pyo3(signature = (name, namespace=vec![], storage_options = None, storage_options_provider=None, index_cache_size = None, location=None))]
|
#[allow(clippy::too_many_arguments)]
|
||||||
|
#[pyo3(signature = (name, namespace_path=None, storage_options=None, index_cache_size=None, location=None, namespace_client=None, managed_versioning=None))]
|
||||||
pub fn open_table(
|
pub fn open_table(
|
||||||
self_: PyRef<'_, Self>,
|
self_: PyRef<'_, Self>,
|
||||||
name: String,
|
name: String,
|
||||||
namespace: Vec<String>,
|
namespace_path: Option<Vec<String>>,
|
||||||
storage_options: Option<HashMap<String, String>>,
|
storage_options: Option<HashMap<String, String>>,
|
||||||
storage_options_provider: Option<Py<PyAny>>,
|
|
||||||
index_cache_size: Option<u32>,
|
index_cache_size: Option<u32>,
|
||||||
location: Option<String>,
|
location: Option<String>,
|
||||||
|
namespace_client: Option<Py<PyAny>>,
|
||||||
|
managed_versioning: Option<bool>,
|
||||||
) -> PyResult<Bound<'_, PyAny>> {
|
) -> PyResult<Bound<'_, PyAny>> {
|
||||||
let inner = self_.get_inner()?.clone();
|
let inner = self_.get_inner()?.clone();
|
||||||
|
let py = self_.py();
|
||||||
|
|
||||||
let mut builder = inner.open_table(name);
|
let ns_path = namespace_path.clone().unwrap_or_default();
|
||||||
builder = builder.namespace(namespace);
|
let mut builder = inner.open_table(name.clone());
|
||||||
|
builder = builder.namespace(ns_path.clone());
|
||||||
if let Some(storage_options) = storage_options {
|
if let Some(storage_options) = storage_options {
|
||||||
builder = builder.storage_options(storage_options);
|
builder = builder.storage_options(storage_options);
|
||||||
}
|
}
|
||||||
if let Some(provider_obj) = storage_options_provider {
|
|
||||||
let provider = py_object_to_storage_options_provider(provider_obj)?;
|
// Auto-create storage options provider from namespace_client
|
||||||
|
if let Some(ns_obj) = namespace_client {
|
||||||
|
let ns_client = extract_namespace_arc(py, ns_obj)?;
|
||||||
|
// Create table_id by combining namespace_path with table name
|
||||||
|
let mut table_id = ns_path;
|
||||||
|
table_id.push(name);
|
||||||
|
let provider = create_namespace_storage_options_provider(ns_client.clone(), table_id);
|
||||||
builder = builder.storage_options_provider(provider);
|
builder = builder.storage_options_provider(provider);
|
||||||
|
builder = builder.namespace_client(ns_client);
|
||||||
}
|
}
|
||||||
|
|
||||||
if let Some(index_cache_size) = index_cache_size {
|
if let Some(index_cache_size) = index_cache_size {
|
||||||
builder = builder.index_cache_size(index_cache_size);
|
builder = builder.index_cache_size(index_cache_size);
|
||||||
}
|
}
|
||||||
if let Some(location) = location {
|
if let Some(location) = location {
|
||||||
builder = builder.location(location);
|
builder = builder.location(location);
|
||||||
}
|
}
|
||||||
|
// Pass managed_versioning if provided to avoid redundant describe_table call
|
||||||
|
if let Some(enabled) = managed_versioning {
|
||||||
|
builder = builder.managed_versioning(enabled);
|
||||||
|
}
|
||||||
|
|
||||||
future_into_py(self_.py(), async move {
|
future_into_py(self_.py(), async move {
|
||||||
let table = builder.execute().await.infer_error()?;
|
let table = builder.execute().await.infer_error()?;
|
||||||
@@ -216,12 +254,12 @@ impl Connection {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
#[pyo3(signature = (target_table_name, source_uri, target_namespace=vec![], source_version=None, source_tag=None, is_shallow=true))]
|
#[pyo3(signature = (target_table_name, source_uri, target_namespace_path=None, source_version=None, source_tag=None, is_shallow=true))]
|
||||||
pub fn clone_table(
|
pub fn clone_table(
|
||||||
self_: PyRef<'_, Self>,
|
self_: PyRef<'_, Self>,
|
||||||
target_table_name: String,
|
target_table_name: String,
|
||||||
source_uri: String,
|
source_uri: String,
|
||||||
target_namespace: Vec<String>,
|
target_namespace_path: Option<Vec<String>>,
|
||||||
source_version: Option<u64>,
|
source_version: Option<u64>,
|
||||||
source_tag: Option<String>,
|
source_tag: Option<String>,
|
||||||
is_shallow: bool,
|
is_shallow: bool,
|
||||||
@@ -229,7 +267,7 @@ impl Connection {
|
|||||||
let inner = self_.get_inner()?.clone();
|
let inner = self_.get_inner()?.clone();
|
||||||
|
|
||||||
let mut builder = inner.clone_table(target_table_name, source_uri);
|
let mut builder = inner.clone_table(target_table_name, source_uri);
|
||||||
builder = builder.target_namespace(target_namespace);
|
builder = builder.target_namespace(target_namespace_path.unwrap_or_default());
|
||||||
if let Some(version) = source_version {
|
if let Some(version) = source_version {
|
||||||
builder = builder.source_version(version);
|
builder = builder.source_version(version);
|
||||||
}
|
}
|
||||||
@@ -244,52 +282,56 @@ impl Connection {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
#[pyo3(signature = (cur_name, new_name, cur_namespace=vec![], new_namespace=vec![]))]
|
#[pyo3(signature = (cur_name, new_name, cur_namespace_path=None, new_namespace_path=None))]
|
||||||
pub fn rename_table(
|
pub fn rename_table(
|
||||||
self_: PyRef<'_, Self>,
|
self_: PyRef<'_, Self>,
|
||||||
cur_name: String,
|
cur_name: String,
|
||||||
new_name: String,
|
new_name: String,
|
||||||
cur_namespace: Vec<String>,
|
cur_namespace_path: Option<Vec<String>>,
|
||||||
new_namespace: Vec<String>,
|
new_namespace_path: Option<Vec<String>>,
|
||||||
) -> PyResult<Bound<'_, PyAny>> {
|
) -> PyResult<Bound<'_, PyAny>> {
|
||||||
let inner = self_.get_inner()?.clone();
|
let inner = self_.get_inner()?.clone();
|
||||||
|
let cur_ns_path = cur_namespace_path.unwrap_or_default();
|
||||||
|
let new_ns_path = new_namespace_path.unwrap_or_default();
|
||||||
future_into_py(self_.py(), async move {
|
future_into_py(self_.py(), async move {
|
||||||
inner
|
inner
|
||||||
.rename_table(cur_name, new_name, &cur_namespace, &new_namespace)
|
.rename_table(cur_name, new_name, &cur_ns_path, &new_ns_path)
|
||||||
.await
|
.await
|
||||||
.infer_error()
|
.infer_error()
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
#[pyo3(signature = (name, namespace=vec![]))]
|
#[pyo3(signature = (name, namespace_path=None))]
|
||||||
pub fn drop_table(
|
pub fn drop_table(
|
||||||
self_: PyRef<'_, Self>,
|
self_: PyRef<'_, Self>,
|
||||||
name: String,
|
name: String,
|
||||||
namespace: Vec<String>,
|
namespace_path: Option<Vec<String>>,
|
||||||
) -> PyResult<Bound<'_, PyAny>> {
|
) -> PyResult<Bound<'_, PyAny>> {
|
||||||
let inner = self_.get_inner()?.clone();
|
let inner = self_.get_inner()?.clone();
|
||||||
|
let ns_path = namespace_path.unwrap_or_default();
|
||||||
future_into_py(self_.py(), async move {
|
future_into_py(self_.py(), async move {
|
||||||
inner.drop_table(name, &namespace).await.infer_error()
|
inner.drop_table(name, &ns_path).await.infer_error()
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
#[pyo3(signature = (namespace=vec![],))]
|
#[pyo3(signature = (namespace_path=None,))]
|
||||||
pub fn drop_all_tables(
|
pub fn drop_all_tables(
|
||||||
self_: PyRef<'_, Self>,
|
self_: PyRef<'_, Self>,
|
||||||
namespace: Vec<String>,
|
namespace_path: Option<Vec<String>>,
|
||||||
) -> PyResult<Bound<'_, PyAny>> {
|
) -> PyResult<Bound<'_, PyAny>> {
|
||||||
let inner = self_.get_inner()?.clone();
|
let inner = self_.get_inner()?.clone();
|
||||||
|
let ns_path = namespace_path.unwrap_or_default();
|
||||||
future_into_py(self_.py(), async move {
|
future_into_py(self_.py(), async move {
|
||||||
inner.drop_all_tables(&namespace).await.infer_error()
|
inner.drop_all_tables(&ns_path).await.infer_error()
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
// Namespace management methods
|
// Namespace management methods
|
||||||
|
|
||||||
#[pyo3(signature = (namespace=vec![], page_token=None, limit=None))]
|
#[pyo3(signature = (namespace_path=None, page_token=None, limit=None))]
|
||||||
pub fn list_namespaces(
|
pub fn list_namespaces(
|
||||||
self_: PyRef<'_, Self>,
|
self_: PyRef<'_, Self>,
|
||||||
namespace: Vec<String>,
|
namespace_path: Option<Vec<String>>,
|
||||||
page_token: Option<String>,
|
page_token: Option<String>,
|
||||||
limit: Option<u32>,
|
limit: Option<u32>,
|
||||||
) -> PyResult<Bound<'_, PyAny>> {
|
) -> PyResult<Bound<'_, PyAny>> {
|
||||||
@@ -298,11 +340,7 @@ impl Connection {
|
|||||||
future_into_py(py, async move {
|
future_into_py(py, async move {
|
||||||
use lance_namespace::models::ListNamespacesRequest;
|
use lance_namespace::models::ListNamespacesRequest;
|
||||||
let request = ListNamespacesRequest {
|
let request = ListNamespacesRequest {
|
||||||
id: if namespace.is_empty() {
|
id: namespace_path,
|
||||||
None
|
|
||||||
} else {
|
|
||||||
Some(namespace)
|
|
||||||
},
|
|
||||||
page_token,
|
page_token,
|
||||||
limit: limit.map(|l| l as i32),
|
limit: limit.map(|l| l as i32),
|
||||||
..Default::default()
|
..Default::default()
|
||||||
@@ -317,10 +355,10 @@ impl Connection {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
#[pyo3(signature = (namespace, mode=None, properties=None))]
|
#[pyo3(signature = (namespace_path, mode=None, properties=None))]
|
||||||
pub fn create_namespace(
|
pub fn create_namespace(
|
||||||
self_: PyRef<'_, Self>,
|
self_: PyRef<'_, Self>,
|
||||||
namespace: Vec<String>,
|
namespace_path: Vec<String>,
|
||||||
mode: Option<String>,
|
mode: Option<String>,
|
||||||
properties: Option<std::collections::HashMap<String, String>>,
|
properties: Option<std::collections::HashMap<String, String>>,
|
||||||
) -> PyResult<Bound<'_, PyAny>> {
|
) -> PyResult<Bound<'_, PyAny>> {
|
||||||
@@ -336,11 +374,7 @@ impl Connection {
|
|||||||
_ => None,
|
_ => None,
|
||||||
});
|
});
|
||||||
let request = CreateNamespaceRequest {
|
let request = CreateNamespaceRequest {
|
||||||
id: if namespace.is_empty() {
|
id: Some(namespace_path),
|
||||||
None
|
|
||||||
} else {
|
|
||||||
Some(namespace)
|
|
||||||
},
|
|
||||||
mode: mode_str,
|
mode: mode_str,
|
||||||
properties,
|
properties,
|
||||||
..Default::default()
|
..Default::default()
|
||||||
@@ -354,10 +388,10 @@ impl Connection {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
#[pyo3(signature = (namespace, mode=None, behavior=None))]
|
#[pyo3(signature = (namespace_path, mode=None, behavior=None))]
|
||||||
pub fn drop_namespace(
|
pub fn drop_namespace(
|
||||||
self_: PyRef<'_, Self>,
|
self_: PyRef<'_, Self>,
|
||||||
namespace: Vec<String>,
|
namespace_path: Vec<String>,
|
||||||
mode: Option<String>,
|
mode: Option<String>,
|
||||||
behavior: Option<String>,
|
behavior: Option<String>,
|
||||||
) -> PyResult<Bound<'_, PyAny>> {
|
) -> PyResult<Bound<'_, PyAny>> {
|
||||||
@@ -377,11 +411,7 @@ impl Connection {
|
|||||||
_ => None,
|
_ => None,
|
||||||
});
|
});
|
||||||
let request = DropNamespaceRequest {
|
let request = DropNamespaceRequest {
|
||||||
id: if namespace.is_empty() {
|
id: Some(namespace_path),
|
||||||
None
|
|
||||||
} else {
|
|
||||||
Some(namespace)
|
|
||||||
},
|
|
||||||
mode: mode_str,
|
mode: mode_str,
|
||||||
behavior: behavior_str,
|
behavior: behavior_str,
|
||||||
..Default::default()
|
..Default::default()
|
||||||
@@ -396,21 +426,17 @@ impl Connection {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
#[pyo3(signature = (namespace,))]
|
#[pyo3(signature = (namespace_path,))]
|
||||||
pub fn describe_namespace(
|
pub fn describe_namespace(
|
||||||
self_: PyRef<'_, Self>,
|
self_: PyRef<'_, Self>,
|
||||||
namespace: Vec<String>,
|
namespace_path: Vec<String>,
|
||||||
) -> PyResult<Bound<'_, PyAny>> {
|
) -> PyResult<Bound<'_, PyAny>> {
|
||||||
let inner = self_.get_inner()?.clone();
|
let inner = self_.get_inner()?.clone();
|
||||||
let py = self_.py();
|
let py = self_.py();
|
||||||
future_into_py(py, async move {
|
future_into_py(py, async move {
|
||||||
use lance_namespace::models::DescribeNamespaceRequest;
|
use lance_namespace::models::DescribeNamespaceRequest;
|
||||||
let request = DescribeNamespaceRequest {
|
let request = DescribeNamespaceRequest {
|
||||||
id: if namespace.is_empty() {
|
id: Some(namespace_path),
|
||||||
None
|
|
||||||
} else {
|
|
||||||
Some(namespace)
|
|
||||||
},
|
|
||||||
..Default::default()
|
..Default::default()
|
||||||
};
|
};
|
||||||
let response = inner.describe_namespace(request).await.infer_error()?;
|
let response = inner.describe_namespace(request).await.infer_error()?;
|
||||||
@@ -422,10 +448,10 @@ impl Connection {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
#[pyo3(signature = (namespace=vec![], page_token=None, limit=None))]
|
#[pyo3(signature = (namespace_path=None, page_token=None, limit=None))]
|
||||||
pub fn list_tables(
|
pub fn list_tables(
|
||||||
self_: PyRef<'_, Self>,
|
self_: PyRef<'_, Self>,
|
||||||
namespace: Vec<String>,
|
namespace_path: Option<Vec<String>>,
|
||||||
page_token: Option<String>,
|
page_token: Option<String>,
|
||||||
limit: Option<u32>,
|
limit: Option<u32>,
|
||||||
) -> PyResult<Bound<'_, PyAny>> {
|
) -> PyResult<Bound<'_, PyAny>> {
|
||||||
@@ -434,11 +460,7 @@ impl Connection {
|
|||||||
future_into_py(py, async move {
|
future_into_py(py, async move {
|
||||||
use lance_namespace::models::ListTablesRequest;
|
use lance_namespace::models::ListTablesRequest;
|
||||||
let request = ListTablesRequest {
|
let request = ListTablesRequest {
|
||||||
id: if namespace.is_empty() {
|
id: namespace_path,
|
||||||
None
|
|
||||||
} else {
|
|
||||||
Some(namespace)
|
|
||||||
},
|
|
||||||
page_token,
|
page_token,
|
||||||
limit: limit.map(|l| l as i32),
|
limit: limit.map(|l| l as i32),
|
||||||
..Default::default()
|
..Default::default()
|
||||||
|
|||||||
@@ -2,10 +2,10 @@
|
|||||||
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||||
|
|
||||||
use pyo3::{
|
use pyo3::{
|
||||||
|
PyErr, PyResult, Python,
|
||||||
exceptions::{PyIOError, PyNotImplementedError, PyOSError, PyRuntimeError, PyValueError},
|
exceptions::{PyIOError, PyNotImplementedError, PyOSError, PyRuntimeError, PyValueError},
|
||||||
intern,
|
intern,
|
||||||
types::{PyAnyMethods, PyNone},
|
types::{PyAnyMethods, PyNone},
|
||||||
PyErr, PyResult, Python,
|
|
||||||
};
|
};
|
||||||
|
|
||||||
use lancedb::error::Error as LanceError;
|
use lancedb::error::Error as LanceError;
|
||||||
|
|||||||
175
python/src/expr.rs
Normal file
175
python/src/expr.rs
Normal file
@@ -0,0 +1,175 @@
|
|||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||||
|
|
||||||
|
//! PyO3 bindings for the LanceDB expression builder API.
|
||||||
|
//!
|
||||||
|
//! This module exposes [`PyExpr`] and helper free functions so Python can
|
||||||
|
//! build type-safe filter / projection expressions that map directly to
|
||||||
|
//! DataFusion [`Expr`] nodes, bypassing SQL string parsing.
|
||||||
|
|
||||||
|
use arrow::{datatypes::DataType, pyarrow::PyArrowType};
|
||||||
|
use lancedb::expr::{DfExpr, col as ldb_col, contains, expr_cast, lit as df_lit, lower, upper};
|
||||||
|
use pyo3::{Bound, PyAny, PyResult, exceptions::PyValueError, prelude::*, pyfunction};
|
||||||
|
|
||||||
|
/// A type-safe DataFusion expression.
|
||||||
|
///
|
||||||
|
/// Instances are constructed via the free functions [`expr_col`] and
|
||||||
|
/// [`expr_lit`] and combined with the methods on this struct. On the Python
|
||||||
|
/// side a thin wrapper class (`lancedb.expr.Expr`) delegates to these methods
|
||||||
|
/// and adds Python operator overloads.
|
||||||
|
#[pyclass(name = "PyExpr")]
|
||||||
|
#[derive(Clone)]
|
||||||
|
pub struct PyExpr(pub DfExpr);
|
||||||
|
|
||||||
|
#[pymethods]
|
||||||
|
impl PyExpr {
|
||||||
|
// ── comparisons ──────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
fn eq(&self, other: &Self) -> Self {
|
||||||
|
Self(self.0.clone().eq(other.0.clone()))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn ne(&self, other: &Self) -> Self {
|
||||||
|
Self(self.0.clone().not_eq(other.0.clone()))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn lt(&self, other: &Self) -> Self {
|
||||||
|
Self(self.0.clone().lt(other.0.clone()))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn lte(&self, other: &Self) -> Self {
|
||||||
|
Self(self.0.clone().lt_eq(other.0.clone()))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn gt(&self, other: &Self) -> Self {
|
||||||
|
Self(self.0.clone().gt(other.0.clone()))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn gte(&self, other: &Self) -> Self {
|
||||||
|
Self(self.0.clone().gt_eq(other.0.clone()))
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── logical ──────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
fn and_(&self, other: &Self) -> Self {
|
||||||
|
Self(self.0.clone().and(other.0.clone()))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn or_(&self, other: &Self) -> Self {
|
||||||
|
Self(self.0.clone().or(other.0.clone()))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn not_(&self) -> Self {
|
||||||
|
use std::ops::Not;
|
||||||
|
Self(self.0.clone().not())
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── arithmetic ───────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
fn add(&self, other: &Self) -> Self {
|
||||||
|
use std::ops::Add;
|
||||||
|
Self(self.0.clone().add(other.0.clone()))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn sub(&self, other: &Self) -> Self {
|
||||||
|
use std::ops::Sub;
|
||||||
|
Self(self.0.clone().sub(other.0.clone()))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn mul(&self, other: &Self) -> Self {
|
||||||
|
use std::ops::Mul;
|
||||||
|
Self(self.0.clone().mul(other.0.clone()))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn div(&self, other: &Self) -> Self {
|
||||||
|
use std::ops::Div;
|
||||||
|
Self(self.0.clone().div(other.0.clone()))
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── string functions ─────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
/// Convert string column to lowercase.
|
||||||
|
fn lower(&self) -> Self {
|
||||||
|
Self(lower(self.0.clone()))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Convert string column to uppercase.
|
||||||
|
fn upper(&self) -> Self {
|
||||||
|
Self(upper(self.0.clone()))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Test whether the string contains `substr`.
|
||||||
|
fn contains(&self, substr: &Self) -> Self {
|
||||||
|
Self(contains(self.0.clone(), substr.0.clone()))
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── type cast ────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
/// Cast the expression to `data_type`.
|
||||||
|
///
|
||||||
|
/// `data_type` must be a PyArrow `DataType` (e.g. `pa.int32()`).
|
||||||
|
/// On the Python side, `lancedb.expr.Expr.cast` also accepts type name
|
||||||
|
/// strings via `pa.lib.ensure_type` before forwarding here.
|
||||||
|
fn cast(&self, data_type: PyArrowType<DataType>) -> Self {
|
||||||
|
Self(expr_cast(self.0.clone(), data_type.0))
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── utilities ────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
/// Render the expression as a SQL string (useful for debugging).
|
||||||
|
fn to_sql(&self) -> PyResult<String> {
|
||||||
|
lancedb::expr::expr_to_sql_string(&self.0).map_err(|e| PyValueError::new_err(e.to_string()))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn __repr__(&self) -> PyResult<String> {
|
||||||
|
let sql =
|
||||||
|
lancedb::expr::expr_to_sql_string(&self.0).unwrap_or_else(|_| "<expr>".to_string());
|
||||||
|
Ok(format!("PyExpr({})", sql))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── free functions ────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
/// Create a column reference expression.
|
||||||
|
///
|
||||||
|
/// The column name is preserved exactly as given (case-sensitive), so
|
||||||
|
/// `col("firstName")` correctly references a field named `firstName`.
|
||||||
|
#[pyfunction]
|
||||||
|
pub fn expr_col(name: &str) -> PyExpr {
|
||||||
|
PyExpr(ldb_col(name))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Create a literal value expression.
|
||||||
|
///
|
||||||
|
/// Supported Python types: `bool`, `int`, `float`, `str`.
|
||||||
|
#[pyfunction]
|
||||||
|
pub fn expr_lit(value: Bound<'_, PyAny>) -> PyResult<PyExpr> {
|
||||||
|
// bool must be checked before int because bool is a subclass of int in Python
|
||||||
|
if let Ok(b) = value.extract::<bool>() {
|
||||||
|
return Ok(PyExpr(df_lit(b)));
|
||||||
|
}
|
||||||
|
if let Ok(i) = value.extract::<i64>() {
|
||||||
|
return Ok(PyExpr(df_lit(i)));
|
||||||
|
}
|
||||||
|
if let Ok(f) = value.extract::<f64>() {
|
||||||
|
return Ok(PyExpr(df_lit(f)));
|
||||||
|
}
|
||||||
|
if let Ok(s) = value.extract::<String>() {
|
||||||
|
return Ok(PyExpr(df_lit(s)));
|
||||||
|
}
|
||||||
|
Err(PyValueError::new_err(format!(
|
||||||
|
"unsupported literal type: {}. Supported: bool, int, float, str",
|
||||||
|
value.get_type().name()?
|
||||||
|
)))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Call an arbitrary registered SQL function by name.
|
||||||
|
///
|
||||||
|
/// See `lancedb::expr::func` for the list of supported function names.
|
||||||
|
#[pyfunction]
|
||||||
|
pub fn expr_func(name: &str, args: Vec<PyExpr>) -> PyResult<PyExpr> {
|
||||||
|
let df_args: Vec<DfExpr> = args.into_iter().map(|e| e.0).collect();
|
||||||
|
lancedb::expr::func(name, df_args)
|
||||||
|
.map(PyExpr)
|
||||||
|
.map_err(|e| PyValueError::new_err(e.to_string()))
|
||||||
|
}
|
||||||
@@ -3,17 +3,17 @@
|
|||||||
|
|
||||||
use lancedb::index::vector::{IvfFlatIndexBuilder, IvfRqIndexBuilder, IvfSqIndexBuilder};
|
use lancedb::index::vector::{IvfFlatIndexBuilder, IvfRqIndexBuilder, IvfSqIndexBuilder};
|
||||||
use lancedb::index::{
|
use lancedb::index::{
|
||||||
|
Index as LanceDbIndex,
|
||||||
scalar::{BTreeIndexBuilder, FtsIndexBuilder},
|
scalar::{BTreeIndexBuilder, FtsIndexBuilder},
|
||||||
vector::{IvfHnswPqIndexBuilder, IvfHnswSqIndexBuilder, IvfPqIndexBuilder},
|
vector::{IvfHnswPqIndexBuilder, IvfHnswSqIndexBuilder, IvfPqIndexBuilder},
|
||||||
Index as LanceDbIndex,
|
|
||||||
};
|
};
|
||||||
use pyo3::types::PyStringMethods;
|
|
||||||
use pyo3::IntoPyObject;
|
use pyo3::IntoPyObject;
|
||||||
|
use pyo3::types::PyStringMethods;
|
||||||
use pyo3::{
|
use pyo3::{
|
||||||
|
Bound, FromPyObject, PyAny, PyResult, Python,
|
||||||
exceptions::{PyKeyError, PyValueError},
|
exceptions::{PyKeyError, PyValueError},
|
||||||
intern, pyclass, pymethods,
|
intern, pyclass, pymethods,
|
||||||
types::PyAnyMethods,
|
types::PyAnyMethods,
|
||||||
Bound, FromPyObject, PyAny, PyResult, Python,
|
|
||||||
};
|
};
|
||||||
|
|
||||||
use crate::util::parse_distance_type;
|
use crate::util::parse_distance_type;
|
||||||
@@ -41,7 +41,12 @@ pub fn extract_index_params(source: &Option<Bound<'_, PyAny>>) -> PyResult<Lance
|
|||||||
let inner_opts = FtsIndexBuilder::default()
|
let inner_opts = FtsIndexBuilder::default()
|
||||||
.base_tokenizer(params.base_tokenizer)
|
.base_tokenizer(params.base_tokenizer)
|
||||||
.language(¶ms.language)
|
.language(¶ms.language)
|
||||||
.map_err(|_| PyValueError::new_err(format!("LanceDB does not support the requested language: '{}'", params.language)))?
|
.map_err(|_| {
|
||||||
|
PyValueError::new_err(format!(
|
||||||
|
"LanceDB does not support the requested language: '{}'",
|
||||||
|
params.language
|
||||||
|
))
|
||||||
|
})?
|
||||||
.with_position(params.with_position)
|
.with_position(params.with_position)
|
||||||
.lower_case(params.lower_case)
|
.lower_case(params.lower_case)
|
||||||
.max_token_length(params.max_token_length)
|
.max_token_length(params.max_token_length)
|
||||||
@@ -52,7 +57,7 @@ pub fn extract_index_params(source: &Option<Bound<'_, PyAny>>) -> PyResult<Lance
|
|||||||
.ngram_max_length(params.ngram_max_length)
|
.ngram_max_length(params.ngram_max_length)
|
||||||
.ngram_prefix_only(params.prefix_only);
|
.ngram_prefix_only(params.prefix_only);
|
||||||
Ok(LanceDbIndex::FTS(inner_opts))
|
Ok(LanceDbIndex::FTS(inner_opts))
|
||||||
},
|
}
|
||||||
"IvfFlat" => {
|
"IvfFlat" => {
|
||||||
let params = source.extract::<IvfFlatParams>()?;
|
let params = source.extract::<IvfFlatParams>()?;
|
||||||
let distance_type = parse_distance_type(params.distance_type)?;
|
let distance_type = parse_distance_type(params.distance_type)?;
|
||||||
@@ -64,10 +69,11 @@ pub fn extract_index_params(source: &Option<Bound<'_, PyAny>>) -> PyResult<Lance
|
|||||||
ivf_flat_builder = ivf_flat_builder.num_partitions(num_partitions);
|
ivf_flat_builder = ivf_flat_builder.num_partitions(num_partitions);
|
||||||
}
|
}
|
||||||
if let Some(target_partition_size) = params.target_partition_size {
|
if let Some(target_partition_size) = params.target_partition_size {
|
||||||
ivf_flat_builder = ivf_flat_builder.target_partition_size(target_partition_size);
|
ivf_flat_builder =
|
||||||
|
ivf_flat_builder.target_partition_size(target_partition_size);
|
||||||
}
|
}
|
||||||
Ok(LanceDbIndex::IvfFlat(ivf_flat_builder))
|
Ok(LanceDbIndex::IvfFlat(ivf_flat_builder))
|
||||||
},
|
}
|
||||||
"IvfPq" => {
|
"IvfPq" => {
|
||||||
let params = source.extract::<IvfPqParams>()?;
|
let params = source.extract::<IvfPqParams>()?;
|
||||||
let distance_type = parse_distance_type(params.distance_type)?;
|
let distance_type = parse_distance_type(params.distance_type)?;
|
||||||
@@ -86,7 +92,7 @@ pub fn extract_index_params(source: &Option<Bound<'_, PyAny>>) -> PyResult<Lance
|
|||||||
ivf_pq_builder = ivf_pq_builder.num_sub_vectors(num_sub_vectors);
|
ivf_pq_builder = ivf_pq_builder.num_sub_vectors(num_sub_vectors);
|
||||||
}
|
}
|
||||||
Ok(LanceDbIndex::IvfPq(ivf_pq_builder))
|
Ok(LanceDbIndex::IvfPq(ivf_pq_builder))
|
||||||
},
|
}
|
||||||
"IvfSq" => {
|
"IvfSq" => {
|
||||||
let params = source.extract::<IvfSqParams>()?;
|
let params = source.extract::<IvfSqParams>()?;
|
||||||
let distance_type = parse_distance_type(params.distance_type)?;
|
let distance_type = parse_distance_type(params.distance_type)?;
|
||||||
@@ -101,7 +107,7 @@ pub fn extract_index_params(source: &Option<Bound<'_, PyAny>>) -> PyResult<Lance
|
|||||||
ivf_sq_builder = ivf_sq_builder.target_partition_size(target_partition_size);
|
ivf_sq_builder = ivf_sq_builder.target_partition_size(target_partition_size);
|
||||||
}
|
}
|
||||||
Ok(LanceDbIndex::IvfSq(ivf_sq_builder))
|
Ok(LanceDbIndex::IvfSq(ivf_sq_builder))
|
||||||
},
|
}
|
||||||
"IvfRq" => {
|
"IvfRq" => {
|
||||||
let params = source.extract::<IvfRqParams>()?;
|
let params = source.extract::<IvfRqParams>()?;
|
||||||
let distance_type = parse_distance_type(params.distance_type)?;
|
let distance_type = parse_distance_type(params.distance_type)?;
|
||||||
@@ -117,7 +123,7 @@ pub fn extract_index_params(source: &Option<Bound<'_, PyAny>>) -> PyResult<Lance
|
|||||||
ivf_rq_builder = ivf_rq_builder.target_partition_size(target_partition_size);
|
ivf_rq_builder = ivf_rq_builder.target_partition_size(target_partition_size);
|
||||||
}
|
}
|
||||||
Ok(LanceDbIndex::IvfRq(ivf_rq_builder))
|
Ok(LanceDbIndex::IvfRq(ivf_rq_builder))
|
||||||
},
|
}
|
||||||
"HnswPq" => {
|
"HnswPq" => {
|
||||||
let params = source.extract::<IvfHnswPqParams>()?;
|
let params = source.extract::<IvfHnswPqParams>()?;
|
||||||
let distance_type = parse_distance_type(params.distance_type)?;
|
let distance_type = parse_distance_type(params.distance_type)?;
|
||||||
@@ -138,7 +144,7 @@ pub fn extract_index_params(source: &Option<Bound<'_, PyAny>>) -> PyResult<Lance
|
|||||||
hnsw_pq_builder = hnsw_pq_builder.num_sub_vectors(num_sub_vectors);
|
hnsw_pq_builder = hnsw_pq_builder.num_sub_vectors(num_sub_vectors);
|
||||||
}
|
}
|
||||||
Ok(LanceDbIndex::IvfHnswPq(hnsw_pq_builder))
|
Ok(LanceDbIndex::IvfHnswPq(hnsw_pq_builder))
|
||||||
},
|
}
|
||||||
"HnswSq" => {
|
"HnswSq" => {
|
||||||
let params = source.extract::<IvfHnswSqParams>()?;
|
let params = source.extract::<IvfHnswSqParams>()?;
|
||||||
let distance_type = parse_distance_type(params.distance_type)?;
|
let distance_type = parse_distance_type(params.distance_type)?;
|
||||||
@@ -155,7 +161,7 @@ pub fn extract_index_params(source: &Option<Bound<'_, PyAny>>) -> PyResult<Lance
|
|||||||
hnsw_sq_builder = hnsw_sq_builder.target_partition_size(target_partition_size);
|
hnsw_sq_builder = hnsw_sq_builder.target_partition_size(target_partition_size);
|
||||||
}
|
}
|
||||||
Ok(LanceDbIndex::IvfHnswSq(hnsw_sq_builder))
|
Ok(LanceDbIndex::IvfHnswSq(hnsw_sq_builder))
|
||||||
},
|
}
|
||||||
not_supported => Err(PyValueError::new_err(format!(
|
not_supported => Err(PyValueError::new_err(format!(
|
||||||
"Invalid index type '{}'. Must be one of BTree, Bitmap, LabelList, FTS, IvfPq, IvfSq, IvfHnswPq, or IvfHnswSq",
|
"Invalid index type '{}'. Must be one of BTree, Bitmap, LabelList, FTS, IvfPq, IvfSq, IvfHnswPq, or IvfHnswSq",
|
||||||
not_supported
|
not_supported
|
||||||
|
|||||||
@@ -2,14 +2,15 @@
|
|||||||
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||||
|
|
||||||
use arrow::RecordBatchStream;
|
use arrow::RecordBatchStream;
|
||||||
use connection::{connect, Connection};
|
use connection::{Connection, connect};
|
||||||
use env_logger::Env;
|
use env_logger::Env;
|
||||||
|
use expr::{PyExpr, expr_col, expr_func, expr_lit};
|
||||||
use index::IndexConfig;
|
use index::IndexConfig;
|
||||||
use permutation::{PyAsyncPermutationBuilder, PyPermutationReader};
|
use permutation::{PyAsyncPermutationBuilder, PyPermutationReader};
|
||||||
use pyo3::{
|
use pyo3::{
|
||||||
pymodule,
|
Bound, PyResult, Python, pymodule,
|
||||||
types::{PyModule, PyModuleMethods},
|
types::{PyModule, PyModuleMethods},
|
||||||
wrap_pyfunction, Bound, PyResult, Python,
|
wrap_pyfunction,
|
||||||
};
|
};
|
||||||
use query::{FTSQuery, HybridQuery, Query, VectorQuery};
|
use query::{FTSQuery, HybridQuery, Query, VectorQuery};
|
||||||
use session::Session;
|
use session::Session;
|
||||||
@@ -21,12 +22,13 @@ use table::{
|
|||||||
pub mod arrow;
|
pub mod arrow;
|
||||||
pub mod connection;
|
pub mod connection;
|
||||||
pub mod error;
|
pub mod error;
|
||||||
|
pub mod expr;
|
||||||
pub mod header;
|
pub mod header;
|
||||||
pub mod index;
|
pub mod index;
|
||||||
|
pub mod namespace;
|
||||||
pub mod permutation;
|
pub mod permutation;
|
||||||
pub mod query;
|
pub mod query;
|
||||||
pub mod session;
|
pub mod session;
|
||||||
pub mod storage_options;
|
|
||||||
pub mod table;
|
pub mod table;
|
||||||
pub mod util;
|
pub mod util;
|
||||||
|
|
||||||
@@ -54,10 +56,14 @@ pub fn _lancedb(_py: Python, m: &Bound<'_, PyModule>) -> PyResult<()> {
|
|||||||
m.add_class::<UpdateResult>()?;
|
m.add_class::<UpdateResult>()?;
|
||||||
m.add_class::<PyAsyncPermutationBuilder>()?;
|
m.add_class::<PyAsyncPermutationBuilder>()?;
|
||||||
m.add_class::<PyPermutationReader>()?;
|
m.add_class::<PyPermutationReader>()?;
|
||||||
|
m.add_class::<PyExpr>()?;
|
||||||
m.add_function(wrap_pyfunction!(connect, m)?)?;
|
m.add_function(wrap_pyfunction!(connect, m)?)?;
|
||||||
m.add_function(wrap_pyfunction!(permutation::async_permutation_builder, m)?)?;
|
m.add_function(wrap_pyfunction!(permutation::async_permutation_builder, m)?)?;
|
||||||
m.add_function(wrap_pyfunction!(util::validate_table_name, m)?)?;
|
m.add_function(wrap_pyfunction!(util::validate_table_name, m)?)?;
|
||||||
m.add_function(wrap_pyfunction!(query::fts_query_to_json, m)?)?;
|
m.add_function(wrap_pyfunction!(query::fts_query_to_json, m)?)?;
|
||||||
|
m.add_function(wrap_pyfunction!(expr_col, m)?)?;
|
||||||
|
m.add_function(wrap_pyfunction!(expr_lit, m)?)?;
|
||||||
|
m.add_function(wrap_pyfunction!(expr_func, m)?)?;
|
||||||
m.add("__version__", env!("CARGO_PKG_VERSION"))?;
|
m.add("__version__", env!("CARGO_PKG_VERSION"))?;
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|||||||
715
python/src/namespace.rs
Normal file
715
python/src/namespace.rs
Normal file
@@ -0,0 +1,715 @@
|
|||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||||
|
|
||||||
|
//! Namespace utilities for Python bindings
|
||||||
|
|
||||||
|
use std::collections::HashMap;
|
||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
use async_trait::async_trait;
|
||||||
|
use bytes::Bytes;
|
||||||
|
use lance_io::object_store::{LanceNamespaceStorageOptionsProvider, StorageOptionsProvider};
|
||||||
|
use lance_namespace::LanceNamespace as LanceNamespaceTrait;
|
||||||
|
use lance_namespace::models::*;
|
||||||
|
use pyo3::prelude::*;
|
||||||
|
use pyo3::types::PyDict;
|
||||||
|
|
||||||
|
/// Wrapper that allows any Python object implementing LanceNamespace protocol
|
||||||
|
/// to be used as a Rust LanceNamespace.
|
||||||
|
///
|
||||||
|
/// This is similar to PyLanceNamespace in lance's Python bindings - it wraps a Python
|
||||||
|
/// object and calls back into Python when namespace methods are invoked.
|
||||||
|
pub struct PyLanceNamespace {
|
||||||
|
py_namespace: Arc<Py<PyAny>>,
|
||||||
|
namespace_id: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl PyLanceNamespace {
|
||||||
|
/// Create a new PyLanceNamespace wrapper around a Python namespace object.
|
||||||
|
pub fn new(_py: Python<'_>, py_namespace: &Bound<'_, PyAny>) -> PyResult<Self> {
|
||||||
|
let namespace_id = py_namespace
|
||||||
|
.call_method0("namespace_id")?
|
||||||
|
.extract::<String>()?;
|
||||||
|
|
||||||
|
Ok(Self {
|
||||||
|
py_namespace: Arc::new(py_namespace.clone().unbind()),
|
||||||
|
namespace_id,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Create an Arc<dyn LanceNamespace> from a Python namespace object.
|
||||||
|
pub fn create_arc(
|
||||||
|
py: Python<'_>,
|
||||||
|
py_namespace: &Bound<'_, PyAny>,
|
||||||
|
) -> PyResult<Arc<dyn LanceNamespaceTrait>> {
|
||||||
|
let wrapper = Self::new(py, py_namespace)?;
|
||||||
|
Ok(Arc::new(wrapper))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl std::fmt::Debug for PyLanceNamespace {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
write!(f, "PyLanceNamespace {{ id: {} }}", self.namespace_id)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get or create the DictWithModelDump class in Python.
|
||||||
|
/// This class acts like a dict but also has model_dump() method.
|
||||||
|
/// This allows it to work with both:
|
||||||
|
/// - depythonize (which expects a dict/Mapping)
|
||||||
|
/// - Python code that calls .model_dump() (like DirectoryNamespace wrapper)
|
||||||
|
fn get_dict_with_model_dump_class(py: Python<'_>) -> PyResult<Bound<'_, PyAny>> {
|
||||||
|
// Use a module-level cache via __builtins__
|
||||||
|
let builtins = py.import("builtins")?;
|
||||||
|
if builtins.hasattr("_DictWithModelDump")? {
|
||||||
|
return builtins.getattr("_DictWithModelDump");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create the class using exec
|
||||||
|
let locals = PyDict::new(py);
|
||||||
|
py.run(
|
||||||
|
c"class DictWithModelDump(dict):
|
||||||
|
def model_dump(self):
|
||||||
|
return dict(self)",
|
||||||
|
None,
|
||||||
|
Some(&locals),
|
||||||
|
)?;
|
||||||
|
let class = locals.get_item("DictWithModelDump")?.ok_or_else(|| {
|
||||||
|
pyo3::exceptions::PyRuntimeError::new_err("Failed to create DictWithModelDump class")
|
||||||
|
})?;
|
||||||
|
|
||||||
|
// Cache it
|
||||||
|
builtins.setattr("_DictWithModelDump", &class)?;
|
||||||
|
Ok(class)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Helper to call a Python namespace method with JSON serialization.
|
||||||
|
/// For methods that take a request and return a response.
|
||||||
|
/// Uses DictWithModelDump to pass a dict that also has model_dump() method,
|
||||||
|
/// making it compatible with both depythonize and Python wrappers.
|
||||||
|
async fn call_py_method<Req, Resp>(
|
||||||
|
py_namespace: Arc<Py<PyAny>>,
|
||||||
|
method_name: &'static str,
|
||||||
|
request: Req,
|
||||||
|
) -> lance_core::Result<Resp>
|
||||||
|
where
|
||||||
|
Req: serde::Serialize + Send + 'static,
|
||||||
|
Resp: serde::de::DeserializeOwned + Send + 'static,
|
||||||
|
{
|
||||||
|
let request_json = serde_json::to_string(&request).map_err(|e| {
|
||||||
|
lance_core::Error::io(format!(
|
||||||
|
"Failed to serialize request for {}: {}",
|
||||||
|
method_name, e
|
||||||
|
))
|
||||||
|
})?;
|
||||||
|
|
||||||
|
let response_json = tokio::task::spawn_blocking(move || {
|
||||||
|
Python::attach(|py| {
|
||||||
|
let json_module = py.import("json")?;
|
||||||
|
let request_dict = json_module.call_method1("loads", (&request_json,))?;
|
||||||
|
|
||||||
|
// Wrap dict in DictWithModelDump so it works with both depythonize and .model_dump()
|
||||||
|
let dict_class = get_dict_with_model_dump_class(py)?;
|
||||||
|
let request_arg = dict_class.call1((request_dict,))?;
|
||||||
|
|
||||||
|
// Call the Python method
|
||||||
|
let result = py_namespace.call_method1(py, method_name, (request_arg,))?;
|
||||||
|
|
||||||
|
// Convert response to dict, then to JSON
|
||||||
|
// Pydantic models have model_dump() method
|
||||||
|
let result_dict = if result.bind(py).hasattr("model_dump")? {
|
||||||
|
result.call_method0(py, "model_dump")?
|
||||||
|
} else {
|
||||||
|
result
|
||||||
|
};
|
||||||
|
let response_json: String = json_module
|
||||||
|
.call_method1("dumps", (result_dict,))?
|
||||||
|
.extract()?;
|
||||||
|
Ok::<_, PyErr>(response_json)
|
||||||
|
})
|
||||||
|
})
|
||||||
|
.await
|
||||||
|
.map_err(|e| lance_core::Error::io(format!("Task join error for {}: {}", method_name, e)))?
|
||||||
|
.map_err(|e: PyErr| lance_core::Error::io(format!("Python error in {}: {}", method_name, e)))?;
|
||||||
|
|
||||||
|
serde_json::from_str(&response_json).map_err(|e| {
|
||||||
|
lance_core::Error::io(format!(
|
||||||
|
"Failed to deserialize response from {}: {}",
|
||||||
|
method_name, e
|
||||||
|
))
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Helper for methods that return () on success
|
||||||
|
async fn call_py_method_unit<Req>(
|
||||||
|
py_namespace: Arc<Py<PyAny>>,
|
||||||
|
method_name: &'static str,
|
||||||
|
request: Req,
|
||||||
|
) -> lance_core::Result<()>
|
||||||
|
where
|
||||||
|
Req: serde::Serialize + Send + 'static,
|
||||||
|
{
|
||||||
|
let request_json = serde_json::to_string(&request).map_err(|e| {
|
||||||
|
lance_core::Error::io(format!(
|
||||||
|
"Failed to serialize request for {}: {}",
|
||||||
|
method_name, e
|
||||||
|
))
|
||||||
|
})?;
|
||||||
|
|
||||||
|
tokio::task::spawn_blocking(move || {
|
||||||
|
Python::attach(|py| {
|
||||||
|
let json_module = py.import("json")?;
|
||||||
|
let request_dict = json_module.call_method1("loads", (&request_json,))?;
|
||||||
|
|
||||||
|
// Wrap dict in DictWithModelDump
|
||||||
|
let dict_class = get_dict_with_model_dump_class(py)?;
|
||||||
|
let request_arg = dict_class.call1((request_dict,))?;
|
||||||
|
|
||||||
|
// Call the Python method
|
||||||
|
py_namespace.call_method1(py, method_name, (request_arg,))?;
|
||||||
|
Ok::<_, PyErr>(())
|
||||||
|
})
|
||||||
|
})
|
||||||
|
.await
|
||||||
|
.map_err(|e| lance_core::Error::io(format!("Task join error for {}: {}", method_name, e)))?
|
||||||
|
.map_err(|e: PyErr| lance_core::Error::io(format!("Python error in {}: {}", method_name, e)))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Helper for methods that return a primitive type
|
||||||
|
async fn call_py_method_primitive<Req, Resp>(
|
||||||
|
py_namespace: Arc<Py<PyAny>>,
|
||||||
|
method_name: &'static str,
|
||||||
|
request: Req,
|
||||||
|
) -> lance_core::Result<Resp>
|
||||||
|
where
|
||||||
|
Req: serde::Serialize + Send + 'static,
|
||||||
|
Resp: for<'py> pyo3::FromPyObject<'py> + Send + 'static,
|
||||||
|
{
|
||||||
|
let request_json = serde_json::to_string(&request).map_err(|e| {
|
||||||
|
lance_core::Error::io(format!(
|
||||||
|
"Failed to serialize request for {}: {}",
|
||||||
|
method_name, e
|
||||||
|
))
|
||||||
|
})?;
|
||||||
|
|
||||||
|
tokio::task::spawn_blocking(move || {
|
||||||
|
Python::attach(|py| {
|
||||||
|
let json_module = py.import("json")?;
|
||||||
|
let request_dict = json_module.call_method1("loads", (&request_json,))?;
|
||||||
|
|
||||||
|
// Wrap dict in DictWithModelDump
|
||||||
|
let dict_class = get_dict_with_model_dump_class(py)?;
|
||||||
|
let request_arg = dict_class.call1((request_dict,))?;
|
||||||
|
|
||||||
|
// Call the Python method
|
||||||
|
let result = py_namespace.call_method1(py, method_name, (request_arg,))?;
|
||||||
|
let value: Resp = result.extract(py)?;
|
||||||
|
Ok::<_, PyErr>(value)
|
||||||
|
})
|
||||||
|
})
|
||||||
|
.await
|
||||||
|
.map_err(|e| lance_core::Error::io(format!("Task join error for {}: {}", method_name, e)))?
|
||||||
|
.map_err(|e: PyErr| lance_core::Error::io(format!("Python error in {}: {}", method_name, e)))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Helper for methods that return Bytes
|
||||||
|
async fn call_py_method_bytes<Req>(
|
||||||
|
py_namespace: Arc<Py<PyAny>>,
|
||||||
|
method_name: &'static str,
|
||||||
|
request: Req,
|
||||||
|
) -> lance_core::Result<Bytes>
|
||||||
|
where
|
||||||
|
Req: serde::Serialize + Send + 'static,
|
||||||
|
{
|
||||||
|
let request_json = serde_json::to_string(&request).map_err(|e| {
|
||||||
|
lance_core::Error::io(format!(
|
||||||
|
"Failed to serialize request for {}: {}",
|
||||||
|
method_name, e
|
||||||
|
))
|
||||||
|
})?;
|
||||||
|
|
||||||
|
tokio::task::spawn_blocking(move || {
|
||||||
|
Python::attach(|py| {
|
||||||
|
let json_module = py.import("json")?;
|
||||||
|
let request_dict = json_module.call_method1("loads", (&request_json,))?;
|
||||||
|
|
||||||
|
// Wrap dict in DictWithModelDump
|
||||||
|
let dict_class = get_dict_with_model_dump_class(py)?;
|
||||||
|
let request_arg = dict_class.call1((request_dict,))?;
|
||||||
|
|
||||||
|
// Call the Python method
|
||||||
|
let result = py_namespace.call_method1(py, method_name, (request_arg,))?;
|
||||||
|
let bytes_data: Vec<u8> = result.extract(py)?;
|
||||||
|
Ok::<_, PyErr>(Bytes::from(bytes_data))
|
||||||
|
})
|
||||||
|
})
|
||||||
|
.await
|
||||||
|
.map_err(|e| lance_core::Error::io(format!("Task join error for {}: {}", method_name, e)))?
|
||||||
|
.map_err(|e: PyErr| lance_core::Error::io(format!("Python error in {}: {}", method_name, e)))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Helper for methods that take request + data and return a response
|
||||||
|
async fn call_py_method_with_data<Req, Resp>(
|
||||||
|
py_namespace: Arc<Py<PyAny>>,
|
||||||
|
method_name: &'static str,
|
||||||
|
request: Req,
|
||||||
|
data: Bytes,
|
||||||
|
) -> lance_core::Result<Resp>
|
||||||
|
where
|
||||||
|
Req: serde::Serialize + Send + 'static,
|
||||||
|
Resp: serde::de::DeserializeOwned + Send + 'static,
|
||||||
|
{
|
||||||
|
let request_json = serde_json::to_string(&request).map_err(|e| {
|
||||||
|
lance_core::Error::io(format!(
|
||||||
|
"Failed to serialize request for {}: {}",
|
||||||
|
method_name, e
|
||||||
|
))
|
||||||
|
})?;
|
||||||
|
|
||||||
|
let response_json = tokio::task::spawn_blocking(move || {
|
||||||
|
Python::attach(|py| {
|
||||||
|
let json_module = py.import("json")?;
|
||||||
|
let request_dict = json_module.call_method1("loads", (&request_json,))?;
|
||||||
|
|
||||||
|
// Wrap dict in DictWithModelDump
|
||||||
|
let dict_class = get_dict_with_model_dump_class(py)?;
|
||||||
|
let request_arg = dict_class.call1((request_dict,))?;
|
||||||
|
|
||||||
|
// Pass request and bytes to Python method
|
||||||
|
let py_bytes = pyo3::types::PyBytes::new(py, &data);
|
||||||
|
let result = py_namespace.call_method1(py, method_name, (request_arg, py_bytes))?;
|
||||||
|
|
||||||
|
// Convert response dict to JSON
|
||||||
|
let response_json: String = json_module.call_method1("dumps", (result,))?.extract()?;
|
||||||
|
Ok::<_, PyErr>(response_json)
|
||||||
|
})
|
||||||
|
})
|
||||||
|
.await
|
||||||
|
.map_err(|e| lance_core::Error::io(format!("Task join error for {}: {}", method_name, e)))?
|
||||||
|
.map_err(|e: PyErr| lance_core::Error::io(format!("Python error in {}: {}", method_name, e)))?;
|
||||||
|
|
||||||
|
serde_json::from_str(&response_json).map_err(|e| {
|
||||||
|
lance_core::Error::io(format!(
|
||||||
|
"Failed to deserialize response from {}: {}",
|
||||||
|
method_name, e
|
||||||
|
))
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
#[async_trait]
|
||||||
|
impl LanceNamespaceTrait for PyLanceNamespace {
|
||||||
|
fn namespace_id(&self) -> String {
|
||||||
|
self.namespace_id.clone()
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn list_namespaces(
|
||||||
|
&self,
|
||||||
|
request: ListNamespacesRequest,
|
||||||
|
) -> lance_core::Result<ListNamespacesResponse> {
|
||||||
|
call_py_method(self.py_namespace.clone(), "list_namespaces", request).await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn describe_namespace(
|
||||||
|
&self,
|
||||||
|
request: DescribeNamespaceRequest,
|
||||||
|
) -> lance_core::Result<DescribeNamespaceResponse> {
|
||||||
|
call_py_method(self.py_namespace.clone(), "describe_namespace", request).await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn create_namespace(
|
||||||
|
&self,
|
||||||
|
request: CreateNamespaceRequest,
|
||||||
|
) -> lance_core::Result<CreateNamespaceResponse> {
|
||||||
|
call_py_method(self.py_namespace.clone(), "create_namespace", request).await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn drop_namespace(
|
||||||
|
&self,
|
||||||
|
request: DropNamespaceRequest,
|
||||||
|
) -> lance_core::Result<DropNamespaceResponse> {
|
||||||
|
call_py_method(self.py_namespace.clone(), "drop_namespace", request).await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn namespace_exists(&self, request: NamespaceExistsRequest) -> lance_core::Result<()> {
|
||||||
|
call_py_method_unit(self.py_namespace.clone(), "namespace_exists", request).await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn list_tables(
|
||||||
|
&self,
|
||||||
|
request: ListTablesRequest,
|
||||||
|
) -> lance_core::Result<ListTablesResponse> {
|
||||||
|
call_py_method(self.py_namespace.clone(), "list_tables", request).await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn describe_table(
|
||||||
|
&self,
|
||||||
|
request: DescribeTableRequest,
|
||||||
|
) -> lance_core::Result<DescribeTableResponse> {
|
||||||
|
call_py_method(self.py_namespace.clone(), "describe_table", request).await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn register_table(
|
||||||
|
&self,
|
||||||
|
request: RegisterTableRequest,
|
||||||
|
) -> lance_core::Result<RegisterTableResponse> {
|
||||||
|
call_py_method(self.py_namespace.clone(), "register_table", request).await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn table_exists(&self, request: TableExistsRequest) -> lance_core::Result<()> {
|
||||||
|
call_py_method_unit(self.py_namespace.clone(), "table_exists", request).await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn drop_table(&self, request: DropTableRequest) -> lance_core::Result<DropTableResponse> {
|
||||||
|
call_py_method(self.py_namespace.clone(), "drop_table", request).await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn deregister_table(
|
||||||
|
&self,
|
||||||
|
request: DeregisterTableRequest,
|
||||||
|
) -> lance_core::Result<DeregisterTableResponse> {
|
||||||
|
call_py_method(self.py_namespace.clone(), "deregister_table", request).await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn count_table_rows(&self, request: CountTableRowsRequest) -> lance_core::Result<i64> {
|
||||||
|
call_py_method_primitive(self.py_namespace.clone(), "count_table_rows", request).await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn create_table(
|
||||||
|
&self,
|
||||||
|
request: CreateTableRequest,
|
||||||
|
request_data: Bytes,
|
||||||
|
) -> lance_core::Result<CreateTableResponse> {
|
||||||
|
call_py_method_with_data(
|
||||||
|
self.py_namespace.clone(),
|
||||||
|
"create_table",
|
||||||
|
request,
|
||||||
|
request_data,
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn declare_table(
|
||||||
|
&self,
|
||||||
|
request: DeclareTableRequest,
|
||||||
|
) -> lance_core::Result<DeclareTableResponse> {
|
||||||
|
call_py_method(self.py_namespace.clone(), "declare_table", request).await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn insert_into_table(
|
||||||
|
&self,
|
||||||
|
request: InsertIntoTableRequest,
|
||||||
|
request_data: Bytes,
|
||||||
|
) -> lance_core::Result<InsertIntoTableResponse> {
|
||||||
|
call_py_method_with_data(
|
||||||
|
self.py_namespace.clone(),
|
||||||
|
"insert_into_table",
|
||||||
|
request,
|
||||||
|
request_data,
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn merge_insert_into_table(
|
||||||
|
&self,
|
||||||
|
request: MergeInsertIntoTableRequest,
|
||||||
|
request_data: Bytes,
|
||||||
|
) -> lance_core::Result<MergeInsertIntoTableResponse> {
|
||||||
|
call_py_method_with_data(
|
||||||
|
self.py_namespace.clone(),
|
||||||
|
"merge_insert_into_table",
|
||||||
|
request,
|
||||||
|
request_data,
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn update_table(
|
||||||
|
&self,
|
||||||
|
request: UpdateTableRequest,
|
||||||
|
) -> lance_core::Result<UpdateTableResponse> {
|
||||||
|
call_py_method(self.py_namespace.clone(), "update_table", request).await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn delete_from_table(
|
||||||
|
&self,
|
||||||
|
request: DeleteFromTableRequest,
|
||||||
|
) -> lance_core::Result<DeleteFromTableResponse> {
|
||||||
|
call_py_method(self.py_namespace.clone(), "delete_from_table", request).await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn query_table(&self, request: QueryTableRequest) -> lance_core::Result<Bytes> {
|
||||||
|
call_py_method_bytes(self.py_namespace.clone(), "query_table", request).await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn create_table_index(
|
||||||
|
&self,
|
||||||
|
request: CreateTableIndexRequest,
|
||||||
|
) -> lance_core::Result<CreateTableIndexResponse> {
|
||||||
|
call_py_method(self.py_namespace.clone(), "create_table_index", request).await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn list_table_indices(
|
||||||
|
&self,
|
||||||
|
request: ListTableIndicesRequest,
|
||||||
|
) -> lance_core::Result<ListTableIndicesResponse> {
|
||||||
|
call_py_method(self.py_namespace.clone(), "list_table_indices", request).await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn describe_table_index_stats(
|
||||||
|
&self,
|
||||||
|
request: DescribeTableIndexStatsRequest,
|
||||||
|
) -> lance_core::Result<DescribeTableIndexStatsResponse> {
|
||||||
|
call_py_method(
|
||||||
|
self.py_namespace.clone(),
|
||||||
|
"describe_table_index_stats",
|
||||||
|
request,
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn describe_transaction(
|
||||||
|
&self,
|
||||||
|
request: DescribeTransactionRequest,
|
||||||
|
) -> lance_core::Result<DescribeTransactionResponse> {
|
||||||
|
call_py_method(self.py_namespace.clone(), "describe_transaction", request).await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn alter_transaction(
|
||||||
|
&self,
|
||||||
|
request: AlterTransactionRequest,
|
||||||
|
) -> lance_core::Result<AlterTransactionResponse> {
|
||||||
|
call_py_method(self.py_namespace.clone(), "alter_transaction", request).await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn create_table_scalar_index(
|
||||||
|
&self,
|
||||||
|
request: CreateTableIndexRequest,
|
||||||
|
) -> lance_core::Result<CreateTableScalarIndexResponse> {
|
||||||
|
call_py_method(
|
||||||
|
self.py_namespace.clone(),
|
||||||
|
"create_table_scalar_index",
|
||||||
|
request,
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn drop_table_index(
|
||||||
|
&self,
|
||||||
|
request: DropTableIndexRequest,
|
||||||
|
) -> lance_core::Result<DropTableIndexResponse> {
|
||||||
|
call_py_method(self.py_namespace.clone(), "drop_table_index", request).await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn list_all_tables(
|
||||||
|
&self,
|
||||||
|
request: ListTablesRequest,
|
||||||
|
) -> lance_core::Result<ListTablesResponse> {
|
||||||
|
call_py_method(self.py_namespace.clone(), "list_all_tables", request).await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn restore_table(
|
||||||
|
&self,
|
||||||
|
request: RestoreTableRequest,
|
||||||
|
) -> lance_core::Result<RestoreTableResponse> {
|
||||||
|
call_py_method(self.py_namespace.clone(), "restore_table", request).await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn rename_table(
|
||||||
|
&self,
|
||||||
|
request: RenameTableRequest,
|
||||||
|
) -> lance_core::Result<RenameTableResponse> {
|
||||||
|
call_py_method(self.py_namespace.clone(), "rename_table", request).await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn list_table_versions(
|
||||||
|
&self,
|
||||||
|
request: ListTableVersionsRequest,
|
||||||
|
) -> lance_core::Result<ListTableVersionsResponse> {
|
||||||
|
call_py_method(self.py_namespace.clone(), "list_table_versions", request).await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn create_table_version(
|
||||||
|
&self,
|
||||||
|
request: CreateTableVersionRequest,
|
||||||
|
) -> lance_core::Result<CreateTableVersionResponse> {
|
||||||
|
call_py_method(self.py_namespace.clone(), "create_table_version", request).await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn describe_table_version(
|
||||||
|
&self,
|
||||||
|
request: DescribeTableVersionRequest,
|
||||||
|
) -> lance_core::Result<DescribeTableVersionResponse> {
|
||||||
|
call_py_method(self.py_namespace.clone(), "describe_table_version", request).await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn batch_delete_table_versions(
|
||||||
|
&self,
|
||||||
|
request: BatchDeleteTableVersionsRequest,
|
||||||
|
) -> lance_core::Result<BatchDeleteTableVersionsResponse> {
|
||||||
|
call_py_method(
|
||||||
|
self.py_namespace.clone(),
|
||||||
|
"batch_delete_table_versions",
|
||||||
|
request,
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn update_table_schema_metadata(
|
||||||
|
&self,
|
||||||
|
request: UpdateTableSchemaMetadataRequest,
|
||||||
|
) -> lance_core::Result<UpdateTableSchemaMetadataResponse> {
|
||||||
|
call_py_method(
|
||||||
|
self.py_namespace.clone(),
|
||||||
|
"update_table_schema_metadata",
|
||||||
|
request,
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn get_table_stats(
|
||||||
|
&self,
|
||||||
|
request: GetTableStatsRequest,
|
||||||
|
) -> lance_core::Result<GetTableStatsResponse> {
|
||||||
|
call_py_method(self.py_namespace.clone(), "get_table_stats", request).await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn explain_table_query_plan(
|
||||||
|
&self,
|
||||||
|
request: ExplainTableQueryPlanRequest,
|
||||||
|
) -> lance_core::Result<String> {
|
||||||
|
call_py_method_primitive(
|
||||||
|
self.py_namespace.clone(),
|
||||||
|
"explain_table_query_plan",
|
||||||
|
request,
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn analyze_table_query_plan(
|
||||||
|
&self,
|
||||||
|
request: AnalyzeTableQueryPlanRequest,
|
||||||
|
) -> lance_core::Result<String> {
|
||||||
|
call_py_method_primitive(
|
||||||
|
self.py_namespace.clone(),
|
||||||
|
"analyze_table_query_plan",
|
||||||
|
request,
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn alter_table_add_columns(
|
||||||
|
&self,
|
||||||
|
request: AlterTableAddColumnsRequest,
|
||||||
|
) -> lance_core::Result<AlterTableAddColumnsResponse> {
|
||||||
|
call_py_method(
|
||||||
|
self.py_namespace.clone(),
|
||||||
|
"alter_table_add_columns",
|
||||||
|
request,
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn alter_table_alter_columns(
|
||||||
|
&self,
|
||||||
|
request: AlterTableAlterColumnsRequest,
|
||||||
|
) -> lance_core::Result<AlterTableAlterColumnsResponse> {
|
||||||
|
call_py_method(
|
||||||
|
self.py_namespace.clone(),
|
||||||
|
"alter_table_alter_columns",
|
||||||
|
request,
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn alter_table_drop_columns(
|
||||||
|
&self,
|
||||||
|
request: AlterTableDropColumnsRequest,
|
||||||
|
) -> lance_core::Result<AlterTableDropColumnsResponse> {
|
||||||
|
call_py_method(
|
||||||
|
self.py_namespace.clone(),
|
||||||
|
"alter_table_drop_columns",
|
||||||
|
request,
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn list_table_tags(
|
||||||
|
&self,
|
||||||
|
request: ListTableTagsRequest,
|
||||||
|
) -> lance_core::Result<ListTableTagsResponse> {
|
||||||
|
call_py_method(self.py_namespace.clone(), "list_table_tags", request).await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn create_table_tag(
|
||||||
|
&self,
|
||||||
|
request: CreateTableTagRequest,
|
||||||
|
) -> lance_core::Result<CreateTableTagResponse> {
|
||||||
|
call_py_method(self.py_namespace.clone(), "create_table_tag", request).await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn delete_table_tag(
|
||||||
|
&self,
|
||||||
|
request: DeleteTableTagRequest,
|
||||||
|
) -> lance_core::Result<DeleteTableTagResponse> {
|
||||||
|
call_py_method(self.py_namespace.clone(), "delete_table_tag", request).await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn update_table_tag(
|
||||||
|
&self,
|
||||||
|
request: UpdateTableTagRequest,
|
||||||
|
) -> lance_core::Result<UpdateTableTagResponse> {
|
||||||
|
call_py_method(self.py_namespace.clone(), "update_table_tag", request).await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn get_table_tag_version(
|
||||||
|
&self,
|
||||||
|
request: GetTableTagVersionRequest,
|
||||||
|
) -> lance_core::Result<GetTableTagVersionResponse> {
|
||||||
|
call_py_method(self.py_namespace.clone(), "get_table_tag_version", request).await
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Convert Python dict to HashMap<String, String>
|
||||||
|
#[allow(dead_code)]
|
||||||
|
fn dict_to_hashmap(dict: &Bound<'_, PyDict>) -> PyResult<HashMap<String, String>> {
|
||||||
|
let mut map = HashMap::new();
|
||||||
|
for (key, value) in dict.iter() {
|
||||||
|
let key_str: String = key.extract()?;
|
||||||
|
let value_str: String = value.extract()?;
|
||||||
|
map.insert(key_str, value_str);
|
||||||
|
}
|
||||||
|
Ok(map)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Extract an Arc<dyn LanceNamespace> from a Python namespace object.
|
||||||
|
///
|
||||||
|
/// This function wraps any Python namespace object with PyLanceNamespace.
|
||||||
|
/// The PyLanceNamespace wrapper uses DictWithModelDump to pass requests,
|
||||||
|
/// which works with both:
|
||||||
|
/// - Native namespaces (DirectoryNamespace, RestNamespace) that use depythonize (expects dict)
|
||||||
|
/// - Custom Python implementations that call .model_dump() on the request
|
||||||
|
pub fn extract_namespace_arc(
|
||||||
|
py: Python<'_>,
|
||||||
|
ns: Py<PyAny>,
|
||||||
|
) -> PyResult<Arc<dyn LanceNamespaceTrait>> {
|
||||||
|
let ns_ref = ns.bind(py);
|
||||||
|
PyLanceNamespace::create_arc(py, ns_ref)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Create a LanceNamespaceStorageOptionsProvider from a namespace client and table ID.
|
||||||
|
///
|
||||||
|
/// This creates a Rust storage options provider that fetches credentials from the
|
||||||
|
/// namespace's describe_table() method, enabling automatic credential refresh.
|
||||||
|
///
|
||||||
|
/// # Arguments
|
||||||
|
/// * `namespace_client` - The namespace client (wrapped PyLanceNamespace)
|
||||||
|
/// * `table_id` - Full table identifier (namespace_path + table_name)
|
||||||
|
pub fn create_namespace_storage_options_provider(
|
||||||
|
namespace_client: Arc<dyn LanceNamespaceTrait>,
|
||||||
|
table_id: Vec<String>,
|
||||||
|
) -> Arc<dyn StorageOptionsProvider> {
|
||||||
|
Arc::new(LanceNamespaceStorageOptionsProvider::new(
|
||||||
|
namespace_client,
|
||||||
|
table_id,
|
||||||
|
))
|
||||||
|
}
|
||||||
@@ -16,10 +16,10 @@ use lancedb::{
|
|||||||
query::Select,
|
query::Select,
|
||||||
};
|
};
|
||||||
use pyo3::{
|
use pyo3::{
|
||||||
|
Bound, PyAny, PyRef, PyRefMut, PyResult, Python,
|
||||||
exceptions::PyRuntimeError,
|
exceptions::PyRuntimeError,
|
||||||
pyclass, pymethods,
|
pyclass, pymethods,
|
||||||
types::{PyAnyMethods, PyDict, PyDictMethods, PyType},
|
types::{PyAnyMethods, PyDict, PyDictMethods, PyType},
|
||||||
Bound, PyAny, PyRef, PyRefMut, PyResult, Python,
|
|
||||||
};
|
};
|
||||||
use pyo3_async_runtimes::tokio::future_into_py;
|
use pyo3_async_runtimes::tokio::future_into_py;
|
||||||
|
|
||||||
|
|||||||
@@ -4,9 +4,9 @@
|
|||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
use std::time::Duration;
|
use std::time::Duration;
|
||||||
|
|
||||||
use arrow::array::make_array;
|
|
||||||
use arrow::array::Array;
|
use arrow::array::Array;
|
||||||
use arrow::array::ArrayData;
|
use arrow::array::ArrayData;
|
||||||
|
use arrow::array::make_array;
|
||||||
use arrow::pyarrow::FromPyArrow;
|
use arrow::pyarrow::FromPyArrow;
|
||||||
use arrow::pyarrow::IntoPyArrow;
|
use arrow::pyarrow::IntoPyArrow;
|
||||||
use arrow::pyarrow::ToPyArrow;
|
use arrow::pyarrow::ToPyArrow;
|
||||||
@@ -22,25 +22,23 @@ use lancedb::query::{
|
|||||||
VectorQuery as LanceDbVectorQuery,
|
VectorQuery as LanceDbVectorQuery,
|
||||||
};
|
};
|
||||||
use lancedb::table::AnyQuery;
|
use lancedb::table::AnyQuery;
|
||||||
use pyo3::prelude::{PyAnyMethods, PyDictMethods};
|
|
||||||
use pyo3::pyfunction;
|
|
||||||
use pyo3::pymethods;
|
|
||||||
use pyo3::types::PyList;
|
|
||||||
use pyo3::types::{PyDict, PyString};
|
|
||||||
use pyo3::Bound;
|
use pyo3::Bound;
|
||||||
use pyo3::IntoPyObject;
|
use pyo3::IntoPyObject;
|
||||||
use pyo3::PyAny;
|
use pyo3::PyAny;
|
||||||
use pyo3::PyRef;
|
use pyo3::PyRef;
|
||||||
use pyo3::PyResult;
|
use pyo3::PyResult;
|
||||||
use pyo3::Python;
|
use pyo3::Python;
|
||||||
use pyo3::{exceptions::PyRuntimeError, FromPyObject};
|
use pyo3::prelude::{PyAnyMethods, PyDictMethods};
|
||||||
use pyo3::{
|
use pyo3::pyfunction;
|
||||||
exceptions::{PyNotImplementedError, PyValueError},
|
use pyo3::pymethods;
|
||||||
intern,
|
use pyo3::types::PyList;
|
||||||
};
|
use pyo3::types::{PyDict, PyString};
|
||||||
use pyo3::{pyclass, PyErr};
|
use pyo3::{FromPyObject, exceptions::PyRuntimeError};
|
||||||
|
use pyo3::{PyErr, pyclass};
|
||||||
|
use pyo3::{exceptions::PyValueError, intern};
|
||||||
use pyo3_async_runtimes::tokio::future_into_py;
|
use pyo3_async_runtimes::tokio::future_into_py;
|
||||||
|
|
||||||
|
use crate::expr::PyExpr;
|
||||||
use crate::util::parse_distance_type;
|
use crate::util::parse_distance_type;
|
||||||
use crate::{arrow::RecordBatchStream, util::PyLanceDB};
|
use crate::{arrow::RecordBatchStream, util::PyLanceDB};
|
||||||
use crate::{error::PythonErrorExt, index::class_name};
|
use crate::{error::PythonErrorExt, index::class_name};
|
||||||
@@ -316,6 +314,19 @@ impl<'py> IntoPyObject<'py> for PySelect {
|
|||||||
Select::All => Ok(py.None().into_bound(py).into_any()),
|
Select::All => Ok(py.None().into_bound(py).into_any()),
|
||||||
Select::Columns(columns) => Ok(columns.into_pyobject(py)?.into_any()),
|
Select::Columns(columns) => Ok(columns.into_pyobject(py)?.into_any()),
|
||||||
Select::Dynamic(columns) => Ok(columns.into_pyobject(py)?.into_any()),
|
Select::Dynamic(columns) => Ok(columns.into_pyobject(py)?.into_any()),
|
||||||
|
Select::Expr(pairs) => {
|
||||||
|
// Serialize DataFusion Expr -> SQL string so Python sees the same
|
||||||
|
// format as Select::Dynamic: a list of (name, sql_string) tuples.
|
||||||
|
let sql_pairs: PyResult<Vec<(String, String)>> = pairs
|
||||||
|
.into_iter()
|
||||||
|
.map(|(name, expr)| {
|
||||||
|
lancedb::expr::expr_to_sql_string(&expr)
|
||||||
|
.map(|sql| (name, sql))
|
||||||
|
.map_err(|e| PyRuntimeError::new_err(e.to_string()))
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
Ok(sql_pairs?.into_pyobject(py)?.into_any())
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -331,9 +342,13 @@ impl<'py> IntoPyObject<'py> for PyQueryFilter {
|
|||||||
|
|
||||||
fn into_pyobject(self, py: pyo3::Python<'py>) -> PyResult<Self::Output> {
|
fn into_pyobject(self, py: pyo3::Python<'py>) -> PyResult<Self::Output> {
|
||||||
match self.0 {
|
match self.0 {
|
||||||
QueryFilter::Datafusion(_) => Err(PyNotImplementedError::new_err(
|
QueryFilter::Datafusion(expr) => {
|
||||||
"Datafusion filter has no conversion to Python",
|
// Serialize the DataFusion expression to a SQL string so that
|
||||||
)),
|
// callers (e.g. remote tables) see the same format as Sql.
|
||||||
|
let sql = lancedb::expr::expr_to_sql_string(&expr)
|
||||||
|
.map_err(|e| PyRuntimeError::new_err(e.to_string()))?;
|
||||||
|
Ok(sql.into_pyobject(py)?.into_any())
|
||||||
|
}
|
||||||
QueryFilter::Sql(sql) => Ok(sql.into_pyobject(py)?.into_any()),
|
QueryFilter::Sql(sql) => Ok(sql.into_pyobject(py)?.into_any()),
|
||||||
QueryFilter::Substrait(substrait) => Ok(substrait.into_pyobject(py)?.into_any()),
|
QueryFilter::Substrait(substrait) => Ok(substrait.into_pyobject(py)?.into_any()),
|
||||||
}
|
}
|
||||||
@@ -357,10 +372,20 @@ impl Query {
|
|||||||
self.inner = self.inner.clone().only_if(predicate);
|
self.inner = self.inner.clone().only_if(predicate);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn where_expr(&mut self, expr: PyExpr) {
|
||||||
|
self.inner = self.inner.clone().only_if_expr(expr.0);
|
||||||
|
}
|
||||||
|
|
||||||
pub fn select(&mut self, columns: Vec<(String, String)>) {
|
pub fn select(&mut self, columns: Vec<(String, String)>) {
|
||||||
self.inner = self.inner.clone().select(Select::dynamic(&columns));
|
self.inner = self.inner.clone().select(Select::dynamic(&columns));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn select_expr(&mut self, columns: Vec<(String, PyExpr)>) {
|
||||||
|
let pairs: Vec<(String, lancedb::expr::DfExpr)> =
|
||||||
|
columns.into_iter().map(|(name, e)| (name, e.0)).collect();
|
||||||
|
self.inner = self.inner.clone().select(Select::Expr(pairs));
|
||||||
|
}
|
||||||
|
|
||||||
pub fn select_columns(&mut self, columns: Vec<String>) {
|
pub fn select_columns(&mut self, columns: Vec<String>) {
|
||||||
self.inner = self.inner.clone().select(Select::columns(&columns));
|
self.inner = self.inner.clone().select(Select::columns(&columns));
|
||||||
}
|
}
|
||||||
@@ -594,10 +619,20 @@ impl FTSQuery {
|
|||||||
self.inner = self.inner.clone().only_if(predicate);
|
self.inner = self.inner.clone().only_if(predicate);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn where_expr(&mut self, expr: PyExpr) {
|
||||||
|
self.inner = self.inner.clone().only_if_expr(expr.0);
|
||||||
|
}
|
||||||
|
|
||||||
pub fn select(&mut self, columns: Vec<(String, String)>) {
|
pub fn select(&mut self, columns: Vec<(String, String)>) {
|
||||||
self.inner = self.inner.clone().select(Select::dynamic(&columns));
|
self.inner = self.inner.clone().select(Select::dynamic(&columns));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn select_expr(&mut self, columns: Vec<(String, PyExpr)>) {
|
||||||
|
let pairs: Vec<(String, lancedb::expr::DfExpr)> =
|
||||||
|
columns.into_iter().map(|(name, e)| (name, e.0)).collect();
|
||||||
|
self.inner = self.inner.clone().select(Select::Expr(pairs));
|
||||||
|
}
|
||||||
|
|
||||||
pub fn select_columns(&mut self, columns: Vec<String>) {
|
pub fn select_columns(&mut self, columns: Vec<String>) {
|
||||||
self.inner = self.inner.clone().select(Select::columns(&columns));
|
self.inner = self.inner.clone().select(Select::columns(&columns));
|
||||||
}
|
}
|
||||||
@@ -712,6 +747,10 @@ impl VectorQuery {
|
|||||||
self.inner = self.inner.clone().only_if(predicate);
|
self.inner = self.inner.clone().only_if(predicate);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn where_expr(&mut self, expr: PyExpr) {
|
||||||
|
self.inner = self.inner.clone().only_if_expr(expr.0);
|
||||||
|
}
|
||||||
|
|
||||||
pub fn add_query_vector(&mut self, vector: Bound<'_, PyAny>) -> PyResult<()> {
|
pub fn add_query_vector(&mut self, vector: Bound<'_, PyAny>) -> PyResult<()> {
|
||||||
let data: ArrayData = ArrayData::from_pyarrow_bound(&vector)?;
|
let data: ArrayData = ArrayData::from_pyarrow_bound(&vector)?;
|
||||||
let array = make_array(data);
|
let array = make_array(data);
|
||||||
@@ -723,6 +762,12 @@ impl VectorQuery {
|
|||||||
self.inner = self.inner.clone().select(Select::dynamic(&columns));
|
self.inner = self.inner.clone().select(Select::dynamic(&columns));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn select_expr(&mut self, columns: Vec<(String, PyExpr)>) {
|
||||||
|
let pairs: Vec<(String, lancedb::expr::DfExpr)> =
|
||||||
|
columns.into_iter().map(|(name, e)| (name, e.0)).collect();
|
||||||
|
self.inner = self.inner.clone().select(Select::Expr(pairs));
|
||||||
|
}
|
||||||
|
|
||||||
pub fn select_columns(&mut self, columns: Vec<String>) {
|
pub fn select_columns(&mut self, columns: Vec<String>) {
|
||||||
self.inner = self.inner.clone().select(Select::columns(&columns));
|
self.inner = self.inner.clone().select(Select::columns(&columns));
|
||||||
}
|
}
|
||||||
@@ -877,11 +922,21 @@ impl HybridQuery {
|
|||||||
self.inner_fts.r#where(predicate);
|
self.inner_fts.r#where(predicate);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn where_expr(&mut self, expr: PyExpr) {
|
||||||
|
self.inner_vec.where_expr(expr.clone());
|
||||||
|
self.inner_fts.where_expr(expr);
|
||||||
|
}
|
||||||
|
|
||||||
pub fn select(&mut self, columns: Vec<(String, String)>) {
|
pub fn select(&mut self, columns: Vec<(String, String)>) {
|
||||||
self.inner_vec.select(columns.clone());
|
self.inner_vec.select(columns.clone());
|
||||||
self.inner_fts.select(columns);
|
self.inner_fts.select(columns);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn select_expr(&mut self, columns: Vec<(String, PyExpr)>) {
|
||||||
|
self.inner_vec.select_expr(columns.clone());
|
||||||
|
self.inner_fts.select_expr(columns);
|
||||||
|
}
|
||||||
|
|
||||||
pub fn select_columns(&mut self, columns: Vec<String>) {
|
pub fn select_columns(&mut self, columns: Vec<String>) {
|
||||||
self.inner_vec.select_columns(columns.clone());
|
self.inner_vec.select_columns(columns.clone());
|
||||||
self.inner_fts.select_columns(columns);
|
self.inner_fts.select_columns(columns);
|
||||||
|
|||||||
@@ -4,7 +4,7 @@
|
|||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use lancedb::{ObjectStoreRegistry, Session as LanceSession};
|
use lancedb::{ObjectStoreRegistry, Session as LanceSession};
|
||||||
use pyo3::{pyclass, pymethods, PyResult};
|
use pyo3::{PyResult, pyclass, pymethods};
|
||||||
|
|
||||||
/// A session for managing caches and object stores across LanceDB operations.
|
/// A session for managing caches and object stores across LanceDB operations.
|
||||||
///
|
///
|
||||||
|
|||||||
@@ -1,150 +0,0 @@
|
|||||||
// SPDX-License-Identifier: Apache-2.0
|
|
||||||
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
|
||||||
|
|
||||||
//! PyO3 bindings for StorageOptionsProvider
|
|
||||||
//!
|
|
||||||
//! This module provides the bridge between Python StorageOptionsProvider objects
|
|
||||||
//! and Rust's StorageOptionsProvider trait, enabling automatic credential refresh.
|
|
||||||
|
|
||||||
use std::collections::HashMap;
|
|
||||||
use std::sync::Arc;
|
|
||||||
|
|
||||||
use async_trait::async_trait;
|
|
||||||
use lance_io::object_store::StorageOptionsProvider;
|
|
||||||
use pyo3::prelude::*;
|
|
||||||
use pyo3::types::PyDict;
|
|
||||||
|
|
||||||
/// Internal wrapper around a Python object implementing StorageOptionsProvider
|
|
||||||
pub struct PyStorageOptionsProvider {
|
|
||||||
/// The Python object implementing fetch_storage_options()
|
|
||||||
inner: Py<PyAny>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Clone for PyStorageOptionsProvider {
|
|
||||||
fn clone(&self) -> Self {
|
|
||||||
Python::attach(|py| Self {
|
|
||||||
inner: self.inner.clone_ref(py),
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl PyStorageOptionsProvider {
|
|
||||||
pub fn new(obj: Py<PyAny>) -> PyResult<Self> {
|
|
||||||
Python::attach(|py| {
|
|
||||||
// Verify the object has a fetch_storage_options method
|
|
||||||
if !obj.bind(py).hasattr("fetch_storage_options")? {
|
|
||||||
return Err(pyo3::exceptions::PyTypeError::new_err(
|
|
||||||
"StorageOptionsProvider must implement fetch_storage_options() method",
|
|
||||||
));
|
|
||||||
}
|
|
||||||
Ok(Self { inner: obj })
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Wrapper that implements the Rust StorageOptionsProvider trait
|
|
||||||
pub struct PyStorageOptionsProviderWrapper {
|
|
||||||
py_provider: PyStorageOptionsProvider,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl PyStorageOptionsProviderWrapper {
|
|
||||||
pub fn new(py_provider: PyStorageOptionsProvider) -> Self {
|
|
||||||
Self { py_provider }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[async_trait]
|
|
||||||
impl StorageOptionsProvider for PyStorageOptionsProviderWrapper {
|
|
||||||
async fn fetch_storage_options(&self) -> lance_core::Result<Option<HashMap<String, String>>> {
|
|
||||||
// Call Python method from async context using spawn_blocking
|
|
||||||
let py_provider = self.py_provider.clone();
|
|
||||||
|
|
||||||
tokio::task::spawn_blocking(move || {
|
|
||||||
Python::attach(|py| {
|
|
||||||
// Call the Python fetch_storage_options method
|
|
||||||
let result = py_provider
|
|
||||||
.inner
|
|
||||||
.bind(py)
|
|
||||||
.call_method0("fetch_storage_options")
|
|
||||||
.map_err(|e| lance_core::Error::IO {
|
|
||||||
source: Box::new(std::io::Error::other(format!(
|
|
||||||
"Failed to call fetch_storage_options: {}",
|
|
||||||
e
|
|
||||||
))),
|
|
||||||
location: snafu::location!(),
|
|
||||||
})?;
|
|
||||||
|
|
||||||
// If result is None, return None
|
|
||||||
if result.is_none() {
|
|
||||||
return Ok(None);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Extract the result dict - should be a flat Map<String, String>
|
|
||||||
let result_dict = result.downcast::<PyDict>().map_err(|_| {
|
|
||||||
lance_core::Error::InvalidInput {
|
|
||||||
source: "fetch_storage_options() must return None or a dict of string key-value pairs".into(),
|
|
||||||
location: snafu::location!(),
|
|
||||||
}
|
|
||||||
})?;
|
|
||||||
|
|
||||||
// Convert all entries to HashMap<String, String>
|
|
||||||
let mut storage_options = HashMap::new();
|
|
||||||
for (key, value) in result_dict.iter() {
|
|
||||||
let key_str: String = key.extract().map_err(|e| {
|
|
||||||
lance_core::Error::InvalidInput {
|
|
||||||
source: format!("Storage option key must be a string: {}", e).into(),
|
|
||||||
location: snafu::location!(),
|
|
||||||
}
|
|
||||||
})?;
|
|
||||||
let value_str: String = value.extract().map_err(|e| {
|
|
||||||
lance_core::Error::InvalidInput {
|
|
||||||
source: format!("Storage option value must be a string: {}", e).into(),
|
|
||||||
location: snafu::location!(),
|
|
||||||
}
|
|
||||||
})?;
|
|
||||||
storage_options.insert(key_str, value_str);
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(Some(storage_options))
|
|
||||||
})
|
|
||||||
})
|
|
||||||
.await
|
|
||||||
.map_err(|e| lance_core::Error::IO {
|
|
||||||
source: Box::new(std::io::Error::other(format!(
|
|
||||||
"Task join error: {}",
|
|
||||||
e
|
|
||||||
))),
|
|
||||||
location: snafu::location!(),
|
|
||||||
})?
|
|
||||||
}
|
|
||||||
|
|
||||||
fn provider_id(&self) -> String {
|
|
||||||
Python::attach(|py| {
|
|
||||||
// Call provider_id() method on the Python object
|
|
||||||
let obj = self.py_provider.inner.bind(py);
|
|
||||||
obj.call_method0("provider_id")
|
|
||||||
.and_then(|result| result.extract::<String>())
|
|
||||||
.unwrap_or_else(|e| {
|
|
||||||
// If provider_id() fails, construct a fallback ID
|
|
||||||
format!("PyStorageOptionsProvider(error: {})", e)
|
|
||||||
})
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl std::fmt::Debug for PyStorageOptionsProviderWrapper {
|
|
||||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
|
||||||
write!(f, "PyStorageOptionsProviderWrapper({})", self.provider_id())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Convert a Python object to an Arc<dyn StorageOptionsProvider>
|
|
||||||
///
|
|
||||||
/// This is the main entry point for converting Python StorageOptionsProvider objects
|
|
||||||
/// to Rust trait objects that can be used by the Lance ecosystem.
|
|
||||||
pub fn py_object_to_storage_options_provider(
|
|
||||||
py_obj: Py<PyAny>,
|
|
||||||
) -> PyResult<Arc<dyn StorageOptionsProvider>> {
|
|
||||||
let py_provider = PyStorageOptionsProvider::new(py_obj)?;
|
|
||||||
Ok(Arc::new(PyStorageOptionsProviderWrapper::new(py_provider)))
|
|
||||||
}
|
|
||||||
@@ -5,8 +5,9 @@ use std::{collections::HashMap, sync::Arc};
|
|||||||
use crate::{
|
use crate::{
|
||||||
connection::Connection,
|
connection::Connection,
|
||||||
error::PythonErrorExt,
|
error::PythonErrorExt,
|
||||||
index::{extract_index_params, IndexConfig},
|
index::{IndexConfig, extract_index_params},
|
||||||
query::{Query, TakeQuery},
|
query::{Query, TakeQuery},
|
||||||
|
table::scannable::PyScannable,
|
||||||
};
|
};
|
||||||
use arrow::{
|
use arrow::{
|
||||||
datatypes::{DataType, Schema},
|
datatypes::{DataType, Schema},
|
||||||
@@ -18,13 +19,15 @@ use lancedb::table::{
|
|||||||
Table as LanceDbTable,
|
Table as LanceDbTable,
|
||||||
};
|
};
|
||||||
use pyo3::{
|
use pyo3::{
|
||||||
|
Bound, FromPyObject, Py, PyAny, PyRef, PyResult, Python,
|
||||||
exceptions::{PyKeyError, PyRuntimeError, PyValueError},
|
exceptions::{PyKeyError, PyRuntimeError, PyValueError},
|
||||||
pyclass, pymethods,
|
pyclass, pymethods,
|
||||||
types::{IntoPyDict, PyAnyMethods, PyDict, PyDictMethods},
|
types::{IntoPyDict, PyAnyMethods, PyDict, PyDictMethods},
|
||||||
Bound, FromPyObject, PyAny, PyRef, PyResult, Python,
|
|
||||||
};
|
};
|
||||||
use pyo3_async_runtimes::tokio::future_into_py;
|
use pyo3_async_runtimes::tokio::future_into_py;
|
||||||
|
|
||||||
|
mod scannable;
|
||||||
|
|
||||||
/// Statistics about a compaction operation.
|
/// Statistics about a compaction operation.
|
||||||
#[pyclass(get_all)]
|
#[pyclass(get_all)]
|
||||||
#[derive(Clone, Debug)]
|
#[derive(Clone, Debug)]
|
||||||
@@ -109,19 +112,24 @@ impl From<lancedb::table::AddResult> for AddResult {
|
|||||||
#[pyclass(get_all)]
|
#[pyclass(get_all)]
|
||||||
#[derive(Clone, Debug)]
|
#[derive(Clone, Debug)]
|
||||||
pub struct DeleteResult {
|
pub struct DeleteResult {
|
||||||
|
pub num_deleted_rows: u64,
|
||||||
pub version: u64,
|
pub version: u64,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[pymethods]
|
#[pymethods]
|
||||||
impl DeleteResult {
|
impl DeleteResult {
|
||||||
pub fn __repr__(&self) -> String {
|
pub fn __repr__(&self) -> String {
|
||||||
format!("DeleteResult(version={})", self.version)
|
format!(
|
||||||
|
"DeleteResult(num_deleted_rows={}, version={})",
|
||||||
|
self.num_deleted_rows, self.version
|
||||||
|
)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl From<lancedb::table::DeleteResult> for DeleteResult {
|
impl From<lancedb::table::DeleteResult> for DeleteResult {
|
||||||
fn from(result: lancedb::table::DeleteResult) -> Self {
|
fn from(result: lancedb::table::DeleteResult) -> Self {
|
||||||
Self {
|
Self {
|
||||||
|
num_deleted_rows: result.num_deleted_rows,
|
||||||
version: result.version,
|
version: result.version,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -291,14 +299,14 @@ impl Table {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[pyo3(signature = (data, mode, progress=None))]
|
||||||
pub fn add<'a>(
|
pub fn add<'a>(
|
||||||
self_: PyRef<'a, Self>,
|
self_: PyRef<'a, Self>,
|
||||||
data: Bound<'_, PyAny>,
|
data: PyScannable,
|
||||||
mode: String,
|
mode: String,
|
||||||
|
progress: Option<Py<PyAny>>,
|
||||||
) -> PyResult<Bound<'a, PyAny>> {
|
) -> PyResult<Bound<'a, PyAny>> {
|
||||||
let batches: Box<dyn arrow::array::RecordBatchReader + Send> =
|
let mut op = self_.inner_ref()?.add(data);
|
||||||
Box::new(ArrowArrayStreamReader::from_pyarrow_bound(&data)?);
|
|
||||||
let mut op = self_.inner_ref()?.add(batches);
|
|
||||||
if mode == "append" {
|
if mode == "append" {
|
||||||
op = op.mode(AddDataMode::Append);
|
op = op.mode(AddDataMode::Append);
|
||||||
} else if mode == "overwrite" {
|
} else if mode == "overwrite" {
|
||||||
@@ -306,6 +314,81 @@ impl Table {
|
|||||||
} else {
|
} else {
|
||||||
return Err(PyValueError::new_err(format!("Invalid mode: {}", mode)));
|
return Err(PyValueError::new_err(format!("Invalid mode: {}", mode)));
|
||||||
}
|
}
|
||||||
|
if let Some(progress_obj) = progress {
|
||||||
|
let is_callable = Python::attach(|py| progress_obj.bind(py).is_callable());
|
||||||
|
if is_callable {
|
||||||
|
// Callback: call with a dict of progress info.
|
||||||
|
op = op.progress(move |p| {
|
||||||
|
Python::attach(|py| {
|
||||||
|
let dict = PyDict::new(py);
|
||||||
|
if let Err(e) = dict
|
||||||
|
.set_item("output_rows", p.output_rows())
|
||||||
|
.and_then(|_| dict.set_item("output_bytes", p.output_bytes()))
|
||||||
|
.and_then(|_| dict.set_item("total_rows", p.total_rows()))
|
||||||
|
.and_then(|_| {
|
||||||
|
dict.set_item("elapsed_seconds", p.elapsed().as_secs_f64())
|
||||||
|
})
|
||||||
|
.and_then(|_| dict.set_item("active_tasks", p.active_tasks()))
|
||||||
|
.and_then(|_| dict.set_item("total_tasks", p.total_tasks()))
|
||||||
|
.and_then(|_| dict.set_item("done", p.done()))
|
||||||
|
{
|
||||||
|
log::warn!("progress dict error: {e}");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if let Err(e) = progress_obj.call1(py, (dict,)) {
|
||||||
|
log::warn!("progress callback error: {e}");
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
// tqdm-like: has update() method.
|
||||||
|
let mut last_rows: usize = 0;
|
||||||
|
let mut total_set = false;
|
||||||
|
op = op.progress(move |p| {
|
||||||
|
let current = p.output_rows();
|
||||||
|
let prev = last_rows;
|
||||||
|
last_rows = current;
|
||||||
|
Python::attach(|py| {
|
||||||
|
if let Some(total) = p.total_rows()
|
||||||
|
&& !total_set
|
||||||
|
{
|
||||||
|
if let Err(e) = progress_obj.setattr(py, "total", total) {
|
||||||
|
log::warn!("progress setattr error: {e}");
|
||||||
|
}
|
||||||
|
total_set = true;
|
||||||
|
}
|
||||||
|
let delta = current.saturating_sub(prev);
|
||||||
|
if delta > 0 {
|
||||||
|
if let Err(e) = progress_obj.call_method1(py, "update", (delta,)) {
|
||||||
|
log::warn!("progress update error: {e}");
|
||||||
|
}
|
||||||
|
// Show throughput and active workers in tqdm postfix.
|
||||||
|
let elapsed = p.elapsed().as_secs_f64();
|
||||||
|
if elapsed > 0.0 {
|
||||||
|
let mb_per_sec = p.output_bytes() as f64 / elapsed / 1_000_000.0;
|
||||||
|
let postfix = format!(
|
||||||
|
"{:.1} MB/s | {}/{} workers",
|
||||||
|
mb_per_sec,
|
||||||
|
p.active_tasks(),
|
||||||
|
p.total_tasks()
|
||||||
|
);
|
||||||
|
if let Err(e) =
|
||||||
|
progress_obj.call_method1(py, "set_postfix_str", (postfix,))
|
||||||
|
{
|
||||||
|
log::warn!("progress set_postfix_str error: {e}");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if p.done() {
|
||||||
|
// Force a final refresh so the bar shows completion.
|
||||||
|
if let Err(e) = progress_obj.call_method0(py, "refresh") {
|
||||||
|
log::warn!("progress refresh error: {e}");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
future_into_py(self_.py(), async move {
|
future_into_py(self_.py(), async move {
|
||||||
let result = op.execute().await.infer_error()?;
|
let result = op.execute().await.infer_error()?;
|
||||||
@@ -420,6 +503,17 @@ impl Table {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn prewarm_data(
|
||||||
|
self_: PyRef<'_, Self>,
|
||||||
|
columns: Option<Vec<String>>,
|
||||||
|
) -> PyResult<Bound<'_, PyAny>> {
|
||||||
|
let inner = self_.inner_ref()?.clone();
|
||||||
|
future_into_py(self_.py(), async move {
|
||||||
|
inner.prewarm_data(columns).await.infer_error()?;
|
||||||
|
Ok(())
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
pub fn list_indices(self_: PyRef<'_, Self>) -> PyResult<Bound<'_, PyAny>> {
|
pub fn list_indices(self_: PyRef<'_, Self>) -> PyResult<Bound<'_, PyAny>> {
|
||||||
let inner = self_.inner_ref()?.clone();
|
let inner = self_.inner_ref()?.clone();
|
||||||
future_into_py(self_.py(), async move {
|
future_into_py(self_.py(), async move {
|
||||||
@@ -536,7 +630,7 @@ impl Table {
|
|||||||
let inner = self_.inner_ref()?.clone();
|
let inner = self_.inner_ref()?.clone();
|
||||||
future_into_py(self_.py(), async move {
|
future_into_py(self_.py(), async move {
|
||||||
let versions = inner.list_versions().await.infer_error()?;
|
let versions = inner.list_versions().await.infer_error()?;
|
||||||
let versions_as_dict = Python::attach(|py| {
|
Python::attach(|py| {
|
||||||
versions
|
versions
|
||||||
.iter()
|
.iter()
|
||||||
.map(|v| {
|
.map(|v| {
|
||||||
@@ -553,9 +647,7 @@ impl Table {
|
|||||||
Ok(dict.unbind())
|
Ok(dict.unbind())
|
||||||
})
|
})
|
||||||
.collect::<PyResult<Vec<_>>>()
|
.collect::<PyResult<Vec<_>>>()
|
||||||
});
|
})
|
||||||
|
|
||||||
versions_as_dict
|
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
145
python/src/table/scannable.rs
Normal file
145
python/src/table/scannable.rs
Normal file
@@ -0,0 +1,145 @@
|
|||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||||
|
|
||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
use arrow::{
|
||||||
|
datatypes::{Schema, SchemaRef},
|
||||||
|
ffi_stream::ArrowArrayStreamReader,
|
||||||
|
pyarrow::{FromPyArrow, PyArrowType},
|
||||||
|
};
|
||||||
|
use futures::StreamExt;
|
||||||
|
use lancedb::{
|
||||||
|
Error,
|
||||||
|
arrow::{SendableRecordBatchStream, SimpleRecordBatchStream},
|
||||||
|
data::scannable::Scannable,
|
||||||
|
};
|
||||||
|
use pyo3::{FromPyObject, Py, PyAny, Python, types::PyAnyMethods};
|
||||||
|
|
||||||
|
/// Adapter that implements Scannable for a Python reader factory callable.
|
||||||
|
///
|
||||||
|
/// This holds a Python callable that returns a RecordBatchReader when called.
|
||||||
|
/// For rescannable sources, the callable can be invoked multiple times to
|
||||||
|
/// get fresh readers.
|
||||||
|
pub struct PyScannable {
|
||||||
|
/// Python callable that returns a RecordBatchReader
|
||||||
|
reader_factory: Py<PyAny>,
|
||||||
|
schema: SchemaRef,
|
||||||
|
num_rows: Option<usize>,
|
||||||
|
rescannable: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl std::fmt::Debug for PyScannable {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
f.debug_struct("PyScannable")
|
||||||
|
.field("schema", &self.schema)
|
||||||
|
.field("num_rows", &self.num_rows)
|
||||||
|
.field("rescannable", &self.rescannable)
|
||||||
|
.finish()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Scannable for PyScannable {
|
||||||
|
fn schema(&self) -> SchemaRef {
|
||||||
|
self.schema.clone()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn scan_as_stream(&mut self) -> SendableRecordBatchStream {
|
||||||
|
let reader: Result<ArrowArrayStreamReader, Error> = {
|
||||||
|
Python::attach(|py| {
|
||||||
|
let result =
|
||||||
|
self.reader_factory
|
||||||
|
.call0(py)
|
||||||
|
.map_err(|e| lancedb::Error::Runtime {
|
||||||
|
message: format!("Python reader factory failed: {}", e),
|
||||||
|
})?;
|
||||||
|
ArrowArrayStreamReader::from_pyarrow_bound(result.bind(py)).map_err(|e| {
|
||||||
|
lancedb::Error::Runtime {
|
||||||
|
message: format!("Failed to create Arrow reader from Python: {}", e),
|
||||||
|
}
|
||||||
|
})
|
||||||
|
})
|
||||||
|
};
|
||||||
|
|
||||||
|
// Reader is blocking but stream is non-blocking, so we need to spawn a task to pull.
|
||||||
|
let (tx, rx) = tokio::sync::mpsc::channel(1);
|
||||||
|
|
||||||
|
let join_handle = tokio::task::spawn_blocking(move || {
|
||||||
|
let reader = match reader {
|
||||||
|
Ok(reader) => reader,
|
||||||
|
Err(e) => {
|
||||||
|
let _ = tx.blocking_send(Err(e));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
for batch in reader {
|
||||||
|
match batch {
|
||||||
|
Ok(batch) => {
|
||||||
|
if tx.blocking_send(Ok(batch)).is_err() {
|
||||||
|
// Receiver dropped, stop processing
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Err(source) => {
|
||||||
|
let _ = tx.blocking_send(Err(Error::Arrow { source }));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
let schema = self.schema.clone();
|
||||||
|
let stream = futures::stream::unfold(
|
||||||
|
(rx, Some(join_handle)),
|
||||||
|
|(mut rx, join_handle)| async move {
|
||||||
|
match rx.recv().await {
|
||||||
|
Some(Ok(batch)) => Some((Ok(batch), (rx, join_handle))),
|
||||||
|
Some(Err(e)) => Some((Err(e), (rx, join_handle))),
|
||||||
|
None => {
|
||||||
|
// Channel closed. Check if the task panicked — a panic
|
||||||
|
// drops the sender without sending an error, so without
|
||||||
|
// this check we'd silently return a truncated stream.
|
||||||
|
if let Some(handle) = join_handle
|
||||||
|
&& let Err(join_err) = handle.await
|
||||||
|
{
|
||||||
|
return Some((
|
||||||
|
Err(Error::Runtime {
|
||||||
|
message: format!("Reader task panicked: {}", join_err),
|
||||||
|
}),
|
||||||
|
(rx, None),
|
||||||
|
));
|
||||||
|
}
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
);
|
||||||
|
Box::pin(SimpleRecordBatchStream::new(stream.fuse(), schema))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn num_rows(&self) -> Option<usize> {
|
||||||
|
self.num_rows
|
||||||
|
}
|
||||||
|
|
||||||
|
fn rescannable(&self) -> bool {
|
||||||
|
self.rescannable
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'py> FromPyObject<'py> for PyScannable {
|
||||||
|
fn extract_bound(ob: &pyo3::Bound<'py, PyAny>) -> pyo3::PyResult<Self> {
|
||||||
|
// Convert from Scannable dataclass.
|
||||||
|
let schema: PyArrowType<Schema> = ob.getattr("schema")?.extract()?;
|
||||||
|
let schema = Arc::new(schema.0);
|
||||||
|
let num_rows: Option<usize> = ob.getattr("num_rows")?.extract()?;
|
||||||
|
let rescannable: bool = ob.getattr("rescannable")?.extract()?;
|
||||||
|
let reader_factory: Py<PyAny> = ob.getattr("reader")?.unbind();
|
||||||
|
|
||||||
|
Ok(Self {
|
||||||
|
schema,
|
||||||
|
reader_factory,
|
||||||
|
num_rows,
|
||||||
|
rescannable,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user