mirror of
https://github.com/lancedb/lancedb.git
synced 2026-05-12 17:40:41 +00:00
Compare commits
1 Commits
dependabot
...
jack/clipp
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
f94349ef59 |
@@ -1,5 +1,5 @@
|
||||
[tool.bumpversion]
|
||||
current_version = "0.28.0-beta.11"
|
||||
current_version = "0.27.0-beta.2"
|
||||
parse = """(?x)
|
||||
(?P<major>0|[1-9]\\d*)\\.
|
||||
(?P<minor>0|[1-9]\\d*)\\.
|
||||
|
||||
2
.github/ISSUE_TEMPLATE/documentation.yml
vendored
2
.github/ISSUE_TEMPLATE/documentation.yml
vendored
@@ -18,6 +18,6 @@ body:
|
||||
label: Link
|
||||
description: >
|
||||
Provide a link to the existing documentation, if applicable.
|
||||
placeholder: ex. https://docs.lancedb.com/tables/...
|
||||
placeholder: ex. https://lancedb.com/docs/tables/...
|
||||
validations:
|
||||
required: false
|
||||
|
||||
18
.github/dependabot.yml
vendored
18
.github/dependabot.yml
vendored
@@ -1,18 +0,0 @@
|
||||
version: 2
|
||||
|
||||
# Scope: the root Cargo workspace, which produces the Rust binaries we
|
||||
# ship to users (the Node.js and Python native extensions). The
|
||||
# `rust/lancedb` library crate shares the same lockfile; its consumers
|
||||
# pick their own dependency versions, but bumping transitive deps here
|
||||
# keeps the binaries we ship current.
|
||||
updates:
|
||||
- package-ecosystem: cargo
|
||||
directory: /
|
||||
schedule:
|
||||
interval: weekly
|
||||
open-pull-requests-limit: 10
|
||||
groups:
|
||||
rust-minor-patch:
|
||||
update-types:
|
||||
- minor
|
||||
- patch
|
||||
@@ -23,15 +23,12 @@ runs:
|
||||
steps:
|
||||
- name: CONFIRM ARM BUILD
|
||||
shell: bash
|
||||
env:
|
||||
ARM_BUILD: ${{ inputs.arm-build }}
|
||||
run: |
|
||||
echo "ARM BUILD: $ARM_BUILD"
|
||||
echo "ARM BUILD: ${{ inputs.arm-build }}"
|
||||
- name: Build x86_64 Manylinux wheel
|
||||
if: ${{ inputs.arm-build == 'false' }}
|
||||
uses: PyO3/maturin-action@v1
|
||||
with:
|
||||
maturin-version: "1.12.4"
|
||||
command: build
|
||||
working-directory: python
|
||||
docker-options: "-e PIP_EXTRA_INDEX_URL='https://pypi.fury.io/lance-format/ https://pypi.fury.io/lancedb/'"
|
||||
@@ -47,7 +44,6 @@ runs:
|
||||
if: ${{ inputs.arm-build == 'true' }}
|
||||
uses: PyO3/maturin-action@v1
|
||||
with:
|
||||
maturin-version: "1.12.4"
|
||||
command: build
|
||||
working-directory: python
|
||||
docker-options: "-e PIP_EXTRA_INDEX_URL='https://pypi.fury.io/lance-format/ https://pypi.fury.io/lancedb/'"
|
||||
|
||||
1
.github/workflows/build_mac_wheel/action.yml
vendored
1
.github/workflows/build_mac_wheel/action.yml
vendored
@@ -20,7 +20,6 @@ runs:
|
||||
uses: PyO3/maturin-action@v1
|
||||
with:
|
||||
command: build
|
||||
maturin-version: "1.12.4"
|
||||
# TODO: pass through interpreter
|
||||
args: ${{ inputs.args }}
|
||||
docker-options: "-e PIP_EXTRA_INDEX_URL='https://pypi.fury.io/lance-format/ https://pypi.fury.io/lancedb/'"
|
||||
|
||||
@@ -25,7 +25,6 @@ runs:
|
||||
uses: PyO3/maturin-action@v1
|
||||
with:
|
||||
command: build
|
||||
maturin-version: "1.12.4"
|
||||
args: ${{ inputs.args }}
|
||||
docker-options: "-e PIP_EXTRA_INDEX_URL='https://pypi.fury.io/lance-format/ https://pypi.fury.io/lancedb/'"
|
||||
working-directory: python
|
||||
|
||||
9
.github/workflows/dev.yml
vendored
9
.github/workflows/dev.yml
vendored
@@ -8,9 +8,6 @@ concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
labeler:
|
||||
permissions:
|
||||
@@ -18,7 +15,7 @@ jobs:
|
||||
name: Label PR
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: srvaroa/labeler@v1
|
||||
- uses: srvaroa/labeler@master
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
commitlint:
|
||||
@@ -27,7 +24,7 @@ jobs:
|
||||
name: Verify PR title / description conforms to semantic-release
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/setup-node@v4
|
||||
- uses: actions/setup-node@v3
|
||||
with:
|
||||
node-version: "18"
|
||||
# These rules are disabled because Github will always ensure there
|
||||
@@ -50,7 +47,7 @@ jobs:
|
||||
|
||||
${{ github.event.pull_request.body }}
|
||||
- if: failure()
|
||||
uses: actions/github-script@v7
|
||||
uses: actions/github-script@v6
|
||||
with:
|
||||
script: |
|
||||
const message = `**ACTION NEEDED**
|
||||
|
||||
4
.github/workflows/docs.yml
vendored
4
.github/workflows/docs.yml
vendored
@@ -53,7 +53,7 @@ jobs:
|
||||
python -m pip install --extra-index-url https://pypi.fury.io/lance-format/ --extra-index-url https://pypi.fury.io/lancedb/ -e .
|
||||
python -m pip install --extra-index-url https://pypi.fury.io/lance-format/ --extra-index-url https://pypi.fury.io/lancedb/ -r ../docs/requirements.txt
|
||||
- name: Set up node
|
||||
uses: actions/setup-node@v4
|
||||
uses: actions/setup-node@v3
|
||||
with:
|
||||
node-version: 20
|
||||
cache: 'npm'
|
||||
@@ -68,7 +68,7 @@ jobs:
|
||||
run: |
|
||||
PYTHONPATH=. mkdocs build
|
||||
- name: Setup Pages
|
||||
uses: actions/configure-pages@v5
|
||||
uses: actions/configure-pages@v2
|
||||
- name: Upload artifact
|
||||
uses: actions/upload-pages-artifact@v3
|
||||
with:
|
||||
|
||||
8
.github/workflows/java-publish.yml
vendored
8
.github/workflows/java-publish.yml
vendored
@@ -19,9 +19,6 @@ on:
|
||||
paths:
|
||||
- .github/workflows/java-publish.yml
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
publish:
|
||||
name: Build and Publish
|
||||
@@ -43,7 +40,7 @@ jobs:
|
||||
server-username: SONATYPE_USER
|
||||
server-password: SONATYPE_TOKEN
|
||||
gpg-private-key: ${{ secrets.GPG_PRIVATE_KEY }}
|
||||
gpg-passphrase: MAVEN_GPG_PASSPHRASE
|
||||
gpg-passphrase: ${{ secrets.GPG_PASSPHRASE }}
|
||||
- name: Set git config
|
||||
run: |
|
||||
git config --global user.email "dev+gha@lancedb.com"
|
||||
@@ -58,11 +55,10 @@ jobs:
|
||||
echo "use-agent" >> ~/.gnupg/gpg.conf
|
||||
echo "pinentry-mode loopback" >> ~/.gnupg/gpg.conf
|
||||
export GPG_TTY=$(tty)
|
||||
./mvnw --batch-mode -DskipTests -DpushChanges=false deploy -pl lancedb-core -am -P deploy-to-ossrh
|
||||
./mvnw --batch-mode -DskipTests -DpushChanges=false -Dgpg.passphrase=${{ secrets.GPG_PASSPHRASE }} deploy -pl lancedb-core -am -P deploy-to-ossrh
|
||||
env:
|
||||
SONATYPE_USER: ${{ secrets.SONATYPE_USER }}
|
||||
SONATYPE_TOKEN: ${{ secrets.SONATYPE_TOKEN }}
|
||||
MAVEN_GPG_PASSPHRASE: ${{ secrets.GPG_PASSPHRASE }}
|
||||
|
||||
report-failure:
|
||||
name: Report Workflow Failure
|
||||
|
||||
3
.github/workflows/java.yml
vendored
3
.github/workflows/java.yml
vendored
@@ -24,9 +24,6 @@ on:
|
||||
- java/**
|
||||
- .github/workflows/java.yml
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
build-java:
|
||||
runs-on: ubuntu-24.04
|
||||
|
||||
4
.github/workflows/license-header-check.yml
vendored
4
.github/workflows/license-header-check.yml
vendored
@@ -10,10 +10,6 @@ on:
|
||||
- nodejs/**
|
||||
- java/**
|
||||
- .github/workflows/license-header-check.yml
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
check-licenses:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
13
.github/workflows/nodejs.yml
vendored
13
.github/workflows/nodejs.yml
vendored
@@ -7,17 +7,12 @@ on:
|
||||
pull_request:
|
||||
paths:
|
||||
- Cargo.toml
|
||||
- Cargo.lock
|
||||
- rust-toolchain.toml
|
||||
- nodejs/**
|
||||
- rust/**
|
||||
- docs/src/js/**
|
||||
- .github/workflows/nodejs.yml
|
||||
- docker-compose.yml
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
|
||||
cancel-in-progress: true
|
||||
@@ -42,7 +37,7 @@ jobs:
|
||||
with:
|
||||
fetch-depth: 0
|
||||
lfs: true
|
||||
- uses: actions/setup-node@v4
|
||||
- uses: actions/setup-node@v3
|
||||
with:
|
||||
node-version: 20
|
||||
cache: 'npm'
|
||||
@@ -82,7 +77,7 @@ jobs:
|
||||
with:
|
||||
fetch-depth: 0
|
||||
lfs: true
|
||||
- uses: actions/setup-node@v4
|
||||
- uses: actions/setup-node@v3
|
||||
name: Setup Node.js 20 for build
|
||||
with:
|
||||
# @napi-rs/cli v3 requires Node >= 20.12 (via @inquirer/prompts@8).
|
||||
@@ -99,7 +94,7 @@ jobs:
|
||||
run: |
|
||||
npm ci --include=optional
|
||||
npm run build:debug -- --profile ci
|
||||
- uses: actions/setup-node@v4
|
||||
- uses: actions/setup-node@v3
|
||||
name: Setup Node.js ${{ matrix.node-version }} for test
|
||||
with:
|
||||
node-version: ${{ matrix.node-version }}
|
||||
@@ -148,7 +143,7 @@ jobs:
|
||||
with:
|
||||
fetch-depth: 0
|
||||
lfs: true
|
||||
- uses: actions/setup-node@v4
|
||||
- uses: actions/setup-node@v3
|
||||
with:
|
||||
node-version: 20
|
||||
cache: 'npm'
|
||||
|
||||
20
.github/workflows/npm-publish.yml
vendored
20
.github/workflows/npm-publish.yml
vendored
@@ -19,7 +19,6 @@ on:
|
||||
paths:
|
||||
- .github/workflows/npm-publish.yml
|
||||
- Cargo.toml # Change in dependency frequently breaks builds
|
||||
- Cargo.lock
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
@@ -125,12 +124,7 @@ jobs:
|
||||
pre_build: |-
|
||||
set -e &&
|
||||
apt-get update &&
|
||||
apt-get install -y protobuf-compiler pkg-config &&
|
||||
# The base image (manylinux2014-cross) sets TARGET_CC to the old
|
||||
# GCC 4.8 cross-compiler. aws-lc-sys checks TARGET_CC before CC,
|
||||
# so it picks up GCC even though the napi-rs image sets CC=clang.
|
||||
# Override to use the image's clang-18 which supports -fuse-ld=lld.
|
||||
export TARGET_CC=clang TARGET_CXX=clang++
|
||||
apt-get install -y protobuf-compiler pkg-config
|
||||
- target: x86_64-unknown-linux-musl
|
||||
# This one seems to need some extra memory
|
||||
host: ubuntu-2404-8x-x64
|
||||
@@ -150,10 +144,9 @@ jobs:
|
||||
set -e &&
|
||||
apt-get update &&
|
||||
apt-get install -y protobuf-compiler pkg-config &&
|
||||
export TARGET_CC=clang TARGET_CXX=clang++ &&
|
||||
# The manylinux2014 sysroot has glibc 2.17 headers which lack
|
||||
# AT_HWCAP2 (added in Linux 3.17). Define it for aws-lc-sys.
|
||||
export CFLAGS="$CFLAGS -DAT_HWCAP2=26" &&
|
||||
# https://github.com/aws/aws-lc-rs/issues/737#issuecomment-2725918627
|
||||
ln -s /usr/aarch64-unknown-linux-gnu/lib/gcc/aarch64-unknown-linux-gnu/4.8.5/crtbeginS.o /usr/aarch64-unknown-linux-gnu/aarch64-unknown-linux-gnu/sysroot/usr/lib/crtbeginS.o &&
|
||||
ln -s /usr/aarch64-unknown-linux-gnu/lib/gcc /usr/aarch64-unknown-linux-gnu/aarch64-unknown-linux-gnu/sysroot/usr/lib/gcc &&
|
||||
rustup target add aarch64-unknown-linux-gnu
|
||||
- target: aarch64-unknown-linux-musl
|
||||
host: ubuntu-2404-8x-x64
|
||||
@@ -273,7 +266,7 @@ jobs:
|
||||
- target: x86_64-unknown-linux-gnu
|
||||
host: ubuntu-latest
|
||||
- target: aarch64-unknown-linux-gnu
|
||||
host: ubuntu-2404-8x-arm64
|
||||
host: buildjet-16vcpu-ubuntu-2204-arm
|
||||
node:
|
||||
- '20'
|
||||
runs-on: ${{ matrix.settings.host }}
|
||||
@@ -363,8 +356,7 @@ jobs:
|
||||
if [[ $DRY_RUN == "true" ]]; then
|
||||
ARGS="$ARGS --dry-run"
|
||||
fi
|
||||
VERSION=$(node -p "require('./package.json').version")
|
||||
if [[ $VERSION == *-* ]]; then
|
||||
if [[ $GITHUB_REF =~ refs/tags/v(.*)-beta.* ]]; then
|
||||
ARGS="$ARGS --tag preview"
|
||||
fi
|
||||
npm publish $ARGS
|
||||
|
||||
16
.github/workflows/pypi-publish.yml
vendored
16
.github/workflows/pypi-publish.yml
vendored
@@ -9,21 +9,14 @@ on:
|
||||
paths:
|
||||
- .github/workflows/pypi-publish.yml
|
||||
- Cargo.toml # Change in dependency frequently breaks builds
|
||||
- Cargo.lock
|
||||
|
||||
env:
|
||||
PIP_EXTRA_INDEX_URL: "https://pypi.fury.io/lance-format/ https://pypi.fury.io/lancedb/"
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
linux:
|
||||
name: Python ${{ matrix.config.platform }} manylinux${{ matrix.config.manylinux }}
|
||||
timeout-minutes: 60
|
||||
permissions:
|
||||
id-token: write
|
||||
contents: read
|
||||
strategy:
|
||||
matrix:
|
||||
config:
|
||||
@@ -63,12 +56,10 @@ jobs:
|
||||
- uses: ./.github/workflows/upload_wheel
|
||||
if: startsWith(github.ref, 'refs/tags/python-v')
|
||||
with:
|
||||
pypi_token: ${{ secrets.LANCEDB_PYPI_API_TOKEN }}
|
||||
fury_token: ${{ secrets.FURY_TOKEN }}
|
||||
mac:
|
||||
timeout-minutes: 90
|
||||
permissions:
|
||||
id-token: write
|
||||
contents: read
|
||||
runs-on: ${{ matrix.config.runner }}
|
||||
strategy:
|
||||
matrix:
|
||||
@@ -93,12 +84,10 @@ jobs:
|
||||
- uses: ./.github/workflows/upload_wheel
|
||||
if: startsWith(github.ref, 'refs/tags/python-v')
|
||||
with:
|
||||
pypi_token: ${{ secrets.LANCEDB_PYPI_API_TOKEN }}
|
||||
fury_token: ${{ secrets.FURY_TOKEN }}
|
||||
windows:
|
||||
timeout-minutes: 60
|
||||
permissions:
|
||||
id-token: write
|
||||
contents: read
|
||||
runs-on: windows-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
@@ -117,6 +106,7 @@ jobs:
|
||||
- uses: ./.github/workflows/upload_wheel
|
||||
if: startsWith(github.ref, 'refs/tags/python-v')
|
||||
with:
|
||||
pypi_token: ${{ secrets.LANCEDB_PYPI_API_TOKEN }}
|
||||
fury_token: ${{ secrets.FURY_TOKEN }}
|
||||
gh-release:
|
||||
if: startsWith(github.ref, 'refs/tags/python-v')
|
||||
|
||||
11
.github/workflows/python.yml
vendored
11
.github/workflows/python.yml
vendored
@@ -7,18 +7,9 @@ on:
|
||||
pull_request:
|
||||
paths:
|
||||
- Cargo.toml
|
||||
- Cargo.lock
|
||||
- rust-toolchain.toml
|
||||
- python/**
|
||||
- rust/**
|
||||
- .github/workflows/python.yml
|
||||
- .github/workflows/build_linux_wheel/**
|
||||
- .github/workflows/build_mac_wheel/**
|
||||
- .github/workflows/build_windows_wheel/**
|
||||
- .github/workflows/run_tests/**
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
|
||||
@@ -111,6 +102,7 @@ jobs:
|
||||
- name: Install
|
||||
run: |
|
||||
pip install --extra-index-url https://pypi.fury.io/lance-format/ --extra-index-url https://pypi.fury.io/lancedb/ -e .[tests,dev,embeddings]
|
||||
pip install tantivy
|
||||
pip install mlx
|
||||
- name: Doctest
|
||||
run: pytest --doctest-modules python/lancedb
|
||||
@@ -229,5 +221,6 @@ jobs:
|
||||
pip install "pydantic<2"
|
||||
pip install pyarrow==16
|
||||
pip install --extra-index-url https://pypi.fury.io/lance-format/ --extra-index-url https://pypi.fury.io/lancedb/ -e .[tests]
|
||||
pip install tantivy
|
||||
- name: Run tests
|
||||
run: pytest -m "not slow and not s3_test" -x -v --durations=30 python/tests
|
||||
|
||||
39
.github/workflows/rust.yml
vendored
39
.github/workflows/rust.yml
vendored
@@ -7,17 +7,9 @@ on:
|
||||
pull_request:
|
||||
paths:
|
||||
- Cargo.toml
|
||||
- Cargo.lock
|
||||
- rust-toolchain.toml
|
||||
- deny.toml
|
||||
- rust/**
|
||||
- nodejs/Cargo.toml
|
||||
- python/Cargo.toml
|
||||
- .github/workflows/rust.yml
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
|
||||
cancel-in-progress: true
|
||||
@@ -59,17 +51,6 @@ jobs:
|
||||
- name: Run clippy (without remote feature)
|
||||
run: cargo clippy --profile ci --workspace --tests -- -D warnings
|
||||
|
||||
deny:
|
||||
# Supply-chain checks: advisories, licenses, banned crates, and source
|
||||
# restrictions. Configuration lives in `deny.toml` at the workspace root.
|
||||
timeout-minutes: 10
|
||||
runs-on: ubuntu-24.04
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: EmbarkStudios/cargo-deny-action@v2
|
||||
with:
|
||||
command: check advisories bans licenses sources
|
||||
|
||||
build-no-lock:
|
||||
runs-on: ubuntu-24.04
|
||||
timeout-minutes: 30
|
||||
@@ -119,9 +100,7 @@ jobs:
|
||||
lfs: true
|
||||
- uses: Swatinem/rust-cache@v2
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
sudo apt update
|
||||
sudo apt install -y protobuf-compiler libssl-dev
|
||||
run: sudo apt install -y protobuf-compiler libssl-dev
|
||||
- uses: rui314/setup-mold@v1
|
||||
- name: Make Swap
|
||||
run: |
|
||||
@@ -225,14 +204,14 @@ jobs:
|
||||
- name: Downgrade dependencies
|
||||
# These packages have newer requirements for MSRV
|
||||
run: |
|
||||
cargo update -p aws-sdk-bedrockruntime --precise 1.77.0
|
||||
cargo update -p aws-sdk-dynamodb --precise 1.68.0
|
||||
cargo update -p aws-config --precise 1.6.0
|
||||
cargo update -p aws-sdk-kms --precise 1.63.0
|
||||
cargo update -p aws-sdk-s3 --precise 1.79.0
|
||||
cargo update -p aws-sdk-sso --precise 1.62.0
|
||||
cargo update -p aws-sdk-ssooidc --precise 1.63.0
|
||||
cargo update -p aws-sdk-sts --precise 1.63.0
|
||||
cargo update -p aws-sdk-bedrockruntime --precise 1.64.0
|
||||
cargo update -p aws-sdk-dynamodb --precise 1.55.0
|
||||
cargo update -p aws-config --precise 1.5.10
|
||||
cargo update -p aws-sdk-kms --precise 1.51.0
|
||||
cargo update -p aws-sdk-s3 --precise 1.65.0
|
||||
cargo update -p aws-sdk-sso --precise 1.50.0
|
||||
cargo update -p aws-sdk-ssooidc --precise 1.51.0
|
||||
cargo update -p aws-sdk-sts --precise 1.51.0
|
||||
cargo update -p home --precise 0.5.9
|
||||
- name: cargo +${{ matrix.msrv }} check
|
||||
env:
|
||||
|
||||
@@ -3,9 +3,6 @@ name: Update package-lock.json
|
||||
on:
|
||||
workflow_dispatch:
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
publish:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
@@ -3,9 +3,6 @@ name: Update NodeJs package-lock.json
|
||||
on:
|
||||
workflow_dispatch:
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
publish:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
31
.github/workflows/upload_wheel/action.yml
vendored
31
.github/workflows/upload_wheel/action.yml
vendored
@@ -2,6 +2,9 @@ name: upload-wheel
|
||||
|
||||
description: "Upload wheels to Pypi"
|
||||
inputs:
|
||||
pypi_token:
|
||||
required: true
|
||||
description: "release token for the repo"
|
||||
fury_token:
|
||||
required: true
|
||||
description: "release token for the fury repo"
|
||||
@@ -9,6 +12,12 @@ inputs:
|
||||
runs:
|
||||
using: "composite"
|
||||
steps:
|
||||
- name: Install dependencies
|
||||
shell: bash
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install twine
|
||||
python3 -m pip install --upgrade pkginfo
|
||||
- name: Choose repo
|
||||
shell: bash
|
||||
id: choose_repo
|
||||
@@ -18,17 +27,19 @@ runs:
|
||||
else
|
||||
echo "repo=pypi" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
- name: Publish to Fury
|
||||
if: steps.choose_repo.outputs.repo == 'fury'
|
||||
- name: Publish to PyPI
|
||||
shell: bash
|
||||
env:
|
||||
FURY_TOKEN: ${{ inputs.fury_token }}
|
||||
PYPI_TOKEN: ${{ inputs.pypi_token }}
|
||||
run: |
|
||||
WHEEL=$(ls target/wheels/lancedb-*.whl 2> /dev/null | head -n 1)
|
||||
echo "Uploading $WHEEL to Fury"
|
||||
curl -f -F package=@$WHEEL https://$FURY_TOKEN@push.fury.io/lancedb/
|
||||
- name: Publish to PyPI
|
||||
if: steps.choose_repo.outputs.repo == 'pypi'
|
||||
uses: pypa/gh-action-pypi-publish@release/v1
|
||||
with:
|
||||
packages-dir: target/wheels/
|
||||
if [[ ${{ steps.choose_repo.outputs.repo }} == fury ]]; then
|
||||
WHEEL=$(ls target/wheels/lancedb-*.whl 2> /dev/null | head -n 1)
|
||||
echo "Uploading $WHEEL to Fury"
|
||||
curl -f -F package=@$WHEEL https://$FURY_TOKEN@push.fury.io/lancedb/
|
||||
else
|
||||
twine upload --repository ${{ steps.choose_repo.outputs.repo }} \
|
||||
--username __token__ \
|
||||
--password $PYPI_TOKEN \
|
||||
target/wheels/lancedb-*.whl
|
||||
fi
|
||||
|
||||
4706
Cargo.lock
generated
4706
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
66
Cargo.toml
66
Cargo.toml
@@ -1,9 +1,11 @@
|
||||
[workspace]
|
||||
members = ["rust/lancedb", "nodejs", "python"]
|
||||
# Python package needs to be built by maturin.
|
||||
exclude = ["python"]
|
||||
resolver = "2"
|
||||
|
||||
[workspace.package]
|
||||
edition = "2024"
|
||||
edition = "2021"
|
||||
authors = ["LanceDB Devs <dev@lancedb.com>"]
|
||||
license = "Apache-2.0"
|
||||
repository = "https://github.com/lancedb/lancedb"
|
||||
@@ -13,40 +15,38 @@ categories = ["database-implementations"]
|
||||
rust-version = "1.91.0"
|
||||
|
||||
[workspace.dependencies]
|
||||
lance = { "version" = "=7.0.0-beta.7", default-features = false, "tag" = "v7.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-core = { "version" = "=7.0.0-beta.7", "tag" = "v7.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-datagen = { "version" = "=7.0.0-beta.7", "tag" = "v7.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-file = { "version" = "=7.0.0-beta.7", "tag" = "v7.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-io = { "version" = "=7.0.0-beta.7", default-features = false, "tag" = "v7.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-index = { "version" = "=7.0.0-beta.7", "tag" = "v7.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-linalg = { "version" = "=7.0.0-beta.7", "tag" = "v7.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-namespace = { "version" = "=7.0.0-beta.7", "tag" = "v7.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-namespace-impls = { "version" = "=7.0.0-beta.7", default-features = false, "tag" = "v7.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-table = { "version" = "=7.0.0-beta.7", "tag" = "v7.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-testing = { "version" = "=7.0.0-beta.7", "tag" = "v7.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-datafusion = { "version" = "=7.0.0-beta.7", "tag" = "v7.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-encoding = { "version" = "=7.0.0-beta.7", "tag" = "v7.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-arrow = { "version" = "=7.0.0-beta.7", "tag" = "v7.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance = { "version" = "=3.1.0-beta.2", default-features = false, "tag" = "v3.1.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-core = { "version" = "=3.1.0-beta.2", "tag" = "v3.1.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-datagen = { "version" = "=3.1.0-beta.2", "tag" = "v3.1.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-file = { "version" = "=3.1.0-beta.2", "tag" = "v3.1.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-io = { "version" = "=3.1.0-beta.2", default-features = false, "tag" = "v3.1.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-index = { "version" = "=3.1.0-beta.2", "tag" = "v3.1.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-linalg = { "version" = "=3.1.0-beta.2", "tag" = "v3.1.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-namespace = { "version" = "=3.1.0-beta.2", "tag" = "v3.1.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-namespace-impls = { "version" = "=3.1.0-beta.2", default-features = false, "tag" = "v3.1.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-table = { "version" = "=3.1.0-beta.2", "tag" = "v3.1.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-testing = { "version" = "=3.1.0-beta.2", "tag" = "v3.1.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-datafusion = { "version" = "=3.1.0-beta.2", "tag" = "v3.1.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-encoding = { "version" = "=3.1.0-beta.2", "tag" = "v3.1.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-arrow = { "version" = "=3.1.0-beta.2", "tag" = "v3.1.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
ahash = "0.8"
|
||||
# Note that this one does not include pyarrow
|
||||
arrow = { version = "58.0.0", optional = false }
|
||||
arrow-array = "58.0.0"
|
||||
arrow-data = "58.0.0"
|
||||
arrow-ipc = "58.0.0"
|
||||
arrow-ord = "58.0.0"
|
||||
arrow-schema = "58.0.0"
|
||||
arrow-select = "58.0.0"
|
||||
arrow-cast = "58.0.0"
|
||||
arrow = { version = "57.2", optional = false }
|
||||
arrow-array = "57.2"
|
||||
arrow-data = "57.2"
|
||||
arrow-ipc = "57.2"
|
||||
arrow-ord = "57.2"
|
||||
arrow-schema = "57.2"
|
||||
arrow-select = "57.2"
|
||||
arrow-cast = "57.2"
|
||||
async-trait = "0"
|
||||
datafusion = { version = "53.0.0", default-features = false }
|
||||
datafusion-catalog = "53.0.0"
|
||||
datafusion-common = { version = "53.0.0", default-features = false }
|
||||
datafusion-execution = "53.0.0"
|
||||
datafusion-expr = "53.0.0"
|
||||
datafusion-functions = "53.0.0"
|
||||
datafusion-physical-plan = "53.0.0"
|
||||
datafusion-physical-expr = "53.0.0"
|
||||
datafusion-sql = "53.0.0"
|
||||
datafusion = { version = "51.0", default-features = false }
|
||||
datafusion-catalog = "51.0"
|
||||
datafusion-common = { version = "51.0", default-features = false }
|
||||
datafusion-execution = "51.0"
|
||||
datafusion-expr = "51.0"
|
||||
datafusion-physical-plan = "51.0"
|
||||
datafusion-physical-expr = "51.0"
|
||||
env_logger = "0.11"
|
||||
half = { "version" = "2.7.1", default-features = false, features = [
|
||||
"num-traits",
|
||||
@@ -54,7 +54,7 @@ half = { "version" = "2.7.1", default-features = false, features = [
|
||||
futures = "0"
|
||||
log = "0.4"
|
||||
moka = { version = "0.12", features = ["future"] }
|
||||
object_store = "0.13.2"
|
||||
object_store = "0.12.0"
|
||||
pin-project = "1.0.7"
|
||||
rand = "0.9"
|
||||
snafu = "0.8"
|
||||
|
||||
@@ -15,7 +15,7 @@
|
||||
|
||||
# **The Multimodal AI Lakehouse**
|
||||
|
||||
[**How to Install** ](#how-to-install) ✦ [**Detailed Documentation**](https://docs.lancedb.com) ✦ [**Tutorials and Recipes**](https://github.com/lancedb/vectordb-recipes/tree/main) ✦ [**Contributors**](#contributors)
|
||||
[**How to Install** ](#how-to-install) ✦ [**Detailed Documentation**](https://lancedb.com/docs) ✦ [**Tutorials and Recipes**](https://github.com/lancedb/vectordb-recipes/tree/main) ✦ [**Contributors**](#contributors)
|
||||
|
||||
**The ultimate multimodal data platform for AI/ML applications.**
|
||||
|
||||
@@ -57,7 +57,7 @@ LanceDB is a central location where developers can build, train and analyze thei
|
||||
|
||||
## **How to Install**:
|
||||
|
||||
Follow the [Quickstart](https://docs.lancedb.com/quickstart) doc to set up LanceDB locally.
|
||||
Follow the [Quickstart](https://lancedb.com/docs/quickstart/) doc to set up LanceDB locally.
|
||||
|
||||
**API & SDK:** We also support Python, Typescript and Rust SDKs
|
||||
|
||||
|
||||
@@ -3,7 +3,6 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import functools
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
@@ -27,7 +26,6 @@ SEMVER_RE = re.compile(
|
||||
)
|
||||
|
||||
|
||||
@functools.total_ordering
|
||||
@dataclass(frozen=True)
|
||||
class SemVer:
|
||||
major: int
|
||||
@@ -158,9 +156,7 @@ def read_current_version(repo_root: Path) -> str:
|
||||
|
||||
|
||||
def determine_latest_tag(tags: Iterable[TagInfo]) -> TagInfo:
|
||||
# Stable releases (no prerelease) are always preferred over pre-releases.
|
||||
# Within each group, standard semver ordering applies.
|
||||
return max(tags, key=lambda tag: (not tag.semver.prerelease, tag.semver))
|
||||
return max(tags, key=lambda tag: tag.semver)
|
||||
|
||||
|
||||
def write_outputs(args: argparse.Namespace, payload: dict) -> None:
|
||||
|
||||
196
deny.toml
196
deny.toml
@@ -1,196 +0,0 @@
|
||||
# cargo-deny configuration for LanceDB.
|
||||
#
|
||||
# Run locally with `cargo deny check`. See
|
||||
# https://embarkstudios.github.io/cargo-deny/ for the full reference.
|
||||
|
||||
# The set of target triples we care about. cargo-deny will only consider
|
||||
# dependencies that are used on at least one of these targets. Keeping this
|
||||
# explicit avoids noise from platform-specific crates (e.g. wasm, android,
|
||||
# ios) that we never actually ship.
|
||||
[graph]
|
||||
targets = [
|
||||
"x86_64-unknown-linux-gnu",
|
||||
"aarch64-unknown-linux-gnu",
|
||||
"x86_64-apple-darwin",
|
||||
"aarch64-apple-darwin",
|
||||
"x86_64-pc-windows-msvc",
|
||||
"aarch64-pc-windows-msvc",
|
||||
]
|
||||
all-features = true
|
||||
|
||||
[output]
|
||||
feature-depth = 1
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Advisories: security vulnerabilities and yanked crates.
|
||||
# ---------------------------------------------------------------------------
|
||||
[advisories]
|
||||
version = 2
|
||||
# Fail the check if any crate in the lockfile has been yanked from crates.io.
|
||||
# Yanked crates are a signal the author retracted the release (often due to
|
||||
# bugs or security issues) and should not be depended on.
|
||||
yanked = "deny"
|
||||
# Advisory IDs we have explicitly reviewed and chosen to accept. Every
|
||||
# entry must include a rationale and, where possible, an upstream issue
|
||||
# pointing to a fix. Revisit this list whenever dependencies are updated.
|
||||
ignore = [
|
||||
# rsa: Marvin Attack timing side-channel in PKCS#1 v1.5 decryption.
|
||||
# Reached only through opendal → reqsign → rsa. We do not use RSA
|
||||
# decryption in LanceDB ourselves; this is dormant in the signing path.
|
||||
# No fixed release exists upstream as of this writing.
|
||||
# https://rustsec.org/advisories/RUSTSEC-2023-0071
|
||||
{ id = "RUSTSEC-2023-0071", reason = "rsa crate via opendal/reqsign; no fixed upstream release" },
|
||||
|
||||
# instant: unmaintained. Pulled in via backoff → instant. Upstream
|
||||
# recommends switching to `web-time`; fix has to come from backoff.
|
||||
# https://rustsec.org/advisories/RUSTSEC-2024-0384
|
||||
{ id = "RUSTSEC-2024-0384", reason = "transitive via backoff; waiting on backoff replacement" },
|
||||
|
||||
# paste: unmaintained (author archived the repo). Used transitively by
|
||||
# datafusion and the arrow ecosystem; widespread, no drop-in replacement.
|
||||
# https://rustsec.org/advisories/RUSTSEC-2024-0436
|
||||
{ id = "RUSTSEC-2024-0436", reason = "transitive via datafusion; awaiting ecosystem migration" },
|
||||
|
||||
# encoding: unmaintained. Reached through lindera-dictionary, which is
|
||||
# required by the native Lindera tokenizer path. Lindera has not migrated
|
||||
# off this crate yet.
|
||||
# https://rustsec.org/advisories/RUSTSEC-2021-0153
|
||||
{ id = "RUSTSEC-2021-0153", reason = "transitive via lindera-dictionary for native Lindera tokenizer" },
|
||||
|
||||
# fast-float: unsound and unmaintained. Reached only through polars-arrow
|
||||
# from the optional Polars integration; replacement requires a Polars
|
||||
# dependency upgrade.
|
||||
# https://rustsec.org/advisories/RUSTSEC-2024-0379
|
||||
{ id = "RUSTSEC-2024-0379", reason = "transitive via polars-arrow; waiting on Polars migration" },
|
||||
|
||||
# tantivy: segfault on malformed input due to missing bounds check.
|
||||
# Pulled in via lance for full-text search. We only feed tantivy
|
||||
# documents we construct ourselves, not attacker-controlled bytes.
|
||||
# Tracked for a lance dependency bump.
|
||||
# https://rustsec.org/advisories/RUSTSEC-2025-0003
|
||||
{ id = "RUSTSEC-2025-0003", reason = "tantivy via lance; inputs are internally produced, not user-supplied bytes" },
|
||||
|
||||
# backoff: unmaintained. Reached only via async-openai. Replacement
|
||||
# requires async-openai to migrate (or us to drop async-openai).
|
||||
# https://rustsec.org/advisories/RUSTSEC-2025-0012
|
||||
{ id = "RUSTSEC-2025-0012", reason = "transitive via async-openai; waiting on upstream migration" },
|
||||
|
||||
# number_prefix: unmaintained. Transitive via indicatif → hf-hub.
|
||||
# No security impact, just maintenance status.
|
||||
# https://rustsec.org/advisories/RUSTSEC-2025-0119
|
||||
{ id = "RUSTSEC-2025-0119", reason = "transitive via hf-hub/indicatif; cosmetic formatting crate" },
|
||||
|
||||
# bincode: unmaintained. Reached through lindera and lindera-dictionary,
|
||||
# which are required by the native Lindera tokenizer path. Lindera has not
|
||||
# migrated to another serialization format yet.
|
||||
# https://rustsec.org/advisories/RUSTSEC-2025-0141
|
||||
{ id = "RUSTSEC-2025-0141", reason = "transitive via lindera/lindera-dictionary for native Lindera tokenizer" },
|
||||
|
||||
# lru: soundness issue in IterMut. Reached only through aws-sdk-s3 in
|
||||
# LanceDB's dev-dependency graph; LanceDB does not use that iterator
|
||||
# directly. Clearing this requires the AWS SDK chain to update lru.
|
||||
# https://rustsec.org/advisories/RUSTSEC-2026-0002
|
||||
{ id = "RUSTSEC-2026-0002", reason = "transitive via aws-sdk-s3 dev-dependency; waiting on AWS SDK lru upgrade" },
|
||||
|
||||
# rustls-webpki 0.101.7 (old major line): name-constraint checks for
|
||||
# URI / wildcard names. Pulled in only via the legacy rustls 0.21 chain
|
||||
# from aws-smithy-http-client. The 0.103 line we actively use is patched.
|
||||
# Clearing the 0.101 copy requires the aws-sdk chain to migrate off
|
||||
# rustls 0.21.
|
||||
# https://rustsec.org/advisories/RUSTSEC-2026-0098
|
||||
# https://rustsec.org/advisories/RUSTSEC-2026-0099
|
||||
{ id = "RUSTSEC-2026-0098", reason = "only affects rustls-webpki 0.101 from legacy aws-smithy/rustls 0.21 chain" },
|
||||
{ id = "RUSTSEC-2026-0099", reason = "only affects rustls-webpki 0.101 from legacy aws-smithy/rustls 0.21 chain" },
|
||||
|
||||
# rustls-webpki 0.101.7: reachable panic in CRL parsing. Same legacy
|
||||
# rustls 0.21 chain from aws-smithy-http-client as above. The 0.103 line
|
||||
# we actively use is upgraded to 0.103.13 which contains the fix.
|
||||
# https://rustsec.org/advisories/RUSTSEC-2026-0104
|
||||
{ id = "RUSTSEC-2026-0104", reason = "only affects rustls-webpki 0.101 from legacy aws-smithy/rustls 0.21 chain" },
|
||||
|
||||
# rand 0.8.5: soundness issue only when ThreadRng reseeds inside a custom
|
||||
# logger. Reached through several transitive chains. LanceDB does not use
|
||||
# rand from a custom logger; upgrade once all pinned chains accept 0.8.6+.
|
||||
# https://rustsec.org/advisories/RUSTSEC-2026-0097
|
||||
{ id = "RUSTSEC-2026-0097", reason = "transitive rand 0.8.5; LanceDB does not call ThreadRng from custom logging" },
|
||||
]
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Licenses: only allow licenses we've reviewed as compatible with Apache-2.0.
|
||||
# ---------------------------------------------------------------------------
|
||||
[licenses]
|
||||
version = 2
|
||||
# SPDX identifiers for licenses that are compatible with our Apache-2.0
|
||||
# distribution. Additions require legal review.
|
||||
allow = [
|
||||
"Apache-2.0",
|
||||
"Apache-2.0 WITH LLVM-exception",
|
||||
"MIT",
|
||||
"BSD-2-Clause",
|
||||
"BSD-3-Clause",
|
||||
"ISC",
|
||||
"Unicode-3.0",
|
||||
"Unicode-DFS-2016",
|
||||
"Zlib",
|
||||
"CC0-1.0",
|
||||
"MPL-2.0",
|
||||
"BSL-1.0",
|
||||
"OpenSSL",
|
||||
# 0BSD ("BSD Zero Clause") is effectively public domain — no attribution
|
||||
# required. Pulled in by `mock_instant`.
|
||||
"0BSD",
|
||||
# bzip2-1.0.6 is the permissive upstream bzip2 license (BSD-like). Pulled
|
||||
# in by `libbz2-rs-sys`, the pure-Rust bzip2 implementation.
|
||||
"bzip2-1.0.6",
|
||||
# CDLA-Permissive-2.0 is a permissive data license used by `webpki-roots`
|
||||
# for the Mozilla CA root bundle. Data-only, distribution-compatible.
|
||||
"CDLA-Permissive-2.0",
|
||||
]
|
||||
confidence-threshold = 0.8
|
||||
# Crates whose license cannot be determined from Cargo metadata but whose
|
||||
# license we've manually confirmed from upstream. Keep this list minimal.
|
||||
[[licenses.clarify]]
|
||||
# polars-arrow-format omits the `license` field in its Cargo.toml, but the
|
||||
# upstream repo (pola-rs/polars-arrow-format) is dual-licensed Apache-2.0 OR
|
||||
# MIT. See https://github.com/pola-rs/polars-arrow-format/blob/main/LICENSE
|
||||
crate = "polars-arrow-format"
|
||||
expression = "Apache-2.0 OR MIT"
|
||||
license-files = []
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Bans: disallow specific crates and flag dependency hygiene issues.
|
||||
# ---------------------------------------------------------------------------
|
||||
[bans]
|
||||
# Warn (not deny) on duplicate versions of the same crate. In a large
|
||||
# workspace like this one, duplicates are common and often unavoidable
|
||||
# transitively. We surface them to discourage growth, but don't fail CI.
|
||||
multiple-versions = "warn"
|
||||
# Wildcard version requirements (`foo = "*"`) are a footgun — they let any
|
||||
# future release in without review. Ban them outright.
|
||||
wildcards = "deny"
|
||||
# Internal workspace crates reference each other via `path = "..."`, which
|
||||
# cargo-deny sees as a wildcard version. That's fine for private workspace
|
||||
# members (not published to crates.io), so allow it specifically for paths.
|
||||
allow-wildcard-paths = true
|
||||
# Features that, if enabled, should cause the check to fail.
|
||||
deny = []
|
||||
# Crates to skip when checking for duplicate versions.
|
||||
skip = []
|
||||
# Similar to `skip`, but also skips the entire transitive subtree.
|
||||
skip-tree = []
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Sources: restrict where crates can come from.
|
||||
# ---------------------------------------------------------------------------
|
||||
[sources]
|
||||
# Deny any registry other than the ones explicitly listed below.
|
||||
unknown-registry = "deny"
|
||||
# Deny any git dependency whose host isn't in the allow-list below. This
|
||||
# prevents accidental pulls from arbitrary forks.
|
||||
unknown-git = "deny"
|
||||
allow-registry = ["https://github.com/rust-lang/crates.io-index"]
|
||||
# Lance is developed in a sibling repo and pulled as a git dependency until
|
||||
# releases are cut to crates.io. Allow that specific host.
|
||||
allow-git = [
|
||||
"https://github.com/lance-format/lance",
|
||||
]
|
||||
@@ -1,7 +1,7 @@
|
||||
version: "3.9"
|
||||
services:
|
||||
localstack:
|
||||
image: localstack/localstack:4.0
|
||||
image: localstack/localstack:3.3
|
||||
ports:
|
||||
- 4566:4566
|
||||
environment:
|
||||
|
||||
@@ -1,27 +1,27 @@
|
||||
# Simple base dockerfile that supports basic dependencies required to run lance with FTS and Hybrid Search
|
||||
# Usage: docker build -t lancedb:latest -f Dockerfile .
|
||||
FROM python:3.12-slim-bookworm
|
||||
#Simple base dockerfile that supports basic dependencies required to run lance with FTS and Hybrid Search
|
||||
#Usage docker build -t lancedb:latest -f Dockerfile .
|
||||
FROM python:3.10-slim-buster
|
||||
|
||||
# Install build dependencies in a single layer
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
curl \
|
||||
build-essential \
|
||||
protobuf-compiler \
|
||||
git \
|
||||
ca-certificates && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Install Rust (pinned installer, non-interactive)
|
||||
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain stable --profile minimal
|
||||
# Install Rust
|
||||
RUN apt-get update && apt-get install -y curl build-essential && \
|
||||
curl https://sh.rustup.rs -sSf | sh -s -- -y
|
||||
|
||||
# Set the environment variable for Rust
|
||||
ENV PATH="/root/.cargo/bin:${PATH}"
|
||||
|
||||
# Install protobuf compiler
|
||||
RUN apt-get install -y protobuf-compiler && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
RUN apt-get -y update &&\
|
||||
apt-get -y upgrade && \
|
||||
apt-get -y install git
|
||||
|
||||
|
||||
# Verify installations
|
||||
RUN python --version && \
|
||||
rustc --version && \
|
||||
protoc --version
|
||||
|
||||
RUN pip install --no-cache-dir lancedb
|
||||
RUN pip install tantivy lancedb
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# LanceDB Documentation
|
||||
|
||||
LanceDB docs are available at [docs.lancedb.com](https://docs.lancedb.com).
|
||||
LanceDB docs are available at [lancedb.com/docs](https://lancedb.com/docs).
|
||||
|
||||
The SDK docs are built and deployed automatically by [Github Actions](../.github/workflows/docs.yml)
|
||||
whenever a commit is pushed to the `main` branch. So it is possible for the docs to show
|
||||
|
||||
@@ -52,21 +52,14 @@ plugins:
|
||||
options:
|
||||
docstring_style: numpy
|
||||
heading_level: 3
|
||||
show_source: true
|
||||
show_symbol_type_in_heading: true
|
||||
show_signature_annotations: true
|
||||
show_root_heading: true
|
||||
show_docstring_examples: true
|
||||
show_docstring_attributes: false
|
||||
show_docstring_other_parameters: true
|
||||
show_symbol_type_heading: true
|
||||
show_labels: false
|
||||
show_if_no_docstring: true
|
||||
show_source: false
|
||||
members_order: source
|
||||
docstring_section_style: list
|
||||
signature_crossrefs: true
|
||||
separate_signature: true
|
||||
filters:
|
||||
- "!^_"
|
||||
import:
|
||||
# for cross references
|
||||
- https://arrow.apache.org/docs/objects.inv
|
||||
@@ -120,7 +113,7 @@ markdown_extensions:
|
||||
emoji_index: !!python/name:material.extensions.emoji.twemoji
|
||||
emoji_generator: !!python/name:material.extensions.emoji.to_svg
|
||||
- markdown.extensions.toc:
|
||||
toc_depth: 4
|
||||
toc_depth: 3
|
||||
permalink: true
|
||||
permalink_title: Anchor link to this section
|
||||
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
mkdocs==1.6.1
|
||||
mkdocs==1.5.3
|
||||
mkdocs-jupyter==0.24.1
|
||||
mkdocs-material==9.6.23
|
||||
mkdocs-autorefs>=0.5,<=1.0
|
||||
mkdocstrings[python]>=0.24,<1.0
|
||||
griffe>=0.40,<1.0
|
||||
mkdocs-render-swagger-plugin>=0.1.0
|
||||
pydantic>=2.0,<3.0
|
||||
mkdocs-redirects>=1.2.0
|
||||
mkdocs-material==9.5.3
|
||||
mkdocs-autorefs<=1.0
|
||||
mkdocstrings[python]==0.25.2
|
||||
griffe
|
||||
mkdocs-render-swagger-plugin
|
||||
pydantic
|
||||
mkdocs-redirects
|
||||
@@ -14,7 +14,7 @@ Add the following dependency to your `pom.xml`:
|
||||
<dependency>
|
||||
<groupId>com.lancedb</groupId>
|
||||
<artifactId>lancedb-core</artifactId>
|
||||
<version>0.28.0-beta.11</version>
|
||||
<version>0.27.0-beta.2</version>
|
||||
</dependency>
|
||||
```
|
||||
|
||||
@@ -57,32 +57,32 @@ LanceNamespace namespaceClient = LanceDbNamespaceClientBuilder.newBuilder()
|
||||
|
||||
## Metadata Operations
|
||||
|
||||
### Creating a Namespace Path
|
||||
### Creating a Namespace
|
||||
|
||||
Namespace paths organize tables hierarchically. Create the desired namespace path before creating tables within it:
|
||||
Namespaces organize tables hierarchically. Create a namespace before creating tables within it:
|
||||
|
||||
```java
|
||||
import org.lance.namespace.model.CreateNamespaceRequest;
|
||||
import org.lance.namespace.model.CreateNamespaceResponse;
|
||||
|
||||
// Create a child namespace path
|
||||
// Create a child namespace
|
||||
CreateNamespaceRequest request = new CreateNamespaceRequest();
|
||||
request.setId(Arrays.asList("my_namespace"));
|
||||
|
||||
CreateNamespaceResponse response = namespaceClient.createNamespace(request);
|
||||
```
|
||||
|
||||
You can also create nested namespace paths:
|
||||
You can also create nested namespaces:
|
||||
|
||||
```java
|
||||
// Create a nested namespace path: parent/child
|
||||
// Create a nested namespace: parent/child
|
||||
CreateNamespaceRequest request = new CreateNamespaceRequest();
|
||||
request.setId(Arrays.asList("parent_namespace", "child_namespace"));
|
||||
|
||||
CreateNamespaceResponse response = namespaceClient.createNamespace(request);
|
||||
```
|
||||
|
||||
### Describing a Namespace Path
|
||||
### Describing a Namespace
|
||||
|
||||
```java
|
||||
import org.lance.namespace.model.DescribeNamespaceRequest;
|
||||
@@ -95,22 +95,22 @@ DescribeNamespaceResponse response = namespaceClient.describeNamespace(request);
|
||||
System.out.println("Namespace properties: " + response.getProperties());
|
||||
```
|
||||
|
||||
### Listing Namespace Paths
|
||||
### Listing Namespaces
|
||||
|
||||
```java
|
||||
import org.lance.namespace.model.ListNamespacesRequest;
|
||||
import org.lance.namespace.model.ListNamespacesResponse;
|
||||
|
||||
// List all namespace paths at the root level
|
||||
// List all namespaces at root level
|
||||
ListNamespacesRequest request = new ListNamespacesRequest();
|
||||
request.setId(Arrays.asList()); // Empty for root
|
||||
|
||||
ListNamespacesResponse response = namespaceClient.listNamespaces(request);
|
||||
for (String ns : response.getNamespaces()) {
|
||||
System.out.println("Namespace path: " + ns);
|
||||
System.out.println("Namespace: " + ns);
|
||||
}
|
||||
|
||||
// List child namespace paths under a parent path
|
||||
// List child namespaces under a parent
|
||||
ListNamespacesRequest childRequest = new ListNamespacesRequest();
|
||||
childRequest.setId(Arrays.asList("parent_namespace"));
|
||||
|
||||
@@ -123,7 +123,7 @@ ListNamespacesResponse childResponse = namespaceClient.listNamespaces(childReque
|
||||
import org.lance.namespace.model.ListTablesRequest;
|
||||
import org.lance.namespace.model.ListTablesResponse;
|
||||
|
||||
// List tables in a namespace path
|
||||
// List tables in a namespace
|
||||
ListTablesRequest request = new ListTablesRequest();
|
||||
request.setId(Arrays.asList("my_namespace"));
|
||||
|
||||
@@ -133,7 +133,7 @@ for (String table : response.getTables()) {
|
||||
}
|
||||
```
|
||||
|
||||
### Dropping a Namespace Path
|
||||
### Dropping a Namespace
|
||||
|
||||
```java
|
||||
import org.lance.namespace.model.DropNamespaceRequest;
|
||||
@@ -175,7 +175,7 @@ DropTableResponse response = namespaceClient.dropTable(request);
|
||||
|
||||
### Creating a Table
|
||||
|
||||
Tables are created within a namespace path by providing data in Apache Arrow IPC format:
|
||||
Tables are created within a namespace by providing data in Apache Arrow IPC format:
|
||||
|
||||
```java
|
||||
import org.lance.namespace.LanceNamespace;
|
||||
@@ -242,7 +242,7 @@ try (BufferAllocator allocator = new RootAllocator();
|
||||
}
|
||||
byte[] tableData = out.toByteArray();
|
||||
|
||||
// Create a table in a namespace path
|
||||
// Create table in a namespace
|
||||
CreateTableRequest request = new CreateTableRequest();
|
||||
request.setId(Arrays.asList("my_namespace", "my_table"));
|
||||
CreateTableResponse response = namespaceClient.createTable(request, tableData);
|
||||
|
||||
@@ -34,7 +34,7 @@ const results = await table.vectorSearch([0.1, 0.3]).limit(20).toArray();
|
||||
console.log(results);
|
||||
```
|
||||
|
||||
The [quickstart](https://docs.lancedb.com/quickstart/) contains more complete examples.
|
||||
The [quickstart](https://lancedb.com/docs/quickstart/basic-usage/) contains more complete examples.
|
||||
|
||||
## Development
|
||||
|
||||
|
||||
@@ -61,8 +61,8 @@ sharing the same data, deletion, and index files.
|
||||
* **options.sourceVersion?**: `number`
|
||||
The version of the source table to clone.
|
||||
|
||||
* **options.targetNamespacePath?**: `string`[]
|
||||
The namespace path for the target table (defaults to root namespace).
|
||||
* **options.targetNamespace?**: `string`[]
|
||||
The namespace for the target table (defaults to root namespace).
|
||||
|
||||
#### Returns
|
||||
|
||||
@@ -116,13 +116,13 @@ Creates a new empty Table
|
||||
|
||||
`Promise`<[`Table`](Table.md)>
|
||||
|
||||
#### createEmptyTable(name, schema, namespacePath, options)
|
||||
#### createEmptyTable(name, schema, namespace, options)
|
||||
|
||||
```ts
|
||||
abstract createEmptyTable(
|
||||
name,
|
||||
schema,
|
||||
namespacePath?,
|
||||
namespace?,
|
||||
options?): Promise<Table>
|
||||
```
|
||||
|
||||
@@ -136,8 +136,8 @@ Creates a new empty Table
|
||||
* **schema**: [`SchemaLike`](../type-aliases/SchemaLike.md)
|
||||
The schema of the table
|
||||
|
||||
* **namespacePath?**: `string`[]
|
||||
The namespace path to create the table in (defaults to root namespace)
|
||||
* **namespace?**: `string`[]
|
||||
The namespace to create the table in (defaults to root namespace)
|
||||
|
||||
* **options?**: `Partial`<[`CreateTableOptions`](../interfaces/CreateTableOptions.md)>
|
||||
Additional options
|
||||
@@ -150,10 +150,10 @@ Creates a new empty Table
|
||||
|
||||
### createTable()
|
||||
|
||||
#### createTable(options, namespacePath)
|
||||
#### createTable(options, namespace)
|
||||
|
||||
```ts
|
||||
abstract createTable(options, namespacePath?): Promise<Table>
|
||||
abstract createTable(options, namespace?): Promise<Table>
|
||||
```
|
||||
|
||||
Creates a new Table and initialize it with new data.
|
||||
@@ -163,8 +163,8 @@ Creates a new Table and initialize it with new data.
|
||||
* **options**: `object` & `Partial`<[`CreateTableOptions`](../interfaces/CreateTableOptions.md)>
|
||||
The options object.
|
||||
|
||||
* **namespacePath?**: `string`[]
|
||||
The namespace path to create the table in (defaults to root namespace)
|
||||
* **namespace?**: `string`[]
|
||||
The namespace to create the table in (defaults to root namespace)
|
||||
|
||||
##### Returns
|
||||
|
||||
@@ -197,13 +197,13 @@ Creates a new Table and initialize it with new data.
|
||||
|
||||
`Promise`<[`Table`](Table.md)>
|
||||
|
||||
#### createTable(name, data, namespacePath, options)
|
||||
#### createTable(name, data, namespace, options)
|
||||
|
||||
```ts
|
||||
abstract createTable(
|
||||
name,
|
||||
data,
|
||||
namespacePath?,
|
||||
namespace?,
|
||||
options?): Promise<Table>
|
||||
```
|
||||
|
||||
@@ -218,8 +218,8 @@ Creates a new Table and initialize it with new data.
|
||||
Non-empty Array of Records
|
||||
to be inserted into the table
|
||||
|
||||
* **namespacePath?**: `string`[]
|
||||
The namespace path to create the table in (defaults to root namespace)
|
||||
* **namespace?**: `string`[]
|
||||
The namespace to create the table in (defaults to root namespace)
|
||||
|
||||
* **options?**: `Partial`<[`CreateTableOptions`](../interfaces/CreateTableOptions.md)>
|
||||
Additional options
|
||||
@@ -247,15 +247,15 @@ Return a brief description of the connection
|
||||
### dropAllTables()
|
||||
|
||||
```ts
|
||||
abstract dropAllTables(namespacePath?): Promise<void>
|
||||
abstract dropAllTables(namespace?): Promise<void>
|
||||
```
|
||||
|
||||
Drop all tables in the database.
|
||||
|
||||
#### Parameters
|
||||
|
||||
* **namespacePath?**: `string`[]
|
||||
The namespace path to drop tables from (defaults to root namespace).
|
||||
* **namespace?**: `string`[]
|
||||
The namespace to drop tables from (defaults to root namespace).
|
||||
|
||||
#### Returns
|
||||
|
||||
@@ -266,7 +266,7 @@ Drop all tables in the database.
|
||||
### dropTable()
|
||||
|
||||
```ts
|
||||
abstract dropTable(name, namespacePath?): Promise<void>
|
||||
abstract dropTable(name, namespace?): Promise<void>
|
||||
```
|
||||
|
||||
Drop an existing table.
|
||||
@@ -276,8 +276,8 @@ Drop an existing table.
|
||||
* **name**: `string`
|
||||
The name of the table to drop.
|
||||
|
||||
* **namespacePath?**: `string`[]
|
||||
The namespace path of the table (defaults to root namespace).
|
||||
* **namespace?**: `string`[]
|
||||
The namespace of the table (defaults to root namespace).
|
||||
|
||||
#### Returns
|
||||
|
||||
@@ -304,7 +304,7 @@ Return true if the connection has not been closed
|
||||
```ts
|
||||
abstract openTable(
|
||||
name,
|
||||
namespacePath?,
|
||||
namespace?,
|
||||
options?): Promise<Table>
|
||||
```
|
||||
|
||||
@@ -315,8 +315,8 @@ Open a table in the database.
|
||||
* **name**: `string`
|
||||
The name of the table
|
||||
|
||||
* **namespacePath?**: `string`[]
|
||||
The namespace path of the table (defaults to root namespace)
|
||||
* **namespace?**: `string`[]
|
||||
The namespace of the table (defaults to root namespace)
|
||||
|
||||
* **options?**: `Partial`<[`OpenTableOptions`](../interfaces/OpenTableOptions.md)>
|
||||
Additional options
|
||||
@@ -349,10 +349,10 @@ Tables will be returned in lexicographical order.
|
||||
|
||||
`Promise`<`string`[]>
|
||||
|
||||
#### tableNames(namespacePath, options)
|
||||
#### tableNames(namespace, options)
|
||||
|
||||
```ts
|
||||
abstract tableNames(namespacePath?, options?): Promise<string[]>
|
||||
abstract tableNames(namespace?, options?): Promise<string[]>
|
||||
```
|
||||
|
||||
List all the table names in this database.
|
||||
@@ -361,8 +361,8 @@ Tables will be returned in lexicographical order.
|
||||
|
||||
##### Parameters
|
||||
|
||||
* **namespacePath?**: `string`[]
|
||||
The namespace path to list tables from (defaults to root namespace)
|
||||
* **namespace?**: `string`[]
|
||||
The namespace to list tables from (defaults to root namespace)
|
||||
|
||||
* **options?**: `Partial`<[`TableNamesOptions`](../interfaces/TableNamesOptions.md)>
|
||||
options to control the
|
||||
|
||||
@@ -71,12 +71,11 @@ Add new columns with defined values.
|
||||
|
||||
#### Parameters
|
||||
|
||||
* **newColumnTransforms**: `Field`<`any`> \| `Field`<`any`>[] \| `Schema`<`any`> \| [`AddColumnsSql`](../interfaces/AddColumnsSql.md)[]
|
||||
Either:
|
||||
- An array of objects with column names and SQL expressions to calculate values
|
||||
- A single Arrow Field defining one column with its data type (column will be initialized with null values)
|
||||
- An array of Arrow Fields defining columns with their data types (columns will be initialized with null values)
|
||||
- An Arrow Schema defining columns with their data types (columns will be initialized with null values)
|
||||
* **newColumnTransforms**: [`AddColumnsSql`](../interfaces/AddColumnsSql.md)[]
|
||||
pairs of column names and
|
||||
the SQL expression to use to calculate the value of the new column. These
|
||||
expressions will be evaluated for each row in the table, and can
|
||||
reference existing columns in the table.
|
||||
|
||||
#### Returns
|
||||
|
||||
@@ -485,7 +484,19 @@ Modeled after ``VACUUM`` in PostgreSQL.
|
||||
- Prune: Removes old versions of the dataset
|
||||
- Index: Optimizes the indices, adding new data to existing indices
|
||||
|
||||
The frequency an application should call optimize is based on the frequency of
|
||||
Experimental API
|
||||
----------------
|
||||
|
||||
The optimization process is undergoing active development and may change.
|
||||
Our goal with these changes is to improve the performance of optimization and
|
||||
reduce the complexity.
|
||||
|
||||
That being said, it is essential today to run optimize if you want the best
|
||||
performance. It should be stable and safe to use in production, but it our
|
||||
hope that the API may be simplified (or not even need to be called) in the
|
||||
future.
|
||||
|
||||
The frequency an application shoudl call optimize is based on the frequency of
|
||||
data modifications. If data is frequently added, deleted, or updated then
|
||||
optimize should be run frequently. A good rule of thumb is to run optimize if
|
||||
you have added or modified 100,000 or more records or run more than 20 data
|
||||
|
||||
@@ -53,18 +53,3 @@ optional tlsConfig: TlsConfig;
|
||||
```ts
|
||||
optional userAgent: string;
|
||||
```
|
||||
|
||||
***
|
||||
|
||||
### userId?
|
||||
|
||||
```ts
|
||||
optional userId: string;
|
||||
```
|
||||
|
||||
User identifier for tracking purposes.
|
||||
|
||||
This is sent as the `x-lancedb-user-id` header in requests to LanceDB Cloud/Enterprise.
|
||||
It can be set directly, or via the `LANCEDB_USER_ID` environment variable.
|
||||
Alternatively, set `LANCEDB_USER_ID_ENV_KEY` to specify another environment
|
||||
variable that contains the user ID value.
|
||||
|
||||
@@ -41,29 +41,6 @@ for testing purposes.
|
||||
|
||||
***
|
||||
|
||||
### manifestEnabled?
|
||||
|
||||
```ts
|
||||
optional manifestEnabled: boolean;
|
||||
```
|
||||
|
||||
(For LanceDB OSS only): use directory namespace manifests as the source
|
||||
of truth for table metadata. Existing directory-listed root tables are
|
||||
migrated into the manifest on access.
|
||||
|
||||
***
|
||||
|
||||
### namespaceClientProperties?
|
||||
|
||||
```ts
|
||||
optional namespaceClientProperties: Record<string, string>;
|
||||
```
|
||||
|
||||
(For LanceDB OSS only): extra properties for the backing namespace
|
||||
client used by manifest-enabled native connections.
|
||||
|
||||
***
|
||||
|
||||
### readConsistencyInterval?
|
||||
|
||||
```ts
|
||||
@@ -112,4 +89,4 @@ optional storageOptions: Record<string, string>;
|
||||
|
||||
(For LanceDB OSS only): configuration for object storage.
|
||||
|
||||
The available options are described at https://docs.lancedb.com/storage/
|
||||
The available options are described at https://lancedb.com/docs/storage/
|
||||
|
||||
@@ -97,4 +97,4 @@ Configuration for object storage.
|
||||
Options already set on the connection will be inherited by the table,
|
||||
but can be overridden here.
|
||||
|
||||
The available options are described at https://docs.lancedb.com/storage/
|
||||
The available options are described at https://lancedb.com/docs/storage/
|
||||
|
||||
@@ -8,14 +8,6 @@
|
||||
|
||||
## Properties
|
||||
|
||||
### numDeletedRows
|
||||
|
||||
```ts
|
||||
numDeletedRows: number;
|
||||
```
|
||||
|
||||
***
|
||||
|
||||
### version
|
||||
|
||||
```ts
|
||||
|
||||
@@ -42,4 +42,4 @@ Configuration for object storage.
|
||||
Options already set on the connection will be inherited by the table,
|
||||
but can be overridden here.
|
||||
|
||||
The available options are described at https://docs.lancedb.com/storage/
|
||||
The available options are described at https://lancedb.com/docs/storage/
|
||||
|
||||
@@ -37,12 +37,3 @@ tbl.optimize({cleanupOlderThan: new Date()});
|
||||
```ts
|
||||
deleteUnverified: boolean;
|
||||
```
|
||||
|
||||
Because they may be part of an in-progress transaction, files newer than
|
||||
7 days old are not deleted by default. If you are sure that there are no
|
||||
in-progress transactions, then you can set this to true to delete all
|
||||
files older than `cleanupOlderThan`.
|
||||
|
||||
**WARNING**: This should only be set to true if you can guarantee that
|
||||
no other process is currently working on this dataset. Otherwise the
|
||||
dataset could be put into a corrupted state.
|
||||
|
||||
@@ -52,7 +52,7 @@ new EmbeddingFunction<T, M>(): EmbeddingFunction<T, M>
|
||||
### computeQueryEmbeddings()
|
||||
|
||||
```ts
|
||||
computeQueryEmbeddings(data): Promise<number[] | Uint8Array | Float32Array | Float64Array>
|
||||
computeQueryEmbeddings(data): Promise<number[] | Float32Array | Float64Array>
|
||||
```
|
||||
|
||||
Compute the embeddings for a single query
|
||||
@@ -63,7 +63,7 @@ Compute the embeddings for a single query
|
||||
|
||||
#### Returns
|
||||
|
||||
`Promise`<`number`[] \| `Uint8Array` \| `Float32Array` \| `Float64Array`>
|
||||
`Promise`<`number`[] \| `Float32Array` \| `Float64Array`>
|
||||
|
||||
***
|
||||
|
||||
|
||||
@@ -37,7 +37,7 @@ new TextEmbeddingFunction<M>(): TextEmbeddingFunction<M>
|
||||
### computeQueryEmbeddings()
|
||||
|
||||
```ts
|
||||
computeQueryEmbeddings(data): Promise<number[] | Uint8Array | Float32Array | Float64Array>
|
||||
computeQueryEmbeddings(data): Promise<number[] | Float32Array | Float64Array>
|
||||
```
|
||||
|
||||
Compute the embeddings for a single query
|
||||
@@ -48,7 +48,7 @@ Compute the embeddings for a single query
|
||||
|
||||
#### Returns
|
||||
|
||||
`Promise`<`number`[] \| `Uint8Array` \| `Float32Array` \| `Float64Array`>
|
||||
`Promise`<`number`[] \| `Float32Array` \| `Float64Array`>
|
||||
|
||||
#### Overrides
|
||||
|
||||
|
||||
@@ -7,10 +7,5 @@
|
||||
# Type Alias: IntoVector
|
||||
|
||||
```ts
|
||||
type IntoVector:
|
||||
| Float32Array
|
||||
| Float64Array
|
||||
| Uint8Array
|
||||
| number[]
|
||||
| Promise<Float32Array | Float64Array | Uint8Array | number[]>;
|
||||
type IntoVector: Float32Array | Float64Array | number[] | Promise<Float32Array | Float64Array | number[]>;
|
||||
```
|
||||
|
||||
@@ -36,20 +36,6 @@ is also an [asynchronous API client](#connections-asynchronous).
|
||||
|
||||
::: lancedb.table.Tags
|
||||
|
||||
## Expressions
|
||||
|
||||
Type-safe expression builder for filters and projections. Use these instead
|
||||
of raw SQL strings with [where][lancedb.query.LanceQueryBuilder.where] and
|
||||
[select][lancedb.query.LanceQueryBuilder.select].
|
||||
|
||||
::: lancedb.expr.Expr
|
||||
|
||||
::: lancedb.expr.col
|
||||
|
||||
::: lancedb.expr.lit
|
||||
|
||||
::: lancedb.expr.func
|
||||
|
||||
## Querying (Synchronous)
|
||||
|
||||
::: lancedb.query.Query
|
||||
@@ -94,11 +80,11 @@ of raw SQL strings with [where][lancedb.query.LanceQueryBuilder.where] and
|
||||
|
||||
## Full text search
|
||||
|
||||
Use [lancedb.table.Table.create_fts_index][] for the synchronous API or
|
||||
[lancedb.table.AsyncTable.create_index][] with [lancedb.index.FTS][] for the
|
||||
asynchronous API.
|
||||
::: lancedb.fts.create_index
|
||||
|
||||
::: lancedb.index.FTS
|
||||
::: lancedb.fts.populate_index
|
||||
|
||||
::: lancedb.fts.search_index
|
||||
|
||||
## Utilities
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
# LanceDB Java Enterprise Client
|
||||
# LanceDB Java SDK
|
||||
|
||||
## Configuration and Initialization
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
<parent>
|
||||
<groupId>com.lancedb</groupId>
|
||||
<artifactId>lancedb-parent</artifactId>
|
||||
<version>0.28.0-beta.11</version>
|
||||
<version>0.27.0-beta.2</version>
|
||||
<relativePath>../pom.xml</relativePath>
|
||||
</parent>
|
||||
|
||||
@@ -56,21 +56,21 @@
|
||||
<dependency>
|
||||
<groupId>org.apache.logging.log4j</groupId>
|
||||
<artifactId>log4j-slf4j2-impl</artifactId>
|
||||
<version>2.25.3</version>
|
||||
<version>2.24.3</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.logging.log4j</groupId>
|
||||
<artifactId>log4j-core</artifactId>
|
||||
<version>2.25.3</version>
|
||||
<version>2.24.3</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.logging.log4j</groupId>
|
||||
<artifactId>log4j-api</artifactId>
|
||||
<version>2.25.3</version>
|
||||
<version>2.24.3</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
20
java/pom.xml
20
java/pom.xml
@@ -6,7 +6,7 @@
|
||||
|
||||
<groupId>com.lancedb</groupId>
|
||||
<artifactId>lancedb-parent</artifactId>
|
||||
<version>0.28.0-beta.11</version>
|
||||
<version>0.27.0-beta.2</version>
|
||||
<packaging>pom</packaging>
|
||||
<name>${project.artifactId}</name>
|
||||
<description>LanceDB Java SDK Parent POM</description>
|
||||
@@ -28,7 +28,7 @@
|
||||
<properties>
|
||||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
||||
<arrow.version>15.0.0</arrow.version>
|
||||
<lance-core.version>7.0.0-beta.7</lance-core.version>
|
||||
<lance-core.version>3.1.0-beta.2</lance-core.version>
|
||||
<spotless.skip>false</spotless.skip>
|
||||
<spotless.version>2.30.0</spotless.version>
|
||||
<spotless.java.googlejavaformat.version>1.7</spotless.java.googlejavaformat.version>
|
||||
@@ -111,7 +111,7 @@
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-source-plugin</artifactId>
|
||||
<version>3.3.1</version>
|
||||
<version>2.2.1</version>
|
||||
<executions>
|
||||
<execution>
|
||||
<id>attach-sources</id>
|
||||
@@ -124,7 +124,7 @@
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-javadoc-plugin</artifactId>
|
||||
<version>3.11.2</version>
|
||||
<version>2.9.1</version>
|
||||
<executions>
|
||||
<execution>
|
||||
<id>attach-javadocs</id>
|
||||
@@ -178,15 +178,15 @@
|
||||
<plugins>
|
||||
<plugin>
|
||||
<artifactId>maven-clean-plugin</artifactId>
|
||||
<version>3.4.1</version>
|
||||
<version>3.1.0</version>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<artifactId>maven-resources-plugin</artifactId>
|
||||
<version>3.3.1</version>
|
||||
<version>3.0.2</version>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<artifactId>maven-compiler-plugin</artifactId>
|
||||
<version>3.14.0</version>
|
||||
<version>3.8.1</version>
|
||||
<configuration>
|
||||
<compilerArgs>
|
||||
<arg>-h</arg>
|
||||
@@ -205,11 +205,11 @@
|
||||
</plugin>
|
||||
<plugin>
|
||||
<artifactId>maven-jar-plugin</artifactId>
|
||||
<version>3.4.2</version>
|
||||
<version>3.0.2</version>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<artifactId>maven-install-plugin</artifactId>
|
||||
<version>3.1.3</version>
|
||||
<version>2.5.2</version>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<groupId>com.diffplug.spotless</groupId>
|
||||
@@ -327,7 +327,7 @@
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-gpg-plugin</artifactId>
|
||||
<version>3.2.7</version>
|
||||
<version>1.5</version>
|
||||
<executions>
|
||||
<execution>
|
||||
<id>sign-artifacts</id>
|
||||
|
||||
@@ -1,8 +1,7 @@
|
||||
[package]
|
||||
name = "lancedb-nodejs"
|
||||
edition.workspace = true
|
||||
version = "0.28.0-beta.11"
|
||||
publish = false
|
||||
version = "0.27.0-beta.2"
|
||||
license.workspace = true
|
||||
description.workspace = true
|
||||
repository.workspace = true
|
||||
@@ -16,8 +15,6 @@ crate-type = ["cdylib"]
|
||||
async-trait.workspace = true
|
||||
arrow-ipc.workspace = true
|
||||
arrow-array.workspace = true
|
||||
arrow-buffer = "58.0.0"
|
||||
half.workspace = true
|
||||
arrow-schema.workspace = true
|
||||
env_logger.workspace = true
|
||||
futures.workspace = true
|
||||
@@ -28,12 +25,12 @@ napi = { version = "3.8.3", default-features = false, features = [
|
||||
] }
|
||||
napi-derive = "3.5.2"
|
||||
# Prevent dynamic linking of lzma, which comes from datafusion
|
||||
lzma-sys = { version = "0.1", features = ["static"] }
|
||||
lzma-sys = { version = "*", features = ["static"] }
|
||||
log.workspace = true
|
||||
|
||||
# Pin to resolve build failures; update periodically for security patches.
|
||||
aws-lc-sys = "=0.40.0"
|
||||
aws-lc-rs = "=1.16.3"
|
||||
# Workaround for build failure until we can fix it.
|
||||
aws-lc-sys = "=0.28.0"
|
||||
aws-lc-rs = "=1.13.0"
|
||||
|
||||
[build-dependencies]
|
||||
napi-build = "2.3.1"
|
||||
|
||||
@@ -30,7 +30,7 @@ const results = await table.vectorSearch([0.1, 0.3]).limit(20).toArray();
|
||||
console.log(results);
|
||||
```
|
||||
|
||||
The [quickstart](https://docs.lancedb.com/quickstart/) contains more complete examples.
|
||||
The [quickstart](https://lancedb.com/docs/quickstart/basic-usage/) contains more complete examples.
|
||||
|
||||
## Development
|
||||
|
||||
|
||||
@@ -63,7 +63,6 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
|
||||
tableFromIPC,
|
||||
DataType,
|
||||
Dictionary,
|
||||
Uint8: ArrowUint8,
|
||||
// biome-ignore lint/suspicious/noExplicitAny: <explanation>
|
||||
} = <any>arrow;
|
||||
type Schema = ApacheArrow["Schema"];
|
||||
@@ -363,38 +362,6 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
|
||||
).toEqual(new Float64().toString());
|
||||
});
|
||||
|
||||
it("will infer FixedSizeList<Float32> from Float32Array values", async function () {
|
||||
const table = makeArrowTable([
|
||||
{ id: "a", vector: new Float32Array([0.1, 0.2, 0.3]) },
|
||||
{ id: "b", vector: new Float32Array([0.4, 0.5, 0.6]) },
|
||||
]);
|
||||
|
||||
expect(DataType.isFixedSizeList(table.getChild("vector")?.type)).toBe(
|
||||
true,
|
||||
);
|
||||
const vectorType = table.getChild("vector")?.type;
|
||||
expect(vectorType.listSize).toBe(3);
|
||||
expect(vectorType.children[0].type.toString()).toEqual(
|
||||
new Float32().toString(),
|
||||
);
|
||||
});
|
||||
|
||||
it("will infer FixedSizeList<Uint8> from Uint8Array values", async function () {
|
||||
const table = makeArrowTable([
|
||||
{ id: "a", vector: new Uint8Array([1, 2, 3]) },
|
||||
{ id: "b", vector: new Uint8Array([4, 5, 6]) },
|
||||
]);
|
||||
|
||||
expect(DataType.isFixedSizeList(table.getChild("vector")?.type)).toBe(
|
||||
true,
|
||||
);
|
||||
const vectorType = table.getChild("vector")?.type;
|
||||
expect(vectorType.listSize).toBe(3);
|
||||
expect(vectorType.children[0].type.toString()).toEqual(
|
||||
new ArrowUint8().toString(),
|
||||
);
|
||||
});
|
||||
|
||||
it("will use dictionary encoded strings if asked", async function () {
|
||||
const table = makeArrowTable([{ str: "hello" }]);
|
||||
expect(DataType.isUtf8(table.getChild("str")?.type)).toBe(true);
|
||||
|
||||
@@ -1,8 +1,6 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||
|
||||
import { spawn } from "node:child_process";
|
||||
import * as path from "node:path";
|
||||
import { RecordBatch } from "apache-arrow";
|
||||
import * as tmp from "tmp";
|
||||
import { Connection, Index, Table, connect, makeArrowTable } from "../lancedb";
|
||||
@@ -78,91 +76,4 @@ describe("rerankers", function () {
|
||||
|
||||
expect(result).toHaveLength(2);
|
||||
});
|
||||
|
||||
it("does not keep process alive after rerank query", async function () {
|
||||
const script = `
|
||||
import * as lancedb from "./dist/index.js";
|
||||
import * as os from "node:os";
|
||||
import * as path from "node:path";
|
||||
import * as fs from "node:fs/promises";
|
||||
|
||||
const dir = await fs.mkdtemp(path.join(os.tmpdir(), "lancedb-rerank-exit-"));
|
||||
const db = await lancedb.connect(dir);
|
||||
const table = await db.createTable("test", [{ text: "hello", vector: [1, 2, 3] }], {
|
||||
mode: "overwrite",
|
||||
});
|
||||
await table.createIndex("text", { config: lancedb.Index.fts() });
|
||||
await table.waitForIndex(["text_idx"], 30);
|
||||
|
||||
const reranker = await lancedb.rerankers.RRFReranker.create();
|
||||
await table
|
||||
.query()
|
||||
.nearestTo([1, 2, 3])
|
||||
.fullTextSearch("hello")
|
||||
.rerank(reranker)
|
||||
.toArray();
|
||||
|
||||
table.close();
|
||||
db.close();
|
||||
`;
|
||||
|
||||
await new Promise<void>((resolve, reject) => {
|
||||
const child = spawn(
|
||||
process.execPath,
|
||||
["--input-type=module", "-e", script],
|
||||
{
|
||||
cwd: path.resolve(__dirname, ".."),
|
||||
stdio: ["ignore", "pipe", "pipe"],
|
||||
},
|
||||
);
|
||||
|
||||
let stdout = "";
|
||||
let stderr = "";
|
||||
|
||||
child.stdout.on("data", (chunk) => {
|
||||
stdout += chunk.toString();
|
||||
});
|
||||
|
||||
child.stderr.on("data", (chunk) => {
|
||||
stderr += chunk.toString();
|
||||
});
|
||||
|
||||
const timeout = setTimeout(() => {
|
||||
child.kill();
|
||||
reject(
|
||||
new Error(
|
||||
`child process did not exit in time\nstdout:\n${stdout}\nstderr:\n${stderr}`,
|
||||
),
|
||||
);
|
||||
}, 20_000);
|
||||
|
||||
child.on("error", (err) => {
|
||||
clearTimeout(timeout);
|
||||
reject(err);
|
||||
});
|
||||
|
||||
child.on("exit", (code, signal) => {
|
||||
clearTimeout(timeout);
|
||||
if (signal !== null) {
|
||||
reject(
|
||||
new Error(
|
||||
`child process exited with signal ${signal}\nstdout:\n${stdout}\nstderr:\n${stderr}`,
|
||||
),
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
if (code !== 0) {
|
||||
reject(
|
||||
new Error(
|
||||
`child process exited with code ${code}\nstdout:\n${stdout}\nstderr:\n${stderr}`,
|
||||
),
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
resolve();
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
@@ -103,7 +103,7 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
|
||||
},
|
||||
numIndices: 0,
|
||||
numRows: 3,
|
||||
totalBytes: 44,
|
||||
totalBytes: 24,
|
||||
});
|
||||
});
|
||||
|
||||
@@ -1259,98 +1259,6 @@ describe("schema evolution", function () {
|
||||
expect(await table.schema()).toEqual(expectedSchema);
|
||||
});
|
||||
|
||||
it("can add columns with schema for explicit data types", async function () {
|
||||
const con = await connect(tmpDir.name);
|
||||
const table = await con.createTable("vectors", [
|
||||
{ id: 1n, vector: [0.1, 0.2] },
|
||||
]);
|
||||
|
||||
// Define schema for new columns with explicit data types
|
||||
// Note: All columns must be nullable when using addColumns with Schema
|
||||
// because they are initially populated with null values
|
||||
const newColumnsSchema = new Schema([
|
||||
new Field("price", new Float64(), true),
|
||||
new Field("category", new Utf8(), true),
|
||||
new Field("rating", new Int32(), true),
|
||||
]);
|
||||
|
||||
const result = await table.addColumns(newColumnsSchema);
|
||||
expect(result).toHaveProperty("version");
|
||||
expect(result.version).toBe(2);
|
||||
|
||||
const expectedSchema = new Schema([
|
||||
new Field("id", new Int64(), true),
|
||||
new Field(
|
||||
"vector",
|
||||
new FixedSizeList(2, new Field("item", new Float32(), true)),
|
||||
true,
|
||||
),
|
||||
new Field("price", new Float64(), true),
|
||||
new Field("category", new Utf8(), true),
|
||||
new Field("rating", new Int32(), true),
|
||||
]);
|
||||
expect(await table.schema()).toEqual(expectedSchema);
|
||||
|
||||
// Verify that new columns are populated with null values
|
||||
const results = await table.query().toArray();
|
||||
expect(results).toHaveLength(1);
|
||||
expect(results[0].price).toBeNull();
|
||||
expect(results[0].category).toBeNull();
|
||||
expect(results[0].rating).toBeNull();
|
||||
});
|
||||
|
||||
it("can add a single column using Field", async function () {
|
||||
const con = await connect(tmpDir.name);
|
||||
const table = await con.createTable("vectors", [
|
||||
{ id: 1n, vector: [0.1, 0.2] },
|
||||
]);
|
||||
|
||||
// Add a single field
|
||||
const priceField = new Field("price", new Float64(), true);
|
||||
const result = await table.addColumns(priceField);
|
||||
expect(result).toHaveProperty("version");
|
||||
expect(result.version).toBe(2);
|
||||
|
||||
const expectedSchema = new Schema([
|
||||
new Field("id", new Int64(), true),
|
||||
new Field(
|
||||
"vector",
|
||||
new FixedSizeList(2, new Field("item", new Float32(), true)),
|
||||
true,
|
||||
),
|
||||
new Field("price", new Float64(), true),
|
||||
]);
|
||||
expect(await table.schema()).toEqual(expectedSchema);
|
||||
});
|
||||
|
||||
it("can add multiple columns using array of Fields", async function () {
|
||||
const con = await connect(tmpDir.name);
|
||||
const table = await con.createTable("vectors", [
|
||||
{ id: 1n, vector: [0.1, 0.2] },
|
||||
]);
|
||||
|
||||
// Add multiple fields as array
|
||||
const fields = [
|
||||
new Field("price", new Float64(), true),
|
||||
new Field("category", new Utf8(), true),
|
||||
];
|
||||
const result = await table.addColumns(fields);
|
||||
expect(result).toHaveProperty("version");
|
||||
expect(result.version).toBe(2);
|
||||
|
||||
const expectedSchema = new Schema([
|
||||
new Field("id", new Int64(), true),
|
||||
new Field(
|
||||
"vector",
|
||||
new FixedSizeList(2, new Field("item", new Float32(), true)),
|
||||
true,
|
||||
),
|
||||
new Field("price", new Float64(), true),
|
||||
new Field("category", new Utf8(), true),
|
||||
]);
|
||||
expect(await table.schema()).toEqual(expectedSchema);
|
||||
});
|
||||
|
||||
it("can alter the columns in the schema", async function () {
|
||||
const con = await connect(tmpDir.name);
|
||||
const schema = new Schema([
|
||||
@@ -1789,65 +1697,6 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
|
||||
expect(results2[0].text).toBe(data[1].text);
|
||||
});
|
||||
|
||||
test("full text search fast search", async () => {
|
||||
const db = await connect(tmpDir.name);
|
||||
const data = [{ text: "hello world", vector: [0.1, 0.2, 0.3], id: 1 }];
|
||||
const table = await db.createTable("test", data);
|
||||
await table.createIndex("text", {
|
||||
config: Index.fts(),
|
||||
});
|
||||
|
||||
// Insert unindexed data after creating the index.
|
||||
await table.add([{ text: "xyz", vector: [0.4, 0.5, 0.6], id: 2 }]);
|
||||
|
||||
const withFlatSearch = await table
|
||||
.search("xyz", "fts")
|
||||
.limit(10)
|
||||
.toArray();
|
||||
expect(withFlatSearch.length).toBeGreaterThan(0);
|
||||
|
||||
const fastSearchResults = await table
|
||||
.search("xyz", "fts")
|
||||
.fastSearch()
|
||||
.limit(10)
|
||||
.toArray();
|
||||
expect(fastSearchResults.length).toBe(0);
|
||||
|
||||
const nearestToTextFastSearch = await table
|
||||
.query()
|
||||
.nearestToText("xyz")
|
||||
.fastSearch()
|
||||
.limit(10)
|
||||
.toArray();
|
||||
expect(nearestToTextFastSearch.length).toBe(0);
|
||||
|
||||
// fastSearch should be chainable with other methods.
|
||||
const chainedFastSearch = await table
|
||||
.search("xyz", "fts")
|
||||
.fastSearch()
|
||||
.select(["text"])
|
||||
.limit(5)
|
||||
.toArray();
|
||||
expect(chainedFastSearch.length).toBe(0);
|
||||
|
||||
await table.optimize();
|
||||
|
||||
const indexedFastSearch = await table
|
||||
.search("xyz", "fts")
|
||||
.fastSearch()
|
||||
.limit(10)
|
||||
.toArray();
|
||||
expect(indexedFastSearch.length).toBeGreaterThan(0);
|
||||
|
||||
const indexedNearestToTextFastSearch = await table
|
||||
.query()
|
||||
.nearestToText("xyz")
|
||||
.fastSearch()
|
||||
.limit(10)
|
||||
.toArray();
|
||||
expect(indexedNearestToTextFastSearch.length).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
test("prewarm full text search index", async () => {
|
||||
const db = await connect(tmpDir.name);
|
||||
const data = [
|
||||
@@ -2296,36 +2145,3 @@ describe("when creating an empty table", () => {
|
||||
expect((actualSchema.fields[1].type as Float64).precision).toBe(2);
|
||||
});
|
||||
});
|
||||
|
||||
// Ensure we can create float32 arrays without using Arrow
|
||||
// by utilizing native JS TypedArray support
|
||||
//
|
||||
// https://github.com/lancedb/lancedb/issues/3115
|
||||
describe("when creating a table with Float32Array vectors", () => {
|
||||
let tmpDir: tmp.DirResult;
|
||||
beforeEach(() => {
|
||||
tmpDir = tmp.dirSync({ unsafeCleanup: true });
|
||||
});
|
||||
afterEach(() => {
|
||||
tmpDir.removeCallback();
|
||||
});
|
||||
|
||||
it("should persist Float32Array as FixedSizeList<Float32> in the LanceDB schema", async () => {
|
||||
const db = await connect(tmpDir.name);
|
||||
const table = await db.createTable("test", [
|
||||
{ id: "a", vector: new Float32Array([0.1, 0.2, 0.3]) },
|
||||
{ id: "b", vector: new Float32Array([0.4, 0.5, 0.6]) },
|
||||
]);
|
||||
|
||||
const schema = await table.schema();
|
||||
const vectorField = schema.fields.find((f) => f.name === "vector");
|
||||
expect(vectorField).toBeDefined();
|
||||
expect(vectorField!.type).toBeInstanceOf(FixedSizeList);
|
||||
|
||||
const fsl = vectorField!.type as FixedSizeList;
|
||||
expect(fsl.listSize).toBe(3);
|
||||
expect(fsl.children[0].type.typeId).toBe(Type.Float);
|
||||
// precision: HALF=0, SINGLE=1, DOUBLE=2
|
||||
expect((fsl.children[0].type as Float32).precision).toBe(1);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -1,110 +0,0 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||
|
||||
import * as tmp from "tmp";
|
||||
|
||||
import { type Table, connect } from "../lancedb";
|
||||
import {
|
||||
Field,
|
||||
FixedSizeList,
|
||||
Float32,
|
||||
Int64,
|
||||
Schema,
|
||||
makeArrowTable,
|
||||
} from "../lancedb/arrow";
|
||||
|
||||
describe("Vector query with different typed arrays", () => {
|
||||
let tmpDir: tmp.DirResult;
|
||||
|
||||
afterEach(() => {
|
||||
tmpDir?.removeCallback();
|
||||
});
|
||||
|
||||
async function createFloat32Table(): Promise<Table> {
|
||||
tmpDir = tmp.dirSync({ unsafeCleanup: true });
|
||||
const db = await connect(tmpDir.name);
|
||||
const schema = new Schema([
|
||||
new Field("id", new Int64(), true),
|
||||
new Field(
|
||||
"vec",
|
||||
new FixedSizeList(2, new Field("item", new Float32())),
|
||||
true,
|
||||
),
|
||||
]);
|
||||
const data = makeArrowTable(
|
||||
[
|
||||
{ id: 1n, vec: [1.0, 0.0] },
|
||||
{ id: 2n, vec: [0.0, 1.0] },
|
||||
{ id: 3n, vec: [1.0, 1.0] },
|
||||
],
|
||||
{ schema },
|
||||
);
|
||||
return db.createTable("test_f32", data);
|
||||
}
|
||||
|
||||
it("should search with Float32Array (baseline)", async () => {
|
||||
const table = await createFloat32Table();
|
||||
const results = await table
|
||||
.query()
|
||||
.nearestTo(new Float32Array([1.0, 0.0]))
|
||||
.limit(1)
|
||||
.toArray();
|
||||
|
||||
expect(results.length).toBe(1);
|
||||
expect(Number(results[0].id)).toBe(1);
|
||||
});
|
||||
|
||||
it("should search with number[] (backward compat)", async () => {
|
||||
const table = await createFloat32Table();
|
||||
const results = await table
|
||||
.query()
|
||||
.nearestTo([1.0, 0.0])
|
||||
.limit(1)
|
||||
.toArray();
|
||||
|
||||
expect(results.length).toBe(1);
|
||||
expect(Number(results[0].id)).toBe(1);
|
||||
});
|
||||
|
||||
it("should search with Float64Array via raw path", async () => {
|
||||
const table = await createFloat32Table();
|
||||
const results = await table
|
||||
.query()
|
||||
.nearestTo(new Float64Array([1.0, 0.0]))
|
||||
.limit(1)
|
||||
.toArray();
|
||||
|
||||
expect(results.length).toBe(1);
|
||||
expect(Number(results[0].id)).toBe(1);
|
||||
});
|
||||
|
||||
it("should add multiple query vectors with Float64Array", async () => {
|
||||
const table = await createFloat32Table();
|
||||
const results = await table
|
||||
.query()
|
||||
.nearestTo(new Float64Array([1.0, 0.0]))
|
||||
.addQueryVector(new Float64Array([0.0, 1.0]))
|
||||
.limit(2)
|
||||
.toArray();
|
||||
|
||||
expect(results.length).toBeGreaterThanOrEqual(2);
|
||||
});
|
||||
|
||||
// Float16Array is only available in Node 22+; not in TypeScript's standard lib yet
|
||||
const float16ArrayCtor = (globalThis as unknown as Record<string, unknown>)
|
||||
.Float16Array as (new (values: number[]) => unknown) | undefined;
|
||||
const hasFloat16 = float16ArrayCtor !== undefined;
|
||||
const f16it = hasFloat16 ? it : it.skip;
|
||||
|
||||
f16it("should search with Float16Array via raw path", async () => {
|
||||
const table = await createFloat32Table();
|
||||
const results = await table
|
||||
.query()
|
||||
.nearestTo(new float16ArrayCtor!([1.0, 0.0]) as Float32Array)
|
||||
.limit(1)
|
||||
.toArray();
|
||||
|
||||
expect(results.length).toBe(1);
|
||||
expect(Number(results[0].id)).toBe(1);
|
||||
});
|
||||
});
|
||||
599
nodejs/examples/package-lock.json
generated
599
nodejs/examples/package-lock.json
generated
@@ -30,15 +30,12 @@
|
||||
"x64",
|
||||
"arm64"
|
||||
],
|
||||
"dev": true,
|
||||
"license": "Apache-2.0",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"darwin",
|
||||
"linux",
|
||||
"win32"
|
||||
],
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"reflect-metadata": "^0.2.2"
|
||||
},
|
||||
@@ -94,15 +91,14 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@babel/code-frame": {
|
||||
"version": "7.29.0",
|
||||
"resolved": "https://registry.npmjs.org/@babel/code-frame/-/code-frame-7.29.0.tgz",
|
||||
"integrity": "sha512-9NhCeYjq9+3uxgdtp20LSiJXJvN0FeCtNGpJxuMFZ1Kv3cWUNb6DOhJwUvcVCzKGR66cw4njwM6hrJLqgOwbcw==",
|
||||
"version": "7.26.2",
|
||||
"resolved": "https://registry.npmjs.org/@babel/code-frame/-/code-frame-7.26.2.tgz",
|
||||
"integrity": "sha512-RJlIHRueQgwWitWgF8OdFYGZX328Ax5BCemNGlqHfplnRT9ESi8JkFlvaVYbS+UubVY6dpv87Fs2u5M29iNFVQ==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@babel/helper-validator-identifier": "^7.28.5",
|
||||
"@babel/helper-validator-identifier": "^7.25.9",
|
||||
"js-tokens": "^4.0.0",
|
||||
"picocolors": "^1.1.1"
|
||||
"picocolors": "^1.0.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=6.9.0"
|
||||
@@ -237,21 +233,19 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@babel/helper-string-parser": {
|
||||
"version": "7.27.1",
|
||||
"resolved": "https://registry.npmjs.org/@babel/helper-string-parser/-/helper-string-parser-7.27.1.tgz",
|
||||
"integrity": "sha512-qMlSxKbpRlAridDExk92nSobyDdpPijUq2DW6oDnUqd0iOGxmQjyqhMIihI9+zv4LPyZdRje2cavWPbCbWm3eA==",
|
||||
"version": "7.25.9",
|
||||
"resolved": "https://registry.npmjs.org/@babel/helper-string-parser/-/helper-string-parser-7.25.9.tgz",
|
||||
"integrity": "sha512-4A/SCr/2KLd5jrtOMFzaKjVtAei3+2r/NChoBNoZ3EyP/+GlhoaEGoWOZUmFmoITP7zOJyHIMm+DYRd8o3PvHA==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">=6.9.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@babel/helper-validator-identifier": {
|
||||
"version": "7.28.5",
|
||||
"resolved": "https://registry.npmjs.org/@babel/helper-validator-identifier/-/helper-validator-identifier-7.28.5.tgz",
|
||||
"integrity": "sha512-qSs4ifwzKJSV39ucNjsvc6WVHs6b7S03sOh2OcHF9UHfVPqWWALUsNUVzhSBiItjRZoLHx7nIarVjqKVusUZ1Q==",
|
||||
"version": "7.25.9",
|
||||
"resolved": "https://registry.npmjs.org/@babel/helper-validator-identifier/-/helper-validator-identifier-7.25.9.tgz",
|
||||
"integrity": "sha512-Ed61U6XJc3CVRfkERJWDz4dJwKe7iLmmJsbOGu9wSloNSFttHV0I8g6UAgb7qnK5ly5bGLPd4oXZlxCdANBOWQ==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">=6.9.0"
|
||||
}
|
||||
@@ -266,27 +260,25 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@babel/helpers": {
|
||||
"version": "7.28.6",
|
||||
"resolved": "https://registry.npmjs.org/@babel/helpers/-/helpers-7.28.6.tgz",
|
||||
"integrity": "sha512-xOBvwq86HHdB7WUDTfKfT/Vuxh7gElQ+Sfti2Cy6yIWNW05P8iUslOVcZ4/sKbE+/jQaukQAdz/gf3724kYdqw==",
|
||||
"version": "7.26.0",
|
||||
"resolved": "https://registry.npmjs.org/@babel/helpers/-/helpers-7.26.0.tgz",
|
||||
"integrity": "sha512-tbhNuIxNcVb21pInl3ZSjksLCvgdZy9KwJ8brv993QtIVKJBBkYXz4q4ZbAv31GdnC+R90np23L5FbEBlthAEw==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@babel/template": "^7.28.6",
|
||||
"@babel/types": "^7.28.6"
|
||||
"@babel/template": "^7.25.9",
|
||||
"@babel/types": "^7.26.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=6.9.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@babel/parser": {
|
||||
"version": "7.29.0",
|
||||
"resolved": "https://registry.npmjs.org/@babel/parser/-/parser-7.29.0.tgz",
|
||||
"integrity": "sha512-IyDgFV5GeDUVX4YdF/3CPULtVGSXXMLh1xVIgdCgxApktqnQV0r7/8Nqthg+8YLGaAtdyIlo2qIdZrbCv4+7ww==",
|
||||
"version": "7.26.2",
|
||||
"resolved": "https://registry.npmjs.org/@babel/parser/-/parser-7.26.2.tgz",
|
||||
"integrity": "sha512-DWMCZH9WA4Maitz2q21SRKHo9QXZxkDsbNZoVD62gusNtNBBqDg9i7uOhASfTfIGNzW+O+r7+jAlM8dwphcJKQ==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@babel/types": "^7.29.0"
|
||||
"@babel/types": "^7.26.0"
|
||||
},
|
||||
"bin": {
|
||||
"parser": "bin/babel-parser.js"
|
||||
@@ -518,15 +510,14 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@babel/template": {
|
||||
"version": "7.28.6",
|
||||
"resolved": "https://registry.npmjs.org/@babel/template/-/template-7.28.6.tgz",
|
||||
"integrity": "sha512-YA6Ma2KsCdGb+WC6UpBVFJGXL58MDA6oyONbjyF/+5sBgxY/dwkhLogbMT2GXXyU84/IhRw/2D1Os1B/giz+BQ==",
|
||||
"version": "7.25.9",
|
||||
"resolved": "https://registry.npmjs.org/@babel/template/-/template-7.25.9.tgz",
|
||||
"integrity": "sha512-9DGttpmPvIxBb/2uwpVo3dqJ+O6RooAFOS+lB+xDqoE2PVCE8nfoHMdZLpfCQRLwvohzXISPZcgxt80xLfsuwg==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@babel/code-frame": "^7.28.6",
|
||||
"@babel/parser": "^7.28.6",
|
||||
"@babel/types": "^7.28.6"
|
||||
"@babel/code-frame": "^7.25.9",
|
||||
"@babel/parser": "^7.25.9",
|
||||
"@babel/types": "^7.25.9"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=6.9.0"
|
||||
@@ -551,14 +542,13 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@babel/types": {
|
||||
"version": "7.29.0",
|
||||
"resolved": "https://registry.npmjs.org/@babel/types/-/types-7.29.0.tgz",
|
||||
"integrity": "sha512-LwdZHpScM4Qz8Xw2iKSzS+cfglZzJGvofQICy7W7v4caru4EaAmyUuO6BGrbyQ2mYV11W0U8j5mBhd14dd3B0A==",
|
||||
"version": "7.26.0",
|
||||
"resolved": "https://registry.npmjs.org/@babel/types/-/types-7.26.0.tgz",
|
||||
"integrity": "sha512-Z/yiTPj+lDVnF7lWeKCIJzaIkI0vYO87dMpZ4bg4TDrFe4XXLFWL1TbXU27gBP3QccxV9mZICCrnjnYlJjXHOA==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@babel/helper-string-parser": "^7.27.1",
|
||||
"@babel/helper-validator-identifier": "^7.28.5"
|
||||
"@babel/helper-string-parser": "^7.25.9",
|
||||
"@babel/helper-validator-identifier": "^7.25.9"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=6.9.0"
|
||||
@@ -1161,6 +1151,95 @@
|
||||
"url": "https://opencollective.com/libvips"
|
||||
}
|
||||
},
|
||||
"node_modules/@isaacs/cliui": {
|
||||
"version": "8.0.2",
|
||||
"resolved": "https://registry.npmjs.org/@isaacs/cliui/-/cliui-8.0.2.tgz",
|
||||
"integrity": "sha512-O8jcjabXaleOG9DQ0+ARXWZBTfnP4WNAqzuiJK7ll44AmxGKv/J2M4TPjxjY3znBCfvBXFzucm1twdyFybFqEA==",
|
||||
"dependencies": {
|
||||
"string-width": "^5.1.2",
|
||||
"string-width-cjs": "npm:string-width@^4.2.0",
|
||||
"strip-ansi": "^7.0.1",
|
||||
"strip-ansi-cjs": "npm:strip-ansi@^6.0.1",
|
||||
"wrap-ansi": "^8.1.0",
|
||||
"wrap-ansi-cjs": "npm:wrap-ansi@^7.0.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
}
|
||||
},
|
||||
"node_modules/@isaacs/cliui/node_modules/ansi-regex": {
|
||||
"version": "6.1.0",
|
||||
"resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-6.1.0.tgz",
|
||||
"integrity": "sha512-7HSX4QQb4CspciLpVFwyRe79O3xsIZDDLER21kERQ71oaPodF8jL725AgJMFAYbooIqolJoRLuM81SpeUkpkvA==",
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/chalk/ansi-regex?sponsor=1"
|
||||
}
|
||||
},
|
||||
"node_modules/@isaacs/cliui/node_modules/ansi-styles": {
|
||||
"version": "6.2.1",
|
||||
"resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-6.2.1.tgz",
|
||||
"integrity": "sha512-bN798gFfQX+viw3R7yrGWRqnrN2oRkEkUjjl4JNn4E8GxxbjtG3FbrEIIY3l8/hrwUwIeCZvi4QuOTP4MErVug==",
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/chalk/ansi-styles?sponsor=1"
|
||||
}
|
||||
},
|
||||
"node_modules/@isaacs/cliui/node_modules/emoji-regex": {
|
||||
"version": "9.2.2",
|
||||
"resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-9.2.2.tgz",
|
||||
"integrity": "sha512-L18DaJsXSUk2+42pv8mLs5jJT2hqFkFE4j21wOmgbUqsZ2hL72NsUU785g9RXgo3s0ZNgVl42TiHp3ZtOv/Vyg=="
|
||||
},
|
||||
"node_modules/@isaacs/cliui/node_modules/string-width": {
|
||||
"version": "5.1.2",
|
||||
"resolved": "https://registry.npmjs.org/string-width/-/string-width-5.1.2.tgz",
|
||||
"integrity": "sha512-HnLOCR3vjcY8beoNLtcjZ5/nxn2afmME6lhrDrebokqMap+XbeW8n9TXpPDOqdGK5qcI3oT0GKTW6wC7EMiVqA==",
|
||||
"dependencies": {
|
||||
"eastasianwidth": "^0.2.0",
|
||||
"emoji-regex": "^9.2.2",
|
||||
"strip-ansi": "^7.0.1"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/sindresorhus"
|
||||
}
|
||||
},
|
||||
"node_modules/@isaacs/cliui/node_modules/strip-ansi": {
|
||||
"version": "7.1.0",
|
||||
"resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-7.1.0.tgz",
|
||||
"integrity": "sha512-iq6eVVI64nQQTRYq2KtEg2d2uU7LElhTJwsH4YzIHZshxlgZms/wIc4VoDQTlG/IvVIrBKG06CrZnp0qv7hkcQ==",
|
||||
"dependencies": {
|
||||
"ansi-regex": "^6.0.1"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/chalk/strip-ansi?sponsor=1"
|
||||
}
|
||||
},
|
||||
"node_modules/@isaacs/cliui/node_modules/wrap-ansi": {
|
||||
"version": "8.1.0",
|
||||
"resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-8.1.0.tgz",
|
||||
"integrity": "sha512-si7QWI6zUMq56bESFvagtmzMdGOtoxfR+Sez11Mobfc7tm+VkUckk9bW2UeffTGVUbOksxmSw0AA2gs8g71NCQ==",
|
||||
"dependencies": {
|
||||
"ansi-styles": "^6.1.0",
|
||||
"string-width": "^5.0.1",
|
||||
"strip-ansi": "^7.0.1"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/chalk/wrap-ansi?sponsor=1"
|
||||
}
|
||||
},
|
||||
"node_modules/@isaacs/fs-minipass": {
|
||||
"version": "4.0.1",
|
||||
"resolved": "https://registry.npmjs.org/@isaacs/fs-minipass/-/fs-minipass-4.0.1.tgz",
|
||||
@@ -1527,6 +1606,15 @@
|
||||
"resolved": "../dist",
|
||||
"link": true
|
||||
},
|
||||
"node_modules/@pkgjs/parseargs": {
|
||||
"version": "0.11.0",
|
||||
"resolved": "https://registry.npmjs.org/@pkgjs/parseargs/-/parseargs-0.11.0.tgz",
|
||||
"integrity": "sha512-+1VkjdD0QBLPodGrJUeqarH8VAIvQODIbwh9XpP5Syisf7YoQgsJKPNFoqqLQlu+VQ/tVSshMR6loPMn8U+dPg==",
|
||||
"optional": true,
|
||||
"engines": {
|
||||
"node": ">=14"
|
||||
}
|
||||
},
|
||||
"node_modules/@protobufjs/aspromise": {
|
||||
"version": "1.1.2",
|
||||
"resolved": "https://registry.npmjs.org/@protobufjs/aspromise/-/aspromise-1.1.2.tgz",
|
||||
@@ -1758,7 +1846,6 @@
|
||||
"version": "5.0.1",
|
||||
"resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz",
|
||||
"integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==",
|
||||
"dev": true,
|
||||
"engines": {
|
||||
"node": ">=8"
|
||||
}
|
||||
@@ -1767,7 +1854,6 @@
|
||||
"version": "4.3.0",
|
||||
"resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz",
|
||||
"integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==",
|
||||
"dev": true,
|
||||
"dependencies": {
|
||||
"color-convert": "^2.0.1"
|
||||
},
|
||||
@@ -1933,15 +2019,13 @@
|
||||
"node_modules/balanced-match": {
|
||||
"version": "1.0.2",
|
||||
"resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz",
|
||||
"integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==",
|
||||
"dev": true
|
||||
"integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw=="
|
||||
},
|
||||
"node_modules/brace-expansion": {
|
||||
"version": "1.1.12",
|
||||
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.12.tgz",
|
||||
"integrity": "sha512-9T9UjW3r0UW5c1Q7GTwllptXwhvYmEzFhzMfZ9H7FQWt+uZePjZPjBP/W1ZEyZ1twGWom5/56TF4lPcqjnDHcg==",
|
||||
"version": "1.1.11",
|
||||
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.11.tgz",
|
||||
"integrity": "sha512-iCuPHDFgrHX7H2vEI/5xpz07zSHB00TpugqhmYtVmMO6518mCuRMoOYFldEBl0g187ufozdaHgWKcYFb61qGiA==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"balanced-match": "^1.0.0",
|
||||
"concat-map": "0.0.1"
|
||||
@@ -2018,19 +2102,6 @@
|
||||
"integrity": "sha512-E+XQCRwSbaaiChtv6k6Dwgc+bx+Bs6vuKJHHl5kox/BaKbhiXzqQOwK4cO22yElGp2OCmjwVhT3HmxgyPGnJfQ==",
|
||||
"dev": true
|
||||
},
|
||||
"node_modules/call-bind-apply-helpers": {
|
||||
"version": "1.0.2",
|
||||
"resolved": "https://registry.npmjs.org/call-bind-apply-helpers/-/call-bind-apply-helpers-1.0.2.tgz",
|
||||
"integrity": "sha512-Sp1ablJ0ivDkSzjcaJdxEunN5/XvksFJ2sMBFfq6x0ryhQV/2b/KwFe21cMpmHtPOSij8K99/wSfoEuTObmuMQ==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"es-errors": "^1.3.0",
|
||||
"function-bind": "^1.1.2"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">= 0.4"
|
||||
}
|
||||
},
|
||||
"node_modules/callsites": {
|
||||
"version": "3.1.0",
|
||||
"resolved": "https://registry.npmjs.org/callsites/-/callsites-3.1.0.tgz",
|
||||
@@ -2227,11 +2298,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/cross-spawn": {
|
||||
"version": "7.0.6",
|
||||
"resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.6.tgz",
|
||||
"integrity": "sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"version": "7.0.3",
|
||||
"resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.3.tgz",
|
||||
"integrity": "sha512-iRDPJKUPVEND7dHPO8rkbOnPpyDygcDFtWjpeWNCgy8WP2rXcxXL8TskReQl6OrB2G7+UJrags1q15Fudc7G6w==",
|
||||
"dependencies": {
|
||||
"path-key": "^3.1.0",
|
||||
"shebang-command": "^2.0.0",
|
||||
@@ -2315,19 +2384,10 @@
|
||||
"node": "^14.15.0 || ^16.10.0 || >=18.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/dunder-proto": {
|
||||
"version": "1.0.1",
|
||||
"resolved": "https://registry.npmjs.org/dunder-proto/-/dunder-proto-1.0.1.tgz",
|
||||
"integrity": "sha512-KIN/nDJBQRcXw0MLVhZE9iQHmG68qAVIBg9CqmUYjmQIhgij9U5MFvrqkUL5FbtyyzZuOeOt0zdeRe4UY7ct+A==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"call-bind-apply-helpers": "^1.0.1",
|
||||
"es-errors": "^1.3.0",
|
||||
"gopd": "^1.2.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">= 0.4"
|
||||
}
|
||||
"node_modules/eastasianwidth": {
|
||||
"version": "0.2.0",
|
||||
"resolved": "https://registry.npmjs.org/eastasianwidth/-/eastasianwidth-0.2.0.tgz",
|
||||
"integrity": "sha512-I88TYZWc9XiYHRQ4/3c5rjjfgkjhLyW2luGIheGERbNQ6OY7yTybanSpDXZa8y7VUP9YmDcYa+eyq4ca7iLqWA=="
|
||||
},
|
||||
"node_modules/ejs": {
|
||||
"version": "3.1.10",
|
||||
@@ -2365,8 +2425,7 @@
|
||||
"node_modules/emoji-regex": {
|
||||
"version": "8.0.0",
|
||||
"resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz",
|
||||
"integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==",
|
||||
"dev": true
|
||||
"integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A=="
|
||||
},
|
||||
"node_modules/error-ex": {
|
||||
"version": "1.3.2",
|
||||
@@ -2383,51 +2442,6 @@
|
||||
"integrity": "sha512-zz06S8t0ozoDXMG+ube26zeCTNXcKIPJZJi8hBrF4idCLms4CG9QtK7qBl1boi5ODzFpjswb5JPmHCbMpjaYzg==",
|
||||
"dev": true
|
||||
},
|
||||
"node_modules/es-define-property": {
|
||||
"version": "1.0.1",
|
||||
"resolved": "https://registry.npmjs.org/es-define-property/-/es-define-property-1.0.1.tgz",
|
||||
"integrity": "sha512-e3nRfgfUZ4rNGL232gUgX06QNyyez04KdjFrF+LTRoOXmrOgFKDg4BCdsjW8EnT69eqdYGmRpJwiPVYNrCaW3g==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">= 0.4"
|
||||
}
|
||||
},
|
||||
"node_modules/es-errors": {
|
||||
"version": "1.3.0",
|
||||
"resolved": "https://registry.npmjs.org/es-errors/-/es-errors-1.3.0.tgz",
|
||||
"integrity": "sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">= 0.4"
|
||||
}
|
||||
},
|
||||
"node_modules/es-object-atoms": {
|
||||
"version": "1.1.1",
|
||||
"resolved": "https://registry.npmjs.org/es-object-atoms/-/es-object-atoms-1.1.1.tgz",
|
||||
"integrity": "sha512-FGgH2h8zKNim9ljj7dankFPcICIK9Cp5bm+c2gQSYePhpaG5+esrLODihIorn+Pe6FGJzWhXQotPv73jTaldXA==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"es-errors": "^1.3.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">= 0.4"
|
||||
}
|
||||
},
|
||||
"node_modules/es-set-tostringtag": {
|
||||
"version": "2.1.0",
|
||||
"resolved": "https://registry.npmjs.org/es-set-tostringtag/-/es-set-tostringtag-2.1.0.tgz",
|
||||
"integrity": "sha512-j6vWzfrGVfyXxge+O0x5sh6cvxAog0a/4Rdd2K36zCMV5eJ+/+tOAngRO8cODMNWbVRdVlmGZQL2YS3yR8bIUA==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"es-errors": "^1.3.0",
|
||||
"get-intrinsic": "^1.2.6",
|
||||
"has-tostringtag": "^1.0.2",
|
||||
"hasown": "^2.0.2"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">= 0.4"
|
||||
}
|
||||
},
|
||||
"node_modules/escalade": {
|
||||
"version": "3.2.0",
|
||||
"resolved": "https://registry.npmjs.org/escalade/-/escalade-3.2.0.tgz",
|
||||
@@ -2540,21 +2554,19 @@
|
||||
}
|
||||
},
|
||||
"node_modules/filelist/node_modules/brace-expansion": {
|
||||
"version": "2.0.2",
|
||||
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.2.tgz",
|
||||
"integrity": "sha512-Jt0vHyM+jmUBqojB7E1NIYadt0vI0Qxjxd2TErW94wDz+E2LAm5vKMXXwg6ZZBTHPuUlDgQHKXvjGBdfcF1ZDQ==",
|
||||
"version": "2.0.1",
|
||||
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.1.tgz",
|
||||
"integrity": "sha512-XnAIvQ8eM+kC6aULx6wuQiwVsnzsi9d3WxzV3FpWTGA19F621kwdbsAcFKXgKUHZWsy+mY6iL1sHTxWEFCytDA==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"balanced-match": "^1.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/filelist/node_modules/minimatch": {
|
||||
"version": "5.1.9",
|
||||
"resolved": "https://registry.npmjs.org/minimatch/-/minimatch-5.1.9.tgz",
|
||||
"integrity": "sha512-7o1wEA2RyMP7Iu7GNba9vc0RWWGACJOCZBJX2GJWip0ikV+wcOsgVuY9uE8CPiyQhkGFSlhuSkZPavN7u1c2Fw==",
|
||||
"version": "5.1.6",
|
||||
"resolved": "https://registry.npmjs.org/minimatch/-/minimatch-5.1.6.tgz",
|
||||
"integrity": "sha512-lKwV/1brpG6mBUFHtb7NUmtABCb2WZZmm2wNiOA5hAb8VdCS4B3dtMWyvcoViccwAW/COERjXLt0zP1zXUN26g==",
|
||||
"dev": true,
|
||||
"license": "ISC",
|
||||
"dependencies": {
|
||||
"brace-expansion": "^2.0.1"
|
||||
},
|
||||
@@ -2592,16 +2604,39 @@
|
||||
"resolved": "https://registry.npmjs.org/flatbuffers/-/flatbuffers-1.12.0.tgz",
|
||||
"integrity": "sha512-c7CZADjRcl6j0PlvFy0ZqXQ67qSEZfrVPynmnL+2zPc+NtMvrF8Y0QceMo7QqnSPc7+uWjUIAbvCQ5WIKlMVdQ=="
|
||||
},
|
||||
"node_modules/foreground-child": {
|
||||
"version": "3.3.0",
|
||||
"resolved": "https://registry.npmjs.org/foreground-child/-/foreground-child-3.3.0.tgz",
|
||||
"integrity": "sha512-Ld2g8rrAyMYFXBhEqMz8ZAHBi4J4uS1i/CxGMDnjyFWddMXLVcDp051DZfu+t7+ab7Wv6SMqpWmyFIj5UbfFvg==",
|
||||
"dependencies": {
|
||||
"cross-spawn": "^7.0.0",
|
||||
"signal-exit": "^4.0.1"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=14"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/isaacs"
|
||||
}
|
||||
},
|
||||
"node_modules/foreground-child/node_modules/signal-exit": {
|
||||
"version": "4.1.0",
|
||||
"resolved": "https://registry.npmjs.org/signal-exit/-/signal-exit-4.1.0.tgz",
|
||||
"integrity": "sha512-bzyZ1e88w9O1iNJbKnOlvYTrWPDl46O1bG0D3XInv+9tkPrxrN8jUUTiFlDkkmKWgn1M6CfIA13SuGqOa9Korw==",
|
||||
"engines": {
|
||||
"node": ">=14"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/isaacs"
|
||||
}
|
||||
},
|
||||
"node_modules/form-data": {
|
||||
"version": "4.0.5",
|
||||
"resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.5.tgz",
|
||||
"integrity": "sha512-8RipRLol37bNs2bhoV67fiTEvdTrbMUYcFTiy3+wuuOnUog2QBHCZWXDRijWQfAkhBj2Uf5UnVaiWwA5vdd82w==",
|
||||
"license": "MIT",
|
||||
"version": "4.0.1",
|
||||
"resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.1.tgz",
|
||||
"integrity": "sha512-tzN8e4TX8+kkxGPK8D5u0FNmjPUjw3lwC9lSLxxoB/+GtsJG91CO8bSWy73APlgAZzZbXEYZJuxjkHH2w+Ezhw==",
|
||||
"dependencies": {
|
||||
"asynckit": "^0.4.0",
|
||||
"combined-stream": "^1.0.8",
|
||||
"es-set-tostringtag": "^2.1.0",
|
||||
"hasown": "^2.0.2",
|
||||
"mime-types": "^2.1.12"
|
||||
},
|
||||
"engines": {
|
||||
@@ -2649,6 +2684,7 @@
|
||||
"version": "1.1.2",
|
||||
"resolved": "https://registry.npmjs.org/function-bind/-/function-bind-1.1.2.tgz",
|
||||
"integrity": "sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA==",
|
||||
"dev": true,
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/ljharb"
|
||||
}
|
||||
@@ -2671,30 +2707,6 @@
|
||||
"node": "6.* || 8.* || >= 10.*"
|
||||
}
|
||||
},
|
||||
"node_modules/get-intrinsic": {
|
||||
"version": "1.3.0",
|
||||
"resolved": "https://registry.npmjs.org/get-intrinsic/-/get-intrinsic-1.3.0.tgz",
|
||||
"integrity": "sha512-9fSjSaos/fRIVIp+xSJlE6lfwhES7LNtKaCBIamHsjr2na1BiABJPo0mOjjz8GJDURarmCPGqaiVg5mfjb98CQ==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"call-bind-apply-helpers": "^1.0.2",
|
||||
"es-define-property": "^1.0.1",
|
||||
"es-errors": "^1.3.0",
|
||||
"es-object-atoms": "^1.1.1",
|
||||
"function-bind": "^1.1.2",
|
||||
"get-proto": "^1.0.1",
|
||||
"gopd": "^1.2.0",
|
||||
"has-symbols": "^1.1.0",
|
||||
"hasown": "^2.0.2",
|
||||
"math-intrinsics": "^1.1.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">= 0.4"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/ljharb"
|
||||
}
|
||||
},
|
||||
"node_modules/get-package-type": {
|
||||
"version": "0.1.0",
|
||||
"resolved": "https://registry.npmjs.org/get-package-type/-/get-package-type-0.1.0.tgz",
|
||||
@@ -2704,19 +2716,6 @@
|
||||
"node": ">=8.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/get-proto": {
|
||||
"version": "1.0.1",
|
||||
"resolved": "https://registry.npmjs.org/get-proto/-/get-proto-1.0.1.tgz",
|
||||
"integrity": "sha512-sTSfBjoXBp89JvIKIefqw7U2CCebsc74kiY6awiGogKtoSGbgjYE/G/+l9sF3MWFPNc9IcoOC4ODfKHfxFmp0g==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"dunder-proto": "^1.0.1",
|
||||
"es-object-atoms": "^1.0.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">= 0.4"
|
||||
}
|
||||
},
|
||||
"node_modules/get-stream": {
|
||||
"version": "6.0.1",
|
||||
"resolved": "https://registry.npmjs.org/get-stream/-/get-stream-6.0.1.tgz",
|
||||
@@ -2759,18 +2758,6 @@
|
||||
"node": ">=4"
|
||||
}
|
||||
},
|
||||
"node_modules/gopd": {
|
||||
"version": "1.2.0",
|
||||
"resolved": "https://registry.npmjs.org/gopd/-/gopd-1.2.0.tgz",
|
||||
"integrity": "sha512-ZUKRh6/kUFoAiTAtTYPZJ3hw9wNxx+BIBOijnlG9PnrJsCcSjs1wyyD6vJpaYtgnzDrKYRSqf3OO6Rfa93xsRg==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">= 0.4"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/ljharb"
|
||||
}
|
||||
},
|
||||
"node_modules/graceful-fs": {
|
||||
"version": "4.2.11",
|
||||
"resolved": "https://registry.npmjs.org/graceful-fs/-/graceful-fs-4.2.11.tgz",
|
||||
@@ -2791,37 +2778,11 @@
|
||||
"node": ">=8"
|
||||
}
|
||||
},
|
||||
"node_modules/has-symbols": {
|
||||
"version": "1.1.0",
|
||||
"resolved": "https://registry.npmjs.org/has-symbols/-/has-symbols-1.1.0.tgz",
|
||||
"integrity": "sha512-1cDNdwJ2Jaohmb3sg4OmKaMBwuC48sYni5HUw2DvsC8LjGTLK9h+eb1X6RyuOHe4hT0ULCW68iomhjUoKUqlPQ==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">= 0.4"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/ljharb"
|
||||
}
|
||||
},
|
||||
"node_modules/has-tostringtag": {
|
||||
"version": "1.0.2",
|
||||
"resolved": "https://registry.npmjs.org/has-tostringtag/-/has-tostringtag-1.0.2.tgz",
|
||||
"integrity": "sha512-NqADB8VjPFLM2V0VvHUewwwsw0ZWBaIdgo+ieHtK3hasLz4qeCRjYcqfB6AQrBggRKppKF8L52/VqdVsO47Dlw==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"has-symbols": "^1.0.3"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">= 0.4"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/ljharb"
|
||||
}
|
||||
},
|
||||
"node_modules/hasown": {
|
||||
"version": "2.0.2",
|
||||
"resolved": "https://registry.npmjs.org/hasown/-/hasown-2.0.2.tgz",
|
||||
"integrity": "sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ==",
|
||||
"dev": true,
|
||||
"dependencies": {
|
||||
"function-bind": "^1.1.2"
|
||||
},
|
||||
@@ -2921,7 +2882,6 @@
|
||||
"version": "3.0.0",
|
||||
"resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-3.0.0.tgz",
|
||||
"integrity": "sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg==",
|
||||
"dev": true,
|
||||
"engines": {
|
||||
"node": ">=8"
|
||||
}
|
||||
@@ -2959,8 +2919,7 @@
|
||||
"node_modules/isexe": {
|
||||
"version": "2.0.0",
|
||||
"resolved": "https://registry.npmjs.org/isexe/-/isexe-2.0.0.tgz",
|
||||
"integrity": "sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw==",
|
||||
"dev": true
|
||||
"integrity": "sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw=="
|
||||
},
|
||||
"node_modules/istanbul-lib-coverage": {
|
||||
"version": "3.2.2",
|
||||
@@ -3028,6 +2987,20 @@
|
||||
"node": ">=8"
|
||||
}
|
||||
},
|
||||
"node_modules/jackspeak": {
|
||||
"version": "3.4.3",
|
||||
"resolved": "https://registry.npmjs.org/jackspeak/-/jackspeak-3.4.3.tgz",
|
||||
"integrity": "sha512-OGlZQpz2yfahA/Rd1Y8Cd9SIEsqvXkLVoSw/cgwhnhFMDbsQFeZYoJJ7bIZBS9BcamUW96asq/npPWugM+RQBw==",
|
||||
"dependencies": {
|
||||
"@isaacs/cliui": "^8.0.2"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/isaacs"
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"@pkgjs/parseargs": "^0.11.0"
|
||||
}
|
||||
},
|
||||
"node_modules/jake": {
|
||||
"version": "10.9.2",
|
||||
"resolved": "https://registry.npmjs.org/jake/-/jake-10.9.2.tgz",
|
||||
@@ -3632,11 +3605,10 @@
|
||||
"dev": true
|
||||
},
|
||||
"node_modules/js-yaml": {
|
||||
"version": "3.14.2",
|
||||
"resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-3.14.2.tgz",
|
||||
"integrity": "sha512-PMSmkqxr106Xa156c2M265Z+FTrPl+oxd/rgOQy2tijQeK5TxQ43psO1ZCwhVOSdnn+RzkzlRz/eY4BgJBYVpg==",
|
||||
"version": "3.14.1",
|
||||
"resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-3.14.1.tgz",
|
||||
"integrity": "sha512-okMH7OXXJ7YrN9Ok3/SXrnu4iX9yOk+25nqX4imS2npuvTYDmo/QEZoqwZkYaIDk3jVvBOTOIEgEhaLOynBS9g==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"argparse": "^1.0.7",
|
||||
"esprima": "^4.0.0"
|
||||
@@ -3756,15 +3728,6 @@
|
||||
"tmpl": "1.0.5"
|
||||
}
|
||||
},
|
||||
"node_modules/math-intrinsics": {
|
||||
"version": "1.1.0",
|
||||
"resolved": "https://registry.npmjs.org/math-intrinsics/-/math-intrinsics-1.1.0.tgz",
|
||||
"integrity": "sha512-/IXtbwEk5HTPyEwyKX6hGkYXxM9nbj64B+ilVJnC/R6B0pH5G4V3b0pVbL7DBj4tkhBAppbQUlf6F6Xl9LHu1g==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">= 0.4"
|
||||
}
|
||||
},
|
||||
"node_modules/merge-stream": {
|
||||
"version": "2.0.0",
|
||||
"resolved": "https://registry.npmjs.org/merge-stream/-/merge-stream-2.0.0.tgz",
|
||||
@@ -3813,11 +3776,10 @@
|
||||
}
|
||||
},
|
||||
"node_modules/minimatch": {
|
||||
"version": "3.1.5",
|
||||
"resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.5.tgz",
|
||||
"integrity": "sha512-VgjWUsnnT6n+NUk6eZq77zeFdpW2LWDzP6zFGrCbHXiYNul5Dzqk2HHQ5uFH2DNW5Xbp8+jVzaeNt94ssEEl4w==",
|
||||
"version": "3.1.2",
|
||||
"resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz",
|
||||
"integrity": "sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw==",
|
||||
"dev": true,
|
||||
"license": "ISC",
|
||||
"dependencies": {
|
||||
"brace-expansion": "^1.1.7"
|
||||
},
|
||||
@@ -3834,17 +3796,31 @@
|
||||
}
|
||||
},
|
||||
"node_modules/minizlib": {
|
||||
"version": "3.1.0",
|
||||
"resolved": "https://registry.npmjs.org/minizlib/-/minizlib-3.1.0.tgz",
|
||||
"integrity": "sha512-KZxYo1BUkWD2TVFLr0MQoM8vUUigWD3LlD83a/75BqC+4qE0Hb1Vo5v1FgcfaNXvfXzr+5EhQ6ing/CaBijTlw==",
|
||||
"license": "MIT",
|
||||
"version": "3.0.1",
|
||||
"resolved": "https://registry.npmjs.org/minizlib/-/minizlib-3.0.1.tgz",
|
||||
"integrity": "sha512-umcy022ILvb5/3Djuu8LWeqUa8D68JaBzlttKeMWen48SjabqS3iY5w/vzeMzMUNhLDifyhbOwKDSznB1vvrwg==",
|
||||
"dependencies": {
|
||||
"minipass": "^7.1.2"
|
||||
"minipass": "^7.0.4",
|
||||
"rimraf": "^5.0.5"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">= 18"
|
||||
}
|
||||
},
|
||||
"node_modules/mkdirp": {
|
||||
"version": "3.0.1",
|
||||
"resolved": "https://registry.npmjs.org/mkdirp/-/mkdirp-3.0.1.tgz",
|
||||
"integrity": "sha512-+NsyUUAZDmo6YVHzL/stxSu3t9YS1iljliy3BSDrXJ/dkn1KYdmtZODGGjLcc9XLgVVpH4KshHB8XmZgMhaBXg==",
|
||||
"bin": {
|
||||
"mkdirp": "dist/cjs/src/bin.js"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=10"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/isaacs"
|
||||
}
|
||||
},
|
||||
"node_modules/ms": {
|
||||
"version": "2.1.3",
|
||||
"resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz",
|
||||
@@ -4034,6 +4010,11 @@
|
||||
"node": ">=6"
|
||||
}
|
||||
},
|
||||
"node_modules/package-json-from-dist": {
|
||||
"version": "1.0.1",
|
||||
"resolved": "https://registry.npmjs.org/package-json-from-dist/-/package-json-from-dist-1.0.1.tgz",
|
||||
"integrity": "sha512-UEZIS3/by4OC8vL3P2dTXRETpebLI2NiI5vIrjaD/5UtrkFX/tNbwjTSRAGC/+7CAo2pIcBaRgWmcBBHcsaCIw=="
|
||||
},
|
||||
"node_modules/parse-json": {
|
||||
"version": "5.2.0",
|
||||
"resolved": "https://registry.npmjs.org/parse-json/-/parse-json-5.2.0.tgz",
|
||||
@@ -4074,7 +4055,6 @@
|
||||
"version": "3.1.1",
|
||||
"resolved": "https://registry.npmjs.org/path-key/-/path-key-3.1.1.tgz",
|
||||
"integrity": "sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q==",
|
||||
"dev": true,
|
||||
"engines": {
|
||||
"node": ">=8"
|
||||
}
|
||||
@@ -4085,6 +4065,26 @@
|
||||
"integrity": "sha512-LDJzPVEEEPR+y48z93A0Ed0yXb8pAByGWo/k5YYdYgpY2/2EsOsksJrq7lOHxryrVOn1ejG6oAp8ahvOIQD8sw==",
|
||||
"dev": true
|
||||
},
|
||||
"node_modules/path-scurry": {
|
||||
"version": "1.11.1",
|
||||
"resolved": "https://registry.npmjs.org/path-scurry/-/path-scurry-1.11.1.tgz",
|
||||
"integrity": "sha512-Xa4Nw17FS9ApQFJ9umLiJS4orGjm7ZzwUrwamcGQuHSzDyth9boKDaycYdDcZDuqYATXw4HFXgaqWTctW/v1HA==",
|
||||
"dependencies": {
|
||||
"lru-cache": "^10.2.0",
|
||||
"minipass": "^5.0.0 || ^6.0.2 || ^7.0.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=16 || 14 >=14.18"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/isaacs"
|
||||
}
|
||||
},
|
||||
"node_modules/path-scurry/node_modules/lru-cache": {
|
||||
"version": "10.4.3",
|
||||
"resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-10.4.3.tgz",
|
||||
"integrity": "sha512-JNAzZcXrCt42VGLuYz0zfAzDfAvJWW6AfYlDBQyDV5DClI2m5sAmK+OIO7s59XfsRsWHp02jAJrRadPRGTt6SQ=="
|
||||
},
|
||||
"node_modules/picocolors": {
|
||||
"version": "1.1.1",
|
||||
"resolved": "https://registry.npmjs.org/picocolors/-/picocolors-1.1.1.tgz",
|
||||
@@ -4246,6 +4246,61 @@
|
||||
"node": ">=10"
|
||||
}
|
||||
},
|
||||
"node_modules/rimraf": {
|
||||
"version": "5.0.10",
|
||||
"resolved": "https://registry.npmjs.org/rimraf/-/rimraf-5.0.10.tgz",
|
||||
"integrity": "sha512-l0OE8wL34P4nJH/H2ffoaniAokM2qSmrtXHmlpvYr5AVVX8msAyW0l8NVJFDxlSK4u3Uh/f41cQheDVdnYijwQ==",
|
||||
"dependencies": {
|
||||
"glob": "^10.3.7"
|
||||
},
|
||||
"bin": {
|
||||
"rimraf": "dist/esm/bin.mjs"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/isaacs"
|
||||
}
|
||||
},
|
||||
"node_modules/rimraf/node_modules/brace-expansion": {
|
||||
"version": "2.0.1",
|
||||
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.1.tgz",
|
||||
"integrity": "sha512-XnAIvQ8eM+kC6aULx6wuQiwVsnzsi9d3WxzV3FpWTGA19F621kwdbsAcFKXgKUHZWsy+mY6iL1sHTxWEFCytDA==",
|
||||
"dependencies": {
|
||||
"balanced-match": "^1.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/rimraf/node_modules/glob": {
|
||||
"version": "10.4.5",
|
||||
"resolved": "https://registry.npmjs.org/glob/-/glob-10.4.5.tgz",
|
||||
"integrity": "sha512-7Bv8RF0k6xjo7d4A/PxYLbUCfb6c+Vpd2/mB2yRDlew7Jb5hEXiCD9ibfO7wpk8i4sevK6DFny9h7EYbM3/sHg==",
|
||||
"dependencies": {
|
||||
"foreground-child": "^3.1.0",
|
||||
"jackspeak": "^3.1.2",
|
||||
"minimatch": "^9.0.4",
|
||||
"minipass": "^7.1.2",
|
||||
"package-json-from-dist": "^1.0.0",
|
||||
"path-scurry": "^1.11.1"
|
||||
},
|
||||
"bin": {
|
||||
"glob": "dist/esm/bin.mjs"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/isaacs"
|
||||
}
|
||||
},
|
||||
"node_modules/rimraf/node_modules/minimatch": {
|
||||
"version": "9.0.5",
|
||||
"resolved": "https://registry.npmjs.org/minimatch/-/minimatch-9.0.5.tgz",
|
||||
"integrity": "sha512-G6T0ZX48xgozx7587koeX9Ys2NYy6Gmv//P89sEte9V9whIapMNF4idKxnW2QtCcLiTWlb/wfCabAtAFWhhBow==",
|
||||
"dependencies": {
|
||||
"brace-expansion": "^2.0.1"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=16 || 14 >=14.17"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/isaacs"
|
||||
}
|
||||
},
|
||||
"node_modules/semver": {
|
||||
"version": "7.6.3",
|
||||
"resolved": "https://registry.npmjs.org/semver/-/semver-7.6.3.tgz",
|
||||
@@ -4299,7 +4354,6 @@
|
||||
"version": "2.0.0",
|
||||
"resolved": "https://registry.npmjs.org/shebang-command/-/shebang-command-2.0.0.tgz",
|
||||
"integrity": "sha512-kHxr2zZpYtdmrN1qDjrrX/Z1rR1kG8Dx+gkpK1G4eXmvXswmcE1hTWBWYUzlraYw1/yZp6YuDY77YtvbN0dmDA==",
|
||||
"dev": true,
|
||||
"dependencies": {
|
||||
"shebang-regex": "^3.0.0"
|
||||
},
|
||||
@@ -4311,7 +4365,6 @@
|
||||
"version": "3.0.0",
|
||||
"resolved": "https://registry.npmjs.org/shebang-regex/-/shebang-regex-3.0.0.tgz",
|
||||
"integrity": "sha512-7++dFhtcx3353uBaq8DDR4NuxBetBzC7ZQOhmTQInHEd6bSrXdiEyzCvG07Z44UYdLShWUyXt5M/yhz8ekcb1A==",
|
||||
"dev": true,
|
||||
"engines": {
|
||||
"node": ">=8"
|
||||
}
|
||||
@@ -4399,7 +4452,20 @@
|
||||
"version": "4.2.3",
|
||||
"resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz",
|
||||
"integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==",
|
||||
"dev": true,
|
||||
"dependencies": {
|
||||
"emoji-regex": "^8.0.0",
|
||||
"is-fullwidth-code-point": "^3.0.0",
|
||||
"strip-ansi": "^6.0.1"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=8"
|
||||
}
|
||||
},
|
||||
"node_modules/string-width-cjs": {
|
||||
"name": "string-width",
|
||||
"version": "4.2.3",
|
||||
"resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz",
|
||||
"integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==",
|
||||
"dependencies": {
|
||||
"emoji-regex": "^8.0.0",
|
||||
"is-fullwidth-code-point": "^3.0.0",
|
||||
@@ -4413,7 +4479,18 @@
|
||||
"version": "6.0.1",
|
||||
"resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz",
|
||||
"integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==",
|
||||
"dev": true,
|
||||
"dependencies": {
|
||||
"ansi-regex": "^5.0.1"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=8"
|
||||
}
|
||||
},
|
||||
"node_modules/strip-ansi-cjs": {
|
||||
"name": "strip-ansi",
|
||||
"version": "6.0.1",
|
||||
"resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz",
|
||||
"integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==",
|
||||
"dependencies": {
|
||||
"ansi-regex": "^5.0.1"
|
||||
},
|
||||
@@ -4464,15 +4541,15 @@
|
||||
}
|
||||
},
|
||||
"node_modules/tar": {
|
||||
"version": "7.5.10",
|
||||
"resolved": "https://registry.npmjs.org/tar/-/tar-7.5.10.tgz",
|
||||
"integrity": "sha512-8mOPs1//5q/rlkNSPcCegA6hiHJYDmSLEI8aMH/CdSQJNWztHC9WHNam5zdQlfpTwB9Xp7IBEsHfV5LKMJGVAw==",
|
||||
"license": "BlueOak-1.0.0",
|
||||
"version": "7.4.3",
|
||||
"resolved": "https://registry.npmjs.org/tar/-/tar-7.4.3.tgz",
|
||||
"integrity": "sha512-5S7Va8hKfV7W5U6g3aYxXmlPoZVAwUMy9AOKyF2fVuZa2UD3qZjg578OrLRt8PcNN1PleVaL/5/yYATNL0ICUw==",
|
||||
"dependencies": {
|
||||
"@isaacs/fs-minipass": "^4.0.0",
|
||||
"chownr": "^3.0.0",
|
||||
"minipass": "^7.1.2",
|
||||
"minizlib": "^3.1.0",
|
||||
"minizlib": "^3.0.1",
|
||||
"mkdirp": "^3.0.1",
|
||||
"yallist": "^5.0.0"
|
||||
},
|
||||
"engines": {
|
||||
@@ -4705,7 +4782,6 @@
|
||||
"version": "2.0.2",
|
||||
"resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz",
|
||||
"integrity": "sha512-BLI3Tl1TW3Pvl70l3yq3Y64i+awpwXqsGBYWkkqMtnbXgrMD+yj7rhW0kuEDxzJaYXGjEW5ogapKNMEKNMjibA==",
|
||||
"dev": true,
|
||||
"dependencies": {
|
||||
"isexe": "^2.0.0"
|
||||
},
|
||||
@@ -4733,6 +4809,23 @@
|
||||
"url": "https://github.com/chalk/wrap-ansi?sponsor=1"
|
||||
}
|
||||
},
|
||||
"node_modules/wrap-ansi-cjs": {
|
||||
"name": "wrap-ansi",
|
||||
"version": "7.0.0",
|
||||
"resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-7.0.0.tgz",
|
||||
"integrity": "sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==",
|
||||
"dependencies": {
|
||||
"ansi-styles": "^4.0.0",
|
||||
"string-width": "^4.1.0",
|
||||
"strip-ansi": "^6.0.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=10"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/chalk/wrap-ansi?sponsor=1"
|
||||
}
|
||||
},
|
||||
"node_modules/wrappy": {
|
||||
"version": "1.0.2",
|
||||
"resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz",
|
||||
|
||||
@@ -20,8 +20,6 @@ import {
|
||||
Float32,
|
||||
Float64,
|
||||
Int,
|
||||
Int8,
|
||||
Int16,
|
||||
Int32,
|
||||
Int64,
|
||||
LargeBinary,
|
||||
@@ -37,8 +35,6 @@ import {
|
||||
Timestamp,
|
||||
Type,
|
||||
Uint8,
|
||||
Uint16,
|
||||
Uint32,
|
||||
Utf8,
|
||||
Vector,
|
||||
makeVector as arrowMakeVector,
|
||||
@@ -117,9 +113,8 @@ export type TableLike =
|
||||
export type IntoVector =
|
||||
| Float32Array
|
||||
| Float64Array
|
||||
| Uint8Array
|
||||
| number[]
|
||||
| Promise<Float32Array | Float64Array | Uint8Array | number[]>;
|
||||
| Promise<Float32Array | Float64Array | number[]>;
|
||||
|
||||
export type MultiVector = IntoVector[];
|
||||
|
||||
@@ -127,48 +122,14 @@ export function isMultiVector(value: unknown): value is MultiVector {
|
||||
return Array.isArray(value) && isIntoVector(value[0]);
|
||||
}
|
||||
|
||||
// Float16Array is not in TypeScript's standard lib yet; access dynamically
|
||||
type Float16ArrayCtor = new (
|
||||
...args: unknown[]
|
||||
) => { buffer: ArrayBuffer; byteOffset: number; byteLength: number };
|
||||
const float16ArrayCtor = (globalThis as unknown as Record<string, unknown>)
|
||||
.Float16Array as Float16ArrayCtor | undefined;
|
||||
|
||||
export function isIntoVector(value: unknown): value is IntoVector {
|
||||
return (
|
||||
value instanceof Float32Array ||
|
||||
value instanceof Float64Array ||
|
||||
value instanceof Uint8Array ||
|
||||
(float16ArrayCtor !== undefined && value instanceof float16ArrayCtor) ||
|
||||
(Array.isArray(value) && !Array.isArray(value[0]))
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract the underlying byte buffer and data type from a typed array
|
||||
* for passing to the Rust NAPI layer without precision loss.
|
||||
*/
|
||||
export function extractVectorBuffer(
|
||||
vector: Float32Array | Float64Array | Uint8Array,
|
||||
): { data: Uint8Array; dtype: string } | null {
|
||||
if (float16ArrayCtor !== undefined && vector instanceof float16ArrayCtor) {
|
||||
return {
|
||||
data: new Uint8Array(vector.buffer, vector.byteOffset, vector.byteLength),
|
||||
dtype: "float16",
|
||||
};
|
||||
}
|
||||
if (vector instanceof Float64Array) {
|
||||
return {
|
||||
data: new Uint8Array(vector.buffer, vector.byteOffset, vector.byteLength),
|
||||
dtype: "float64",
|
||||
};
|
||||
}
|
||||
if (vector instanceof Uint8Array && !(vector instanceof Float32Array)) {
|
||||
return { data: vector, dtype: "uint8" };
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
export function isArrowTable(value: object): value is TableLike {
|
||||
if (value instanceof ArrowTable) return true;
|
||||
return "schema" in value && "batches" in value;
|
||||
@@ -568,8 +529,7 @@ function isObject(value: unknown): value is Record<string, unknown> {
|
||||
!(value instanceof Date) &&
|
||||
!(value instanceof Set) &&
|
||||
!(value instanceof Map) &&
|
||||
!(value instanceof Buffer) &&
|
||||
!ArrayBuffer.isView(value)
|
||||
!(value instanceof Buffer)
|
||||
);
|
||||
}
|
||||
|
||||
@@ -628,13 +588,6 @@ function inferType(
|
||||
return new Bool();
|
||||
} else if (value instanceof Buffer) {
|
||||
return new Binary();
|
||||
} else if (ArrayBuffer.isView(value) && !(value instanceof DataView)) {
|
||||
const info = typedArrayToArrowType(value);
|
||||
if (info !== undefined) {
|
||||
const child = new Field("item", info.elementType, true);
|
||||
return new FixedSizeList(info.length, child);
|
||||
}
|
||||
return undefined;
|
||||
} else if (Array.isArray(value)) {
|
||||
if (value.length === 0) {
|
||||
return undefined; // Without any values we can't infer the type
|
||||
@@ -793,32 +746,6 @@ function makeListVector(lists: unknown[][]): Vector<unknown> {
|
||||
return listBuilder.finish().toVector();
|
||||
}
|
||||
|
||||
/**
|
||||
* Map a JS TypedArray instance to the corresponding Arrow element DataType
|
||||
* and its length. Returns undefined if the value is not a recognized TypedArray.
|
||||
*/
|
||||
function typedArrayToArrowType(
|
||||
value: ArrayBufferView,
|
||||
): { elementType: DataType; length: number } | undefined {
|
||||
if (value instanceof Float32Array)
|
||||
return { elementType: new Float32(), length: value.length };
|
||||
if (value instanceof Float64Array)
|
||||
return { elementType: new Float64(), length: value.length };
|
||||
if (value instanceof Uint8Array)
|
||||
return { elementType: new Uint8(), length: value.length };
|
||||
if (value instanceof Uint16Array)
|
||||
return { elementType: new Uint16(), length: value.length };
|
||||
if (value instanceof Uint32Array)
|
||||
return { elementType: new Uint32(), length: value.length };
|
||||
if (value instanceof Int8Array)
|
||||
return { elementType: new Int8(), length: value.length };
|
||||
if (value instanceof Int16Array)
|
||||
return { elementType: new Int16(), length: value.length };
|
||||
if (value instanceof Int32Array)
|
||||
return { elementType: new Int32(), length: value.length };
|
||||
return undefined;
|
||||
}
|
||||
|
||||
/** Helper function to convert an Array of JS values to an Arrow Vector */
|
||||
function makeVector(
|
||||
values: unknown[],
|
||||
@@ -887,16 +814,6 @@ function makeVector(
|
||||
"makeVector cannot infer the type if all values are null or undefined",
|
||||
);
|
||||
}
|
||||
if (ArrayBuffer.isView(sampleValue) && !(sampleValue instanceof DataView)) {
|
||||
const info = typedArrayToArrowType(sampleValue);
|
||||
if (info !== undefined) {
|
||||
const fslType = new FixedSizeList(
|
||||
info.length,
|
||||
new Field("item", info.elementType, true),
|
||||
);
|
||||
return vectorFromArray(values, fslType);
|
||||
}
|
||||
}
|
||||
if (Array.isArray(sampleValue)) {
|
||||
// Default Arrow inference doesn't handle list types
|
||||
return makeListVector(values as unknown[][]);
|
||||
|
||||
@@ -42,7 +42,7 @@ export interface CreateTableOptions {
|
||||
* Options already set on the connection will be inherited by the table,
|
||||
* but can be overridden here.
|
||||
*
|
||||
* The available options are described at https://docs.lancedb.com/storage/
|
||||
* The available options are described at https://lancedb.com/docs/storage/
|
||||
*/
|
||||
storageOptions?: Record<string, string>;
|
||||
|
||||
@@ -78,7 +78,7 @@ export interface OpenTableOptions {
|
||||
* Options already set on the connection will be inherited by the table,
|
||||
* but can be overridden here.
|
||||
*
|
||||
* The available options are described at https://docs.lancedb.com/storage/
|
||||
* The available options are described at https://lancedb.com/docs/storage/
|
||||
*/
|
||||
storageOptions?: Record<string, string>;
|
||||
/**
|
||||
@@ -166,25 +166,25 @@ export abstract class Connection {
|
||||
* List all the table names in this database.
|
||||
*
|
||||
* Tables will be returned in lexicographical order.
|
||||
* @param {string[]} namespacePath - The namespace path to list tables from (defaults to root namespace)
|
||||
* @param {string[]} namespace - The namespace to list tables from (defaults to root namespace)
|
||||
* @param {Partial<TableNamesOptions>} options - options to control the
|
||||
* paging / start point
|
||||
*
|
||||
*/
|
||||
abstract tableNames(
|
||||
namespacePath?: string[],
|
||||
namespace?: string[],
|
||||
options?: Partial<TableNamesOptions>,
|
||||
): Promise<string[]>;
|
||||
|
||||
/**
|
||||
* Open a table in the database.
|
||||
* @param {string} name - The name of the table
|
||||
* @param {string[]} namespacePath - The namespace path of the table (defaults to root namespace)
|
||||
* @param {string[]} namespace - The namespace of the table (defaults to root namespace)
|
||||
* @param {Partial<OpenTableOptions>} options - Additional options
|
||||
*/
|
||||
abstract openTable(
|
||||
name: string,
|
||||
namespacePath?: string[],
|
||||
namespace?: string[],
|
||||
options?: Partial<OpenTableOptions>,
|
||||
): Promise<Table>;
|
||||
|
||||
@@ -193,7 +193,7 @@ export abstract class Connection {
|
||||
* @param {object} options - The options object.
|
||||
* @param {string} options.name - The name of the table.
|
||||
* @param {Data} options.data - Non-empty Array of Records to be inserted into the table
|
||||
* @param {string[]} namespacePath - The namespace path to create the table in (defaults to root namespace)
|
||||
* @param {string[]} namespace - The namespace to create the table in (defaults to root namespace)
|
||||
*
|
||||
*/
|
||||
abstract createTable(
|
||||
@@ -201,7 +201,7 @@ export abstract class Connection {
|
||||
name: string;
|
||||
data: Data;
|
||||
} & Partial<CreateTableOptions>,
|
||||
namespacePath?: string[],
|
||||
namespace?: string[],
|
||||
): Promise<Table>;
|
||||
/**
|
||||
* Creates a new Table and initialize it with new data.
|
||||
@@ -220,13 +220,13 @@ export abstract class Connection {
|
||||
* @param {string} name - The name of the table.
|
||||
* @param {Record<string, unknown>[] | TableLike} data - Non-empty Array of Records
|
||||
* to be inserted into the table
|
||||
* @param {string[]} namespacePath - The namespace path to create the table in (defaults to root namespace)
|
||||
* @param {string[]} namespace - The namespace to create the table in (defaults to root namespace)
|
||||
* @param {Partial<CreateTableOptions>} options - Additional options
|
||||
*/
|
||||
abstract createTable(
|
||||
name: string,
|
||||
data: Record<string, unknown>[] | TableLike,
|
||||
namespacePath?: string[],
|
||||
namespace?: string[],
|
||||
options?: Partial<CreateTableOptions>,
|
||||
): Promise<Table>;
|
||||
|
||||
@@ -245,28 +245,28 @@ export abstract class Connection {
|
||||
* Creates a new empty Table
|
||||
* @param {string} name - The name of the table.
|
||||
* @param {Schema} schema - The schema of the table
|
||||
* @param {string[]} namespacePath - The namespace path to create the table in (defaults to root namespace)
|
||||
* @param {string[]} namespace - The namespace to create the table in (defaults to root namespace)
|
||||
* @param {Partial<CreateTableOptions>} options - Additional options
|
||||
*/
|
||||
abstract createEmptyTable(
|
||||
name: string,
|
||||
schema: import("./arrow").SchemaLike,
|
||||
namespacePath?: string[],
|
||||
namespace?: string[],
|
||||
options?: Partial<CreateTableOptions>,
|
||||
): Promise<Table>;
|
||||
|
||||
/**
|
||||
* Drop an existing table.
|
||||
* @param {string} name The name of the table to drop.
|
||||
* @param {string[]} namespacePath The namespace path of the table (defaults to root namespace).
|
||||
* @param {string[]} namespace The namespace of the table (defaults to root namespace).
|
||||
*/
|
||||
abstract dropTable(name: string, namespacePath?: string[]): Promise<void>;
|
||||
abstract dropTable(name: string, namespace?: string[]): Promise<void>;
|
||||
|
||||
/**
|
||||
* Drop all tables in the database.
|
||||
* @param {string[]} namespacePath The namespace path to drop tables from (defaults to root namespace).
|
||||
* @param {string[]} namespace The namespace to drop tables from (defaults to root namespace).
|
||||
*/
|
||||
abstract dropAllTables(namespacePath?: string[]): Promise<void>;
|
||||
abstract dropAllTables(namespace?: string[]): Promise<void>;
|
||||
|
||||
/**
|
||||
* Clone a table from a source table.
|
||||
@@ -279,7 +279,7 @@ export abstract class Connection {
|
||||
* @param {string} targetTableName - The name of the target table to create.
|
||||
* @param {string} sourceUri - The URI of the source table to clone from.
|
||||
* @param {object} options - Clone options.
|
||||
* @param {string[]} options.targetNamespacePath - The namespace path for the target table (defaults to root namespace).
|
||||
* @param {string[]} options.targetNamespace - The namespace for the target table (defaults to root namespace).
|
||||
* @param {number} options.sourceVersion - The version of the source table to clone.
|
||||
* @param {string} options.sourceTag - The tag of the source table to clone.
|
||||
* @param {boolean} options.isShallow - Whether to perform a shallow clone (defaults to true).
|
||||
@@ -288,7 +288,7 @@ export abstract class Connection {
|
||||
targetTableName: string,
|
||||
sourceUri: string,
|
||||
options?: {
|
||||
targetNamespacePath?: string[];
|
||||
targetNamespace?: string[];
|
||||
sourceVersion?: number;
|
||||
sourceTag?: string;
|
||||
isShallow?: boolean;
|
||||
@@ -319,25 +319,25 @@ export class LocalConnection extends Connection {
|
||||
}
|
||||
|
||||
async tableNames(
|
||||
namespacePathOrOptions?: string[] | Partial<TableNamesOptions>,
|
||||
namespaceOrOptions?: string[] | Partial<TableNamesOptions>,
|
||||
options?: Partial<TableNamesOptions>,
|
||||
): Promise<string[]> {
|
||||
// Detect if first argument is namespacePath array or options object
|
||||
let namespacePath: string[] | undefined;
|
||||
// Detect if first argument is namespace array or options object
|
||||
let namespace: string[] | undefined;
|
||||
let tableNamesOptions: Partial<TableNamesOptions> | undefined;
|
||||
|
||||
if (Array.isArray(namespacePathOrOptions)) {
|
||||
// First argument is namespacePath array
|
||||
namespacePath = namespacePathOrOptions;
|
||||
if (Array.isArray(namespaceOrOptions)) {
|
||||
// First argument is namespace array
|
||||
namespace = namespaceOrOptions;
|
||||
tableNamesOptions = options;
|
||||
} else {
|
||||
// First argument is options object (backwards compatibility)
|
||||
namespacePath = undefined;
|
||||
tableNamesOptions = namespacePathOrOptions;
|
||||
namespace = undefined;
|
||||
tableNamesOptions = namespaceOrOptions;
|
||||
}
|
||||
|
||||
return this.inner.tableNames(
|
||||
namespacePath ?? [],
|
||||
namespace ?? [],
|
||||
tableNamesOptions?.startAfter,
|
||||
tableNamesOptions?.limit,
|
||||
);
|
||||
@@ -345,12 +345,12 @@ export class LocalConnection extends Connection {
|
||||
|
||||
async openTable(
|
||||
name: string,
|
||||
namespacePath?: string[],
|
||||
namespace?: string[],
|
||||
options?: Partial<OpenTableOptions>,
|
||||
): Promise<Table> {
|
||||
const innerTable = await this.inner.openTable(
|
||||
name,
|
||||
namespacePath ?? [],
|
||||
namespace ?? [],
|
||||
cleanseStorageOptions(options?.storageOptions),
|
||||
options?.indexCacheSize,
|
||||
);
|
||||
@@ -362,7 +362,7 @@ export class LocalConnection extends Connection {
|
||||
targetTableName: string,
|
||||
sourceUri: string,
|
||||
options?: {
|
||||
targetNamespacePath?: string[];
|
||||
targetNamespace?: string[];
|
||||
sourceVersion?: number;
|
||||
sourceTag?: string;
|
||||
isShallow?: boolean;
|
||||
@@ -371,7 +371,7 @@ export class LocalConnection extends Connection {
|
||||
const innerTable = await this.inner.cloneTable(
|
||||
targetTableName,
|
||||
sourceUri,
|
||||
options?.targetNamespacePath ?? [],
|
||||
options?.targetNamespace ?? [],
|
||||
options?.sourceVersion ?? null,
|
||||
options?.sourceTag ?? null,
|
||||
options?.isShallow ?? true,
|
||||
@@ -406,42 +406,42 @@ export class LocalConnection extends Connection {
|
||||
nameOrOptions:
|
||||
| string
|
||||
| ({ name: string; data: Data } & Partial<CreateTableOptions>),
|
||||
dataOrNamespacePath?: Record<string, unknown>[] | TableLike | string[],
|
||||
namespacePathOrOptions?: string[] | Partial<CreateTableOptions>,
|
||||
dataOrNamespace?: Record<string, unknown>[] | TableLike | string[],
|
||||
namespaceOrOptions?: string[] | Partial<CreateTableOptions>,
|
||||
options?: Partial<CreateTableOptions>,
|
||||
): Promise<Table> {
|
||||
if (typeof nameOrOptions !== "string" && "name" in nameOrOptions) {
|
||||
// First overload: createTable(options, namespacePath?)
|
||||
// First overload: createTable(options, namespace?)
|
||||
const { name, data, ...createOptions } = nameOrOptions;
|
||||
const namespacePath = dataOrNamespacePath as string[] | undefined;
|
||||
return this._createTableImpl(name, data, namespacePath, createOptions);
|
||||
const namespace = dataOrNamespace as string[] | undefined;
|
||||
return this._createTableImpl(name, data, namespace, createOptions);
|
||||
}
|
||||
|
||||
// Second overload: createTable(name, data, namespacePath?, options?)
|
||||
// Second overload: createTable(name, data, namespace?, options?)
|
||||
const name = nameOrOptions;
|
||||
const data = dataOrNamespacePath as Record<string, unknown>[] | TableLike;
|
||||
const data = dataOrNamespace as Record<string, unknown>[] | TableLike;
|
||||
|
||||
// Detect if third argument is namespacePath array or options object
|
||||
let namespacePath: string[] | undefined;
|
||||
// Detect if third argument is namespace array or options object
|
||||
let namespace: string[] | undefined;
|
||||
let createOptions: Partial<CreateTableOptions> | undefined;
|
||||
|
||||
if (Array.isArray(namespacePathOrOptions)) {
|
||||
// Third argument is namespacePath array
|
||||
namespacePath = namespacePathOrOptions;
|
||||
if (Array.isArray(namespaceOrOptions)) {
|
||||
// Third argument is namespace array
|
||||
namespace = namespaceOrOptions;
|
||||
createOptions = options;
|
||||
} else {
|
||||
// Third argument is options object (backwards compatibility)
|
||||
namespacePath = undefined;
|
||||
createOptions = namespacePathOrOptions;
|
||||
namespace = undefined;
|
||||
createOptions = namespaceOrOptions;
|
||||
}
|
||||
|
||||
return this._createTableImpl(name, data, namespacePath, createOptions);
|
||||
return this._createTableImpl(name, data, namespace, createOptions);
|
||||
}
|
||||
|
||||
private async _createTableImpl(
|
||||
name: string,
|
||||
data: Data,
|
||||
namespacePath?: string[],
|
||||
namespace?: string[],
|
||||
options?: Partial<CreateTableOptions>,
|
||||
): Promise<Table> {
|
||||
if (data === undefined) {
|
||||
@@ -455,7 +455,7 @@ export class LocalConnection extends Connection {
|
||||
name,
|
||||
buf,
|
||||
mode,
|
||||
namespacePath ?? [],
|
||||
namespace ?? [],
|
||||
storageOptions,
|
||||
);
|
||||
|
||||
@@ -465,21 +465,21 @@ export class LocalConnection extends Connection {
|
||||
async createEmptyTable(
|
||||
name: string,
|
||||
schema: import("./arrow").SchemaLike,
|
||||
namespacePathOrOptions?: string[] | Partial<CreateTableOptions>,
|
||||
namespaceOrOptions?: string[] | Partial<CreateTableOptions>,
|
||||
options?: Partial<CreateTableOptions>,
|
||||
): Promise<Table> {
|
||||
// Detect if third argument is namespacePath array or options object
|
||||
let namespacePath: string[] | undefined;
|
||||
// Detect if third argument is namespace array or options object
|
||||
let namespace: string[] | undefined;
|
||||
let createOptions: Partial<CreateTableOptions> | undefined;
|
||||
|
||||
if (Array.isArray(namespacePathOrOptions)) {
|
||||
// Third argument is namespacePath array
|
||||
namespacePath = namespacePathOrOptions;
|
||||
if (Array.isArray(namespaceOrOptions)) {
|
||||
// Third argument is namespace array
|
||||
namespace = namespaceOrOptions;
|
||||
createOptions = options;
|
||||
} else {
|
||||
// Third argument is options object (backwards compatibility)
|
||||
namespacePath = undefined;
|
||||
createOptions = namespacePathOrOptions;
|
||||
namespace = undefined;
|
||||
createOptions = namespaceOrOptions;
|
||||
}
|
||||
|
||||
let mode: string = createOptions?.mode ?? "create";
|
||||
@@ -502,18 +502,18 @@ export class LocalConnection extends Connection {
|
||||
name,
|
||||
buf,
|
||||
mode,
|
||||
namespacePath ?? [],
|
||||
namespace ?? [],
|
||||
storageOptions,
|
||||
);
|
||||
return new LocalTable(innerTable);
|
||||
}
|
||||
|
||||
async dropTable(name: string, namespacePath?: string[]): Promise<void> {
|
||||
return this.inner.dropTable(name, namespacePath ?? []);
|
||||
async dropTable(name: string, namespace?: string[]): Promise<void> {
|
||||
return this.inner.dropTable(name, namespace ?? []);
|
||||
}
|
||||
|
||||
async dropAllTables(namespacePath?: string[]): Promise<void> {
|
||||
return this.inner.dropAllTables(namespacePath ?? []);
|
||||
async dropAllTables(namespace?: string[]): Promise<void> {
|
||||
return this.inner.dropAllTables(namespace ?? []);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -5,7 +5,6 @@ import {
|
||||
Table as ArrowTable,
|
||||
type IntoVector,
|
||||
RecordBatch,
|
||||
extractVectorBuffer,
|
||||
fromBufferToRecordBatch,
|
||||
fromRecordBatchToBuffer,
|
||||
tableFromIPC,
|
||||
@@ -662,8 +661,10 @@ export class VectorQuery extends StandardQueryBase<NativeVectorQuery> {
|
||||
const res = (async () => {
|
||||
try {
|
||||
const v = await vector;
|
||||
const arr = Float32Array.from(v);
|
||||
//
|
||||
// biome-ignore lint/suspicious/noExplicitAny: we need to get the `inner`, but js has no package scoping
|
||||
const value: any = this.addQueryVector(v);
|
||||
const value: any = this.addQueryVector(arr);
|
||||
const inner = value.inner as
|
||||
| NativeVectorQuery
|
||||
| Promise<NativeVectorQuery>;
|
||||
@@ -675,12 +676,7 @@ export class VectorQuery extends StandardQueryBase<NativeVectorQuery> {
|
||||
return new VectorQuery(res);
|
||||
} else {
|
||||
super.doCall((inner) => {
|
||||
const raw = Array.isArray(vector) ? null : extractVectorBuffer(vector);
|
||||
if (raw) {
|
||||
inner.addQueryVectorRaw(raw.data, raw.dtype);
|
||||
} else {
|
||||
inner.addQueryVector(Float32Array.from(vector as number[]));
|
||||
}
|
||||
inner.addQueryVector(Float32Array.from(vector));
|
||||
});
|
||||
return this;
|
||||
}
|
||||
@@ -769,23 +765,14 @@ export class Query extends StandardQueryBase<NativeQuery> {
|
||||
* a default `limit` of 10 will be used. @see {@link Query#limit}
|
||||
*/
|
||||
nearestTo(vector: IntoVector): VectorQuery {
|
||||
const callNearestTo = (
|
||||
inner: NativeQuery,
|
||||
resolved: Float32Array | Float64Array | Uint8Array | number[],
|
||||
): NativeVectorQuery => {
|
||||
const raw = Array.isArray(resolved)
|
||||
? null
|
||||
: extractVectorBuffer(resolved);
|
||||
if (raw) {
|
||||
return inner.nearestToRaw(raw.data, raw.dtype);
|
||||
}
|
||||
return inner.nearestTo(Float32Array.from(resolved as number[]));
|
||||
};
|
||||
|
||||
if (this.inner instanceof Promise) {
|
||||
const nativeQuery = this.inner.then(async (inner) => {
|
||||
const resolved = vector instanceof Promise ? await vector : vector;
|
||||
return callNearestTo(inner, resolved);
|
||||
if (vector instanceof Promise) {
|
||||
const arr = await vector.then((v) => Float32Array.from(v));
|
||||
return inner.nearestTo(arr);
|
||||
} else {
|
||||
return inner.nearestTo(Float32Array.from(vector));
|
||||
}
|
||||
});
|
||||
return new VectorQuery(nativeQuery);
|
||||
}
|
||||
@@ -793,8 +780,10 @@ export class Query extends StandardQueryBase<NativeQuery> {
|
||||
const res = (async () => {
|
||||
try {
|
||||
const v = await vector;
|
||||
const arr = Float32Array.from(v);
|
||||
//
|
||||
// biome-ignore lint/suspicious/noExplicitAny: we need to get the `inner`, but js has no package scoping
|
||||
const value: any = this.nearestTo(v);
|
||||
const value: any = this.nearestTo(arr);
|
||||
const inner = value.inner as
|
||||
| NativeVectorQuery
|
||||
| Promise<NativeVectorQuery>;
|
||||
@@ -805,7 +794,7 @@ export class Query extends StandardQueryBase<NativeQuery> {
|
||||
})();
|
||||
return new VectorQuery(res);
|
||||
} else {
|
||||
const vectorQuery = callNearestTo(this.inner, vector);
|
||||
const vectorQuery = this.inner.nearestTo(Float32Array.from(vector));
|
||||
return new VectorQuery(vectorQuery);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5,15 +5,12 @@ import {
|
||||
Table as ArrowTable,
|
||||
Data,
|
||||
DataType,
|
||||
Field,
|
||||
IntoVector,
|
||||
MultiVector,
|
||||
Schema,
|
||||
dataTypeToJson,
|
||||
fromDataToBuffer,
|
||||
fromTableToBuffer,
|
||||
isMultiVector,
|
||||
makeEmptyTable,
|
||||
tableFromIPC,
|
||||
} from "./arrow";
|
||||
|
||||
@@ -87,16 +84,6 @@ export interface OptimizeOptions {
|
||||
* tbl.optimize({cleanupOlderThan: new Date()});
|
||||
*/
|
||||
cleanupOlderThan: Date;
|
||||
/**
|
||||
* Because they may be part of an in-progress transaction, files newer than
|
||||
* 7 days old are not deleted by default. If you are sure that there are no
|
||||
* in-progress transactions, then you can set this to true to delete all
|
||||
* files older than `cleanupOlderThan`.
|
||||
*
|
||||
* **WARNING**: This should only be set to true if you can guarantee that
|
||||
* no other process is currently working on this dataset. Otherwise the
|
||||
* dataset could be put into a corrupted state.
|
||||
*/
|
||||
deleteUnverified: boolean;
|
||||
}
|
||||
|
||||
@@ -394,16 +381,15 @@ export abstract class Table {
|
||||
abstract vectorSearch(vector: IntoVector | MultiVector): VectorQuery;
|
||||
/**
|
||||
* Add new columns with defined values.
|
||||
* @param {AddColumnsSql[] | Field | Field[] | Schema} newColumnTransforms Either:
|
||||
* - An array of objects with column names and SQL expressions to calculate values
|
||||
* - A single Arrow Field defining one column with its data type (column will be initialized with null values)
|
||||
* - An array of Arrow Fields defining columns with their data types (columns will be initialized with null values)
|
||||
* - An Arrow Schema defining columns with their data types (columns will be initialized with null values)
|
||||
* @param {AddColumnsSql[]} newColumnTransforms pairs of column names and
|
||||
* the SQL expression to use to calculate the value of the new column. These
|
||||
* expressions will be evaluated for each row in the table, and can
|
||||
* reference existing columns in the table.
|
||||
* @returns {Promise<AddColumnsResult>} A promise that resolves to an object
|
||||
* containing the new version number of the table after adding the columns.
|
||||
*/
|
||||
abstract addColumns(
|
||||
newColumnTransforms: AddColumnsSql[] | Field | Field[] | Schema,
|
||||
newColumnTransforms: AddColumnsSql[],
|
||||
): Promise<AddColumnsResult>;
|
||||
|
||||
/**
|
||||
@@ -515,7 +501,19 @@ export abstract class Table {
|
||||
* - Index: Optimizes the indices, adding new data to existing indices
|
||||
*
|
||||
*
|
||||
* The frequency an application should call optimize is based on the frequency of
|
||||
* Experimental API
|
||||
* ----------------
|
||||
*
|
||||
* The optimization process is undergoing active development and may change.
|
||||
* Our goal with these changes is to improve the performance of optimization and
|
||||
* reduce the complexity.
|
||||
*
|
||||
* That being said, it is essential today to run optimize if you want the best
|
||||
* performance. It should be stable and safe to use in production, but it our
|
||||
* hope that the API may be simplified (or not even need to be called) in the
|
||||
* future.
|
||||
*
|
||||
* The frequency an application shoudl call optimize is based on the frequency of
|
||||
* data modifications. If data is frequently added, deleted, or updated then
|
||||
* optimize should be run frequently. A good rule of thumb is to run optimize if
|
||||
* you have added or modified 100,000 or more records or run more than 20 data
|
||||
@@ -808,40 +806,9 @@ export class LocalTable extends Table {
|
||||
// TODO: Support BatchUDF
|
||||
|
||||
async addColumns(
|
||||
newColumnTransforms: AddColumnsSql[] | Field | Field[] | Schema,
|
||||
newColumnTransforms: AddColumnsSql[],
|
||||
): Promise<AddColumnsResult> {
|
||||
// Handle single Field -> convert to array of Fields
|
||||
if (newColumnTransforms instanceof Field) {
|
||||
newColumnTransforms = [newColumnTransforms];
|
||||
}
|
||||
|
||||
// Handle array of Fields -> convert to Schema
|
||||
if (
|
||||
Array.isArray(newColumnTransforms) &&
|
||||
newColumnTransforms.length > 0 &&
|
||||
newColumnTransforms[0] instanceof Field
|
||||
) {
|
||||
const fields = newColumnTransforms as Field[];
|
||||
newColumnTransforms = new Schema(fields);
|
||||
}
|
||||
|
||||
// Handle Schema -> use schema-based approach
|
||||
if (newColumnTransforms instanceof Schema) {
|
||||
const schema = newColumnTransforms;
|
||||
// Convert schema to buffer using Arrow IPC format
|
||||
const emptyTable = makeEmptyTable(schema);
|
||||
const schemaBuf = await fromTableToBuffer(emptyTable);
|
||||
return await this.inner.addColumnsWithSchema(schemaBuf);
|
||||
}
|
||||
|
||||
// Handle SQL expressions (existing functionality)
|
||||
if (Array.isArray(newColumnTransforms)) {
|
||||
return await this.inner.addColumns(
|
||||
newColumnTransforms as AddColumnsSql[],
|
||||
);
|
||||
}
|
||||
|
||||
throw new Error("Invalid input type for addColumns");
|
||||
return await this.inner.addColumns(newColumnTransforms);
|
||||
}
|
||||
|
||||
async alterColumns(
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-darwin-arm64",
|
||||
"version": "0.28.0-beta.11",
|
||||
"version": "0.27.0-beta.2",
|
||||
"os": ["darwin"],
|
||||
"cpu": ["arm64"],
|
||||
"main": "lancedb.darwin-arm64.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-linux-arm64-gnu",
|
||||
"version": "0.28.0-beta.11",
|
||||
"version": "0.27.0-beta.2",
|
||||
"os": ["linux"],
|
||||
"cpu": ["arm64"],
|
||||
"main": "lancedb.linux-arm64-gnu.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-linux-arm64-musl",
|
||||
"version": "0.28.0-beta.11",
|
||||
"version": "0.27.0-beta.2",
|
||||
"os": ["linux"],
|
||||
"cpu": ["arm64"],
|
||||
"main": "lancedb.linux-arm64-musl.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-linux-x64-gnu",
|
||||
"version": "0.28.0-beta.11",
|
||||
"version": "0.27.0-beta.2",
|
||||
"os": ["linux"],
|
||||
"cpu": ["x64"],
|
||||
"main": "lancedb.linux-x64-gnu.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-linux-x64-musl",
|
||||
"version": "0.28.0-beta.11",
|
||||
"version": "0.27.0-beta.2",
|
||||
"os": ["linux"],
|
||||
"cpu": ["x64"],
|
||||
"main": "lancedb.linux-x64-musl.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-win32-arm64-msvc",
|
||||
"version": "0.28.0-beta.11",
|
||||
"version": "0.27.0-beta.2",
|
||||
"os": [
|
||||
"win32"
|
||||
],
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-win32-x64-msvc",
|
||||
"version": "0.28.0-beta.11",
|
||||
"version": "0.27.0-beta.2",
|
||||
"os": ["win32"],
|
||||
"cpu": ["x64"],
|
||||
"main": "lancedb.win32-x64-msvc.node",
|
||||
|
||||
4599
nodejs/package-lock.json
generated
4599
nodejs/package-lock.json
generated
File diff suppressed because it is too large
Load Diff
@@ -11,7 +11,7 @@
|
||||
"ann"
|
||||
],
|
||||
"private": false,
|
||||
"version": "0.28.0-beta.11",
|
||||
"version": "0.27.0-beta.2",
|
||||
"main": "dist/index.js",
|
||||
"exports": {
|
||||
".": "./dist/index.js",
|
||||
@@ -75,6 +75,7 @@
|
||||
"build:debug": "napi build --platform --dts ../lancedb/native.d.ts --js ../lancedb/native.js --output-dir lancedb",
|
||||
"postbuild:debug": "shx mkdir -p dist && shx cp lancedb/*.node dist/",
|
||||
"build:release": "napi build --platform --release --dts ../lancedb/native.d.ts --js ../lancedb/native.js --output-dir dist",
|
||||
"postbuild:release": "shx mkdir -p dist && shx cp lancedb/*.node dist/",
|
||||
"build": "npm run build:debug && npm run tsc",
|
||||
"build-release": "npm run build:release && npm run tsc",
|
||||
"tsc": "tsc -b",
|
||||
|
||||
@@ -8,10 +8,10 @@ use lancedb::database::{CreateTableMode, Database};
|
||||
use napi::bindgen_prelude::*;
|
||||
use napi_derive::*;
|
||||
|
||||
use crate::ConnectionOptions;
|
||||
use crate::error::NapiErrorExt;
|
||||
use crate::header::JsHeaderProvider;
|
||||
use crate::table::Table;
|
||||
use crate::ConnectionOptions;
|
||||
use lancedb::connection::{ConnectBuilder, Connection as LanceDBConnection};
|
||||
|
||||
use lancedb::ipc::{ipc_file_to_batches, ipc_file_to_schema};
|
||||
@@ -67,12 +67,6 @@ impl Connection {
|
||||
builder = builder.storage_option(key, value);
|
||||
}
|
||||
}
|
||||
if let Some(manifest_enabled) = options.manifest_enabled {
|
||||
builder = builder.manifest_enabled(manifest_enabled);
|
||||
}
|
||||
if let Some(namespace_client_properties) = options.namespace_client_properties {
|
||||
builder = builder.namespace_client_properties(namespace_client_properties);
|
||||
}
|
||||
|
||||
// Create client config, optionally with header provider
|
||||
let client_config = options.client_config.unwrap_or_default();
|
||||
@@ -125,12 +119,12 @@ impl Connection {
|
||||
#[napi(catch_unwind)]
|
||||
pub async fn table_names(
|
||||
&self,
|
||||
namespace_path: Option<Vec<String>>,
|
||||
namespace: Vec<String>,
|
||||
start_after: Option<String>,
|
||||
limit: Option<u32>,
|
||||
) -> napi::Result<Vec<String>> {
|
||||
let mut op = self.get_inner()?.table_names();
|
||||
op = op.namespace(namespace_path.unwrap_or_default());
|
||||
op = op.namespace(namespace);
|
||||
if let Some(start_after) = start_after {
|
||||
op = op.start_after(start_after);
|
||||
}
|
||||
@@ -152,7 +146,7 @@ impl Connection {
|
||||
name: String,
|
||||
buf: Buffer,
|
||||
mode: String,
|
||||
namespace_path: Option<Vec<String>>,
|
||||
namespace: Vec<String>,
|
||||
storage_options: Option<HashMap<String, String>>,
|
||||
) -> napi::Result<Table> {
|
||||
let batches = ipc_file_to_batches(buf.to_vec())
|
||||
@@ -160,7 +154,7 @@ impl Connection {
|
||||
let mode = Self::parse_create_mode_str(&mode)?;
|
||||
let mut builder = self.get_inner()?.create_table(&name, batches).mode(mode);
|
||||
|
||||
builder = builder.namespace(namespace_path.unwrap_or_default());
|
||||
builder = builder.namespace(namespace);
|
||||
|
||||
if let Some(storage_options) = storage_options {
|
||||
for (key, value) in storage_options {
|
||||
@@ -177,7 +171,7 @@ impl Connection {
|
||||
name: String,
|
||||
schema_buf: Buffer,
|
||||
mode: String,
|
||||
namespace_path: Option<Vec<String>>,
|
||||
namespace: Vec<String>,
|
||||
storage_options: Option<HashMap<String, String>>,
|
||||
) -> napi::Result<Table> {
|
||||
let schema = ipc_file_to_schema(schema_buf.to_vec()).map_err(|e| {
|
||||
@@ -189,7 +183,7 @@ impl Connection {
|
||||
.create_empty_table(&name, schema)
|
||||
.mode(mode);
|
||||
|
||||
builder = builder.namespace(namespace_path.unwrap_or_default());
|
||||
builder = builder.namespace(namespace);
|
||||
|
||||
if let Some(storage_options) = storage_options {
|
||||
for (key, value) in storage_options {
|
||||
@@ -204,13 +198,13 @@ impl Connection {
|
||||
pub async fn open_table(
|
||||
&self,
|
||||
name: String,
|
||||
namespace_path: Option<Vec<String>>,
|
||||
namespace: Vec<String>,
|
||||
storage_options: Option<HashMap<String, String>>,
|
||||
index_cache_size: Option<u32>,
|
||||
) -> napi::Result<Table> {
|
||||
let mut builder = self.get_inner()?.open_table(&name);
|
||||
|
||||
builder = builder.namespace(namespace_path.unwrap_or_default());
|
||||
builder = builder.namespace(namespace);
|
||||
|
||||
if let Some(storage_options) = storage_options {
|
||||
for (key, value) in storage_options {
|
||||
@@ -229,7 +223,7 @@ impl Connection {
|
||||
&self,
|
||||
target_table_name: String,
|
||||
source_uri: String,
|
||||
target_namespace_path: Option<Vec<String>>,
|
||||
target_namespace: Vec<String>,
|
||||
source_version: Option<i64>,
|
||||
source_tag: Option<String>,
|
||||
is_shallow: bool,
|
||||
@@ -238,7 +232,7 @@ impl Connection {
|
||||
.get_inner()?
|
||||
.clone_table(&target_table_name, &source_uri);
|
||||
|
||||
builder = builder.target_namespace(target_namespace_path.unwrap_or_default());
|
||||
builder = builder.target_namespace(target_namespace);
|
||||
|
||||
if let Some(version) = source_version {
|
||||
builder = builder.source_version(version as u64);
|
||||
@@ -256,21 +250,18 @@ impl Connection {
|
||||
|
||||
/// Drop table with the name. Or raise an error if the table does not exist.
|
||||
#[napi(catch_unwind)]
|
||||
pub async fn drop_table(
|
||||
&self,
|
||||
name: String,
|
||||
namespace_path: Option<Vec<String>>,
|
||||
) -> napi::Result<()> {
|
||||
let ns = namespace_path.unwrap_or_default();
|
||||
pub async fn drop_table(&self, name: String, namespace: Vec<String>) -> napi::Result<()> {
|
||||
self.get_inner()?
|
||||
.drop_table(&name, &ns)
|
||||
.drop_table(&name, &namespace)
|
||||
.await
|
||||
.default_error()
|
||||
}
|
||||
|
||||
#[napi(catch_unwind)]
|
||||
pub async fn drop_all_tables(&self, namespace_path: Option<Vec<String>>) -> napi::Result<()> {
|
||||
let ns = namespace_path.unwrap_or_default();
|
||||
self.get_inner()?.drop_all_tables(&ns).await.default_error()
|
||||
pub async fn drop_all_tables(&self, namespace: Vec<String>) -> napi::Result<()> {
|
||||
self.get_inner()?
|
||||
.drop_all_tables(&namespace)
|
||||
.await
|
||||
.default_error()
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3,12 +3,12 @@
|
||||
|
||||
use std::sync::Mutex;
|
||||
|
||||
use lancedb::index::Index as LanceDbIndex;
|
||||
use lancedb::index::scalar::{BTreeIndexBuilder, FtsIndexBuilder};
|
||||
use lancedb::index::vector::{
|
||||
IvfFlatIndexBuilder, IvfHnswPqIndexBuilder, IvfHnswSqIndexBuilder, IvfPqIndexBuilder,
|
||||
IvfRqIndexBuilder,
|
||||
};
|
||||
use lancedb::index::Index as LanceDbIndex;
|
||||
use napi_derive::napi;
|
||||
|
||||
use crate::util::parse_distance_type;
|
||||
|
||||
@@ -35,15 +35,8 @@ pub struct ConnectionOptions {
|
||||
pub read_consistency_interval: Option<f64>,
|
||||
/// (For LanceDB OSS only): configuration for object storage.
|
||||
///
|
||||
/// The available options are described at https://docs.lancedb.com/storage/
|
||||
/// The available options are described at https://lancedb.com/docs/storage/
|
||||
pub storage_options: Option<HashMap<String, String>>,
|
||||
/// (For LanceDB OSS only): use directory namespace manifests as the source
|
||||
/// of truth for table metadata. Existing directory-listed root tables are
|
||||
/// migrated into the manifest on access.
|
||||
pub manifest_enabled: Option<bool>,
|
||||
/// (For LanceDB OSS only): extra properties for the backing namespace
|
||||
/// client used by manifest-enabled native connections.
|
||||
pub namespace_client_properties: Option<HashMap<String, String>>,
|
||||
/// (For LanceDB OSS only): the session to use for this connection. Holds
|
||||
/// shared caches and other session-specific state.
|
||||
pub session: Option<session::Session>,
|
||||
|
||||
@@ -3,12 +3,6 @@
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow_array::{
|
||||
Array, Float16Array as ArrowFloat16Array, Float32Array as ArrowFloat32Array,
|
||||
Float64Array as ArrowFloat64Array, UInt8Array as ArrowUInt8Array,
|
||||
};
|
||||
use arrow_buffer::ScalarBuffer;
|
||||
use half::f16;
|
||||
use lancedb::index::scalar::{
|
||||
BooleanQuery, BoostQuery, FtsQuery, FullTextSearchQuery, MatchQuery, MultiMatchQuery, Occur,
|
||||
Operator, PhraseQuery,
|
||||
@@ -23,40 +17,13 @@ use lancedb::query::VectorQuery as LanceDbVectorQuery;
|
||||
use napi::bindgen_prelude::*;
|
||||
use napi_derive::napi;
|
||||
|
||||
use crate::error::NapiErrorExt;
|
||||
use crate::error::convert_error;
|
||||
use crate::error::NapiErrorExt;
|
||||
use crate::iterator::RecordBatchIterator;
|
||||
use crate::rerankers::RerankHybridCallbackArgs;
|
||||
use crate::rerankers::Reranker;
|
||||
use crate::util::{parse_distance_type, schema_to_buffer};
|
||||
|
||||
fn bytes_to_arrow_array(data: Uint8Array, dtype: String) -> napi::Result<Arc<dyn Array>> {
|
||||
let buf = arrow_buffer::Buffer::from(data.to_vec());
|
||||
let num_bytes = buf.len();
|
||||
match dtype.as_str() {
|
||||
"float16" => {
|
||||
let scalar_buf = ScalarBuffer::<f16>::new(buf, 0, num_bytes / 2);
|
||||
Ok(Arc::new(ArrowFloat16Array::new(scalar_buf, None)))
|
||||
}
|
||||
"float32" => {
|
||||
let scalar_buf = ScalarBuffer::<f32>::new(buf, 0, num_bytes / 4);
|
||||
Ok(Arc::new(ArrowFloat32Array::new(scalar_buf, None)))
|
||||
}
|
||||
"float64" => {
|
||||
let scalar_buf = ScalarBuffer::<f64>::new(buf, 0, num_bytes / 8);
|
||||
Ok(Arc::new(ArrowFloat64Array::new(scalar_buf, None)))
|
||||
}
|
||||
"uint8" => {
|
||||
let scalar_buf = ScalarBuffer::<u8>::new(buf, 0, num_bytes);
|
||||
Ok(Arc::new(ArrowUInt8Array::new(scalar_buf, None)))
|
||||
}
|
||||
_ => Err(napi::Error::from_reason(format!(
|
||||
"Unsupported vector dtype: {}. Expected one of: float16, float32, float64, uint8",
|
||||
dtype
|
||||
))),
|
||||
}
|
||||
}
|
||||
|
||||
#[napi]
|
||||
pub struct Query {
|
||||
inner: LanceDbQuery,
|
||||
@@ -111,13 +78,6 @@ impl Query {
|
||||
Ok(VectorQuery { inner })
|
||||
}
|
||||
|
||||
#[napi]
|
||||
pub fn nearest_to_raw(&mut self, data: Uint8Array, dtype: String) -> Result<VectorQuery> {
|
||||
let array = bytes_to_arrow_array(data, dtype)?;
|
||||
let inner = self.inner.clone().nearest_to(array).default_error()?;
|
||||
Ok(VectorQuery { inner })
|
||||
}
|
||||
|
||||
#[napi]
|
||||
pub fn fast_search(&mut self) {
|
||||
self.inner = self.inner.clone().fast_search();
|
||||
@@ -203,13 +163,6 @@ impl VectorQuery {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[napi]
|
||||
pub fn add_query_vector_raw(&mut self, data: Uint8Array, dtype: String) -> Result<()> {
|
||||
let array = bytes_to_arrow_array(data, dtype)?;
|
||||
self.inner = self.inner.clone().add_query_vector(array).default_error()?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[napi]
|
||||
pub fn distance_type(&mut self, distance_type: String) -> napi::Result<()> {
|
||||
let distance_type = parse_distance_type(distance_type)?;
|
||||
@@ -598,12 +551,15 @@ fn parse_fts_query(query: Object) -> napi::Result<FullTextSearchQuery> {
|
||||
}
|
||||
};
|
||||
let mut query = FullTextSearchQuery::new_query(query);
|
||||
if let Some(cols) = columns
|
||||
&& !cols.is_empty()
|
||||
{
|
||||
query = query.with_columns(&cols).map_err(|e| {
|
||||
napi::Error::from_reason(format!("Failed to set full text search columns: {}", e))
|
||||
})?;
|
||||
if let Some(cols) = columns {
|
||||
if !cols.is_empty() {
|
||||
query = query.with_columns(&cols).map_err(|e| {
|
||||
napi::Error::from_reason(format!(
|
||||
"Failed to set full text search columns: {}",
|
||||
e
|
||||
))
|
||||
})?;
|
||||
}
|
||||
}
|
||||
Ok(query)
|
||||
} else {
|
||||
|
||||
@@ -92,13 +92,6 @@ pub struct ClientConfig {
|
||||
pub extra_headers: Option<HashMap<String, String>>,
|
||||
pub id_delimiter: Option<String>,
|
||||
pub tls_config: Option<TlsConfig>,
|
||||
/// User identifier for tracking purposes.
|
||||
///
|
||||
/// This is sent as the `x-lancedb-user-id` header in requests to LanceDB Cloud/Enterprise.
|
||||
/// It can be set directly, or via the `LANCEDB_USER_ID` environment variable.
|
||||
/// Alternatively, set `LANCEDB_USER_ID_ENV_KEY` to specify another environment
|
||||
/// variable that contains the user ID value.
|
||||
pub user_id: Option<String>,
|
||||
}
|
||||
|
||||
impl From<TimeoutConfig> for lancedb::remote::TimeoutConfig {
|
||||
@@ -152,7 +145,6 @@ impl From<ClientConfig> for lancedb::remote::ClientConfig {
|
||||
id_delimiter: config.id_delimiter,
|
||||
tls_config: config.tls_config.map(Into::into),
|
||||
header_provider: None, // the header provider is set separately later
|
||||
user_id: config.user_id,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -18,7 +18,6 @@ type RerankHybridFn = ThreadsafeFunction<
|
||||
RerankHybridCallbackArgs,
|
||||
Status,
|
||||
false,
|
||||
true,
|
||||
>;
|
||||
|
||||
/// Reranker implementation that "wraps" a NodeJS Reranker implementation.
|
||||
@@ -33,10 +32,7 @@ impl Reranker {
|
||||
pub fn new(
|
||||
rerank_hybrid: Function<RerankHybridCallbackArgs, Promise<Buffer>>,
|
||||
) -> napi::Result<Self> {
|
||||
let rerank_hybrid = rerank_hybrid
|
||||
.build_threadsafe_function()
|
||||
.weak::<true>()
|
||||
.build()?;
|
||||
let rerank_hybrid = rerank_hybrid.build_threadsafe_function().build()?;
|
||||
Ok(Self { rerank_hybrid })
|
||||
}
|
||||
}
|
||||
|
||||
@@ -95,7 +95,7 @@ impl napi::bindgen_prelude::FromNapiValue for Session {
|
||||
napi_val: napi::sys::napi_value,
|
||||
) -> napi::Result<Self> {
|
||||
let object: napi::bindgen_prelude::ClassInstance<Self> =
|
||||
unsafe { napi::bindgen_prelude::ClassInstance::from_napi_value(env, napi_val)? };
|
||||
napi::bindgen_prelude::ClassInstance::from_napi_value(env, napi_val)?;
|
||||
Ok((*object).clone())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
|
||||
use std::collections::HashMap;
|
||||
|
||||
use lancedb::ipc::{ipc_file_to_batches, ipc_file_to_schema};
|
||||
use lancedb::ipc::ipc_file_to_batches;
|
||||
use lancedb::table::{
|
||||
AddDataMode, ColumnAlteration as LanceColumnAlteration, Duration, NewColumnTransform,
|
||||
OptimizeAction, OptimizeOptions, Table as LanceDbTable,
|
||||
@@ -279,23 +279,6 @@ impl Table {
|
||||
Ok(res.into())
|
||||
}
|
||||
|
||||
#[napi(catch_unwind)]
|
||||
pub async fn add_columns_with_schema(
|
||||
&self,
|
||||
schema_buf: Buffer,
|
||||
) -> napi::Result<AddColumnsResult> {
|
||||
let schema = ipc_file_to_schema(schema_buf.to_vec())
|
||||
.map_err(|e| napi::Error::from_reason(format!("Failed to read IPC schema: {}", e)))?;
|
||||
|
||||
let transforms = NewColumnTransform::AllNulls(schema);
|
||||
let res = self
|
||||
.inner_ref()?
|
||||
.add_columns(transforms, None)
|
||||
.await
|
||||
.default_error()?;
|
||||
Ok(res.into())
|
||||
}
|
||||
|
||||
#[napi(catch_unwind)]
|
||||
pub async fn alter_columns(
|
||||
&self,
|
||||
@@ -770,14 +753,12 @@ impl From<lancedb::table::AddResult> for AddResult {
|
||||
|
||||
#[napi(object)]
|
||||
pub struct DeleteResult {
|
||||
pub num_deleted_rows: i64,
|
||||
pub version: i64,
|
||||
}
|
||||
|
||||
impl From<lancedb::table::DeleteResult> for DeleteResult {
|
||||
fn from(value: lancedb::table::DeleteResult) -> Self {
|
||||
Self {
|
||||
num_deleted_rows: value.num_deleted_rows as i64,
|
||||
version: value.version as i64,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
[tool.bumpversion]
|
||||
current_version = "0.31.0-beta.11"
|
||||
current_version = "0.30.0-beta.2"
|
||||
parse = """(?x)
|
||||
(?P<major>0|[1-9]\\d*)\\.
|
||||
(?P<minor>0|[1-9]\\d*)\\.
|
||||
|
||||
2
python/.gitignore
vendored
2
python/.gitignore
vendored
@@ -1,5 +1,3 @@
|
||||
# Test data created by some example tests
|
||||
data/
|
||||
_lancedb.pyd
|
||||
# macOS debug symbols bundle generated during build
|
||||
*.dSYM/
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
[package]
|
||||
name = "lancedb-python"
|
||||
version = "0.31.0-beta.11"
|
||||
publish = false
|
||||
version = "0.30.0-beta.2"
|
||||
edition.workspace = true
|
||||
description = "Python bindings for LanceDB"
|
||||
license.workspace = true
|
||||
@@ -15,31 +14,25 @@ name = "_lancedb"
|
||||
crate-type = ["cdylib"]
|
||||
|
||||
[dependencies]
|
||||
arrow = { version = "58.0.0", features = ["pyarrow"] }
|
||||
arrow = { version = "57.2", features = ["pyarrow"] }
|
||||
async-trait = "0.1"
|
||||
bytes = "1"
|
||||
lancedb = { path = "../rust/lancedb", default-features = false }
|
||||
lance-core.workspace = true
|
||||
lance-namespace.workspace = true
|
||||
lance-namespace-impls.workspace = true
|
||||
lance-io.workspace = true
|
||||
env_logger.workspace = true
|
||||
log.workspace = true
|
||||
pyo3 = { version = "0.28", features = ["extension-module", "abi3-py39"] }
|
||||
pyo3-async-runtimes = { version = "0.28", features = [
|
||||
pyo3 = { version = "0.26", features = ["extension-module", "abi3-py39"] }
|
||||
pyo3-async-runtimes = { version = "0.26", features = [
|
||||
"attributes",
|
||||
"tokio-runtime",
|
||||
] }
|
||||
pin-project = "1.1.5"
|
||||
futures.workspace = true
|
||||
serde = "1"
|
||||
serde_json = "1"
|
||||
snafu.workspace = true
|
||||
tokio = { version = "1.40", features = ["sync", "rt-multi-thread"] }
|
||||
libc = "0.2"
|
||||
tokio = { version = "1.40", features = ["sync"] }
|
||||
|
||||
[build-dependencies]
|
||||
pyo3-build-config = { version = "0.28", features = [
|
||||
pyo3-build-config = { version = "0.26", features = [
|
||||
"extension-module",
|
||||
"abi3-py39",
|
||||
] }
|
||||
|
||||
@@ -183,6 +183,7 @@
|
||||
| stack-data | 0.6.3 | MIT License | http://github.com/alexmojaki/stack_data |
|
||||
| sympy | 1.14.0 | BSD License | https://sympy.org |
|
||||
| tabulate | 0.9.0 | MIT License | https://github.com/astanin/python-tabulate |
|
||||
| tantivy | 0.25.1 | UNKNOWN | UNKNOWN |
|
||||
| threadpoolctl | 3.6.0 | BSD License | https://github.com/joblib/threadpoolctl |
|
||||
| timm | 1.0.24 | Apache Software License | https://github.com/huggingface/pytorch-image-models |
|
||||
| tinycss2 | 1.4.0 | BSD License | https://www.courtbouillon.org/tinycss2 |
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
# LanceDB Python SDK
|
||||
# LanceDB
|
||||
|
||||
A Python library for [LanceDB](https://github.com/lancedb/lancedb).
|
||||
|
||||
|
||||
@@ -3,10 +3,10 @@ name = "lancedb"
|
||||
# version in Cargo.toml
|
||||
dynamic = ["version"]
|
||||
dependencies = [
|
||||
"deprecation>=2.1.0",
|
||||
"numpy>=1.24.0",
|
||||
"deprecation",
|
||||
"numpy",
|
||||
"overrides>=0.7; python_version<'3.12'",
|
||||
"packaging>=23.0",
|
||||
"packaging",
|
||||
"pyarrow>=16",
|
||||
"pydantic>=1.10",
|
||||
"tqdm>=4.27.0",
|
||||
@@ -45,50 +45,51 @@ repository = "https://github.com/lancedb/lancedb"
|
||||
|
||||
[project.optional-dependencies]
|
||||
pylance = [
|
||||
"pylance>=5.0.0b5",
|
||||
"pylance>=1.0.0b14",
|
||||
]
|
||||
tests = [
|
||||
"aiohttp>=3.9.0",
|
||||
"boto3>=1.28.57",
|
||||
"aiohttp",
|
||||
"boto3",
|
||||
"pandas>=1.4",
|
||||
"pytest>=7.0",
|
||||
"pytest-mock>=3.10",
|
||||
"pytest-asyncio>=0.21",
|
||||
"duckdb>=0.9.0",
|
||||
"pytz>=2023.3",
|
||||
"pytest",
|
||||
"pytest-mock",
|
||||
"pytest-asyncio",
|
||||
"duckdb",
|
||||
"pytz",
|
||||
"polars>=0.19, <=1.3.0",
|
||||
"pyarrow-stubs>=16.0",
|
||||
"pylance>=5.0.0b5",
|
||||
"requests>=2.31.0",
|
||||
"datafusion>=52,<53",
|
||||
"tantivy",
|
||||
"pyarrow-stubs",
|
||||
"pylance>=1.0.0b14",
|
||||
"requests",
|
||||
"datafusion<52",
|
||||
]
|
||||
dev = [
|
||||
"ruff>=0.3.0",
|
||||
"pre-commit>=3.5.0",
|
||||
"pyright>=1.1.350",
|
||||
"ruff",
|
||||
"pre-commit",
|
||||
"pyright",
|
||||
'typing-extensions>=4.0.0; python_version < "3.11"',
|
||||
]
|
||||
docs = ["mkdocs", "mkdocs-jupyter", "mkdocs-material", "mkdocstrings-python"]
|
||||
clip = ["torch", "pillow>=12.1.1", "open-clip-torch"]
|
||||
siglip = ["torch", "pillow>=12.1.1", "transformers>=4.41.0","sentencepiece"]
|
||||
clip = ["torch", "pillow", "open-clip-torch"]
|
||||
siglip = ["torch", "pillow", "transformers>=4.41.0","sentencepiece"]
|
||||
embeddings = [
|
||||
"requests>=2.31.0",
|
||||
"openai>=1.6.1",
|
||||
"sentence-transformers>=2.2.0",
|
||||
"torch>=2.0.0",
|
||||
"pillow>=12.1.1",
|
||||
"open-clip-torch>=2.20.0",
|
||||
"cohere>=4.0",
|
||||
"sentence-transformers",
|
||||
"torch",
|
||||
"pillow",
|
||||
"open-clip-torch",
|
||||
"cohere",
|
||||
"colpali-engine>=0.3.10",
|
||||
"huggingface_hub>=0.19.0",
|
||||
"InstructorEmbedding>=1.0.1",
|
||||
"google-genai>=1.0.0",
|
||||
"huggingface_hub",
|
||||
"InstructorEmbedding",
|
||||
"google.generativeai",
|
||||
"boto3>=1.28.57",
|
||||
"awscli>=1.44.38",
|
||||
"awscli>=1.29.57",
|
||||
"botocore>=1.31.57",
|
||||
'ibm-watsonx-ai>=1.1.2; python_version >= "3.10"',
|
||||
"ollama>=0.3.0",
|
||||
"sentencepiece>=0.1.99"
|
||||
"sentencepiece"
|
||||
]
|
||||
azure = ["adlfs>=2024.2.0"]
|
||||
|
||||
|
||||
@@ -6,7 +6,8 @@ import importlib.metadata
|
||||
import os
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from datetime import timedelta
|
||||
from typing import Dict, Optional, Union, Any, List
|
||||
from typing import Dict, Optional, Union, Any
|
||||
import warnings
|
||||
|
||||
__version__ = importlib.metadata.version("lancedb")
|
||||
|
||||
@@ -14,9 +15,9 @@ from ._lancedb import connect as lancedb_connect
|
||||
from .common import URI, sanitize_uri
|
||||
from urllib.parse import urlparse
|
||||
from .db import AsyncConnection, DBConnection, LanceDBConnection
|
||||
from .io import StorageOptionsProvider
|
||||
from .remote import ClientConfig
|
||||
from .remote.db import RemoteDBConnection
|
||||
from .expr import Expr, col, lit, func
|
||||
from .schema import vector
|
||||
from .table import AsyncTable, Table
|
||||
from ._lancedb import Session
|
||||
@@ -62,7 +63,7 @@ def _check_s3_bucket_with_dots(
|
||||
|
||||
|
||||
def connect(
|
||||
uri: Optional[URI] = None,
|
||||
uri: URI,
|
||||
*,
|
||||
api_key: Optional[str] = None,
|
||||
region: str = "us-east-1",
|
||||
@@ -72,19 +73,14 @@ def connect(
|
||||
client_config: Union[ClientConfig, Dict[str, Any], None] = None,
|
||||
storage_options: Optional[Dict[str, str]] = None,
|
||||
session: Optional[Session] = None,
|
||||
manifest_enabled: bool = False,
|
||||
namespace_client_impl: Optional[str] = None,
|
||||
namespace_client_properties: Optional[Dict[str, str]] = None,
|
||||
namespace_client_pushdown_operations: Optional[List[str]] = None,
|
||||
**kwargs: Any,
|
||||
) -> DBConnection:
|
||||
"""Connect to a LanceDB database.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
uri: str or Path, optional
|
||||
The uri of the database. When ``namespace_client_impl`` is provided you may
|
||||
omit ``uri`` and connect through a namespace client instead.
|
||||
uri: str or Path
|
||||
The uri of the database.
|
||||
api_key: str, optional
|
||||
If presented, connect to LanceDB cloud.
|
||||
Otherwise, connect to a database on file system or cloud storage.
|
||||
@@ -110,29 +106,13 @@ def connect(
|
||||
default configuration is used.
|
||||
storage_options: dict, optional
|
||||
Additional options for the storage backend. See available options at
|
||||
<https://docs.lancedb.com/storage/>
|
||||
manifest_enabled : bool, default False
|
||||
When true for local/native connections, use directory namespace
|
||||
manifests as the source of truth for table metadata. Existing
|
||||
directory-listed root tables are migrated into the manifest on access.
|
||||
<https://lancedb.com/docs/storage/>
|
||||
session: Session, optional
|
||||
(For LanceDB OSS only)
|
||||
A session to use for this connection. Sessions allow you to configure
|
||||
cache sizes for index and metadata caches, which can significantly
|
||||
impact memory use and performance. They can also be re-used across
|
||||
multiple connections to share the same cache state.
|
||||
namespace_client_impl : str, optional
|
||||
When provided along with ``namespace_client_properties``, ``connect``
|
||||
returns a namespace-backed connection by delegating to
|
||||
:func:`connect_namespace`. The value identifies which namespace
|
||||
implementation to load (e.g., ``"dir"`` or ``"rest"``).
|
||||
namespace_client_properties : dict, optional
|
||||
Configuration to pass to the namespace client implementation. Required
|
||||
when ``namespace_client_impl`` is set.
|
||||
namespace_client_pushdown_operations : list[str], optional
|
||||
Only used when ``namespace_client_properties`` is provided. Forwards to
|
||||
:func:`connect_namespace` to control which operations are executed on the
|
||||
namespace service (e.g., ``["QueryTable", "CreateTable"]``).
|
||||
|
||||
Examples
|
||||
--------
|
||||
@@ -152,48 +132,11 @@ def connect(
|
||||
>>> db = lancedb.connect("db://my_database", api_key="ldb_...",
|
||||
... client_config={"retry_config": {"retries": 5}})
|
||||
|
||||
Connect to a namespace-backed database:
|
||||
|
||||
>>> db = lancedb.connect(namespace_client_impl="dir",
|
||||
... namespace_client_properties={"root": "/tmp/ns"})
|
||||
|
||||
Returns
|
||||
-------
|
||||
conn : DBConnection
|
||||
A connection to a LanceDB database.
|
||||
"""
|
||||
if namespace_client_impl is not None:
|
||||
if namespace_client_properties is None:
|
||||
raise ValueError(
|
||||
"namespace_client_properties must be provided when "
|
||||
"namespace_client_impl is set"
|
||||
)
|
||||
if kwargs:
|
||||
raise ValueError(f"Unknown keyword arguments: {kwargs}")
|
||||
return connect_namespace(
|
||||
namespace_client_impl,
|
||||
namespace_client_properties,
|
||||
read_consistency_interval=read_consistency_interval,
|
||||
storage_options=storage_options,
|
||||
session=session,
|
||||
namespace_client_pushdown_operations=namespace_client_pushdown_operations,
|
||||
)
|
||||
|
||||
if namespace_client_properties is not None and not manifest_enabled:
|
||||
raise ValueError(
|
||||
"namespace_client_impl must be provided when using "
|
||||
"namespace_client_properties unless manifest_enabled=True"
|
||||
)
|
||||
|
||||
if namespace_client_pushdown_operations is not None:
|
||||
raise ValueError(
|
||||
"namespace_client_pushdown_operations is only valid when "
|
||||
"connecting through a namespace"
|
||||
)
|
||||
if uri is None:
|
||||
raise ValueError(
|
||||
"uri is required when not connecting through a namespace client"
|
||||
)
|
||||
if isinstance(uri, str) and uri.startswith("db://"):
|
||||
if api_key is None:
|
||||
api_key = os.environ.get("LANCEDB_API_KEY")
|
||||
@@ -222,92 +165,9 @@ def connect(
|
||||
read_consistency_interval=read_consistency_interval,
|
||||
storage_options=storage_options,
|
||||
session=session,
|
||||
manifest_enabled=manifest_enabled,
|
||||
namespace_client_properties=namespace_client_properties,
|
||||
)
|
||||
|
||||
|
||||
WORKER_PROPERTY_PREFIX = "_lancedb_worker_"
|
||||
|
||||
|
||||
def _apply_worker_overrides(props: dict[str, str]) -> dict[str, str]:
|
||||
"""Apply worker property overrides.
|
||||
|
||||
Any key starting with ``_lancedb_worker_`` is extracted, the prefix
|
||||
is stripped, and the resulting key-value pair is put back into the
|
||||
map (overriding the existing value if present). The original
|
||||
prefixed key is removed.
|
||||
"""
|
||||
worker_keys = [k for k in props if k.startswith(WORKER_PROPERTY_PREFIX)]
|
||||
if not worker_keys:
|
||||
return props
|
||||
result = dict(props)
|
||||
for key in worker_keys:
|
||||
value = result.pop(key)
|
||||
real_key = key[len(WORKER_PROPERTY_PREFIX) :]
|
||||
result[real_key] = value
|
||||
return result
|
||||
|
||||
|
||||
def deserialize_conn(
|
||||
data: str,
|
||||
*,
|
||||
for_worker: bool = False,
|
||||
) -> DBConnection:
|
||||
"""Reconstruct a DBConnection from a serialized string.
|
||||
|
||||
The string must have been produced by
|
||||
:meth:`DBConnection.serialize`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
data : str
|
||||
String produced by ``serialize()``.
|
||||
for_worker : bool, default False
|
||||
When ``True``, any namespace client property whose key starts
|
||||
with ``_lancedb_worker_`` has that prefix stripped and the
|
||||
value overrides the corresponding property. For example,
|
||||
``_lancedb_worker_uri`` replaces ``uri``.
|
||||
|
||||
Returns
|
||||
-------
|
||||
DBConnection
|
||||
A new connection matching the serialized state.
|
||||
"""
|
||||
import json
|
||||
|
||||
parsed = json.loads(data)
|
||||
connection_type = parsed.get("connection_type")
|
||||
|
||||
rci_secs = parsed.get("read_consistency_interval_seconds")
|
||||
rci = timedelta(seconds=rci_secs) if rci_secs is not None else None
|
||||
storage_options = parsed.get("storage_options")
|
||||
|
||||
if connection_type == "namespace":
|
||||
props = dict(parsed.get("namespace_client_properties") or {})
|
||||
if for_worker:
|
||||
props = _apply_worker_overrides(props)
|
||||
return connect_namespace(
|
||||
namespace_client_impl=parsed["namespace_client_impl"],
|
||||
namespace_client_properties=props,
|
||||
read_consistency_interval=rci,
|
||||
storage_options=storage_options,
|
||||
namespace_client_pushdown_operations=parsed.get(
|
||||
"namespace_client_pushdown_operations"
|
||||
),
|
||||
)
|
||||
elif connection_type == "local":
|
||||
return LanceDBConnection(
|
||||
parsed["uri"],
|
||||
read_consistency_interval=rci,
|
||||
storage_options=storage_options,
|
||||
manifest_enabled=parsed.get("manifest_enabled", False),
|
||||
namespace_client_properties=parsed.get("namespace_client_properties"),
|
||||
)
|
||||
else:
|
||||
raise ValueError(f"Unknown connection_type: {connection_type}")
|
||||
|
||||
|
||||
async def connect_async(
|
||||
uri: URI,
|
||||
*,
|
||||
@@ -318,8 +178,6 @@ async def connect_async(
|
||||
client_config: Optional[Union[ClientConfig, Dict[str, Any]]] = None,
|
||||
storage_options: Optional[Dict[str, str]] = None,
|
||||
session: Optional[Session] = None,
|
||||
manifest_enabled: bool = False,
|
||||
namespace_client_properties: Optional[Dict[str, str]] = None,
|
||||
) -> AsyncConnection:
|
||||
"""Connect to a LanceDB database.
|
||||
|
||||
@@ -352,20 +210,13 @@ async def connect_async(
|
||||
default configuration is used.
|
||||
storage_options: dict, optional
|
||||
Additional options for the storage backend. See available options at
|
||||
<https://docs.lancedb.com/storage/>
|
||||
<https://lancedb.com/docs/storage/>
|
||||
session: Session, optional
|
||||
(For LanceDB OSS only)
|
||||
A session to use for this connection. Sessions allow you to configure
|
||||
cache sizes for index and metadata caches, which can significantly
|
||||
impact memory use and performance. They can also be re-used across
|
||||
multiple connections to share the same cache state.
|
||||
manifest_enabled : bool, default False
|
||||
When true for local/native connections, use directory namespace
|
||||
manifests as the source of truth for table metadata. Existing
|
||||
directory-listed root tables are migrated into the manifest on access.
|
||||
namespace_client_properties : dict, optional
|
||||
Additional directory namespace client properties to use with
|
||||
``manifest_enabled=True``.
|
||||
|
||||
Examples
|
||||
--------
|
||||
@@ -408,8 +259,6 @@ async def connect_async(
|
||||
client_config,
|
||||
storage_options,
|
||||
session,
|
||||
manifest_enabled,
|
||||
namespace_client_properties,
|
||||
)
|
||||
)
|
||||
|
||||
@@ -422,10 +271,6 @@ __all__ = [
|
||||
"AsyncConnection",
|
||||
"AsyncLanceNamespaceDBConnection",
|
||||
"AsyncTable",
|
||||
"col",
|
||||
"Expr",
|
||||
"func",
|
||||
"lit",
|
||||
"URI",
|
||||
"sanitize_uri",
|
||||
"vector",
|
||||
@@ -434,6 +279,17 @@ __all__ = [
|
||||
"LanceNamespaceDBConnection",
|
||||
"RemoteDBConnection",
|
||||
"Session",
|
||||
"StorageOptionsProvider",
|
||||
"Table",
|
||||
"__version__",
|
||||
]
|
||||
|
||||
|
||||
def __warn_on_fork():
|
||||
warnings.warn(
|
||||
"lance is not fork-safe. If you are using multiprocessing, use spawn instead.",
|
||||
)
|
||||
|
||||
|
||||
if hasattr(os, "register_at_fork"):
|
||||
os.register_at_fork(before=__warn_on_fork) # type: ignore[attr-defined]
|
||||
|
||||
@@ -14,6 +14,7 @@ from .index import (
|
||||
HnswSq,
|
||||
FTS,
|
||||
)
|
||||
from .io import StorageOptionsProvider
|
||||
from lance_namespace import (
|
||||
ListNamespacesResponse,
|
||||
CreateNamespaceResponse,
|
||||
@@ -26,32 +27,6 @@ from .remote import ClientConfig
|
||||
IvfHnswPq: type[HnswPq] = HnswPq
|
||||
IvfHnswSq: type[HnswSq] = HnswSq
|
||||
|
||||
class PyExpr:
|
||||
"""A type-safe DataFusion expression node (Rust-side handle)."""
|
||||
|
||||
def eq(self, other: "PyExpr") -> "PyExpr": ...
|
||||
def ne(self, other: "PyExpr") -> "PyExpr": ...
|
||||
def lt(self, other: "PyExpr") -> "PyExpr": ...
|
||||
def lte(self, other: "PyExpr") -> "PyExpr": ...
|
||||
def gt(self, other: "PyExpr") -> "PyExpr": ...
|
||||
def gte(self, other: "PyExpr") -> "PyExpr": ...
|
||||
def and_(self, other: "PyExpr") -> "PyExpr": ...
|
||||
def or_(self, other: "PyExpr") -> "PyExpr": ...
|
||||
def not_(self) -> "PyExpr": ...
|
||||
def add(self, other: "PyExpr") -> "PyExpr": ...
|
||||
def sub(self, other: "PyExpr") -> "PyExpr": ...
|
||||
def mul(self, other: "PyExpr") -> "PyExpr": ...
|
||||
def div(self, other: "PyExpr") -> "PyExpr": ...
|
||||
def lower(self) -> "PyExpr": ...
|
||||
def upper(self) -> "PyExpr": ...
|
||||
def contains(self, substr: "PyExpr") -> "PyExpr": ...
|
||||
def cast(self, data_type: pa.DataType) -> "PyExpr": ...
|
||||
def to_sql(self) -> str: ...
|
||||
|
||||
def expr_col(name: str) -> PyExpr: ...
|
||||
def expr_lit(value: Union[bool, int, float, str]) -> PyExpr: ...
|
||||
def expr_func(name: str, args: List[PyExpr]) -> PyExpr: ...
|
||||
|
||||
class Session:
|
||||
def __init__(
|
||||
self,
|
||||
@@ -71,35 +46,35 @@ class Connection(object):
|
||||
async def close(self): ...
|
||||
async def list_namespaces(
|
||||
self,
|
||||
namespace_path: Optional[List[str]] = None,
|
||||
namespace: Optional[List[str]] = None,
|
||||
page_token: Optional[str] = None,
|
||||
limit: Optional[int] = None,
|
||||
) -> ListNamespacesResponse: ...
|
||||
async def create_namespace(
|
||||
self,
|
||||
namespace_path: List[str],
|
||||
namespace: List[str],
|
||||
mode: Optional[str] = None,
|
||||
properties: Optional[Dict[str, str]] = None,
|
||||
) -> CreateNamespaceResponse: ...
|
||||
async def drop_namespace(
|
||||
self,
|
||||
namespace_path: List[str],
|
||||
namespace: List[str],
|
||||
mode: Optional[str] = None,
|
||||
behavior: Optional[str] = None,
|
||||
) -> DropNamespaceResponse: ...
|
||||
async def describe_namespace(
|
||||
self,
|
||||
namespace_path: List[str],
|
||||
namespace: List[str],
|
||||
) -> DescribeNamespaceResponse: ...
|
||||
async def list_tables(
|
||||
self,
|
||||
namespace_path: Optional[List[str]] = None,
|
||||
namespace: Optional[List[str]] = None,
|
||||
page_token: Optional[str] = None,
|
||||
limit: Optional[int] = None,
|
||||
) -> ListTablesResponse: ...
|
||||
async def table_names(
|
||||
self,
|
||||
namespace_path: Optional[List[str]],
|
||||
namespace: Optional[List[str]],
|
||||
start_after: Optional[str],
|
||||
limit: Optional[int],
|
||||
) -> list[str]: ... # Deprecated: Use list_tables instead
|
||||
@@ -108,8 +83,9 @@ class Connection(object):
|
||||
name: str,
|
||||
mode: str,
|
||||
data: pa.RecordBatchReader,
|
||||
namespace_path: Optional[List[str]] = None,
|
||||
namespace: Optional[List[str]] = None,
|
||||
storage_options: Optional[Dict[str, str]] = None,
|
||||
storage_options_provider: Optional[StorageOptionsProvider] = None,
|
||||
location: Optional[str] = None,
|
||||
) -> Table: ...
|
||||
async def create_empty_table(
|
||||
@@ -117,15 +93,17 @@ class Connection(object):
|
||||
name: str,
|
||||
mode: str,
|
||||
schema: pa.Schema,
|
||||
namespace_path: Optional[List[str]] = None,
|
||||
namespace: Optional[List[str]] = None,
|
||||
storage_options: Optional[Dict[str, str]] = None,
|
||||
storage_options_provider: Optional[StorageOptionsProvider] = None,
|
||||
location: Optional[str] = None,
|
||||
) -> Table: ...
|
||||
async def open_table(
|
||||
self,
|
||||
name: str,
|
||||
namespace_path: Optional[List[str]] = None,
|
||||
namespace: Optional[List[str]] = None,
|
||||
storage_options: Optional[Dict[str, str]] = None,
|
||||
storage_options_provider: Optional[StorageOptionsProvider] = None,
|
||||
index_cache_size: Optional[int] = None,
|
||||
location: Optional[str] = None,
|
||||
) -> Table: ...
|
||||
@@ -133,7 +111,7 @@ class Connection(object):
|
||||
self,
|
||||
target_table_name: str,
|
||||
source_uri: str,
|
||||
target_namespace_path: Optional[List[str]] = None,
|
||||
target_namespace: Optional[List[str]] = None,
|
||||
source_version: Optional[int] = None,
|
||||
source_tag: Optional[str] = None,
|
||||
is_shallow: bool = True,
|
||||
@@ -142,18 +120,13 @@ class Connection(object):
|
||||
self,
|
||||
cur_name: str,
|
||||
new_name: str,
|
||||
cur_namespace_path: Optional[List[str]] = None,
|
||||
new_namespace_path: Optional[List[str]] = None,
|
||||
cur_namespace: Optional[List[str]] = None,
|
||||
new_namespace: Optional[List[str]] = None,
|
||||
) -> None: ...
|
||||
async def drop_table(
|
||||
self, name: str, namespace_path: Optional[List[str]] = None
|
||||
self, name: str, namespace: Optional[List[str]] = None
|
||||
) -> None: ...
|
||||
async def drop_all_tables(
|
||||
self, namespace_path: Optional[List[str]] = None
|
||||
) -> None: ...
|
||||
async def namespace_client_config(
|
||||
self,
|
||||
) -> Dict[str, Any]: ...
|
||||
async def drop_all_tables(self, namespace: Optional[List[str]] = None) -> None: ...
|
||||
|
||||
class Table:
|
||||
def name(self) -> str: ...
|
||||
@@ -162,10 +135,7 @@ class Table:
|
||||
def close(self) -> None: ...
|
||||
async def schema(self) -> pa.Schema: ...
|
||||
async def add(
|
||||
self,
|
||||
data: pa.RecordBatchReader,
|
||||
mode: Literal["append", "overwrite"],
|
||||
progress: Optional[Any] = None,
|
||||
self, data: pa.RecordBatchReader, mode: Literal["append", "overwrite"]
|
||||
) -> AddResult: ...
|
||||
async def update(
|
||||
self, updates: Dict[str, str], where: Optional[str]
|
||||
@@ -196,8 +166,6 @@ class Table:
|
||||
async def checkout(self, version: Union[int, str]): ...
|
||||
async def checkout_latest(self): ...
|
||||
async def restore(self, version: Optional[Union[int, str]] = None): ...
|
||||
async def prewarm_index(self, index_name: str) -> None: ...
|
||||
async def prewarm_data(self, columns: Optional[List[str]] = None) -> None: ...
|
||||
async def list_indices(self) -> list[IndexConfig]: ...
|
||||
async def delete(self, filter: str) -> DeleteResult: ...
|
||||
async def add_columns(self, columns: list[tuple[str, str]]) -> AddColumnsResult: ...
|
||||
@@ -242,8 +210,6 @@ async def connect(
|
||||
client_config: Optional[Union[ClientConfig, Dict[str, Any]]],
|
||||
storage_options: Optional[Dict[str, str]],
|
||||
session: Optional[Session],
|
||||
manifest_enabled: bool = False,
|
||||
namespace_client_properties: Optional[Dict[str, str]] = None,
|
||||
) -> Connection: ...
|
||||
|
||||
class RecordBatchStream:
|
||||
@@ -254,9 +220,7 @@ class RecordBatchStream:
|
||||
|
||||
class Query:
|
||||
def where(self, filter: str): ...
|
||||
def where_expr(self, expr: PyExpr): ...
|
||||
def select(self, columns: List[Tuple[str, str]]): ...
|
||||
def select_expr(self, columns: List[Tuple[str, PyExpr]]): ...
|
||||
def select(self, columns: Tuple[str, str]): ...
|
||||
def select_columns(self, columns: List[str]): ...
|
||||
def limit(self, limit: int): ...
|
||||
def offset(self, offset: int): ...
|
||||
@@ -282,9 +246,7 @@ class TakeQuery:
|
||||
|
||||
class FTSQuery:
|
||||
def where(self, filter: str): ...
|
||||
def where_expr(self, expr: PyExpr): ...
|
||||
def select(self, columns: List[Tuple[str, str]]): ...
|
||||
def select_expr(self, columns: List[Tuple[str, PyExpr]]): ...
|
||||
def select(self, columns: List[str]): ...
|
||||
def limit(self, limit: int): ...
|
||||
def offset(self, offset: int): ...
|
||||
def fast_search(self): ...
|
||||
@@ -303,9 +265,7 @@ class VectorQuery:
|
||||
async def output_schema(self) -> pa.Schema: ...
|
||||
async def execute(self) -> RecordBatchStream: ...
|
||||
def where(self, filter: str): ...
|
||||
def where_expr(self, expr: PyExpr): ...
|
||||
def select(self, columns: List[Tuple[str, str]]): ...
|
||||
def select_expr(self, columns: List[Tuple[str, PyExpr]]): ...
|
||||
def select(self, columns: List[str]): ...
|
||||
def select_with_projection(self, columns: Tuple[str, str]): ...
|
||||
def limit(self, limit: int): ...
|
||||
def offset(self, offset: int): ...
|
||||
@@ -322,9 +282,7 @@ class VectorQuery:
|
||||
|
||||
class HybridQuery:
|
||||
def where(self, filter: str): ...
|
||||
def where_expr(self, expr: PyExpr): ...
|
||||
def select(self, columns: List[Tuple[str, str]]): ...
|
||||
def select_expr(self, columns: List[Tuple[str, PyExpr]]): ...
|
||||
def select(self, columns: List[str]): ...
|
||||
def limit(self, limit: int): ...
|
||||
def offset(self, offset: int): ...
|
||||
def fast_search(self): ...
|
||||
@@ -442,7 +400,7 @@ class AsyncPermutationBuilder:
|
||||
async def execute(self) -> Table: ...
|
||||
|
||||
def async_permutation_builder(
|
||||
table: Table,
|
||||
table: Table, dest_table_name: str
|
||||
) -> AsyncPermutationBuilder: ...
|
||||
def fts_query_to_json(query: Any) -> str: ...
|
||||
|
||||
|
||||
@@ -2,9 +2,7 @@
|
||||
# SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import threading
|
||||
import warnings
|
||||
|
||||
|
||||
class BackgroundEventLoop:
|
||||
@@ -15,9 +13,6 @@ class BackgroundEventLoop:
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self._start()
|
||||
|
||||
def _start(self):
|
||||
self.loop = asyncio.new_event_loop()
|
||||
self.thread = threading.Thread(
|
||||
target=self.loop.run_forever,
|
||||
@@ -36,30 +31,3 @@ class BackgroundEventLoop:
|
||||
|
||||
|
||||
LOOP = BackgroundEventLoop()
|
||||
|
||||
_FORK_WARNED = False
|
||||
|
||||
|
||||
def _reset_after_fork():
|
||||
# Threads do not survive fork(), so the asyncio loop in LOOP.thread is
|
||||
# dead in the child. Re-initialize the singleton in place so existing
|
||||
# `from .background_loop import LOOP` references in other modules see
|
||||
# the new state. The Rust-side tokio runtime is reset analogously by a
|
||||
# pthread_atfork hook installed in the _lancedb extension.
|
||||
LOOP._start()
|
||||
global _FORK_WARNED
|
||||
if not _FORK_WARNED:
|
||||
_FORK_WARNED = True
|
||||
warnings.warn(
|
||||
"lancedb fork support is experimental: the internal async "
|
||||
"runtime has been reset in the forked child, but a small chance "
|
||||
"of deadlock remains if other state was mid-operation at fork "
|
||||
"time. The 'forkserver' or 'spawn' multiprocessing start method "
|
||||
"is likely a safer alternative.",
|
||||
RuntimeWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
|
||||
|
||||
if hasattr(os, "register_at_fork"):
|
||||
os.register_at_fork(after_in_child=_reset_after_fork)
|
||||
|
||||
@@ -96,7 +96,7 @@ def data_to_reader(
|
||||
f"Unknown data type {type(data)}. "
|
||||
"Supported types: list of dicts, pandas DataFrame, polars DataFrame, "
|
||||
"pyarrow Table/RecordBatch, or Pydantic models. "
|
||||
"See https://docs.lancedb.com/tables/ for examples."
|
||||
"See https://lancedb.com/docs/tables/ for examples."
|
||||
)
|
||||
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -19,10 +19,10 @@ from .utils import TEXT, api_key_not_found_help
|
||||
@register("gemini-text")
|
||||
class GeminiText(TextEmbeddingFunction):
|
||||
"""
|
||||
An embedding function that uses Google's Gemini API. Requires GOOGLE_API_KEY to
|
||||
An embedding function that uses the Google's Gemini API. Requires GOOGLE_API_KEY to
|
||||
be set.
|
||||
|
||||
https://ai.google.dev/gemini-api/docs/embeddings
|
||||
https://ai.google.dev/docs/embeddings_guide
|
||||
|
||||
Supports various tasks types:
|
||||
| Task Type | Description |
|
||||
@@ -46,12 +46,9 @@ class GeminiText(TextEmbeddingFunction):
|
||||
|
||||
Parameters
|
||||
----------
|
||||
name: str, default "gemini-embedding-001"
|
||||
The name of the model to use. Supported models include:
|
||||
- "gemini-embedding-001" (768 dimensions)
|
||||
|
||||
Note: The legacy "models/embedding-001" format is also supported but
|
||||
"gemini-embedding-001" is recommended.
|
||||
name: str, default "models/embedding-001"
|
||||
The name of the model to use. See the Gemini documentation for a list of
|
||||
available models.
|
||||
|
||||
query_task_type: str, default "retrieval_query"
|
||||
Sets the task type for the queries.
|
||||
@@ -80,7 +77,7 @@ class GeminiText(TextEmbeddingFunction):
|
||||
|
||||
"""
|
||||
|
||||
name: str = "gemini-embedding-001"
|
||||
name: str = "models/embedding-001"
|
||||
query_task_type: str = "retrieval_query"
|
||||
source_task_type: str = "retrieval_document"
|
||||
|
||||
@@ -117,48 +114,23 @@ class GeminiText(TextEmbeddingFunction):
|
||||
texts: list[str] or np.ndarray (of str)
|
||||
The texts to embed
|
||||
"""
|
||||
from google.genai import types
|
||||
if (
|
||||
kwargs.get("task_type") == "retrieval_document"
|
||||
): # Provide a title to use existing API design
|
||||
title = "Embedding of a document"
|
||||
kwargs["title"] = title
|
||||
|
||||
task_type = kwargs.get("task_type")
|
||||
|
||||
# Build content objects for embed_content
|
||||
contents = []
|
||||
for text in texts:
|
||||
if task_type == "retrieval_document":
|
||||
# Provide a title for retrieval_document task
|
||||
contents.append(
|
||||
{"parts": [{"text": "Embedding of a document"}, {"text": text}]}
|
||||
)
|
||||
else:
|
||||
contents.append({"parts": [{"text": text}]})
|
||||
|
||||
# Build config
|
||||
config_kwargs = {}
|
||||
if task_type:
|
||||
config_kwargs["task_type"] = task_type.upper() # API expects uppercase
|
||||
|
||||
# Call embed_content for each content
|
||||
embeddings = []
|
||||
for content in contents:
|
||||
config = (
|
||||
types.EmbedContentConfig(**config_kwargs) if config_kwargs else None
|
||||
)
|
||||
response = self.client.models.embed_content(
|
||||
model=self.name,
|
||||
contents=content,
|
||||
config=config,
|
||||
)
|
||||
embeddings.append(response.embeddings[0].values)
|
||||
|
||||
return embeddings
|
||||
return [
|
||||
self.client.embed_content(model=self.name, content=text, **kwargs)[
|
||||
"embedding"
|
||||
]
|
||||
for text in texts
|
||||
]
|
||||
|
||||
@cached_property
|
||||
def client(self):
|
||||
attempt_import_or_raise("google.genai", "google-genai")
|
||||
genai = attempt_import_or_raise("google.generativeai", "google.generativeai")
|
||||
|
||||
if not os.environ.get("GOOGLE_API_KEY"):
|
||||
api_key_not_found_help("google")
|
||||
|
||||
from google import genai as genai_module
|
||||
|
||||
return genai_module.Client(api_key=os.environ.get("GOOGLE_API_KEY"))
|
||||
return genai
|
||||
|
||||
@@ -10,7 +10,6 @@ import sys
|
||||
import threading
|
||||
import time
|
||||
import urllib.error
|
||||
import urllib.request
|
||||
import weakref
|
||||
import logging
|
||||
from functools import wraps
|
||||
|
||||
@@ -1,298 +0,0 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||
|
||||
"""Type-safe expression builder for filters and projections.
|
||||
|
||||
Instead of writing raw SQL strings you can build expressions with Python
|
||||
operators::
|
||||
|
||||
from lancedb.expr import col, lit
|
||||
|
||||
# filter: age > 18 AND status = 'active'
|
||||
filt = (col("age") > lit(18)) & (col("status") == lit("active"))
|
||||
|
||||
# projection: compute a derived column
|
||||
proj = {"score": col("raw_score") * lit(1.5)}
|
||||
|
||||
table.search().where(filt).select(proj).to_list()
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Union
|
||||
|
||||
import pyarrow as pa
|
||||
|
||||
from lancedb._lancedb import PyExpr, expr_col, expr_lit, expr_func
|
||||
|
||||
__all__ = ["Expr", "col", "lit", "func"]
|
||||
|
||||
_STR_TO_PA_TYPE: dict = {
|
||||
"bool": pa.bool_(),
|
||||
"boolean": pa.bool_(),
|
||||
"int8": pa.int8(),
|
||||
"int16": pa.int16(),
|
||||
"int32": pa.int32(),
|
||||
"int64": pa.int64(),
|
||||
"uint8": pa.uint8(),
|
||||
"uint16": pa.uint16(),
|
||||
"uint32": pa.uint32(),
|
||||
"uint64": pa.uint64(),
|
||||
"float16": pa.float16(),
|
||||
"float32": pa.float32(),
|
||||
"float": pa.float32(),
|
||||
"float64": pa.float64(),
|
||||
"double": pa.float64(),
|
||||
"string": pa.string(),
|
||||
"utf8": pa.string(),
|
||||
"str": pa.string(),
|
||||
"large_string": pa.large_utf8(),
|
||||
"large_utf8": pa.large_utf8(),
|
||||
"date32": pa.date32(),
|
||||
"date": pa.date32(),
|
||||
"date64": pa.date64(),
|
||||
}
|
||||
|
||||
|
||||
def _coerce(value: "ExprLike") -> "Expr":
|
||||
"""Return *value* as an :class:`Expr`, wrapping plain Python values via
|
||||
:func:`lit` if needed."""
|
||||
if isinstance(value, Expr):
|
||||
return value
|
||||
return lit(value)
|
||||
|
||||
|
||||
# Type alias used in annotations.
|
||||
ExprLike = Union["Expr", bool, int, float, str]
|
||||
|
||||
|
||||
class Expr:
|
||||
"""A type-safe expression node.
|
||||
|
||||
Construct instances with :func:`col` and :func:`lit`, then combine them
|
||||
using Python operators or the named methods below.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> from lancedb.expr import col, lit
|
||||
>>> filt = (col("age") > lit(18)) & (col("name").lower() == lit("alice"))
|
||||
>>> proj = {"double": col("x") * lit(2)}
|
||||
"""
|
||||
|
||||
# Make Expr unhashable so that == returns an Expr rather than being used
|
||||
# for dict keys / set membership.
|
||||
__hash__ = None # type: ignore[assignment]
|
||||
|
||||
def __init__(self, inner: PyExpr) -> None:
|
||||
self._inner = inner
|
||||
|
||||
# ── comparisons ──────────────────────────────────────────────────────────
|
||||
|
||||
def __eq__(self, other: ExprLike) -> "Expr": # type: ignore[override]
|
||||
"""Equal to (``col("x") == 1``)."""
|
||||
return Expr(self._inner.eq(_coerce(other)._inner))
|
||||
|
||||
def __ne__(self, other: ExprLike) -> "Expr": # type: ignore[override]
|
||||
"""Not equal to (``col("x") != 1``)."""
|
||||
return Expr(self._inner.ne(_coerce(other)._inner))
|
||||
|
||||
def __lt__(self, other: ExprLike) -> "Expr":
|
||||
"""Less than (``col("x") < 1``)."""
|
||||
return Expr(self._inner.lt(_coerce(other)._inner))
|
||||
|
||||
def __le__(self, other: ExprLike) -> "Expr":
|
||||
"""Less than or equal to (``col("x") <= 1``)."""
|
||||
return Expr(self._inner.lte(_coerce(other)._inner))
|
||||
|
||||
def __gt__(self, other: ExprLike) -> "Expr":
|
||||
"""Greater than (``col("x") > 1``)."""
|
||||
return Expr(self._inner.gt(_coerce(other)._inner))
|
||||
|
||||
def __ge__(self, other: ExprLike) -> "Expr":
|
||||
"""Greater than or equal to (``col("x") >= 1``)."""
|
||||
return Expr(self._inner.gte(_coerce(other)._inner))
|
||||
|
||||
# ── logical ──────────────────────────────────────────────────────────────
|
||||
|
||||
def __and__(self, other: "Expr") -> "Expr":
|
||||
"""Logical AND (``expr_a & expr_b``)."""
|
||||
return Expr(self._inner.and_(_coerce(other)._inner))
|
||||
|
||||
def __or__(self, other: "Expr") -> "Expr":
|
||||
"""Logical OR (``expr_a | expr_b``)."""
|
||||
return Expr(self._inner.or_(_coerce(other)._inner))
|
||||
|
||||
def __invert__(self) -> "Expr":
|
||||
"""Logical NOT (``~expr``)."""
|
||||
return Expr(self._inner.not_())
|
||||
|
||||
# ── arithmetic ───────────────────────────────────────────────────────────
|
||||
|
||||
def __add__(self, other: ExprLike) -> "Expr":
|
||||
"""Add (``col("x") + 1``)."""
|
||||
return Expr(self._inner.add(_coerce(other)._inner))
|
||||
|
||||
def __radd__(self, other: ExprLike) -> "Expr":
|
||||
"""Right-hand add (``1 + col("x")``)."""
|
||||
return Expr(_coerce(other)._inner.add(self._inner))
|
||||
|
||||
def __sub__(self, other: ExprLike) -> "Expr":
|
||||
"""Subtract (``col("x") - 1``)."""
|
||||
return Expr(self._inner.sub(_coerce(other)._inner))
|
||||
|
||||
def __rsub__(self, other: ExprLike) -> "Expr":
|
||||
"""Right-hand subtract (``1 - col("x")``)."""
|
||||
return Expr(_coerce(other)._inner.sub(self._inner))
|
||||
|
||||
def __mul__(self, other: ExprLike) -> "Expr":
|
||||
"""Multiply (``col("x") * 2``)."""
|
||||
return Expr(self._inner.mul(_coerce(other)._inner))
|
||||
|
||||
def __rmul__(self, other: ExprLike) -> "Expr":
|
||||
"""Right-hand multiply (``2 * col("x")``)."""
|
||||
return Expr(_coerce(other)._inner.mul(self._inner))
|
||||
|
||||
def __truediv__(self, other: ExprLike) -> "Expr":
|
||||
"""Divide (``col("x") / 2``)."""
|
||||
return Expr(self._inner.div(_coerce(other)._inner))
|
||||
|
||||
def __rtruediv__(self, other: ExprLike) -> "Expr":
|
||||
"""Right-hand divide (``1 / col("x")``)."""
|
||||
return Expr(_coerce(other)._inner.div(self._inner))
|
||||
|
||||
# ── string methods ───────────────────────────────────────────────────────
|
||||
|
||||
def lower(self) -> "Expr":
|
||||
"""Convert string column values to lowercase."""
|
||||
return Expr(self._inner.lower())
|
||||
|
||||
def upper(self) -> "Expr":
|
||||
"""Convert string column values to uppercase."""
|
||||
return Expr(self._inner.upper())
|
||||
|
||||
def contains(self, substr: "ExprLike") -> "Expr":
|
||||
"""Return True where the string contains *substr*."""
|
||||
return Expr(self._inner.contains(_coerce(substr)._inner))
|
||||
|
||||
# ── type cast ────────────────────────────────────────────────────────────
|
||||
|
||||
def cast(self, data_type: Union[str, "pa.DataType"]) -> "Expr":
|
||||
"""Cast values to *data_type*.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
data_type:
|
||||
A PyArrow ``DataType`` (e.g. ``pa.int32()``) or one of the type
|
||||
name strings: ``"bool"``, ``"int8"``, ``"int16"``, ``"int32"``,
|
||||
``"int64"``, ``"uint8"``–``"uint64"``, ``"float32"``,
|
||||
``"float64"``, ``"string"``, ``"date32"``, ``"date64"``.
|
||||
"""
|
||||
if isinstance(data_type, str):
|
||||
try:
|
||||
data_type = _STR_TO_PA_TYPE[data_type]
|
||||
except KeyError:
|
||||
raise ValueError(
|
||||
f"unsupported data type: '{data_type}'. Supported: "
|
||||
f"{', '.join(_STR_TO_PA_TYPE)}"
|
||||
)
|
||||
return Expr(self._inner.cast(data_type))
|
||||
|
||||
# ── named comparison helpers (alternative to operators) ──────────────────
|
||||
|
||||
def eq(self, other: ExprLike) -> "Expr":
|
||||
"""Equal to."""
|
||||
return self.__eq__(other)
|
||||
|
||||
def ne(self, other: ExprLike) -> "Expr":
|
||||
"""Not equal to."""
|
||||
return self.__ne__(other)
|
||||
|
||||
def lt(self, other: ExprLike) -> "Expr":
|
||||
"""Less than."""
|
||||
return self.__lt__(other)
|
||||
|
||||
def lte(self, other: ExprLike) -> "Expr":
|
||||
"""Less than or equal to."""
|
||||
return self.__le__(other)
|
||||
|
||||
def gt(self, other: ExprLike) -> "Expr":
|
||||
"""Greater than."""
|
||||
return self.__gt__(other)
|
||||
|
||||
def gte(self, other: ExprLike) -> "Expr":
|
||||
"""Greater than or equal to."""
|
||||
return self.__ge__(other)
|
||||
|
||||
def and_(self, other: "Expr") -> "Expr":
|
||||
"""Logical AND."""
|
||||
return self.__and__(other)
|
||||
|
||||
def or_(self, other: "Expr") -> "Expr":
|
||||
"""Logical OR."""
|
||||
return self.__or__(other)
|
||||
|
||||
# ── utilities ────────────────────────────────────────────────────────────
|
||||
|
||||
def to_sql(self) -> str:
|
||||
"""Render the expression as a SQL string (useful for debugging)."""
|
||||
return self._inner.to_sql()
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"Expr({self._inner.to_sql()})"
|
||||
|
||||
|
||||
# ── free functions ────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def col(name: str) -> Expr:
|
||||
"""Reference a table column by name.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
name:
|
||||
The column name.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> from lancedb.expr import col, lit
|
||||
>>> col("age") > lit(18)
|
||||
Expr((age > 18))
|
||||
"""
|
||||
return Expr(expr_col(name))
|
||||
|
||||
|
||||
def lit(value: Union[bool, int, float, str]) -> Expr:
|
||||
"""Create a literal (constant) value expression.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
value:
|
||||
A Python ``bool``, ``int``, ``float``, or ``str``.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> from lancedb.expr import col, lit
|
||||
>>> col("price") * lit(1.1)
|
||||
Expr((price * 1.1))
|
||||
"""
|
||||
return Expr(expr_lit(value))
|
||||
|
||||
|
||||
def func(name: str, *args: ExprLike) -> Expr:
|
||||
"""Call an arbitrary SQL function by name.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
name:
|
||||
The SQL function name (e.g. ``"lower"``, ``"upper"``).
|
||||
*args:
|
||||
The function arguments as :class:`Expr` or plain Python literals.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> from lancedb.expr import col, func
|
||||
>>> func("lower", col("name"))
|
||||
Expr(lower(name))
|
||||
"""
|
||||
inner_args = [_coerce(a)._inner for a in args]
|
||||
return Expr(expr_func(name, inner_args))
|
||||
201
python/python/lancedb/fts.py
Normal file
201
python/python/lancedb/fts.py
Normal file
@@ -0,0 +1,201 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||
|
||||
"""Full text search index using tantivy-py"""
|
||||
|
||||
import os
|
||||
from typing import List, Tuple, Optional
|
||||
|
||||
import pyarrow as pa
|
||||
|
||||
try:
|
||||
import tantivy
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"Please install tantivy-py `pip install tantivy` to use the full text search feature." # noqa: E501
|
||||
)
|
||||
|
||||
from .table import LanceTable
|
||||
|
||||
|
||||
def create_index(
|
||||
index_path: str,
|
||||
text_fields: List[str],
|
||||
ordering_fields: Optional[List[str]] = None,
|
||||
tokenizer_name: str = "default",
|
||||
) -> tantivy.Index:
|
||||
"""
|
||||
Create a new Index (not populated)
|
||||
|
||||
Parameters
|
||||
----------
|
||||
index_path : str
|
||||
Path to the index directory
|
||||
text_fields : List[str]
|
||||
List of text fields to index
|
||||
ordering_fields: List[str]
|
||||
List of unsigned type fields to order by at search time
|
||||
tokenizer_name : str, default "default"
|
||||
The tokenizer to use
|
||||
|
||||
Returns
|
||||
-------
|
||||
index : tantivy.Index
|
||||
The index object (not yet populated)
|
||||
"""
|
||||
if ordering_fields is None:
|
||||
ordering_fields = []
|
||||
# Declaring our schema.
|
||||
schema_builder = tantivy.SchemaBuilder()
|
||||
# special field that we'll populate with row_id
|
||||
schema_builder.add_integer_field("doc_id", stored=True)
|
||||
# data fields
|
||||
for name in text_fields:
|
||||
schema_builder.add_text_field(name, stored=True, tokenizer_name=tokenizer_name)
|
||||
if ordering_fields:
|
||||
for name in ordering_fields:
|
||||
schema_builder.add_unsigned_field(name, fast=True)
|
||||
schema = schema_builder.build()
|
||||
os.makedirs(index_path, exist_ok=True)
|
||||
index = tantivy.Index(schema, path=index_path)
|
||||
return index
|
||||
|
||||
|
||||
def populate_index(
|
||||
index: tantivy.Index,
|
||||
table: LanceTable,
|
||||
fields: List[str],
|
||||
writer_heap_size: Optional[int] = None,
|
||||
ordering_fields: Optional[List[str]] = None,
|
||||
) -> int:
|
||||
"""
|
||||
Populate an index with data from a LanceTable
|
||||
|
||||
Parameters
|
||||
----------
|
||||
index : tantivy.Index
|
||||
The index object
|
||||
table : LanceTable
|
||||
The table to index
|
||||
fields : List[str]
|
||||
List of fields to index
|
||||
writer_heap_size : int
|
||||
The writer heap size in bytes, defaults to 1GB
|
||||
|
||||
Returns
|
||||
-------
|
||||
int
|
||||
The number of rows indexed
|
||||
"""
|
||||
if ordering_fields is None:
|
||||
ordering_fields = []
|
||||
writer_heap_size = writer_heap_size or 1024 * 1024 * 1024
|
||||
# first check the fields exist and are string or large string type
|
||||
nested = []
|
||||
|
||||
for name in fields:
|
||||
try:
|
||||
f = table.schema.field(name) # raises KeyError if not found
|
||||
except KeyError:
|
||||
f = resolve_path(table.schema, name)
|
||||
nested.append(name)
|
||||
|
||||
if not pa.types.is_string(f.type) and not pa.types.is_large_string(f.type):
|
||||
raise TypeError(f"Field {name} is not a string type")
|
||||
|
||||
# create a tantivy writer
|
||||
writer = index.writer(heap_size=writer_heap_size)
|
||||
# write data into index
|
||||
dataset = table.to_lance()
|
||||
row_id = 0
|
||||
|
||||
max_nested_level = 0
|
||||
if len(nested) > 0:
|
||||
max_nested_level = max([len(name.split(".")) for name in nested])
|
||||
|
||||
for b in dataset.to_batches(columns=fields + ordering_fields):
|
||||
if max_nested_level > 0:
|
||||
b = pa.Table.from_batches([b])
|
||||
for _ in range(max_nested_level - 1):
|
||||
b = b.flatten()
|
||||
for i in range(b.num_rows):
|
||||
doc = tantivy.Document()
|
||||
for name in fields:
|
||||
value = b[name][i].as_py()
|
||||
if value is not None:
|
||||
doc.add_text(name, value)
|
||||
for name in ordering_fields:
|
||||
value = b[name][i].as_py()
|
||||
if value is not None:
|
||||
doc.add_unsigned(name, value)
|
||||
if not doc.is_empty:
|
||||
doc.add_integer("doc_id", row_id)
|
||||
writer.add_document(doc)
|
||||
row_id += 1
|
||||
# commit changes
|
||||
writer.commit()
|
||||
return row_id
|
||||
|
||||
|
||||
def resolve_path(schema, field_name: str) -> pa.Field:
|
||||
"""
|
||||
Resolve a nested field path to a list of field names
|
||||
|
||||
Parameters
|
||||
----------
|
||||
field_name : str
|
||||
The field name to resolve
|
||||
|
||||
Returns
|
||||
-------
|
||||
List[str]
|
||||
The resolved path
|
||||
"""
|
||||
path = field_name.split(".")
|
||||
field = schema.field(path.pop(0))
|
||||
for segment in path:
|
||||
if pa.types.is_struct(field.type):
|
||||
field = field.type.field(segment)
|
||||
else:
|
||||
raise KeyError(f"field {field_name} not found in schema {schema}")
|
||||
return field
|
||||
|
||||
|
||||
def search_index(
|
||||
index: tantivy.Index, query: str, limit: int = 10, ordering_field=None
|
||||
) -> Tuple[Tuple[int], Tuple[float]]:
|
||||
"""
|
||||
Search an index for a query
|
||||
|
||||
Parameters
|
||||
----------
|
||||
index : tantivy.Index
|
||||
The index object
|
||||
query : str
|
||||
The query string
|
||||
limit : int
|
||||
The maximum number of results to return
|
||||
|
||||
Returns
|
||||
-------
|
||||
ids_and_score: list[tuple[int], tuple[float]]
|
||||
A tuple of two tuples, the first containing the document ids
|
||||
and the second containing the scores
|
||||
"""
|
||||
searcher = index.searcher()
|
||||
query = index.parse_query(query)
|
||||
# get top results
|
||||
if ordering_field:
|
||||
results = searcher.search(query, limit, order_by_field=ordering_field)
|
||||
else:
|
||||
results = searcher.search(query, limit)
|
||||
if results.count == 0:
|
||||
return tuple(), tuple()
|
||||
return tuple(
|
||||
zip(
|
||||
*[
|
||||
(searcher.doc(doc_address)["doc_id"][0], score)
|
||||
for score, doc_address in results.hits
|
||||
]
|
||||
)
|
||||
)
|
||||
@@ -7,7 +7,6 @@ from typing import Literal, Optional
|
||||
from ._lancedb import (
|
||||
IndexConfig,
|
||||
)
|
||||
from .types import BaseTokenizerType
|
||||
|
||||
lang_mapping = {
|
||||
"ar": "Arabic",
|
||||
@@ -112,12 +111,8 @@ class FTS:
|
||||
- "simple": Splits text by whitespace and punctuation.
|
||||
- "whitespace": Split text by whitespace, but not punctuation.
|
||||
- "raw": No tokenization. The entire text is treated as a single token.
|
||||
- "ngram": N-gram tokenizer for substring-style matching.
|
||||
- "jieba/*": Jieba tokenizer loaded from Lance's language model home.
|
||||
- "lindera/*": Lindera tokenizer loaded from Lance's language model home.
|
||||
language : str, default "English"
|
||||
The language to use for stemming and stop-word removal. This is not the
|
||||
primary way to enable CJK tokenization.
|
||||
The language to use for tokenization.
|
||||
max_token_length : int, default 40
|
||||
The maximum token length to index. Tokens longer than this length will be
|
||||
ignored.
|
||||
@@ -132,17 +127,10 @@ class FTS:
|
||||
ascii_folding : bool, default True
|
||||
Whether to fold ASCII characters. This converts accented characters to
|
||||
their ASCII equivalent. For example, "café" would be converted to "cafe".
|
||||
|
||||
Notes
|
||||
-----
|
||||
Model-backed tokenizers such as ``jieba/default`` and ``lindera/ipadic``
|
||||
require tokenizer models in Lance's language model home. Set
|
||||
``LANCE_LANGUAGE_MODEL_HOME`` to override the default platform data
|
||||
directory under ``lance/language_models``.
|
||||
"""
|
||||
|
||||
with_position: bool = False
|
||||
base_tokenizer: BaseTokenizerType = "simple"
|
||||
base_tokenizer: Literal["simple", "raw", "whitespace"] = "simple"
|
||||
language: str = "English"
|
||||
max_token_length: Optional[int] = 40
|
||||
lower_case: bool = True
|
||||
|
||||
@@ -2,3 +2,70 @@
|
||||
# SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||
|
||||
"""I/O utilities and interfaces for LanceDB."""
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Dict
|
||||
|
||||
|
||||
class StorageOptionsProvider(ABC):
|
||||
"""Abstract base class for providing storage options to LanceDB tables.
|
||||
|
||||
Storage options providers enable automatic credential refresh for cloud
|
||||
storage backends (e.g., AWS S3, Azure Blob Storage, GCS). When credentials
|
||||
have an expiration time, the provider's fetch_storage_options() method will
|
||||
be called periodically to get fresh credentials before they expire.
|
||||
|
||||
Example
|
||||
-------
|
||||
>>> class MyProvider(StorageOptionsProvider):
|
||||
... def fetch_storage_options(self) -> Dict[str, str]:
|
||||
... # Fetch fresh credentials from your credential manager
|
||||
... return {
|
||||
... "aws_access_key_id": "...",
|
||||
... "aws_secret_access_key": "...",
|
||||
... "expires_at_millis": "1234567890000" # Optional
|
||||
... }
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def fetch_storage_options(self) -> Dict[str, str]:
|
||||
"""Fetch fresh storage credentials.
|
||||
|
||||
This method is called by LanceDB when credentials need to be refreshed.
|
||||
If the returned dictionary contains an "expires_at_millis" key with a
|
||||
Unix timestamp in milliseconds, LanceDB will automatically refresh the
|
||||
credentials before that time. If the key is not present, credentials
|
||||
are assumed to not expire.
|
||||
|
||||
Returns
|
||||
-------
|
||||
Dict[str, str]
|
||||
Dictionary containing cloud storage credentials and optionally an
|
||||
expiration time:
|
||||
- "expires_at_millis" (optional): Unix timestamp in milliseconds when
|
||||
credentials expire
|
||||
- Provider-specific credential keys (e.g., aws_access_key_id,
|
||||
aws_secret_access_key, etc.)
|
||||
|
||||
Raises
|
||||
------
|
||||
RuntimeError
|
||||
If credentials cannot be fetched or are invalid
|
||||
"""
|
||||
pass
|
||||
|
||||
def provider_id(self) -> str:
|
||||
"""Return a human-readable unique identifier for this provider instance.
|
||||
|
||||
This identifier is used for caching and equality comparison. Two providers
|
||||
with the same ID will share the same cached object store connection.
|
||||
|
||||
The default implementation uses the class name and string representation.
|
||||
Override this method if you need custom identification logic.
|
||||
|
||||
Returns
|
||||
-------
|
||||
str
|
||||
A unique identifier for this provider instance
|
||||
"""
|
||||
return f"{self.__class__.__name__} {{ repr: {str(self)!r} }}"
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,11 +1,10 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||
|
||||
import copy
|
||||
import json
|
||||
|
||||
from deprecation import deprecated
|
||||
from lancedb import AsyncConnection, DBConnection
|
||||
import pyarrow as pa
|
||||
import json
|
||||
|
||||
from ._lancedb import async_permutation_builder, PermutationReader
|
||||
from .table import LanceTable
|
||||
@@ -37,7 +36,10 @@ class PermutationBuilder:
|
||||
be referenced by name in the future. If names are not provided then they can only
|
||||
be referenced by their ordinal index. There is no requirement to name every split.
|
||||
|
||||
The permutation is stored in memory and will be lost when the program exits.
|
||||
By default, the permutation will be stored in memory and will be lost when the
|
||||
program exits. To persist the permutation (for very large datasets or to share
|
||||
the permutation across multiple workers) use the [persist](#persist) method to
|
||||
create a permanent table.
|
||||
"""
|
||||
|
||||
def __init__(self, table: LanceTable):
|
||||
@@ -49,6 +51,15 @@ class PermutationBuilder:
|
||||
"""
|
||||
self._async = async_permutation_builder(table)
|
||||
|
||||
def persist(
|
||||
self, database: Union[DBConnection, AsyncConnection], table_name: str
|
||||
) -> "PermutationBuilder":
|
||||
"""
|
||||
Persist the permutation to the given database.
|
||||
"""
|
||||
self._async.persist(database, table_name)
|
||||
return self
|
||||
|
||||
def split_random(
|
||||
self,
|
||||
*,
|
||||
@@ -273,8 +284,9 @@ class Permutations:
|
||||
self.permutation_table = permutation_table
|
||||
|
||||
if permutation_table.schema.metadata is not None:
|
||||
raw = permutation_table.schema.metadata.get(b"split_names")
|
||||
split_names = raw.decode("utf-8") if raw is not None else None
|
||||
split_names = permutation_table.schema.metadata.get(
|
||||
b"split_names", None
|
||||
).decode("utf-8")
|
||||
if split_names is not None:
|
||||
self.split_names = json.loads(split_names)
|
||||
self.split_dict = {
|
||||
@@ -369,44 +381,20 @@ class Permutation:
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
base_table: LanceTable,
|
||||
permutation_table: Optional[LanceTable],
|
||||
split: int,
|
||||
reader: PermutationReader,
|
||||
selection: dict[str, str],
|
||||
batch_size: int,
|
||||
transform_fn: Callable[pa.RecordBatch, Any],
|
||||
offset: Optional[int] = None,
|
||||
limit: Optional[int] = None,
|
||||
connection_factory: Optional[Callable[[str], LanceTable]] = None,
|
||||
_reader: Optional[PermutationReader] = None,
|
||||
):
|
||||
"""
|
||||
Internal constructor. Use [from_tables](#from_tables) instead.
|
||||
"""
|
||||
assert base_table is not None, "base_table is required"
|
||||
assert reader is not None, "reader is required"
|
||||
assert selection is not None, "selection is required"
|
||||
self.base_table = base_table
|
||||
self.permutation_table = permutation_table
|
||||
self.split = split
|
||||
self.reader = reader
|
||||
self.selection = selection
|
||||
self.transform_fn = transform_fn
|
||||
self.batch_size = batch_size
|
||||
self.offset = offset
|
||||
self.limit = limit
|
||||
self.connection_factory = connection_factory
|
||||
if _reader is None:
|
||||
_reader = LOOP.run(self._build_reader())
|
||||
self.reader: PermutationReader = _reader
|
||||
|
||||
async def _build_reader(self) -> PermutationReader:
|
||||
reader = await PermutationReader.from_tables(
|
||||
self.base_table, self.permutation_table, self.split
|
||||
)
|
||||
if self.offset is not None:
|
||||
reader = await reader.with_offset(self.offset)
|
||||
if self.limit is not None:
|
||||
reader = await reader.with_limit(self.limit)
|
||||
return reader
|
||||
|
||||
def _with_selection(self, selection: dict[str, str]) -> "Permutation":
|
||||
"""
|
||||
@@ -415,97 +403,21 @@ class Permutation:
|
||||
Does not validation of the selection and it replaces it entirely. This is not
|
||||
intended for public use.
|
||||
"""
|
||||
new = copy.copy(self)
|
||||
new.selection = selection
|
||||
return new
|
||||
return Permutation(self.reader, selection, self.batch_size, self.transform_fn)
|
||||
|
||||
def _with_reader(self, reader: PermutationReader) -> "Permutation":
|
||||
"""
|
||||
Creates a new permutation with the given reader
|
||||
|
||||
This is an internal method and should not be used directly.
|
||||
"""
|
||||
return Permutation(reader, self.selection, self.batch_size, self.transform_fn)
|
||||
|
||||
def with_batch_size(self, batch_size: int) -> "Permutation":
|
||||
"""
|
||||
Creates a new permutation with the given batch size
|
||||
"""
|
||||
new = copy.copy(self)
|
||||
new.batch_size = batch_size
|
||||
return new
|
||||
|
||||
def with_connection_factory(
|
||||
self, connection_factory: Callable[[str], LanceTable]
|
||||
) -> "Permutation":
|
||||
"""
|
||||
Creates a new permutation that will use ``connection_factory`` to reopen
|
||||
the base table when this permutation is unpickled in a worker process.
|
||||
|
||||
The factory is a callable that takes a single argument — the base table
|
||||
name — and returns a [LanceTable]. It must be picklable; the worker
|
||||
will pickle it via standard ``pickle`` and call it to recover the base
|
||||
table. Picklable callables in practice means top-level (module-level)
|
||||
functions, ``functools.partial`` of such functions, or instances of
|
||||
picklable classes implementing ``__call__``. Lambdas and closures over
|
||||
local variables don't pickle with the default protocol.
|
||||
|
||||
Setting a factory is necessary when the URI alone is not enough to
|
||||
re-open the connection — most importantly for LanceDB Cloud (``db://``)
|
||||
connections, where ``api_key`` and ``region`` aren't recoverable from
|
||||
the connection object after construction.
|
||||
|
||||
For local file or cloud-storage paths the factory is optional: if not
|
||||
set, ``__getstate__`` falls back to capturing
|
||||
``(uri, storage_options, namespace_path)`` and re-opening via
|
||||
``lancedb.connect(uri, storage_options=...)``.
|
||||
|
||||
Examples
|
||||
--------
|
||||
Basic native (file-system path), parameterized via ``functools.partial``::
|
||||
|
||||
import functools, lancedb
|
||||
from lancedb.permutation import Permutation
|
||||
|
||||
def open_native_table(uri: str, table_name: str):
|
||||
return lancedb.connect(uri).open_table(table_name)
|
||||
|
||||
factory = functools.partial(open_native_table, "/data/lance_db")
|
||||
permutation = Permutation.identity(
|
||||
factory("training")
|
||||
).with_connection_factory(factory)
|
||||
|
||||
Native via :func:`lancedb.connect_namespace` (e.g. a directory- or
|
||||
REST-backed namespace client). The factory takes the
|
||||
implementation name and properties dict as partial-bound args so
|
||||
the worker can rebuild the same namespace connection::
|
||||
|
||||
def open_via_namespace(
|
||||
impl: str, properties: dict[str, str], table_name: str,
|
||||
):
|
||||
return lancedb.connect_namespace(impl, properties).open_table(
|
||||
table_name,
|
||||
)
|
||||
|
||||
factory = functools.partial(
|
||||
open_via_namespace,
|
||||
"dir",
|
||||
{"root": "/data/lance_db"},
|
||||
)
|
||||
|
||||
LanceDB Cloud, reading credentials from env vars at worker startup
|
||||
so secrets aren't pickled into the dataset::
|
||||
|
||||
import os, lancedb
|
||||
|
||||
def open_remote_table(table_name: str):
|
||||
db = lancedb.connect(
|
||||
"db://my-database",
|
||||
api_key=os.environ["LANCEDB_API_KEY"],
|
||||
region=os.environ.get("LANCEDB_REGION", "us-east-1"),
|
||||
)
|
||||
return db.open_table(table_name)
|
||||
|
||||
permutation = Permutation.identity(
|
||||
open_remote_table("training")
|
||||
).with_connection_factory(open_remote_table)
|
||||
"""
|
||||
assert connection_factory is not None, "connection_factory is required"
|
||||
new = copy.copy(self)
|
||||
new.connection_factory = connection_factory
|
||||
return new
|
||||
return Permutation(self.reader, self.selection, batch_size, self.transform_fn)
|
||||
|
||||
@classmethod
|
||||
def identity(cls, table: LanceTable) -> "Permutation":
|
||||
@@ -548,8 +460,9 @@ class Permutation:
|
||||
f"Cannot create a permutation on split `{split}`"
|
||||
" because no split names are defined in the permutation table"
|
||||
)
|
||||
raw = permutation_table.schema.metadata.get(b"split_names")
|
||||
split_names = raw.decode("utf-8") if raw is not None else None
|
||||
split_names = permutation_table.schema.metadata.get(
|
||||
b"split_names", None
|
||||
).decode("utf-8")
|
||||
if split_names is None:
|
||||
raise ValueError(
|
||||
f"Cannot create a permutation on split `{split}`"
|
||||
@@ -578,126 +491,11 @@ class Permutation:
|
||||
schema = await reader.output_schema(None)
|
||||
initial_selection = {name: name for name in schema.names}
|
||||
return cls(
|
||||
base_table,
|
||||
permutation_table,
|
||||
split,
|
||||
initial_selection,
|
||||
DEFAULT_BATCH_SIZE,
|
||||
Transforms.arrow2python,
|
||||
_reader=reader,
|
||||
reader, initial_selection, DEFAULT_BATCH_SIZE, Transforms.arrow2python
|
||||
)
|
||||
|
||||
return LOOP.run(do_from_tables())
|
||||
|
||||
def __getstate__(self) -> dict[str, Any]:
|
||||
"""Build a picklable state dict for this permutation.
|
||||
|
||||
The base table is captured either via a user-supplied
|
||||
``connection_factory`` (see [with_connection_factory]) or, as a
|
||||
fallback, by introspecting ``(uri, storage_options, namespace_path)``
|
||||
on the connection. The permutation table — always an in-memory
|
||||
LanceDB table — is captured as a pyarrow Table (which pickles via
|
||||
Arrow IPC natively). The reader is dropped from the wire format;
|
||||
``__setstate__`` rebuilds it from the restored tables.
|
||||
"""
|
||||
permutation_data: Optional[pa.Table] = None
|
||||
if self.permutation_table is not None:
|
||||
permutation_data = self.permutation_table.to_arrow()
|
||||
|
||||
common = {
|
||||
"base_table_name": self.base_table.name,
|
||||
"permutation_data": permutation_data,
|
||||
"split": self.split,
|
||||
"selection": self.selection,
|
||||
"batch_size": self.batch_size,
|
||||
"transform_fn": self.transform_fn,
|
||||
"offset": self.offset,
|
||||
"limit": self.limit,
|
||||
"connection_factory": self.connection_factory,
|
||||
}
|
||||
|
||||
if self.connection_factory is not None:
|
||||
# The factory carries enough state to recover the base table on
|
||||
# its own; we don't need to capture the URI / storage options /
|
||||
# namespace from the existing connection.
|
||||
return common
|
||||
|
||||
# URI-introspection fallback: only viable for native (OSS) connections
|
||||
# where (uri, storage_options) is enough to reopen. Remote / cloud
|
||||
# connections don't expose recoverable api_key / region — those users
|
||||
# must call with_connection_factory().
|
||||
try:
|
||||
base_uri = self.base_table._conn.uri
|
||||
storage_options = self.base_table._conn.storage_options
|
||||
except AttributeError as e:
|
||||
raise ValueError(
|
||||
"Cannot pickle this Permutation: the base table's connection "
|
||||
"does not expose a uri/storage_options, which usually means it "
|
||||
"is a remote (LanceDB Cloud) connection. Call "
|
||||
"Permutation.with_connection_factory(...) first to provide a "
|
||||
"picklable callable that re-opens the base table from a worker "
|
||||
"process."
|
||||
) from e
|
||||
|
||||
if base_uri.startswith("memory://"):
|
||||
# In-memory base tables don't exist in any worker process by
|
||||
# default, so dump the entire base table into the pickle. This
|
||||
# can be expensive for large datasets — users with large
|
||||
# in-memory base tables should either persist them or set a
|
||||
# connection_factory.
|
||||
return {
|
||||
**common,
|
||||
"base_table_data": self.base_table.to_arrow(),
|
||||
}
|
||||
|
||||
return {
|
||||
**common,
|
||||
"base_table_uri": base_uri,
|
||||
"base_table_namespace": self.base_table._namespace_path,
|
||||
"base_table_storage_options": storage_options,
|
||||
}
|
||||
|
||||
def __setstate__(self, state: dict[str, Any]) -> None:
|
||||
from . import connect
|
||||
|
||||
connection_factory = state["connection_factory"]
|
||||
if connection_factory is not None:
|
||||
base_table = connection_factory(state["base_table_name"])
|
||||
elif "base_table_data" in state:
|
||||
# In-memory base table inlined into the pickle; rebuild the same
|
||||
# way we rebuild the in-memory permutation table.
|
||||
mem_db = connect("memory://")
|
||||
base_table = mem_db.create_table(
|
||||
state["base_table_name"], state["base_table_data"]
|
||||
)
|
||||
else:
|
||||
base_db = connect(
|
||||
state["base_table_uri"],
|
||||
storage_options=state["base_table_storage_options"],
|
||||
)
|
||||
base_table = base_db.open_table(
|
||||
state["base_table_name"],
|
||||
namespace_path=state["base_table_namespace"] or None,
|
||||
)
|
||||
|
||||
permutation_table: Optional[LanceTable] = None
|
||||
if state["permutation_data"] is not None:
|
||||
mem_db = connect("memory://")
|
||||
permutation_table = mem_db.create_table(
|
||||
"permutation", state["permutation_data"]
|
||||
)
|
||||
|
||||
self.base_table = base_table
|
||||
self.permutation_table = permutation_table
|
||||
self.split = state["split"]
|
||||
self.selection = state["selection"]
|
||||
self.batch_size = state["batch_size"]
|
||||
self.transform_fn = state["transform_fn"]
|
||||
self.offset = state["offset"]
|
||||
self.limit = state["limit"]
|
||||
self.connection_factory = connection_factory
|
||||
self.reader = LOOP.run(self._build_reader())
|
||||
|
||||
@property
|
||||
def schema(self) -> pa.Schema:
|
||||
async def do_output_schema():
|
||||
@@ -964,9 +762,7 @@ class Permutation:
|
||||
for expensive operations such as image decoding.
|
||||
"""
|
||||
assert transform is not None, "transform is required"
|
||||
new = copy.copy(self)
|
||||
new.transform_fn = transform
|
||||
return new
|
||||
return Permutation(self.reader, self.selection, self.batch_size, transform)
|
||||
|
||||
def __getitem__(self, index: int) -> Any:
|
||||
"""
|
||||
@@ -1001,10 +797,12 @@ class Permutation:
|
||||
"""
|
||||
Skip the first `skip` rows of the permutation
|
||||
"""
|
||||
new = copy.copy(self)
|
||||
new.offset = skip
|
||||
new.reader = LOOP.run(new._build_reader())
|
||||
return new
|
||||
|
||||
async def do_with_skip():
|
||||
reader = await self.reader.with_offset(skip)
|
||||
return self._with_reader(reader)
|
||||
|
||||
return LOOP.run(do_with_skip())
|
||||
|
||||
@deprecated(details="Use with_take instead")
|
||||
def take(self, limit: int) -> "Permutation":
|
||||
@@ -1022,10 +820,12 @@ class Permutation:
|
||||
"""
|
||||
Limit the permutation to `limit` rows (following any `skip`)
|
||||
"""
|
||||
new = copy.copy(self)
|
||||
new.limit = limit
|
||||
new.reader = LOOP.run(new._build_reader())
|
||||
return new
|
||||
|
||||
async def do_with_take():
|
||||
reader = await self.reader.with_limit(limit)
|
||||
return self._with_reader(reader)
|
||||
|
||||
return LOOP.run(do_with_take())
|
||||
|
||||
@deprecated(details="Use with_repeat instead")
|
||||
def repeat(self, times: int) -> "Permutation":
|
||||
|
||||
@@ -10,7 +10,6 @@ import sys
|
||||
import types
|
||||
from abc import ABC, abstractmethod
|
||||
from datetime import date, datetime
|
||||
from enum import Enum
|
||||
from typing import (
|
||||
TYPE_CHECKING,
|
||||
Any,
|
||||
@@ -315,19 +314,6 @@ def _pydantic_type_to_arrow_type(tp: Any, field: FieldInfo) -> pa.DataType:
|
||||
return pa.list_(pa.list_(tp.value_arrow_type(), tp.dim()))
|
||||
# For regular Vector
|
||||
return pa.list_(tp.value_arrow_type(), tp.dim())
|
||||
if _safe_issubclass(tp, Enum):
|
||||
# Map Enum to the Arrow type of its value.
|
||||
# For string-valued enums, use dictionary encoding for efficiency.
|
||||
# For integer enums, use the native type.
|
||||
# Fall back to utf8 for mixed-type or empty enums.
|
||||
value_types = {type(m.value) for m in tp}
|
||||
if len(value_types) == 1:
|
||||
value_type = value_types.pop()
|
||||
if value_type is str:
|
||||
# Use dictionary encoding for string enums
|
||||
return pa.dictionary(pa.int32(), pa.utf8())
|
||||
return _py_type_to_arrow_type(value_type, field)
|
||||
return pa.utf8()
|
||||
return _py_type_to_arrow_type(tp, field)
|
||||
|
||||
|
||||
|
||||
@@ -25,6 +25,7 @@ import deprecation
|
||||
import numpy as np
|
||||
import pyarrow as pa
|
||||
import pyarrow.compute as pc
|
||||
import pyarrow.fs as pa_fs
|
||||
import pydantic
|
||||
|
||||
from lancedb.pydantic import PYDANTIC_VERSION
|
||||
@@ -37,7 +38,6 @@ from .rerankers.base import Reranker
|
||||
from .rerankers.rrf import RRFReranker
|
||||
from .rerankers.util import check_reranker_result
|
||||
from .util import flatten_columns
|
||||
from .expr import Expr
|
||||
from lancedb._lancedb import fts_query_to_json
|
||||
from typing_extensions import Annotated
|
||||
|
||||
@@ -70,7 +70,7 @@ def ensure_vector_query(
|
||||
) -> Union[List[float], List[List[float]], pa.Array, List[pa.Array]]:
|
||||
if isinstance(val, list):
|
||||
if len(val) == 0:
|
||||
raise ValueError("Vector query must be a non-empty list")
|
||||
return ValueError("Vector query must be a non-empty list")
|
||||
sample = val[0]
|
||||
else:
|
||||
if isinstance(val, float):
|
||||
@@ -83,7 +83,7 @@ def ensure_vector_query(
|
||||
return val
|
||||
if isinstance(sample, list):
|
||||
if len(sample) == 0:
|
||||
raise ValueError("Vector query must be a non-empty list")
|
||||
return ValueError("Vector query must be a non-empty list")
|
||||
if isinstance(sample[0], float):
|
||||
# val is list of list of floats
|
||||
return val
|
||||
@@ -449,8 +449,8 @@ class Query(pydantic.BaseModel):
|
||||
ensure_vector_query,
|
||||
] = None
|
||||
|
||||
# sql filter or type-safe Expr to refine the query with
|
||||
filter: Optional[Union[str, Expr]] = None
|
||||
# sql filter to refine the query with
|
||||
filter: Optional[str] = None
|
||||
|
||||
# if True then apply the filter after vector search
|
||||
postfilter: Optional[bool] = None
|
||||
@@ -464,8 +464,8 @@ class Query(pydantic.BaseModel):
|
||||
# distance type to use for vector search
|
||||
distance_type: Optional[str] = None
|
||||
|
||||
# which columns to return in the results (dict values may be str or Expr)
|
||||
columns: Optional[Union[List[str], Dict[str, Union[str, Expr]]]] = None
|
||||
# which columns to return in the results
|
||||
columns: Optional[Union[List[str], Dict[str, str]]] = None
|
||||
|
||||
# minimum number of IVF partitions to search
|
||||
#
|
||||
@@ -606,7 +606,6 @@ class LanceQueryBuilder(ABC):
|
||||
query,
|
||||
ordering_field_name=ordering_field_name,
|
||||
fts_columns=fts_columns,
|
||||
fast_search=fast_search,
|
||||
)
|
||||
|
||||
if isinstance(query, list):
|
||||
@@ -856,15 +855,14 @@ class LanceQueryBuilder(ABC):
|
||||
self._offset = offset
|
||||
return self
|
||||
|
||||
def select(self, columns: Union[list[str], dict[str, Union[str, Expr]]]) -> Self:
|
||||
def select(self, columns: Union[list[str], dict[str, str]]) -> Self:
|
||||
"""Set the columns to return.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
columns: list of str, or dict of str to str or Expr
|
||||
columns: list of str, or dict of str to str default None
|
||||
List of column names to be fetched.
|
||||
Or a dictionary of column names to SQL expressions or
|
||||
:class:`~lancedb.expr.Expr` objects.
|
||||
Or a dictionary of column names to SQL expressions.
|
||||
All columns are fetched if None or unspecified.
|
||||
|
||||
Returns
|
||||
@@ -878,15 +876,15 @@ class LanceQueryBuilder(ABC):
|
||||
raise ValueError("columns must be a list or a dictionary")
|
||||
return self
|
||||
|
||||
def where(self, where: Union[str, Expr], prefilter: bool = True) -> Self:
|
||||
def where(self, where: str, prefilter: bool = True) -> Self:
|
||||
"""Set the where clause.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
where: str or :class:`~lancedb.expr.Expr`
|
||||
The filter condition. Can be a SQL string or a type-safe
|
||||
:class:`~lancedb.expr.Expr` built with :func:`~lancedb.expr.col`
|
||||
and :func:`~lancedb.expr.lit`.
|
||||
where: str
|
||||
The where clause which is a valid SQL where clause. See
|
||||
`Lance filter pushdown <https://lance.org/guide/read_and_write#filter-push-down>`_
|
||||
for valid SQL expressions.
|
||||
prefilter: bool, default True
|
||||
If True, apply the filter before vector search, otherwise the
|
||||
filter is applied on the result of vector search.
|
||||
@@ -1356,17 +1354,15 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
|
||||
|
||||
return result_set
|
||||
|
||||
def where(
|
||||
self, where: Union[str, Expr], prefilter: bool = None
|
||||
) -> LanceVectorQueryBuilder:
|
||||
def where(self, where: str, prefilter: bool = None) -> LanceVectorQueryBuilder:
|
||||
"""Set the where clause.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
where: str or :class:`~lancedb.expr.Expr`
|
||||
The filter condition. Can be a SQL string or a type-safe
|
||||
:class:`~lancedb.expr.Expr` built with :func:`~lancedb.expr.col`
|
||||
and :func:`~lancedb.expr.lit`.
|
||||
where: str
|
||||
The where clause which is a valid SQL where clause. See
|
||||
`Lance filter pushdown <https://lance.org/guide/read_and_write#filter-push-down>`_
|
||||
for valid SQL expressions.
|
||||
prefilter: bool, default True
|
||||
If True, apply the filter before vector search, otherwise the
|
||||
filter is applied on the result of vector search.
|
||||
@@ -1460,14 +1456,12 @@ class LanceFtsQueryBuilder(LanceQueryBuilder):
|
||||
query: str | FullTextQuery,
|
||||
ordering_field_name: Optional[str] = None,
|
||||
fts_columns: Optional[Union[str, List[str]]] = None,
|
||||
fast_search: bool = None,
|
||||
):
|
||||
super().__init__(table)
|
||||
self._query = query
|
||||
self._phrase_query = False
|
||||
self.ordering_field_name = ordering_field_name
|
||||
self._reranker = None
|
||||
self._fast_search = fast_search
|
||||
if isinstance(fts_columns, str):
|
||||
fts_columns = [fts_columns]
|
||||
self._fts_columns = fts_columns
|
||||
@@ -1489,19 +1483,6 @@ class LanceFtsQueryBuilder(LanceQueryBuilder):
|
||||
self._phrase_query = phrase_query
|
||||
return self
|
||||
|
||||
def fast_search(self) -> LanceFtsQueryBuilder:
|
||||
"""
|
||||
Skip a flat search of unindexed data. This will improve
|
||||
search performance but search results will not include unindexed data.
|
||||
|
||||
Returns
|
||||
-------
|
||||
LanceFtsQueryBuilder
|
||||
The LanceFtsQueryBuilder object.
|
||||
"""
|
||||
self._fast_search = True
|
||||
return self
|
||||
|
||||
def to_query_object(self) -> Query:
|
||||
return Query(
|
||||
columns=self._columns,
|
||||
@@ -1513,7 +1494,6 @@ class LanceFtsQueryBuilder(LanceQueryBuilder):
|
||||
query=self._query, columns=self._fts_columns
|
||||
),
|
||||
offset=self._offset,
|
||||
fast_search=self._fast_search,
|
||||
)
|
||||
|
||||
def output_schema(self) -> pa.Schema:
|
||||
@@ -1525,7 +1505,9 @@ class LanceFtsQueryBuilder(LanceQueryBuilder):
|
||||
return self._table._output_schema(self.to_query_object())
|
||||
|
||||
def to_arrow(self, *, timeout: Optional[timedelta] = None) -> pa.Table:
|
||||
self._table._ensure_no_legacy_fts_index()
|
||||
path, fs, exist = self._table._get_fts_index_path()
|
||||
if exist:
|
||||
return self.tantivy_to_arrow()
|
||||
|
||||
query = self._query
|
||||
if self._phrase_query:
|
||||
@@ -1549,6 +1531,90 @@ class LanceFtsQueryBuilder(LanceQueryBuilder):
|
||||
):
|
||||
raise NotImplementedError("to_batches on an FTS query")
|
||||
|
||||
def tantivy_to_arrow(self) -> pa.Table:
|
||||
try:
|
||||
import tantivy
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"Please install tantivy-py `pip install tantivy` to use the full text search feature." # noqa: E501
|
||||
)
|
||||
|
||||
from .fts import search_index
|
||||
|
||||
# get the index path
|
||||
path, fs, exist = self._table._get_fts_index_path()
|
||||
|
||||
# check if the index exist
|
||||
if not exist:
|
||||
raise FileNotFoundError(
|
||||
"Fts index does not exist. "
|
||||
"Please first call table.create_fts_index(['<field_names>']) to "
|
||||
"create the fts index."
|
||||
)
|
||||
|
||||
# Check that we are on local filesystem
|
||||
if not isinstance(fs, pa_fs.LocalFileSystem):
|
||||
raise NotImplementedError(
|
||||
"Tantivy-based full text search "
|
||||
"is only supported on the local filesystem"
|
||||
)
|
||||
# open the index
|
||||
index = tantivy.Index.open(path)
|
||||
# get the scores and doc ids
|
||||
query = self._query
|
||||
if self._phrase_query:
|
||||
query = query.replace('"', "'")
|
||||
query = f'"{query}"'
|
||||
limit = self._limit if self._limit is not None else 10
|
||||
row_ids, scores = search_index(
|
||||
index, query, limit, ordering_field=self.ordering_field_name
|
||||
)
|
||||
if len(row_ids) == 0:
|
||||
empty_schema = pa.schema([pa.field("_score", pa.float32())])
|
||||
return pa.Table.from_batches([], schema=empty_schema)
|
||||
scores = pa.array(scores)
|
||||
output_tbl = self._table.to_lance().take(row_ids, columns=self._columns)
|
||||
output_tbl = output_tbl.append_column("_score", scores)
|
||||
# this needs to match vector search results which are uint64
|
||||
row_ids = pa.array(row_ids, type=pa.uint64())
|
||||
|
||||
if self._where is not None:
|
||||
tmp_name = "__lancedb__duckdb__indexer__"
|
||||
output_tbl = output_tbl.append_column(
|
||||
tmp_name, pa.array(range(len(output_tbl)))
|
||||
)
|
||||
try:
|
||||
# TODO would be great to have Substrait generate pyarrow compute
|
||||
# expressions or conversely have pyarrow support SQL expressions
|
||||
# using Substrait
|
||||
import duckdb
|
||||
|
||||
indexer = duckdb.sql(
|
||||
f"SELECT {tmp_name} FROM output_tbl WHERE {self._where}"
|
||||
).to_arrow_table()[tmp_name]
|
||||
output_tbl = output_tbl.take(indexer).drop([tmp_name])
|
||||
row_ids = row_ids.take(indexer)
|
||||
|
||||
except ImportError:
|
||||
import tempfile
|
||||
|
||||
import lance
|
||||
|
||||
# TODO Use "memory://" instead once that's supported
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
ds = lance.write_dataset(output_tbl, tmp)
|
||||
output_tbl = ds.to_table(filter=self._where)
|
||||
indexer = output_tbl[tmp_name]
|
||||
row_ids = row_ids.take(indexer)
|
||||
output_tbl = output_tbl.drop([tmp_name])
|
||||
|
||||
if self._with_row_id:
|
||||
output_tbl = output_tbl.append_column("_rowid", row_ids)
|
||||
|
||||
if self._reranker is not None:
|
||||
output_tbl = self._reranker.rerank_fts(self._query, output_tbl)
|
||||
return output_tbl
|
||||
|
||||
def rerank(self, reranker: Reranker) -> LanceFtsQueryBuilder:
|
||||
"""Rerank the results using the specified reranker.
|
||||
|
||||
@@ -1643,7 +1709,7 @@ class LanceHybridQueryBuilder(LanceQueryBuilder):
|
||||
def _validate_query(self, query, vector=None, text=None):
|
||||
if query is not None and (vector is not None or text is not None):
|
||||
raise ValueError(
|
||||
"You can either provide a string query in search() method "
|
||||
"You can either provide a string query in search() method"
|
||||
"or set `vector()` and `text()` explicitly for hybrid search."
|
||||
"But not both."
|
||||
)
|
||||
@@ -2122,8 +2188,8 @@ class LanceHybridQueryBuilder(LanceQueryBuilder):
|
||||
self._vector_query.select(self._columns)
|
||||
self._fts_query.select(self._columns)
|
||||
if self._where:
|
||||
self._vector_query.where(self._where, not self._postfilter)
|
||||
self._fts_query.where(self._where, not self._postfilter)
|
||||
self._vector_query.where(self._where, self._postfilter)
|
||||
self._fts_query.where(self._where, self._postfilter)
|
||||
if self._with_row_id:
|
||||
self._vector_query.with_row_id(True)
|
||||
self._fts_query.with_row_id(True)
|
||||
@@ -2203,20 +2269,10 @@ class AsyncQueryBase(object):
|
||||
"""
|
||||
if isinstance(columns, list) and all(isinstance(c, str) for c in columns):
|
||||
self._inner.select_columns(columns)
|
||||
elif isinstance(columns, dict) and all(isinstance(k, str) for k in columns):
|
||||
if any(isinstance(v, Expr) for v in columns.values()):
|
||||
# At least one value is an Expr — use the type-safe path.
|
||||
from .expr import _coerce
|
||||
|
||||
pairs = [(k, _coerce(v)._inner) for k, v in columns.items()]
|
||||
self._inner.select_expr(pairs)
|
||||
elif all(isinstance(v, str) for v in columns.values()):
|
||||
self._inner.select(list(columns.items()))
|
||||
else:
|
||||
raise TypeError(
|
||||
"dict values must be str or Expr, got "
|
||||
+ str({k: type(v) for k, v in columns.items()})
|
||||
)
|
||||
elif isinstance(columns, dict) and all(
|
||||
isinstance(k, str) and isinstance(v, str) for k, v in columns.items()
|
||||
):
|
||||
self._inner.select(list(columns.items()))
|
||||
else:
|
||||
raise TypeError("columns must be a list of column names or a dict")
|
||||
return self
|
||||
@@ -2456,13 +2512,11 @@ class AsyncStandardQuery(AsyncQueryBase):
|
||||
"""
|
||||
super().__init__(inner)
|
||||
|
||||
def where(self, predicate: Union[str, Expr]) -> Self:
|
||||
def where(self, predicate: str) -> Self:
|
||||
"""
|
||||
Only return rows matching the given predicate
|
||||
|
||||
The predicate can be a SQL string or a type-safe
|
||||
:class:`~lancedb.expr.Expr` built with :func:`~lancedb.expr.col`
|
||||
and :func:`~lancedb.expr.lit`.
|
||||
The predicate should be supplied as an SQL query string.
|
||||
|
||||
Examples
|
||||
--------
|
||||
@@ -2474,10 +2528,7 @@ class AsyncStandardQuery(AsyncQueryBase):
|
||||
Filtering performance can often be improved by creating a scalar index
|
||||
on the filter column(s).
|
||||
"""
|
||||
if isinstance(predicate, Expr):
|
||||
self._inner.where_expr(predicate._inner)
|
||||
else:
|
||||
self._inner.where(predicate)
|
||||
self._inner.where(predicate)
|
||||
return self
|
||||
|
||||
def limit(self, limit: int) -> Self:
|
||||
|
||||
@@ -145,33 +145,6 @@ class TlsConfig:
|
||||
|
||||
@dataclass
|
||||
class ClientConfig:
|
||||
"""Configuration for the LanceDB Cloud HTTP client.
|
||||
|
||||
Attributes
|
||||
----------
|
||||
user_agent: str
|
||||
User agent string sent with requests.
|
||||
retry_config: RetryConfig
|
||||
Configuration for retrying failed requests.
|
||||
timeout_config: Optional[TimeoutConfig]
|
||||
Configuration for request timeouts.
|
||||
extra_headers: Optional[dict]
|
||||
Additional headers to include in requests.
|
||||
id_delimiter: Optional[str]
|
||||
The delimiter to use when constructing object identifiers.
|
||||
tls_config: Optional[TlsConfig]
|
||||
TLS/mTLS configuration for secure connections.
|
||||
header_provider: Optional[HeaderProvider]
|
||||
Provider for dynamic headers to be added to each request.
|
||||
user_id: Optional[str]
|
||||
User identifier for tracking purposes. This is sent as the
|
||||
`x-lancedb-user-id` header in requests to LanceDB Cloud/Enterprise.
|
||||
|
||||
This can also be set via the `LANCEDB_USER_ID` environment variable.
|
||||
Alternatively, set `LANCEDB_USER_ID_ENV_KEY` to specify another
|
||||
environment variable that contains the user ID value.
|
||||
"""
|
||||
|
||||
user_agent: str = f"LanceDB-Python-Client/{__version__}"
|
||||
retry_config: RetryConfig = field(default_factory=RetryConfig)
|
||||
timeout_config: Optional[TimeoutConfig] = field(default_factory=TimeoutConfig)
|
||||
@@ -179,7 +152,6 @@ class ClientConfig:
|
||||
id_delimiter: Optional[str] = None
|
||||
tls_config: Optional[TlsConfig] = None
|
||||
header_provider: Optional["HeaderProvider"] = None
|
||||
user_id: Optional[str] = None
|
||||
|
||||
def __post_init__(self):
|
||||
if isinstance(self.retry_config, dict):
|
||||
|
||||
@@ -24,7 +24,6 @@ from ..common import DATA
|
||||
from ..db import DBConnection, LOOP
|
||||
from ..embeddings import EmbeddingFunctionConfig
|
||||
from lance_namespace import (
|
||||
LanceNamespace,
|
||||
CreateNamespaceResponse,
|
||||
DescribeNamespaceResponse,
|
||||
DropNamespaceResponse,
|
||||
@@ -112,7 +111,7 @@ class RemoteDBConnection(DBConnection):
|
||||
@override
|
||||
def list_namespaces(
|
||||
self,
|
||||
namespace_path: Optional[List[str]] = None,
|
||||
namespace: Optional[List[str]] = None,
|
||||
page_token: Optional[str] = None,
|
||||
limit: Optional[int] = None,
|
||||
) -> ListNamespacesResponse:
|
||||
@@ -120,7 +119,7 @@ class RemoteDBConnection(DBConnection):
|
||||
|
||||
Parameters
|
||||
----------
|
||||
namespace_path: List[str], optional
|
||||
namespace: List[str], optional
|
||||
The parent namespace to list namespaces in.
|
||||
None or empty list represents root namespace.
|
||||
page_token: str, optional
|
||||
@@ -134,18 +133,18 @@ class RemoteDBConnection(DBConnection):
|
||||
ListNamespacesResponse
|
||||
Response containing namespace names and optional page_token for pagination.
|
||||
"""
|
||||
if namespace_path is None:
|
||||
namespace_path = []
|
||||
if namespace is None:
|
||||
namespace = []
|
||||
return LOOP.run(
|
||||
self._conn.list_namespaces(
|
||||
namespace_path=namespace_path, page_token=page_token, limit=limit
|
||||
namespace=namespace, page_token=page_token, limit=limit
|
||||
)
|
||||
)
|
||||
|
||||
@override
|
||||
def create_namespace(
|
||||
self,
|
||||
namespace_path: List[str],
|
||||
namespace: List[str],
|
||||
mode: Optional[str] = None,
|
||||
properties: Optional[Dict[str, str]] = None,
|
||||
) -> CreateNamespaceResponse:
|
||||
@@ -153,7 +152,7 @@ class RemoteDBConnection(DBConnection):
|
||||
|
||||
Parameters
|
||||
----------
|
||||
namespace_path: List[str]
|
||||
namespace: List[str]
|
||||
The namespace identifier to create.
|
||||
mode: str, optional
|
||||
Creation mode - "create" (fail if exists), "exist_ok" (skip if exists),
|
||||
@@ -168,14 +167,14 @@ class RemoteDBConnection(DBConnection):
|
||||
"""
|
||||
return LOOP.run(
|
||||
self._conn.create_namespace(
|
||||
namespace_path=namespace_path, mode=mode, properties=properties
|
||||
namespace=namespace, mode=mode, properties=properties
|
||||
)
|
||||
)
|
||||
|
||||
@override
|
||||
def drop_namespace(
|
||||
self,
|
||||
namespace_path: List[str],
|
||||
namespace: List[str],
|
||||
mode: Optional[str] = None,
|
||||
behavior: Optional[str] = None,
|
||||
) -> DropNamespaceResponse:
|
||||
@@ -183,7 +182,7 @@ class RemoteDBConnection(DBConnection):
|
||||
|
||||
Parameters
|
||||
----------
|
||||
namespace_path: List[str]
|
||||
namespace: List[str]
|
||||
The namespace identifier to drop.
|
||||
mode: str, optional
|
||||
Whether to skip if not exists ("SKIP") or fail ("FAIL"). Case insensitive.
|
||||
@@ -197,20 +196,16 @@ class RemoteDBConnection(DBConnection):
|
||||
Response containing properties and transaction_id if applicable.
|
||||
"""
|
||||
return LOOP.run(
|
||||
self._conn.drop_namespace(
|
||||
namespace_path=namespace_path, mode=mode, behavior=behavior
|
||||
)
|
||||
self._conn.drop_namespace(namespace=namespace, mode=mode, behavior=behavior)
|
||||
)
|
||||
|
||||
@override
|
||||
def describe_namespace(
|
||||
self, namespace_path: List[str]
|
||||
) -> DescribeNamespaceResponse:
|
||||
def describe_namespace(self, namespace: List[str]) -> DescribeNamespaceResponse:
|
||||
"""Describe a namespace.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
namespace_path: List[str]
|
||||
namespace: List[str]
|
||||
The namespace identifier to describe.
|
||||
|
||||
Returns
|
||||
@@ -218,12 +213,12 @@ class RemoteDBConnection(DBConnection):
|
||||
DescribeNamespaceResponse
|
||||
Response containing the namespace properties.
|
||||
"""
|
||||
return LOOP.run(self._conn.describe_namespace(namespace_path=namespace_path))
|
||||
return LOOP.run(self._conn.describe_namespace(namespace=namespace))
|
||||
|
||||
@override
|
||||
def list_tables(
|
||||
self,
|
||||
namespace_path: Optional[List[str]] = None,
|
||||
namespace: Optional[List[str]] = None,
|
||||
page_token: Optional[str] = None,
|
||||
limit: Optional[int] = None,
|
||||
) -> ListTablesResponse:
|
||||
@@ -231,7 +226,7 @@ class RemoteDBConnection(DBConnection):
|
||||
|
||||
Parameters
|
||||
----------
|
||||
namespace_path: List[str], optional
|
||||
namespace: List[str], optional
|
||||
The namespace to list tables in.
|
||||
None or empty list represents root namespace.
|
||||
page_token: str, optional
|
||||
@@ -245,11 +240,11 @@ class RemoteDBConnection(DBConnection):
|
||||
ListTablesResponse
|
||||
Response containing table names and optional page_token for pagination.
|
||||
"""
|
||||
if namespace_path is None:
|
||||
namespace_path = []
|
||||
if namespace is None:
|
||||
namespace = []
|
||||
return LOOP.run(
|
||||
self._conn.list_tables(
|
||||
namespace_path=namespace_path, page_token=page_token, limit=limit
|
||||
namespace=namespace, page_token=page_token, limit=limit
|
||||
)
|
||||
)
|
||||
|
||||
@@ -259,7 +254,7 @@ class RemoteDBConnection(DBConnection):
|
||||
page_token: Optional[str] = None,
|
||||
limit: int = 10,
|
||||
*,
|
||||
namespace_path: Optional[List[str]] = None,
|
||||
namespace: Optional[List[str]] = None,
|
||||
) -> Iterable[str]:
|
||||
"""List the names of all tables in the database.
|
||||
|
||||
@@ -268,7 +263,7 @@ class RemoteDBConnection(DBConnection):
|
||||
|
||||
Parameters
|
||||
----------
|
||||
namespace_path: List[str], default []
|
||||
namespace: List[str], default []
|
||||
The namespace to list tables in.
|
||||
Empty list represents root namespace.
|
||||
page_token: str
|
||||
@@ -287,11 +282,11 @@ class RemoteDBConnection(DBConnection):
|
||||
DeprecationWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
if namespace_path is None:
|
||||
namespace_path = []
|
||||
if namespace is None:
|
||||
namespace = []
|
||||
return LOOP.run(
|
||||
self._conn.table_names(
|
||||
namespace_path=namespace_path, start_after=page_token, limit=limit
|
||||
namespace=namespace, start_after=page_token, limit=limit
|
||||
)
|
||||
)
|
||||
|
||||
@@ -300,7 +295,7 @@ class RemoteDBConnection(DBConnection):
|
||||
self,
|
||||
name: str,
|
||||
*,
|
||||
namespace_path: Optional[List[str]] = None,
|
||||
namespace: Optional[List[str]] = None,
|
||||
storage_options: Optional[Dict[str, str]] = None,
|
||||
index_cache_size: Optional[int] = None,
|
||||
) -> Table:
|
||||
@@ -310,7 +305,7 @@ class RemoteDBConnection(DBConnection):
|
||||
----------
|
||||
name: str
|
||||
The name of the table.
|
||||
namespace_path: List[str], optional
|
||||
namespace: List[str], optional
|
||||
The namespace to open the table from.
|
||||
None or empty list represents root namespace.
|
||||
|
||||
@@ -320,15 +315,15 @@ class RemoteDBConnection(DBConnection):
|
||||
"""
|
||||
from .table import RemoteTable
|
||||
|
||||
if namespace_path is None:
|
||||
namespace_path = []
|
||||
if namespace is None:
|
||||
namespace = []
|
||||
if index_cache_size is not None:
|
||||
logging.info(
|
||||
"index_cache_size is ignored in LanceDb Cloud"
|
||||
" (there is no local cache to configure)"
|
||||
)
|
||||
|
||||
table = LOOP.run(self._conn.open_table(name, namespace_path=namespace_path))
|
||||
table = LOOP.run(self._conn.open_table(name, namespace=namespace))
|
||||
return RemoteTable(table, self.db_name)
|
||||
|
||||
def clone_table(
|
||||
@@ -336,7 +331,7 @@ class RemoteDBConnection(DBConnection):
|
||||
target_table_name: str,
|
||||
source_uri: str,
|
||||
*,
|
||||
target_namespace_path: Optional[List[str]] = None,
|
||||
target_namespace: Optional[List[str]] = None,
|
||||
source_version: Optional[int] = None,
|
||||
source_tag: Optional[str] = None,
|
||||
is_shallow: bool = True,
|
||||
@@ -349,7 +344,7 @@ class RemoteDBConnection(DBConnection):
|
||||
The name of the target table to create.
|
||||
source_uri: str
|
||||
The URI of the source table to clone from.
|
||||
target_namespace_path: List[str], optional
|
||||
target_namespace: List[str], optional
|
||||
The namespace for the target table.
|
||||
None or empty list represents root namespace.
|
||||
source_version: int, optional
|
||||
@@ -366,13 +361,13 @@ class RemoteDBConnection(DBConnection):
|
||||
"""
|
||||
from .table import RemoteTable
|
||||
|
||||
if target_namespace_path is None:
|
||||
target_namespace_path = []
|
||||
if target_namespace is None:
|
||||
target_namespace = []
|
||||
table = LOOP.run(
|
||||
self._conn.clone_table(
|
||||
target_table_name,
|
||||
source_uri,
|
||||
target_namespace_path=target_namespace_path,
|
||||
target_namespace=target_namespace,
|
||||
source_version=source_version,
|
||||
source_tag=source_tag,
|
||||
is_shallow=is_shallow,
|
||||
@@ -392,7 +387,7 @@ class RemoteDBConnection(DBConnection):
|
||||
exist_ok: bool = False,
|
||||
embedding_functions: Optional[List[EmbeddingFunctionConfig]] = None,
|
||||
*,
|
||||
namespace_path: Optional[List[str]] = None,
|
||||
namespace: Optional[List[str]] = None,
|
||||
) -> Table:
|
||||
"""Create a [Table][lancedb.table.Table] in the database.
|
||||
|
||||
@@ -400,7 +395,7 @@ class RemoteDBConnection(DBConnection):
|
||||
----------
|
||||
name: str
|
||||
The name of the table.
|
||||
namespace_path: List[str], optional
|
||||
namespace: List[str], optional
|
||||
The namespace to create the table in.
|
||||
None or empty list represents root namespace.
|
||||
data: The data to initialize the table, *optional*
|
||||
@@ -500,8 +495,8 @@ class RemoteDBConnection(DBConnection):
|
||||
mode = "exist_ok"
|
||||
elif not mode:
|
||||
mode = "exist_ok"
|
||||
if namespace_path is None:
|
||||
namespace_path = []
|
||||
if namespace is None:
|
||||
namespace = []
|
||||
validate_table_name(name)
|
||||
if embedding_functions is not None:
|
||||
logging.warning(
|
||||
@@ -516,7 +511,7 @@ class RemoteDBConnection(DBConnection):
|
||||
self._conn.create_table(
|
||||
name,
|
||||
data,
|
||||
namespace_path=namespace_path,
|
||||
namespace=namespace,
|
||||
mode=mode,
|
||||
schema=schema,
|
||||
on_bad_vectors=on_bad_vectors,
|
||||
@@ -526,28 +521,28 @@ class RemoteDBConnection(DBConnection):
|
||||
return RemoteTable(table, self.db_name)
|
||||
|
||||
@override
|
||||
def drop_table(self, name: str, namespace_path: Optional[List[str]] = None):
|
||||
def drop_table(self, name: str, namespace: Optional[List[str]] = None):
|
||||
"""Drop a table from the database.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
name: str
|
||||
The name of the table.
|
||||
namespace_path: List[str], optional
|
||||
namespace: List[str], optional
|
||||
The namespace to drop the table from.
|
||||
None or empty list represents root namespace.
|
||||
"""
|
||||
if namespace_path is None:
|
||||
namespace_path = []
|
||||
LOOP.run(self._conn.drop_table(name, namespace_path=namespace_path))
|
||||
if namespace is None:
|
||||
namespace = []
|
||||
LOOP.run(self._conn.drop_table(name, namespace=namespace))
|
||||
|
||||
@override
|
||||
def rename_table(
|
||||
self,
|
||||
cur_name: str,
|
||||
new_name: str,
|
||||
cur_namespace_path: Optional[List[str]] = None,
|
||||
new_namespace_path: Optional[List[str]] = None,
|
||||
cur_namespace: Optional[List[str]] = None,
|
||||
new_namespace: Optional[List[str]] = None,
|
||||
):
|
||||
"""Rename a table in the database.
|
||||
|
||||
@@ -558,32 +553,19 @@ class RemoteDBConnection(DBConnection):
|
||||
new_name: str
|
||||
The new name of the table.
|
||||
"""
|
||||
if cur_namespace_path is None:
|
||||
cur_namespace_path = []
|
||||
if new_namespace_path is None:
|
||||
new_namespace_path = []
|
||||
if cur_namespace is None:
|
||||
cur_namespace = []
|
||||
if new_namespace is None:
|
||||
new_namespace = []
|
||||
LOOP.run(
|
||||
self._conn.rename_table(
|
||||
cur_name,
|
||||
new_name,
|
||||
cur_namespace_path=cur_namespace_path,
|
||||
new_namespace_path=new_namespace_path,
|
||||
cur_namespace=cur_namespace,
|
||||
new_namespace=new_namespace,
|
||||
)
|
||||
)
|
||||
|
||||
@override
|
||||
def namespace_client(self) -> LanceNamespace:
|
||||
"""Get the equivalent namespace client for this connection.
|
||||
|
||||
Returns a RestNamespace with the same URI and authentication headers.
|
||||
|
||||
Returns
|
||||
-------
|
||||
LanceNamespace
|
||||
The namespace client for this connection.
|
||||
"""
|
||||
return LOOP.run(self._conn.namespace_client())
|
||||
|
||||
async def close(self):
|
||||
"""Close the connection to the database."""
|
||||
self._conn.close()
|
||||
self._client.close()
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
from datetime import timedelta
|
||||
import logging
|
||||
from functools import cached_property
|
||||
from typing import Any, Callable, Dict, Iterable, List, Optional, Union, Literal
|
||||
from typing import Dict, Iterable, List, Optional, Union, Literal
|
||||
import warnings
|
||||
|
||||
from lancedb._lancedb import (
|
||||
@@ -35,11 +35,9 @@ import pyarrow as pa
|
||||
from lancedb.common import DATA, VEC, VECTOR_COLUMN_NAME
|
||||
from lancedb.merge import LanceMergeInsertBuilder
|
||||
from lancedb.embeddings import EmbeddingFunctionRegistry
|
||||
from lancedb.table import _normalize_progress
|
||||
|
||||
from ..query import LanceVectorQueryBuilder, LanceQueryBuilder, LanceTakeQueryBuilder
|
||||
from ..table import AsyncTable, IndexStatistics, Query, Table, Tags
|
||||
from ..types import BaseTokenizerType
|
||||
|
||||
|
||||
class RemoteTable(Table):
|
||||
@@ -168,7 +166,7 @@ class RemoteTable(Table):
|
||||
wait_timeout: Optional[timedelta] = None,
|
||||
with_position: bool = False,
|
||||
# tokenizer configs:
|
||||
base_tokenizer: BaseTokenizerType = "simple",
|
||||
base_tokenizer: str = "simple",
|
||||
language: str = "English",
|
||||
max_token_length: Optional[int] = 40,
|
||||
lower_case: bool = True,
|
||||
@@ -220,6 +218,8 @@ class RemoteTable(Table):
|
||||
train: bool = True,
|
||||
):
|
||||
"""Create an index on the table.
|
||||
Currently, the only parameters that matter are
|
||||
the metric and the vector column name.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
@@ -250,6 +250,11 @@ class RemoteTable(Table):
|
||||
>>> table.create_index("l2", "vector") # doctest: +SKIP
|
||||
"""
|
||||
|
||||
if num_sub_vectors is not None:
|
||||
logging.warning(
|
||||
"num_sub_vectors is not supported on LanceDB cloud."
|
||||
"This parameter will be tuned automatically."
|
||||
)
|
||||
if accelerator is not None:
|
||||
logging.warning(
|
||||
"GPU accelerator is not yet supported on LanceDB cloud."
|
||||
@@ -310,7 +315,6 @@ class RemoteTable(Table):
|
||||
mode: str = "append",
|
||||
on_bad_vectors: str = "error",
|
||||
fill_value: float = 0.0,
|
||||
progress: Optional[Union[bool, Callable, Any]] = None,
|
||||
) -> AddResult:
|
||||
"""Add more data to the [Table](Table). It has the same API signature as
|
||||
the OSS version.
|
||||
@@ -333,29 +337,17 @@ class RemoteTable(Table):
|
||||
One of "error", "drop", "fill".
|
||||
fill_value: float, default 0.
|
||||
The value to use when filling vectors. Only used if on_bad_vectors="fill".
|
||||
progress: bool, callable, or tqdm-like, optional
|
||||
A callback or tqdm-compatible progress bar. See
|
||||
:meth:`Table.add` for details.
|
||||
|
||||
Returns
|
||||
-------
|
||||
AddResult
|
||||
An object containing the new version number of the table after adding data.
|
||||
"""
|
||||
progress, owns = _normalize_progress(progress)
|
||||
try:
|
||||
return LOOP.run(
|
||||
self._table.add(
|
||||
data,
|
||||
mode=mode,
|
||||
on_bad_vectors=on_bad_vectors,
|
||||
fill_value=fill_value,
|
||||
progress=progress,
|
||||
)
|
||||
return LOOP.run(
|
||||
self._table.add(
|
||||
data, mode=mode, on_bad_vectors=on_bad_vectors, fill_value=fill_value
|
||||
)
|
||||
finally:
|
||||
if owns:
|
||||
progress.close()
|
||||
)
|
||||
|
||||
def search(
|
||||
self,
|
||||
@@ -655,45 +647,6 @@ class RemoteTable(Table):
|
||||
def drop_index(self, index_name: str):
|
||||
return LOOP.run(self._table.drop_index(index_name))
|
||||
|
||||
def prewarm_index(self, name: str) -> None:
|
||||
"""Prewarm an index in the table.
|
||||
|
||||
This is a hint to the database that the index will be accessed in the
|
||||
future and should be loaded into memory if possible. This can reduce
|
||||
cold-start latency for subsequent queries.
|
||||
|
||||
This call initiates prewarming and returns once the request is accepted.
|
||||
It is idempotent and safe to call from multiple clients concurrently.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
name: str
|
||||
The name of the index to prewarm
|
||||
"""
|
||||
return LOOP.run(self._table.prewarm_index(name))
|
||||
|
||||
def prewarm_data(self, columns: Optional[List[str]] = None) -> None:
|
||||
"""Prewarm data for the table.
|
||||
|
||||
This is a hint to the database that the given columns will be accessed
|
||||
in the future and the database should prefetch the data if possible.
|
||||
Currently only supported on remote tables.
|
||||
|
||||
This call initiates prewarming and returns once the request is accepted.
|
||||
It is idempotent and safe to call from multiple clients concurrently.
|
||||
|
||||
This operation has a large upfront cost but can speed up future queries
|
||||
that need to fetch the given columns. Large columns such as embeddings
|
||||
or binary data may not be practical to prewarm. This feature is intended
|
||||
for workloads that issue many queries against the same columns.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
columns: list of str, optional
|
||||
The columns to prewarm. If None, all columns are prewarmed.
|
||||
"""
|
||||
return LOOP.run(self._table.prewarm_data(columns))
|
||||
|
||||
def wait_for_index(
|
||||
self, index_names: Iterable[str], timeout: timedelta = timedelta(seconds=300)
|
||||
):
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -40,5 +40,4 @@ IndexType = Literal[
|
||||
]
|
||||
|
||||
# Tokenizer literals
|
||||
BuiltinTokenizerType = Literal["simple", "raw", "whitespace", "ngram"]
|
||||
BaseTokenizerType = BuiltinTokenizerType | str
|
||||
BaseTokenizerType = Literal["simple", "raw", "whitespace", "ngram"]
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user