mirror of
https://github.com/lancedb/lancedb.git
synced 2026-01-08 21:02:58 +00:00
Compare commits
1 Commits
main
...
codex/upda
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
aa0da4f0f3 |
@@ -1,5 +1,5 @@
|
||||
[tool.bumpversion]
|
||||
current_version = "0.23.1"
|
||||
current_version = "0.22.4-beta.2"
|
||||
parse = """(?x)
|
||||
(?P<major>0|[1-9]\\d*)\\.
|
||||
(?P<minor>0|[1-9]\\d*)\\.
|
||||
@@ -72,9 +72,3 @@ search = "\nversion = \"{current_version}\""
|
||||
filename = "nodejs/Cargo.toml"
|
||||
replace = "\nversion = \"{new_version}\""
|
||||
search = "\nversion = \"{current_version}\""
|
||||
|
||||
# Java documentation
|
||||
[[tool.bumpversion.files]]
|
||||
filename = "docs/src/java/java.md"
|
||||
replace = "<version>{new_version}</version>"
|
||||
search = "<version>{current_version}</version>"
|
||||
|
||||
@@ -19,7 +19,7 @@ rustflags = [
|
||||
"-Wclippy::string_add_assign",
|
||||
"-Wclippy::string_add",
|
||||
"-Wclippy::string_lit_as_bytes",
|
||||
"-Wclippy::implicit_clone",
|
||||
"-Wclippy::string_to_string",
|
||||
"-Wclippy::use_self",
|
||||
"-Dclippy::cargo",
|
||||
"-Dclippy::dbg_macro",
|
||||
|
||||
@@ -31,7 +31,7 @@ runs:
|
||||
with:
|
||||
command: build
|
||||
working-directory: python
|
||||
docker-options: "-e PIP_EXTRA_INDEX_URL='https://pypi.fury.io/lance-format/ https://pypi.fury.io/lancedb/'"
|
||||
docker-options: "-e PIP_EXTRA_INDEX_URL=https://pypi.fury.io/lancedb/"
|
||||
target: x86_64-unknown-linux-gnu
|
||||
manylinux: ${{ inputs.manylinux }}
|
||||
args: ${{ inputs.args }}
|
||||
@@ -46,7 +46,7 @@ runs:
|
||||
with:
|
||||
command: build
|
||||
working-directory: python
|
||||
docker-options: "-e PIP_EXTRA_INDEX_URL='https://pypi.fury.io/lance-format/ https://pypi.fury.io/lancedb/'"
|
||||
docker-options: "-e PIP_EXTRA_INDEX_URL=https://pypi.fury.io/lancedb/"
|
||||
target: aarch64-unknown-linux-gnu
|
||||
manylinux: ${{ inputs.manylinux }}
|
||||
args: ${{ inputs.args }}
|
||||
|
||||
2
.github/workflows/build_mac_wheel/action.yml
vendored
2
.github/workflows/build_mac_wheel/action.yml
vendored
@@ -22,5 +22,5 @@ runs:
|
||||
command: build
|
||||
# TODO: pass through interpreter
|
||||
args: ${{ inputs.args }}
|
||||
docker-options: "-e PIP_EXTRA_INDEX_URL='https://pypi.fury.io/lance-format/ https://pypi.fury.io/lancedb/'"
|
||||
docker-options: "-e PIP_EXTRA_INDEX_URL=https://pypi.fury.io/lancedb/"
|
||||
working-directory: python
|
||||
|
||||
@@ -26,7 +26,7 @@ runs:
|
||||
with:
|
||||
command: build
|
||||
args: ${{ inputs.args }}
|
||||
docker-options: "-e PIP_EXTRA_INDEX_URL='https://pypi.fury.io/lance-format/ https://pypi.fury.io/lancedb/'"
|
||||
docker-options: "-e PIP_EXTRA_INDEX_URL=https://pypi.fury.io/lancedb/"
|
||||
working-directory: python
|
||||
- uses: actions/upload-artifact@v4
|
||||
with:
|
||||
|
||||
@@ -98,30 +98,3 @@ jobs:
|
||||
|
||||
printenv OPENAI_API_KEY | codex login --with-api-key
|
||||
codex --config shell_environment_policy.ignore_default_excludes=true exec --dangerously-bypass-approvals-and-sandbox "$(cat /tmp/codex-prompt.txt)"
|
||||
|
||||
- name: Trigger sophon dependency update
|
||||
env:
|
||||
TAG: ${{ inputs.tag }}
|
||||
GH_TOKEN: ${{ secrets.ROBOT_TOKEN }}
|
||||
run: |
|
||||
set -euo pipefail
|
||||
VERSION="${TAG#refs/tags/}"
|
||||
VERSION="${VERSION#v}"
|
||||
LANCEDB_BRANCH="codex/update-lance-${VERSION//[^a-zA-Z0-9]/-}"
|
||||
|
||||
echo "Triggering sophon workflow with:"
|
||||
echo " lance_ref: ${TAG#refs/tags/}"
|
||||
echo " lancedb_ref: ${LANCEDB_BRANCH}"
|
||||
|
||||
gh workflow run codex-bump-lancedb-lance.yml \
|
||||
--repo lancedb/sophon \
|
||||
-f lance_ref="${TAG#refs/tags/}" \
|
||||
-f lancedb_ref="${LANCEDB_BRANCH}"
|
||||
|
||||
- name: Show latest sophon workflow run
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.ROBOT_TOKEN }}
|
||||
run: |
|
||||
set -euo pipefail
|
||||
echo "Latest sophon workflow run:"
|
||||
gh run list --repo lancedb/sophon --workflow codex-bump-lancedb-lance.yml --limit 1 --json databaseId,url,displayTitle
|
||||
|
||||
6
.github/workflows/docs.yml
vendored
6
.github/workflows/docs.yml
vendored
@@ -24,7 +24,7 @@ env:
|
||||
# according to: https://matklad.github.io/2021/09/04/fast-rust-builds.html
|
||||
# CI builds are faster with incremental disabled.
|
||||
CARGO_INCREMENTAL: "0"
|
||||
PIP_EXTRA_INDEX_URL: "https://pypi.fury.io/lance-format/ https://pypi.fury.io/lancedb/"
|
||||
PIP_EXTRA_INDEX_URL: "https://pypi.fury.io/lancedb/"
|
||||
|
||||
jobs:
|
||||
# Single deploy job since we're just deploying
|
||||
@@ -50,8 +50,8 @@ jobs:
|
||||
- name: Build Python
|
||||
working-directory: python
|
||||
run: |
|
||||
python -m pip install --extra-index-url https://pypi.fury.io/lance-format/ --extra-index-url https://pypi.fury.io/lancedb/ -e .
|
||||
python -m pip install --extra-index-url https://pypi.fury.io/lance-format/ --extra-index-url https://pypi.fury.io/lancedb/ -r ../docs/requirements.txt
|
||||
python -m pip install --extra-index-url https://pypi.fury.io/lancedb/ -e .
|
||||
python -m pip install --extra-index-url https://pypi.fury.io/lancedb/ -r ../docs/requirements.txt
|
||||
- name: Set up node
|
||||
uses: actions/setup-node@v3
|
||||
with:
|
||||
|
||||
107
.github/workflows/java-publish.yml
vendored
107
.github/workflows/java-publish.yml
vendored
@@ -1,35 +1,76 @@
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
name: Build and publish Java packages
|
||||
on:
|
||||
push:
|
||||
tags:
|
||||
- "v*"
|
||||
release:
|
||||
types: [released]
|
||||
pull_request:
|
||||
paths:
|
||||
- .github/workflows/java-publish.yml
|
||||
|
||||
jobs:
|
||||
publish:
|
||||
name: Build and Publish
|
||||
runs-on: ubuntu-24.04
|
||||
macos-arm64:
|
||||
name: Build on MacOS Arm64
|
||||
runs-on: macos-14
|
||||
timeout-minutes: 45
|
||||
defaults:
|
||||
run:
|
||||
working-directory: ./java/core/lancedb-jni
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
- uses: Swatinem/rust-cache@v2
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
brew install protobuf
|
||||
- name: Build release
|
||||
run: |
|
||||
cargo build --release
|
||||
- uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: liblancedb_jni_darwin_aarch64.zip
|
||||
path: target/release/liblancedb_jni.dylib
|
||||
retention-days: 1
|
||||
if-no-files-found: error
|
||||
linux-arm64:
|
||||
name: Build on Linux Arm64
|
||||
runs-on: warp-ubuntu-2204-arm64-8x
|
||||
timeout-minutes: 45
|
||||
defaults:
|
||||
run:
|
||||
working-directory: ./java/core/lancedb-jni
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
- uses: Swatinem/rust-cache@v2
|
||||
- uses: actions-rust-lang/setup-rust-toolchain@v1
|
||||
with:
|
||||
cache-workspaces: "./java/core/lancedb-jni"
|
||||
# Disable full debug symbol generation to speed up CI build and keep memory down
|
||||
# "1" means line tables only, which is useful for panic tracebacks.
|
||||
rustflags: "-C debuginfo=1"
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
sudo apt -y -qq update
|
||||
sudo apt install -y protobuf-compiler libssl-dev pkg-config
|
||||
- name: Build release
|
||||
run: |
|
||||
cargo build --release
|
||||
- uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: liblancedb_jni_linux_aarch64.zip
|
||||
path: target/release/liblancedb_jni.so
|
||||
retention-days: 1
|
||||
if-no-files-found: error
|
||||
linux-x86:
|
||||
runs-on: warp-ubuntu-2204-x64-8x
|
||||
timeout-minutes: 30
|
||||
needs: [macos-arm64, linux-arm64]
|
||||
defaults:
|
||||
run:
|
||||
working-directory: ./java
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
- uses: Swatinem/rust-cache@v2
|
||||
- name: Set up Java 8
|
||||
uses: actions/setup-java@v4
|
||||
with:
|
||||
@@ -41,30 +82,40 @@ jobs:
|
||||
server-password: SONATYPE_TOKEN
|
||||
gpg-private-key: ${{ secrets.GPG_PRIVATE_KEY }}
|
||||
gpg-passphrase: ${{ secrets.GPG_PASSPHRASE }}
|
||||
- name: Set git config
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
git config --global user.email "dev+gha@lancedb.com"
|
||||
git config --global user.name "LanceDB Github Runner"
|
||||
sudo apt -y -qq update
|
||||
sudo apt install -y protobuf-compiler libssl-dev pkg-config
|
||||
- name: Download artifact
|
||||
uses: actions/download-artifact@v4
|
||||
- name: Copy native libs
|
||||
run: |
|
||||
mkdir -p ./core/target/classes/nativelib/darwin-aarch64 ./core/target/classes/nativelib/linux-aarch64
|
||||
cp ../liblancedb_jni_darwin_aarch64.zip/liblancedb_jni.dylib ./core/target/classes/nativelib/darwin-aarch64/liblancedb_jni.dylib
|
||||
cp ../liblancedb_jni_linux_aarch64.zip/liblancedb_jni.so ./core/target/classes/nativelib/linux-aarch64/liblancedb_jni.so
|
||||
- name: Dry run
|
||||
if: github.event_name == 'pull_request'
|
||||
run: |
|
||||
./mvnw --batch-mode -DskipTests package -pl lancedb-core -am
|
||||
- name: Publish
|
||||
if: startsWith(github.ref, 'refs/tags/v')
|
||||
mvn --batch-mode -DskipTests -Drust.release.build=true package
|
||||
- name: Set github
|
||||
run: |
|
||||
git config --global user.email "LanceDB Github Runner"
|
||||
git config --global user.name "dev+gha@lancedb.com"
|
||||
- name: Publish with Java 8
|
||||
if: github.event_name == 'release'
|
||||
run: |
|
||||
echo "use-agent" >> ~/.gnupg/gpg.conf
|
||||
echo "pinentry-mode loopback" >> ~/.gnupg/gpg.conf
|
||||
export GPG_TTY=$(tty)
|
||||
./mvnw --batch-mode -DskipTests -DpushChanges=false -Dgpg.passphrase=${{ secrets.GPG_PASSPHRASE }} deploy -pl lancedb-core -am -P deploy-to-ossrh
|
||||
mvn --batch-mode -DskipTests -Drust.release.build=true -DpushChanges=false -Dgpg.passphrase=${{ secrets.GPG_PASSPHRASE }} deploy -P deploy-to-ossrh
|
||||
env:
|
||||
SONATYPE_USER: ${{ secrets.SONATYPE_USER }}
|
||||
SONATYPE_TOKEN: ${{ secrets.SONATYPE_TOKEN }}
|
||||
|
||||
report-failure:
|
||||
name: Report Workflow Failure
|
||||
runs-on: ubuntu-latest
|
||||
needs: [publish]
|
||||
if: always() && failure() && startsWith(github.ref, 'refs/tags/v')
|
||||
needs: [linux-arm64, linux-x86, macos-arm64]
|
||||
if: always() && (github.event_name == 'release' || github.event_name == 'workflow_dispatch')
|
||||
permissions:
|
||||
contents: read
|
||||
issues: write
|
||||
|
||||
118
.github/workflows/java.yml
vendored
118
.github/workflows/java.yml
vendored
@@ -1,46 +1,118 @@
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
name: Build Java LanceDB Core
|
||||
|
||||
name: Build and Run Java JNI Tests
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
paths:
|
||||
- java/**
|
||||
- .github/workflows/java.yml
|
||||
pull_request:
|
||||
paths:
|
||||
- java/**
|
||||
- rust/**
|
||||
- .github/workflows/java.yml
|
||||
|
||||
env:
|
||||
# This env var is used by Swatinem/rust-cache@v2 for the cache
|
||||
# key, so we set it to make sure it is always consistent.
|
||||
CARGO_TERM_COLOR: always
|
||||
# Disable full debug symbol generation to speed up CI build and keep memory down
|
||||
# "1" means line tables only, which is useful for panic tracebacks.
|
||||
RUSTFLAGS: "-C debuginfo=1"
|
||||
RUST_BACKTRACE: "1"
|
||||
# according to: https://matklad.github.io/2021/09/04/fast-rust-builds.html
|
||||
# CI builds are faster with incremental disabled.
|
||||
CARGO_INCREMENTAL: "0"
|
||||
CARGO_BUILD_JOBS: "1"
|
||||
jobs:
|
||||
build-java:
|
||||
runs-on: ubuntu-24.04
|
||||
name: Build
|
||||
linux-build-java-11:
|
||||
runs-on: ubuntu-22.04
|
||||
name: ubuntu-22.04 + Java 11
|
||||
defaults:
|
||||
run:
|
||||
working-directory: ./java
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
- name: Set up Java 17
|
||||
- uses: Swatinem/rust-cache@v2
|
||||
with:
|
||||
workspaces: java/core/lancedb-jni
|
||||
- uses: actions-rust-lang/setup-rust-toolchain@v1
|
||||
with:
|
||||
components: rustfmt
|
||||
- name: Run cargo fmt
|
||||
run: cargo fmt --check
|
||||
working-directory: ./java/core/lancedb-jni
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
sudo apt update
|
||||
sudo apt install -y protobuf-compiler libssl-dev
|
||||
- name: Install Java 11
|
||||
uses: actions/setup-java@v4
|
||||
with:
|
||||
distribution: temurin
|
||||
java-version: 11
|
||||
cache: "maven"
|
||||
- name: Java Style Check
|
||||
run: mvn checkstyle:check
|
||||
# Disable because of issues in lancedb rust core code
|
||||
# - name: Rust Clippy
|
||||
# working-directory: java/core/lancedb-jni
|
||||
# run: cargo clippy --all-targets -- -D warnings
|
||||
- name: Running tests with Java 11
|
||||
run: mvn clean test
|
||||
linux-build-java-17:
|
||||
runs-on: ubuntu-22.04
|
||||
name: ubuntu-22.04 + Java 17
|
||||
defaults:
|
||||
run:
|
||||
working-directory: ./java
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
- uses: Swatinem/rust-cache@v2
|
||||
with:
|
||||
workspaces: java/core/lancedb-jni
|
||||
- uses: actions-rust-lang/setup-rust-toolchain@v1
|
||||
with:
|
||||
components: rustfmt
|
||||
- name: Run cargo fmt
|
||||
run: cargo fmt --check
|
||||
working-directory: ./java/core/lancedb-jni
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
sudo apt update
|
||||
sudo apt install -y protobuf-compiler libssl-dev
|
||||
- name: Install Java 17
|
||||
uses: actions/setup-java@v4
|
||||
with:
|
||||
distribution: temurin
|
||||
java-version: 17
|
||||
cache: "maven"
|
||||
- run: echo "JAVA_17=$JAVA_HOME" >> $GITHUB_ENV
|
||||
- name: Java Style Check
|
||||
run: ./mvnw checkstyle:check
|
||||
- name: Build and install
|
||||
run: ./mvnw clean install
|
||||
run: mvn checkstyle:check
|
||||
# Disable because of issues in lancedb rust core code
|
||||
# - name: Rust Clippy
|
||||
# working-directory: java/core/lancedb-jni
|
||||
# run: cargo clippy --all-targets -- -D warnings
|
||||
- name: Running tests with Java 17
|
||||
run: |
|
||||
export JAVA_TOOL_OPTIONS="$JAVA_TOOL_OPTIONS \
|
||||
-XX:+IgnoreUnrecognizedVMOptions \
|
||||
--add-opens=java.base/java.lang=ALL-UNNAMED \
|
||||
--add-opens=java.base/java.lang.invoke=ALL-UNNAMED \
|
||||
--add-opens=java.base/java.lang.reflect=ALL-UNNAMED \
|
||||
--add-opens=java.base/java.io=ALL-UNNAMED \
|
||||
--add-opens=java.base/java.net=ALL-UNNAMED \
|
||||
--add-opens=java.base/java.nio=ALL-UNNAMED \
|
||||
--add-opens=java.base/java.util=ALL-UNNAMED \
|
||||
--add-opens=java.base/java.util.concurrent=ALL-UNNAMED \
|
||||
--add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED \
|
||||
--add-opens=java.base/jdk.internal.ref=ALL-UNNAMED \
|
||||
--add-opens=java.base/sun.nio.ch=ALL-UNNAMED \
|
||||
--add-opens=java.base/sun.nio.cs=ALL-UNNAMED \
|
||||
--add-opens=java.base/sun.security.action=ALL-UNNAMED \
|
||||
--add-opens=java.base/sun.util.calendar=ALL-UNNAMED \
|
||||
--add-opens=java.security.jgss/sun.security.krb5=ALL-UNNAMED \
|
||||
-Djdk.reflect.useDirectMethodHandle=false \
|
||||
-Dio.netty.tryReflectionSetAccessible=true"
|
||||
JAVA_HOME=$JAVA_17 mvn clean test
|
||||
|
||||
2
.github/workflows/lance-release-timer.yml
vendored
2
.github/workflows/lance-release-timer.yml
vendored
@@ -59,4 +59,4 @@ jobs:
|
||||
GH_TOKEN: ${{ secrets.ROBOT_TOKEN }}
|
||||
run: |
|
||||
set -euo pipefail
|
||||
gh run list --workflow codex-update-lance-dependency.yml --limit 1 --json databaseId,url,displayTitle
|
||||
gh run list --workflow codex-update-lance-dependency.yml --limit 1 --json databaseId,htmlUrl,displayTitle
|
||||
|
||||
4
.github/workflows/nodejs.yml
vendored
4
.github/workflows/nodejs.yml
vendored
@@ -88,7 +88,7 @@ jobs:
|
||||
npm install -g @napi-rs/cli
|
||||
- name: Build
|
||||
run: |
|
||||
npm ci --include=optional
|
||||
npm ci
|
||||
npm run build:debug -- --profile ci
|
||||
npm run tsc
|
||||
- name: Setup localstack
|
||||
@@ -146,7 +146,7 @@ jobs:
|
||||
npm install -g @napi-rs/cli
|
||||
- name: Build
|
||||
run: |
|
||||
npm ci --include=optional
|
||||
npm ci
|
||||
npm run build:debug -- --profile ci
|
||||
npm run tsc
|
||||
- name: Test
|
||||
|
||||
6
.github/workflows/npm-publish.yml
vendored
6
.github/workflows/npm-publish.yml
vendored
@@ -97,6 +97,12 @@ jobs:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
settings:
|
||||
- target: x86_64-apple-darwin
|
||||
host: macos-latest
|
||||
features: ","
|
||||
pre_build: |-
|
||||
brew install protobuf
|
||||
rustup target add x86_64-apple-darwin
|
||||
- target: aarch64-apple-darwin
|
||||
host: macos-latest
|
||||
features: fp16kernels
|
||||
|
||||
4
.github/workflows/pypi-publish.yml
vendored
4
.github/workflows/pypi-publish.yml
vendored
@@ -11,7 +11,7 @@ on:
|
||||
- Cargo.toml # Change in dependency frequently breaks builds
|
||||
|
||||
env:
|
||||
PIP_EXTRA_INDEX_URL: "https://pypi.fury.io/lance-format/ https://pypi.fury.io/lancedb/"
|
||||
PIP_EXTRA_INDEX_URL: "https://pypi.fury.io/lancedb/"
|
||||
|
||||
jobs:
|
||||
linux:
|
||||
@@ -64,6 +64,8 @@ jobs:
|
||||
strategy:
|
||||
matrix:
|
||||
config:
|
||||
- target: x86_64-apple-darwin
|
||||
runner: macos-13
|
||||
- target: aarch64-apple-darwin
|
||||
runner: warp-macos-14-arm64-6x
|
||||
env:
|
||||
|
||||
25
.github/workflows/python.yml
vendored
25
.github/workflows/python.yml
vendored
@@ -18,7 +18,7 @@ env:
|
||||
# Color output for pytest is off by default.
|
||||
PYTEST_ADDOPTS: "--color=yes"
|
||||
FORCE_COLOR: "1"
|
||||
PIP_EXTRA_INDEX_URL: "https://pypi.fury.io/lance-format/ https://pypi.fury.io/lancedb/"
|
||||
PIP_EXTRA_INDEX_URL: "https://pypi.fury.io/lancedb/"
|
||||
RUST_BACKTRACE: "1"
|
||||
|
||||
jobs:
|
||||
@@ -49,8 +49,8 @@ jobs:
|
||||
|
||||
type-check:
|
||||
name: "Type Check"
|
||||
timeout-minutes: 60
|
||||
runs-on: ubuntu-2404-8x-x64
|
||||
timeout-minutes: 30
|
||||
runs-on: "ubuntu-22.04"
|
||||
defaults:
|
||||
run:
|
||||
shell: bash
|
||||
@@ -78,8 +78,8 @@ jobs:
|
||||
|
||||
doctest:
|
||||
name: "Doctest"
|
||||
timeout-minutes: 60
|
||||
runs-on: ubuntu-2404-8x-x64
|
||||
timeout-minutes: 30
|
||||
runs-on: "ubuntu-24.04"
|
||||
defaults:
|
||||
run:
|
||||
shell: bash
|
||||
@@ -100,7 +100,7 @@ jobs:
|
||||
sudo apt install -y protobuf-compiler
|
||||
- name: Install
|
||||
run: |
|
||||
pip install --extra-index-url https://pypi.fury.io/lance-format/ --extra-index-url https://pypi.fury.io/lancedb/ -e .[tests,dev,embeddings]
|
||||
pip install --extra-index-url https://pypi.fury.io/lancedb/ -e .[tests,dev,embeddings]
|
||||
pip install tantivy
|
||||
pip install mlx
|
||||
- name: Doctest
|
||||
@@ -143,9 +143,16 @@ jobs:
|
||||
- name: Delete wheels
|
||||
run: rm -rf target/wheels
|
||||
platform:
|
||||
name: "Mac"
|
||||
name: "Mac: ${{ matrix.config.name }}"
|
||||
timeout-minutes: 30
|
||||
runs-on: macos-14
|
||||
strategy:
|
||||
matrix:
|
||||
config:
|
||||
- name: x86
|
||||
runner: macos-13
|
||||
- name: Arm
|
||||
runner: macos-14
|
||||
runs-on: "${{ matrix.config.runner }}"
|
||||
defaults:
|
||||
run:
|
||||
shell: bash
|
||||
@@ -219,7 +226,7 @@ jobs:
|
||||
run: |
|
||||
pip install "pydantic<2"
|
||||
pip install pyarrow==16
|
||||
pip install --extra-index-url https://pypi.fury.io/lance-format/ --extra-index-url https://pypi.fury.io/lancedb/ -e .[tests]
|
||||
pip install --extra-index-url https://pypi.fury.io/lancedb/ -e .[tests]
|
||||
pip install tantivy
|
||||
- name: Run tests
|
||||
run: pytest -m "not slow and not s3_test" -x -v --durations=30 python/tests
|
||||
|
||||
2
.github/workflows/run_tests/action.yml
vendored
2
.github/workflows/run_tests/action.yml
vendored
@@ -15,7 +15,7 @@ runs:
|
||||
- name: Install lancedb
|
||||
shell: bash
|
||||
run: |
|
||||
pip3 install --extra-index-url https://pypi.fury.io/lance-format/ --extra-index-url https://pypi.fury.io/lancedb/ $(ls target/wheels/lancedb-*.whl)[tests,dev]
|
||||
pip3 install --extra-index-url https://pypi.fury.io/lancedb/ $(ls target/wheels/lancedb-*.whl)[tests,dev]
|
||||
- name: Setup localstack for integration tests
|
||||
if: ${{ inputs.integration == 'true' }}
|
||||
shell: bash
|
||||
|
||||
2
.github/workflows/rust.yml
vendored
2
.github/workflows/rust.yml
vendored
@@ -122,7 +122,7 @@ jobs:
|
||||
timeout-minutes: 30
|
||||
strategy:
|
||||
matrix:
|
||||
mac-runner: ["macos-14", "macos-15"]
|
||||
mac-runner: ["macos-13", "macos-14"]
|
||||
runs-on: "${{ matrix.mac-runner }}"
|
||||
defaults:
|
||||
run:
|
||||
|
||||
997
Cargo.lock
generated
997
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
30
Cargo.toml
30
Cargo.toml
@@ -1,5 +1,5 @@
|
||||
[workspace]
|
||||
members = ["rust/lancedb", "nodejs", "python"]
|
||||
members = ["rust/lancedb", "nodejs", "python", "java/core/lancedb-jni"]
|
||||
# Python package needs to be built by maturin.
|
||||
exclude = ["python"]
|
||||
resolver = "2"
|
||||
@@ -15,20 +15,20 @@ categories = ["database-implementations"]
|
||||
rust-version = "1.78.0"
|
||||
|
||||
[workspace.dependencies]
|
||||
lance = { "version" = "=1.0.1", default-features = false }
|
||||
lance-core = "=1.0.1"
|
||||
lance-datagen = "=1.0.1"
|
||||
lance-file = "=1.0.1"
|
||||
lance-io = { "version" = "=1.0.1", default-features = false }
|
||||
lance-index = "=1.0.1"
|
||||
lance-linalg = "=1.0.1"
|
||||
lance-namespace = "=1.0.1"
|
||||
lance-namespace-impls = { "version" = "=1.0.1", default-features = false }
|
||||
lance-table = "=1.0.1"
|
||||
lance-testing = "=1.0.1"
|
||||
lance-datafusion = "=1.0.1"
|
||||
lance-encoding = "=1.0.1"
|
||||
lance-arrow = "=1.0.1"
|
||||
lance = { "version" = "=1.0.0-beta.6", default-features = false, "tag" = "v1.0.0-beta.6", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-core = { "version" = "=1.0.0-beta.6", "tag" = "v1.0.0-beta.6", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-datagen = { "version" = "=1.0.0-beta.6", "tag" = "v1.0.0-beta.6", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-file = { "version" = "=1.0.0-beta.6", "tag" = "v1.0.0-beta.6", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-io = { "version" = "=1.0.0-beta.6", default-features = false, "tag" = "v1.0.0-beta.6", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-index = { "version" = "=1.0.0-beta.6", "tag" = "v1.0.0-beta.6", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-linalg = { "version" = "=1.0.0-beta.6", "tag" = "v1.0.0-beta.6", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-namespace = { "version" = "=1.0.0-beta.6", "tag" = "v1.0.0-beta.6", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-namespace-impls = { "version" = "=1.0.0-beta.6", "features" = ["dir-aws", "dir-gcp", "dir-azure", "dir-oss", "rest"], "tag" = "v1.0.0-beta.6", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-table = { "version" = "=1.0.0-beta.6", "tag" = "v1.0.0-beta.6", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-testing = { "version" = "=1.0.0-beta.6", "tag" = "v1.0.0-beta.6", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-datafusion = { "version" = "=1.0.0-beta.6", "tag" = "v1.0.0-beta.6", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-encoding = { "version" = "=1.0.0-beta.6", "tag" = "v1.0.0-beta.6", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-arrow = { "version" = "=1.0.0-beta.6", "tag" = "v1.0.0-beta.6", "git" = "https://github.com/lance-format/lance.git" }
|
||||
ahash = "0.8"
|
||||
# Note that this one does not include pyarrow
|
||||
arrow = { version = "56.2", optional = false }
|
||||
|
||||
@@ -16,7 +16,7 @@ check_command_exists() {
|
||||
}
|
||||
|
||||
if [[ ! -e ./lancedb ]]; then
|
||||
if [[ x${SOPHON_READ_TOKEN} != "x" ]]; then
|
||||
if [[ -v SOPHON_READ_TOKEN ]]; then
|
||||
INPUT="lancedb-linux-x64"
|
||||
gh release \
|
||||
--repo lancedb/lancedb \
|
||||
|
||||
@@ -229,29 +229,6 @@ def set_local_version():
|
||||
update_cargo_toml(line_updater)
|
||||
|
||||
|
||||
def update_lockfiles(version: str, fallback_to_git: bool = False):
|
||||
"""
|
||||
Update Cargo metadata and optionally fall back to using the git tag if the
|
||||
requested crates.io version is unavailable.
|
||||
"""
|
||||
try:
|
||||
print("Updating lockfiles...", file=sys.stderr, end="")
|
||||
run_command("cargo metadata > /dev/null")
|
||||
print(" done.", file=sys.stderr)
|
||||
except Exception as e:
|
||||
if fallback_to_git and "failed to select a version" in str(e):
|
||||
print(
|
||||
f" failed for crates.io v{version}, retrying with git tag...",
|
||||
file=sys.stderr,
|
||||
)
|
||||
set_preview_version(version)
|
||||
print("Updating lockfiles...", file=sys.stderr, end="")
|
||||
run_command("cargo metadata > /dev/null")
|
||||
print(" done.", file=sys.stderr)
|
||||
else:
|
||||
raise
|
||||
|
||||
|
||||
parser = argparse.ArgumentParser(description="Set the version of the Lance package.")
|
||||
parser.add_argument(
|
||||
"version",
|
||||
@@ -267,7 +244,6 @@ if args.version == "stable":
|
||||
file=sys.stderr,
|
||||
)
|
||||
set_stable_version(latest_stable_version)
|
||||
update_lockfiles(latest_stable_version)
|
||||
elif args.version == "preview":
|
||||
latest_preview_version = get_latest_preview_version()
|
||||
print(
|
||||
@@ -275,10 +251,8 @@ elif args.version == "preview":
|
||||
file=sys.stderr,
|
||||
)
|
||||
set_preview_version(latest_preview_version)
|
||||
update_lockfiles(latest_preview_version)
|
||||
elif args.version == "local":
|
||||
set_local_version()
|
||||
update_lockfiles("local")
|
||||
else:
|
||||
# Parse the version number.
|
||||
version = args.version
|
||||
@@ -288,7 +262,9 @@ else:
|
||||
|
||||
if "beta" in version:
|
||||
set_preview_version(version)
|
||||
update_lockfiles(version)
|
||||
else:
|
||||
set_stable_version(version)
|
||||
update_lockfiles(version, fallback_to_git=True)
|
||||
|
||||
print("Updating lockfiles...", file=sys.stderr, end="")
|
||||
run_command("cargo metadata > /dev/null")
|
||||
print(" done.", file=sys.stderr)
|
||||
|
||||
@@ -11,7 +11,7 @@ watch:
|
||||
theme:
|
||||
name: "material"
|
||||
logo: assets/logo.png
|
||||
favicon: assets/favicon.ico
|
||||
favicon: assets/logo.png
|
||||
palette:
|
||||
# Palette toggle for light mode
|
||||
- scheme: lancedb
|
||||
@@ -32,6 +32,8 @@ theme:
|
||||
- content.tooltips
|
||||
- toc.follow
|
||||
- navigation.top
|
||||
- navigation.tabs
|
||||
- navigation.tabs.sticky
|
||||
- navigation.footer
|
||||
- navigation.tracking
|
||||
- navigation.instant
|
||||
@@ -113,16 +115,14 @@ markdown_extensions:
|
||||
emoji_index: !!python/name:material.extensions.emoji.twemoji
|
||||
emoji_generator: !!python/name:material.extensions.emoji.to_svg
|
||||
- markdown.extensions.toc:
|
||||
toc_depth: 3
|
||||
permalink: true
|
||||
permalink_title: Anchor link to this section
|
||||
baselevel: 1
|
||||
permalink: ""
|
||||
|
||||
nav:
|
||||
- Documentation:
|
||||
- SDK Reference: index.md
|
||||
- API reference:
|
||||
- Overview: index.md
|
||||
- Python: python/python.md
|
||||
- Javascript/TypeScript: js/globals.md
|
||||
- Java: java/java.md
|
||||
- Rust: https://docs.rs/lancedb/latest/lancedb/index.html
|
||||
|
||||
extra_css:
|
||||
|
||||
Binary file not shown.
|
Before Width: | Height: | Size: 15 KiB |
@@ -1,111 +0,0 @@
|
||||
# VoyageAI Embeddings : Multimodal
|
||||
|
||||
VoyageAI embeddings can also be used to embed both text and image data, only some of the models support image data and you can check the list
|
||||
under [https://docs.voyageai.com/docs/multimodal-embeddings](https://docs.voyageai.com/docs/multimodal-embeddings)
|
||||
|
||||
Supported multimodal models:
|
||||
|
||||
- `voyage-multimodal-3` - 1024 dimensions (text + images)
|
||||
- `voyage-multimodal-3.5` - Flexible dimensions (256, 512, 1024 default, 2048). Supports text, images, and video.
|
||||
|
||||
### Video Support (voyage-multimodal-3.5)
|
||||
|
||||
The `voyage-multimodal-3.5` model supports video input through:
|
||||
- Video URLs (`.mp4`, `.webm`, `.mov`, `.avi`, `.mkv`, `.m4v`, `.gif`)
|
||||
- Video file paths
|
||||
|
||||
Constraints: Max 20MB video size.
|
||||
|
||||
Supported parameters (to be passed in `create` method) are:
|
||||
|
||||
| Parameter | Type | Default Value | Description |
|
||||
|---|---|-------------------------|-------------------------------------------|
|
||||
| `name` | `str` | `"voyage-multimodal-3"` | The model ID of the VoyageAI model to use |
|
||||
| `output_dimension` | `int` | `None` | Output dimension for voyage-multimodal-3.5. Valid: 256, 512, 1024, 2048 |
|
||||
|
||||
Usage Example:
|
||||
|
||||
```python
|
||||
import base64
|
||||
import os
|
||||
from io import BytesIO
|
||||
|
||||
import requests
|
||||
import lancedb
|
||||
from lancedb.pydantic import LanceModel, Vector
|
||||
from lancedb.embeddings import get_registry
|
||||
import pandas as pd
|
||||
|
||||
os.environ['VOYAGE_API_KEY'] = 'YOUR_VOYAGE_API_KEY'
|
||||
|
||||
db = lancedb.connect(".lancedb")
|
||||
func = get_registry().get("voyageai").create(name="voyage-multimodal-3")
|
||||
|
||||
|
||||
def image_to_base64(image_bytes: bytes):
|
||||
buffered = BytesIO(image_bytes)
|
||||
img_str = base64.b64encode(buffered.getvalue())
|
||||
return img_str.decode("utf-8")
|
||||
|
||||
|
||||
class Images(LanceModel):
|
||||
label: str
|
||||
image_uri: str = func.SourceField() # image uri as the source
|
||||
image_bytes: str = func.SourceField() # image bytes base64 encoded as the source
|
||||
vector: Vector(func.ndims()) = func.VectorField() # vector column
|
||||
vec_from_bytes: Vector(func.ndims()) = func.VectorField() # Another vector column
|
||||
|
||||
|
||||
if "images" in db.table_names():
|
||||
db.drop_table("images")
|
||||
table = db.create_table("images", schema=Images)
|
||||
labels = ["cat", "cat", "dog", "dog", "horse", "horse"]
|
||||
uris = [
|
||||
"http://farm1.staticflickr.com/53/167798175_7c7845bbbd_z.jpg",
|
||||
"http://farm1.staticflickr.com/134/332220238_da527d8140_z.jpg",
|
||||
"http://farm9.staticflickr.com/8387/8602747737_2e5c2a45d4_z.jpg",
|
||||
"http://farm5.staticflickr.com/4092/5017326486_1f46057f5f_z.jpg",
|
||||
"http://farm9.staticflickr.com/8216/8434969557_d37882c42d_z.jpg",
|
||||
"http://farm6.staticflickr.com/5142/5835678453_4f3a4edb45_z.jpg",
|
||||
]
|
||||
# get each uri as bytes
|
||||
images_bytes = [image_to_base64(requests.get(uri).content) for uri in uris]
|
||||
table.add(
|
||||
pd.DataFrame({"label": labels, "image_uri": uris, "image_bytes": images_bytes})
|
||||
)
|
||||
```
|
||||
Now we can search using text from both the default vector column and the custom vector column
|
||||
```python
|
||||
|
||||
# text search
|
||||
actual = table.search("man's best friend", "vec_from_bytes").limit(1).to_pydantic(Images)[0]
|
||||
print(actual.label) # prints "dog"
|
||||
|
||||
frombytes = (
|
||||
table.search("man's best friend", vector_column_name="vec_from_bytes")
|
||||
.limit(1)
|
||||
.to_pydantic(Images)[0]
|
||||
)
|
||||
print(frombytes.label)
|
||||
|
||||
```
|
||||
|
||||
Because we're using a multi-modal embedding function, we can also search using images
|
||||
|
||||
```python
|
||||
# image search
|
||||
query_image_uri = "http://farm1.staticflickr.com/200/467715466_ed4a31801f_z.jpg"
|
||||
image_bytes = requests.get(query_image_uri).content
|
||||
query_image = Image.open(BytesIO(image_bytes))
|
||||
actual = table.search(query_image, "vec_from_bytes").limit(1).to_pydantic(Images)[0]
|
||||
print(actual.label == "dog")
|
||||
|
||||
# image search using a custom vector column
|
||||
other = (
|
||||
table.search(query_image, vector_column_name="vec_from_bytes")
|
||||
.limit(1)
|
||||
.to_pydantic(Images)[0]
|
||||
)
|
||||
print(actual.label)
|
||||
|
||||
```
|
||||
@@ -1,12 +1,7 @@
|
||||
# SDK Reference
|
||||
# API Reference
|
||||
|
||||
This site contains the API reference for the client SDKs supported by [LanceDB](https://lancedb.com).
|
||||
This page contains the API reference for the SDKs supported by the LanceDB team.
|
||||
|
||||
- [Python](python/python.md)
|
||||
- [JavaScript/TypeScript](js/globals.md)
|
||||
- [Java](java/java.md)
|
||||
- [Rust](https://docs.rs/lancedb/latest/lancedb/index.html)
|
||||
|
||||
!!! info "LanceDB Documentation"
|
||||
|
||||
If you're looking for the full documentation of LanceDB, visit [docs.lancedb.com](https://docs.lancedb.com).
|
||||
- [Rust](https://docs.rs/lancedb/latest/lancedb/index.html)
|
||||
@@ -1,499 +0,0 @@
|
||||
# Java SDK
|
||||
|
||||
The LanceDB Java SDK provides a convenient way to interact with LanceDB Cloud and Enterprise deployments using the Lance REST Namespace API.
|
||||
|
||||
!!! note
|
||||
The Java SDK currently only works for LanceDB remote database that connects to LanceDB Cloud and Enterprise.
|
||||
Local database support is a work in progress. Check [LANCEDB-2848](https://github.com/lancedb/lancedb/issues/2848) for the latest progress.
|
||||
|
||||
## Installation
|
||||
|
||||
Add the following dependency to your `pom.xml`:
|
||||
|
||||
```xml
|
||||
<dependency>
|
||||
<groupId>com.lancedb</groupId>
|
||||
<artifactId>lancedb-core</artifactId>
|
||||
<version>0.23.1</version>
|
||||
</dependency>
|
||||
```
|
||||
|
||||
## Quick Start
|
||||
|
||||
### Connecting to LanceDB Cloud
|
||||
|
||||
```java
|
||||
import com.lancedb.LanceDbNamespaceClientBuilder;
|
||||
import org.lance.namespace.LanceNamespace;
|
||||
|
||||
// If your DB url is db://example-db, then your database here is example-db
|
||||
LanceNamespace namespaceClient = LanceDbNamespaceClientBuilder.newBuilder()
|
||||
.apiKey("your_lancedb_cloud_api_key")
|
||||
.database("your_database_name")
|
||||
.build();
|
||||
```
|
||||
|
||||
### Connecting to LanceDB Enterprise
|
||||
|
||||
For LanceDB Enterprise deployments with a custom endpoint:
|
||||
|
||||
```java
|
||||
LanceNamespace namespaceClient = LanceDbNamespaceClientBuilder.newBuilder()
|
||||
.apiKey("your_lancedb_enterprise_api_key")
|
||||
.database("your_database_name")
|
||||
.endpoint("<your_enterprise_endpoint>")
|
||||
.build();
|
||||
```
|
||||
|
||||
### Configuration Options
|
||||
|
||||
| Method | Description | Required |
|
||||
|--------|-------------|----------|
|
||||
| `apiKey(String)` | LanceDB API key | Yes |
|
||||
| `database(String)` | Database name | Yes |
|
||||
| `endpoint(String)` | Custom endpoint URL for Enterprise deployments | No |
|
||||
| `region(String)` | AWS region (default: "us-east-1") | No |
|
||||
| `config(String, String)` | Additional configuration parameters | No |
|
||||
|
||||
## Metadata Operations
|
||||
|
||||
### Creating a Namespace
|
||||
|
||||
Namespaces organize tables hierarchically. Create a namespace before creating tables within it:
|
||||
|
||||
```java
|
||||
import org.lance.namespace.model.CreateNamespaceRequest;
|
||||
import org.lance.namespace.model.CreateNamespaceResponse;
|
||||
|
||||
// Create a child namespace
|
||||
CreateNamespaceRequest request = new CreateNamespaceRequest();
|
||||
request.setId(Arrays.asList("my_namespace"));
|
||||
|
||||
CreateNamespaceResponse response = namespaceClient.createNamespace(request);
|
||||
```
|
||||
|
||||
You can also create nested namespaces:
|
||||
|
||||
```java
|
||||
// Create a nested namespace: parent/child
|
||||
CreateNamespaceRequest request = new CreateNamespaceRequest();
|
||||
request.setId(Arrays.asList("parent_namespace", "child_namespace"));
|
||||
|
||||
CreateNamespaceResponse response = namespaceClient.createNamespace(request);
|
||||
```
|
||||
|
||||
### Describing a Namespace
|
||||
|
||||
```java
|
||||
import org.lance.namespace.model.DescribeNamespaceRequest;
|
||||
import org.lance.namespace.model.DescribeNamespaceResponse;
|
||||
|
||||
DescribeNamespaceRequest request = new DescribeNamespaceRequest();
|
||||
request.setId(Arrays.asList("my_namespace"));
|
||||
|
||||
DescribeNamespaceResponse response = namespaceClient.describeNamespace(request);
|
||||
System.out.println("Namespace properties: " + response.getProperties());
|
||||
```
|
||||
|
||||
### Listing Namespaces
|
||||
|
||||
```java
|
||||
import org.lance.namespace.model.ListNamespacesRequest;
|
||||
import org.lance.namespace.model.ListNamespacesResponse;
|
||||
|
||||
// List all namespaces at root level
|
||||
ListNamespacesRequest request = new ListNamespacesRequest();
|
||||
request.setId(Arrays.asList()); // Empty for root
|
||||
|
||||
ListNamespacesResponse response = namespaceClient.listNamespaces(request);
|
||||
for (String ns : response.getNamespaces()) {
|
||||
System.out.println("Namespace: " + ns);
|
||||
}
|
||||
|
||||
// List child namespaces under a parent
|
||||
ListNamespacesRequest childRequest = new ListNamespacesRequest();
|
||||
childRequest.setId(Arrays.asList("parent_namespace"));
|
||||
|
||||
ListNamespacesResponse childResponse = namespaceClient.listNamespaces(childRequest);
|
||||
```
|
||||
|
||||
### Listing Tables
|
||||
|
||||
```java
|
||||
import org.lance.namespace.model.ListTablesRequest;
|
||||
import org.lance.namespace.model.ListTablesResponse;
|
||||
|
||||
// List tables in a namespace
|
||||
ListTablesRequest request = new ListTablesRequest();
|
||||
request.setId(Arrays.asList("my_namespace"));
|
||||
|
||||
ListTablesResponse response = namespaceClient.listTables(request);
|
||||
for (String table : response.getTables()) {
|
||||
System.out.println("Table: " + table);
|
||||
}
|
||||
```
|
||||
|
||||
### Dropping a Namespace
|
||||
|
||||
```java
|
||||
import org.lance.namespace.model.DropNamespaceRequest;
|
||||
import org.lance.namespace.model.DropNamespaceResponse;
|
||||
|
||||
DropNamespaceRequest request = new DropNamespaceRequest();
|
||||
request.setId(Arrays.asList("my_namespace"));
|
||||
|
||||
DropNamespaceResponse response = namespaceClient.dropNamespace(request);
|
||||
```
|
||||
|
||||
### Describing a Table
|
||||
|
||||
```java
|
||||
import org.lance.namespace.model.DescribeTableRequest;
|
||||
import org.lance.namespace.model.DescribeTableResponse;
|
||||
|
||||
DescribeTableRequest request = new DescribeTableRequest();
|
||||
request.setId(Arrays.asList("my_namespace", "my_table"));
|
||||
|
||||
DescribeTableResponse response = namespaceClient.describeTable(request);
|
||||
System.out.println("Table version: " + response.getVersion());
|
||||
System.out.println("Schema fields: " + response.getSchema().getFields());
|
||||
```
|
||||
|
||||
### Dropping a Table
|
||||
|
||||
```java
|
||||
import org.lance.namespace.model.DropTableRequest;
|
||||
import org.lance.namespace.model.DropTableResponse;
|
||||
|
||||
DropTableRequest request = new DropTableRequest();
|
||||
request.setId(Arrays.asList("my_namespace", "my_table"));
|
||||
|
||||
DropTableResponse response = namespaceClient.dropTable(request);
|
||||
```
|
||||
|
||||
## Writing Data
|
||||
|
||||
### Creating a Table
|
||||
|
||||
Tables are created within a namespace by providing data in Apache Arrow IPC format:
|
||||
|
||||
```java
|
||||
import org.lance.namespace.LanceNamespace;
|
||||
import org.lance.namespace.model.CreateTableRequest;
|
||||
import org.lance.namespace.model.CreateTableResponse;
|
||||
import org.apache.arrow.memory.BufferAllocator;
|
||||
import org.apache.arrow.memory.RootAllocator;
|
||||
import org.apache.arrow.vector.IntVector;
|
||||
import org.apache.arrow.vector.VarCharVector;
|
||||
import org.apache.arrow.vector.VectorSchemaRoot;
|
||||
import org.apache.arrow.vector.complex.FixedSizeListVector;
|
||||
import org.apache.arrow.vector.Float4Vector;
|
||||
import org.apache.arrow.vector.ipc.ArrowStreamWriter;
|
||||
import org.apache.arrow.vector.types.FloatingPointPrecision;
|
||||
import org.apache.arrow.vector.types.pojo.ArrowType;
|
||||
import org.apache.arrow.vector.types.pojo.Field;
|
||||
import org.apache.arrow.vector.types.pojo.FieldType;
|
||||
import org.apache.arrow.vector.types.pojo.Schema;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.nio.channels.Channels;
|
||||
import java.util.Arrays;
|
||||
|
||||
// Create schema with id, name, and embedding fields
|
||||
Schema schema = new Schema(Arrays.asList(
|
||||
new Field("id", FieldType.nullable(new ArrowType.Int(32, true)), null),
|
||||
new Field("name", FieldType.nullable(new ArrowType.Utf8()), null),
|
||||
new Field("embedding",
|
||||
FieldType.nullable(new ArrowType.FixedSizeList(128)),
|
||||
Arrays.asList(new Field("item",
|
||||
FieldType.nullable(new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE)),
|
||||
null)))
|
||||
));
|
||||
|
||||
try (BufferAllocator allocator = new RootAllocator();
|
||||
VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) {
|
||||
|
||||
// Populate data
|
||||
root.setRowCount(3);
|
||||
IntVector idVector = (IntVector) root.getVector("id");
|
||||
VarCharVector nameVector = (VarCharVector) root.getVector("name");
|
||||
FixedSizeListVector embeddingVector = (FixedSizeListVector) root.getVector("embedding");
|
||||
Float4Vector embeddingData = (Float4Vector) embeddingVector.getDataVector();
|
||||
|
||||
for (int i = 0; i < 3; i++) {
|
||||
idVector.setSafe(i, i + 1);
|
||||
nameVector.setSafe(i, ("item_" + i).getBytes());
|
||||
embeddingVector.setNotNull(i);
|
||||
for (int j = 0; j < 128; j++) {
|
||||
embeddingData.setSafe(i * 128 + j, (float) i);
|
||||
}
|
||||
}
|
||||
idVector.setValueCount(3);
|
||||
nameVector.setValueCount(3);
|
||||
embeddingData.setValueCount(3 * 128);
|
||||
embeddingVector.setValueCount(3);
|
||||
|
||||
// Serialize to Arrow IPC format
|
||||
ByteArrayOutputStream out = new ByteArrayOutputStream();
|
||||
try (ArrowStreamWriter writer = new ArrowStreamWriter(root, null, Channels.newChannel(out))) {
|
||||
writer.start();
|
||||
writer.writeBatch();
|
||||
writer.end();
|
||||
}
|
||||
byte[] tableData = out.toByteArray();
|
||||
|
||||
// Create table in a namespace
|
||||
CreateTableRequest request = new CreateTableRequest();
|
||||
request.setId(Arrays.asList("my_namespace", "my_table"));
|
||||
CreateTableResponse response = namespaceClient.createTable(request, tableData);
|
||||
}
|
||||
```
|
||||
|
||||
### Insert
|
||||
|
||||
```java
|
||||
import org.lance.namespace.model.InsertIntoTableRequest;
|
||||
import org.lance.namespace.model.InsertIntoTableResponse;
|
||||
|
||||
// Prepare data in Arrow IPC format (similar to create table example)
|
||||
byte[] insertData = prepareArrowData();
|
||||
|
||||
InsertIntoTableRequest request = new InsertIntoTableRequest();
|
||||
request.setId(Arrays.asList("my_namespace", "my_table"));
|
||||
request.setMode(InsertIntoTableRequest.ModeEnum.APPEND);
|
||||
|
||||
InsertIntoTableResponse response = namespaceClient.insertIntoTable(request, insertData);
|
||||
System.out.println("New version: " + response.getVersion());
|
||||
```
|
||||
|
||||
### Update
|
||||
|
||||
Update rows matching a predicate condition:
|
||||
|
||||
```java
|
||||
import org.lance.namespace.model.UpdateTableRequest;
|
||||
import org.lance.namespace.model.UpdateTableResponse;
|
||||
|
||||
UpdateTableRequest request = new UpdateTableRequest();
|
||||
request.setId(Arrays.asList("my_namespace", "my_table"));
|
||||
|
||||
// Predicate to select rows to update
|
||||
request.setPredicate("id = 1");
|
||||
|
||||
// Set new values using SQL expressions as [column_name, expression] pairs
|
||||
request.setUpdates(Arrays.asList(
|
||||
Arrays.asList("name", "'updated_name'")
|
||||
));
|
||||
|
||||
UpdateTableResponse response = namespaceClient.updateTable(request);
|
||||
System.out.println("Updated rows: " + response.getUpdatedRows());
|
||||
```
|
||||
|
||||
### Delete
|
||||
|
||||
Delete rows matching a predicate condition:
|
||||
|
||||
```java
|
||||
import org.lance.namespace.model.DeleteFromTableRequest;
|
||||
import org.lance.namespace.model.DeleteFromTableResponse;
|
||||
|
||||
DeleteFromTableRequest request = new DeleteFromTableRequest();
|
||||
request.setId(Arrays.asList("my_namespace", "my_table"));
|
||||
|
||||
// Predicate to select rows to delete
|
||||
request.setPredicate("id > 100");
|
||||
|
||||
DeleteFromTableResponse response = namespaceClient.deleteFromTable(request);
|
||||
System.out.println("New version: " + response.getVersion());
|
||||
```
|
||||
|
||||
### Merge Insert (Upsert)
|
||||
|
||||
Merge insert allows you to update existing rows and insert new rows in a single operation based on a key column:
|
||||
|
||||
```java
|
||||
import org.lance.namespace.model.MergeInsertIntoTableRequest;
|
||||
import org.lance.namespace.model.MergeInsertIntoTableResponse;
|
||||
|
||||
// Prepare data with rows to update (id=2,3) and new rows (id=4)
|
||||
byte[] mergeData = prepareArrowData(); // Contains rows with id=2,3,4
|
||||
|
||||
MergeInsertIntoTableRequest request = new MergeInsertIntoTableRequest();
|
||||
request.setId(Arrays.asList("my_namespace", "my_table"));
|
||||
|
||||
// Match on the "id" column
|
||||
request.setOn("id");
|
||||
|
||||
// Update all columns when a matching row is found
|
||||
request.setWhenMatchedUpdateAll(true);
|
||||
|
||||
// Insert new rows when no match is found
|
||||
request.setWhenNotMatchedInsertAll(true);
|
||||
|
||||
MergeInsertIntoTableResponse response = namespaceClient.mergeInsertIntoTable(request, mergeData);
|
||||
|
||||
System.out.println("Updated rows: " + response.getNumUpdatedRows());
|
||||
System.out.println("Inserted rows: " + response.getNumInsertedRows());
|
||||
```
|
||||
|
||||
## Querying Data
|
||||
|
||||
### Counting Rows
|
||||
|
||||
```java
|
||||
import org.lance.namespace.model.CountTableRowsRequest;
|
||||
|
||||
CountTableRowsRequest request = new CountTableRowsRequest();
|
||||
request.setId(Arrays.asList("my_namespace", "my_table"));
|
||||
|
||||
Long rowCount = namespaceClient.countTableRows(request);
|
||||
System.out.println("Row count: " + rowCount);
|
||||
```
|
||||
|
||||
### Vector Search
|
||||
|
||||
```java
|
||||
import org.lance.namespace.model.QueryTableRequest;
|
||||
import org.lance.namespace.model.QueryTableRequestVector;
|
||||
|
||||
QueryTableRequest query = new QueryTableRequest();
|
||||
query.setId(Arrays.asList("my_namespace", "my_table"));
|
||||
query.setK(10); // Return top 10 results
|
||||
|
||||
// Set the query vector
|
||||
List<Float> queryVector = new ArrayList<>();
|
||||
for (int i = 0; i < 128; i++) {
|
||||
queryVector.add(1.0f);
|
||||
}
|
||||
QueryTableRequestVector vector = new QueryTableRequestVector();
|
||||
vector.setSingleVector(queryVector);
|
||||
query.setVector(vector);
|
||||
|
||||
// Specify columns to return
|
||||
query.setColumns(Arrays.asList("id", "name", "embedding"));
|
||||
|
||||
// Execute query - returns Arrow IPC format
|
||||
byte[] result = namespaceClient.queryTable(query);
|
||||
```
|
||||
|
||||
### Full Text Search
|
||||
|
||||
```java
|
||||
import org.lance.namespace.model.QueryTableRequest;
|
||||
import org.lance.namespace.model.QueryTableRequestFullTextQuery;
|
||||
import org.lance.namespace.model.StringFtsQuery;
|
||||
|
||||
QueryTableRequest query = new QueryTableRequest();
|
||||
query.setId(Arrays.asList("my_namespace", "my_table"));
|
||||
query.setK(10);
|
||||
|
||||
// Set full text search query
|
||||
StringFtsQuery stringQuery = new StringFtsQuery();
|
||||
stringQuery.setQuery("search terms");
|
||||
stringQuery.setColumns(Arrays.asList("text_column"));
|
||||
|
||||
QueryTableRequestFullTextQuery fts = new QueryTableRequestFullTextQuery();
|
||||
fts.setStringQuery(stringQuery);
|
||||
query.setFullTextQuery(fts);
|
||||
|
||||
// Specify columns to return
|
||||
query.setColumns(Arrays.asList("id", "text_column"));
|
||||
|
||||
byte[] result = namespaceClient.queryTable(query);
|
||||
```
|
||||
|
||||
### Query with Filter
|
||||
|
||||
```java
|
||||
QueryTableRequest query = new QueryTableRequest();
|
||||
query.setId(Arrays.asList("my_namespace", "my_table"));
|
||||
query.setK(10);
|
||||
query.setFilter("id > 50");
|
||||
query.setColumns(Arrays.asList("id", "name"));
|
||||
|
||||
byte[] result = namespaceClient.queryTable(query);
|
||||
```
|
||||
|
||||
### Query with Prefilter
|
||||
|
||||
```java
|
||||
QueryTableRequest query = new QueryTableRequest();
|
||||
query.setId(Arrays.asList("my_namespace", "my_table"));
|
||||
query.setK(5);
|
||||
query.setPrefilter(true); // Apply filter before vector search
|
||||
query.setFilter("category = 'electronics'");
|
||||
|
||||
// Set query vector
|
||||
QueryTableRequestVector vector = new QueryTableRequestVector();
|
||||
vector.setSingleVector(queryVector);
|
||||
query.setVector(vector);
|
||||
|
||||
byte[] result = namespaceClient.queryTable(query);
|
||||
```
|
||||
|
||||
### Reading Query Results
|
||||
|
||||
Query results are returned in Apache Arrow IPC file format. Here's how to read them:
|
||||
|
||||
```java
|
||||
import org.apache.arrow.vector.ipc.ArrowFileReader;
|
||||
import org.apache.arrow.vector.VectorSchemaRoot;
|
||||
import org.apache.arrow.memory.BufferAllocator;
|
||||
import org.apache.arrow.memory.RootAllocator;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.channels.SeekableByteChannel;
|
||||
|
||||
// Helper class to read Arrow data from byte array
|
||||
class ByteArraySeekableByteChannel implements SeekableByteChannel {
|
||||
private final byte[] data;
|
||||
private long position = 0;
|
||||
private boolean isOpen = true;
|
||||
|
||||
public ByteArraySeekableByteChannel(byte[] data) {
|
||||
this.data = data;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int read(ByteBuffer dst) {
|
||||
int remaining = dst.remaining();
|
||||
int available = (int) (data.length - position);
|
||||
if (available <= 0) return -1;
|
||||
int toRead = Math.min(remaining, available);
|
||||
dst.put(data, (int) position, toRead);
|
||||
position += toRead;
|
||||
return toRead;
|
||||
}
|
||||
|
||||
@Override public long position() { return position; }
|
||||
@Override public SeekableByteChannel position(long newPosition) { position = newPosition; return this; }
|
||||
@Override public long size() { return data.length; }
|
||||
@Override public boolean isOpen() { return isOpen; }
|
||||
@Override public void close() { isOpen = false; }
|
||||
@Override public int write(ByteBuffer src) { throw new UnsupportedOperationException(); }
|
||||
@Override public SeekableByteChannel truncate(long size) { throw new UnsupportedOperationException(); }
|
||||
}
|
||||
|
||||
// Read query results
|
||||
byte[] queryResult = namespaceClient.queryTable(query);
|
||||
|
||||
try (BufferAllocator allocator = new RootAllocator();
|
||||
ArrowFileReader reader = new ArrowFileReader(
|
||||
new ByteArraySeekableByteChannel(queryResult), allocator)) {
|
||||
|
||||
for (int i = 0; i < reader.getRecordBlocks().size(); i++) {
|
||||
reader.loadRecordBatch(reader.getRecordBlocks().get(i));
|
||||
VectorSchemaRoot root = reader.getVectorSchemaRoot();
|
||||
|
||||
// Access data
|
||||
IntVector idVector = (IntVector) root.getVector("id");
|
||||
VarCharVector nameVector = (VarCharVector) root.getVector("name");
|
||||
|
||||
for (int row = 0; row < root.getRowCount(); row++) {
|
||||
int id = idVector.get(row);
|
||||
String name = new String(nameVector.get(row));
|
||||
System.out.println("Row " + row + ": id=" + id + ", name=" + name);
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
@@ -34,7 +34,7 @@ const results = await table.vectorSearch([0.1, 0.3]).limit(20).toArray();
|
||||
console.log(results);
|
||||
```
|
||||
|
||||
The [quickstart](https://lancedb.com/docs/quickstart/basic-usage/) contains more complete examples.
|
||||
The [quickstart](https://lancedb.com/docs/quickstart/basic-usage/) contains a more complete example.
|
||||
|
||||
## Development
|
||||
|
||||
|
||||
@@ -147,7 +147,7 @@ A new PermutationBuilder instance
|
||||
#### Example
|
||||
|
||||
```ts
|
||||
builder.splitCalculated({ calculation: "user_id % 3" });
|
||||
builder.splitCalculated("user_id % 3");
|
||||
```
|
||||
|
||||
***
|
||||
|
||||
@@ -85,26 +85,17 @@
|
||||
|
||||
/* Header gradient (only header area) */
|
||||
.md-header {
|
||||
background: linear-gradient(90deg, #e4d8f8 0%, #F0B7C1 45%, #E55A2B 100%);
|
||||
background: linear-gradient(90deg, #3B2E58 0%, #F0B7C1 45%, #E55A2B 100%);
|
||||
box-shadow: inset 0 1px 0 rgba(255,255,255,0.08), 0 1px 0 rgba(0,0,0,0.08);
|
||||
}
|
||||
|
||||
/* Improve brand title contrast on the lavender side */
|
||||
.md-header__title,
|
||||
.md-header__topic,
|
||||
.md-header__title .md-ellipsis,
|
||||
.md-header__topic .md-ellipsis {
|
||||
color: #2b1b3a;
|
||||
text-shadow: 0 1px 0 rgba(255, 255, 255, 0.25);
|
||||
}
|
||||
|
||||
/* Same colors as header for tabs (that hold the text) */
|
||||
.md-tabs {
|
||||
background: linear-gradient(90deg, #e4d8f8 0%, #F0B7C1 45%, #E55A2B 100%);
|
||||
background: linear-gradient(90deg, #3B2E58 0%, #F0B7C1 45%, #E55A2B 100%);
|
||||
}
|
||||
|
||||
/* Dark scheme variant */
|
||||
[data-md-color-scheme="slate"] .md-header,
|
||||
[data-md-color-scheme="slate"] .md-tabs {
|
||||
background: linear-gradient(90deg, #e4d8f8 0%, #F0B7C1 45%, #E55A2B 100%);
|
||||
background: linear-gradient(90deg, #3B2E58 0%, #F0B7C1 45%, #E55A2B 100%);
|
||||
}
|
||||
|
||||
@@ -1,28 +0,0 @@
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
.PHONY: build-lancedb
|
||||
build-lancedb:
|
||||
./mvnw spotless:apply -pl lancedb-core -am
|
||||
./mvnw install -pl lancedb-core -am
|
||||
|
||||
.PHONY: test-lancedb
|
||||
test-lancedb:
|
||||
# Requires LANCEDB_DB and LANCEDB_API_KEY environment variables
|
||||
./mvnw test -pl lancedb-core -P integration-tests
|
||||
|
||||
.PHONY: clean
|
||||
clean:
|
||||
./mvnw clean
|
||||
|
||||
.PHONY: build
|
||||
build: build-lancedb
|
||||
@@ -7,11 +7,10 @@
|
||||
For LanceDB Cloud, use the simplified builder API:
|
||||
|
||||
```java
|
||||
import com.lancedb.LanceDbNamespaceClientBuilder;
|
||||
import org.lance.namespace.LanceNamespace;
|
||||
import com.lancedb.lance.namespace.LanceRestNamespace;
|
||||
|
||||
// If your DB url is db://example-db, then your database here is example-db
|
||||
LanceNamespace namespaceClient = LanceDbNamespaceClientBuilder.newBuilder()
|
||||
LanceRestNamespace namespace = LanceDBRestNamespaces.builder()
|
||||
.apiKey("your_lancedb_cloud_api_key")
|
||||
.database("your_database_name")
|
||||
.build();
|
||||
@@ -19,13 +18,13 @@ LanceNamespace namespaceClient = LanceDbNamespaceClientBuilder.newBuilder()
|
||||
|
||||
### LanceDB Enterprise
|
||||
|
||||
For Enterprise deployments, use your custom endpoint:
|
||||
For Enterprise deployments, use your VPC endpoint:
|
||||
|
||||
```java
|
||||
LanceNamespace namespaceClient = LanceDbNamespaceClientBuilder.newBuilder()
|
||||
LanceRestNamespace namespace = LanceDBRestNamespaces.builder()
|
||||
.apiKey("your_lancedb_enterprise_api_key")
|
||||
.database("your_database_name")
|
||||
.endpoint("<your_enterprise_endpoint>")
|
||||
.database("your-top-dir") // Your top level folder under your cloud bucket, e.g. s3://your-bucket/your-top-dir/
|
||||
.hostOverride("http://<vpc_endpoint_dns_name>:80")
|
||||
.build();
|
||||
```
|
||||
|
||||
@@ -34,11 +33,5 @@ LanceNamespace namespaceClient = LanceDbNamespaceClientBuilder.newBuilder()
|
||||
Build:
|
||||
|
||||
```shell
|
||||
./mvnw install -pl lancedb-core -am
|
||||
```
|
||||
|
||||
Run tests:
|
||||
|
||||
```shell
|
||||
./mvnw test -pl lancedb-core
|
||||
```
|
||||
./mvnw install
|
||||
```
|
||||
30
java/core/lancedb-jni/Cargo.toml
Normal file
30
java/core/lancedb-jni/Cargo.toml
Normal file
@@ -0,0 +1,30 @@
|
||||
[package]
|
||||
name = "lancedb-jni"
|
||||
description = "JNI bindings for LanceDB"
|
||||
# TODO modify lancedb/Cargo.toml for version and dependencies
|
||||
version = "0.10.0"
|
||||
edition.workspace = true
|
||||
repository.workspace = true
|
||||
readme.workspace = true
|
||||
license.workspace = true
|
||||
keywords.workspace = true
|
||||
categories.workspace = true
|
||||
publish = false
|
||||
|
||||
[lib]
|
||||
crate-type = ["cdylib"]
|
||||
|
||||
[dependencies]
|
||||
lancedb = { path = "../../../rust/lancedb", default-features = false }
|
||||
lance = { workspace = true }
|
||||
arrow = { workspace = true, features = ["ffi"] }
|
||||
arrow-schema.workspace = true
|
||||
tokio = "1.46"
|
||||
jni = "0.21.1"
|
||||
snafu.workspace = true
|
||||
lazy_static.workspace = true
|
||||
serde = { version = "^1" }
|
||||
serde_json = { version = "1" }
|
||||
|
||||
[features]
|
||||
default = ["lancedb/default"]
|
||||
133
java/core/lancedb-jni/src/connection.rs
Normal file
133
java/core/lancedb-jni/src/connection.rs
Normal file
@@ -0,0 +1,133 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||
|
||||
use crate::ffi::JNIEnvExt;
|
||||
use crate::traits::IntoJava;
|
||||
use crate::{Error, RT};
|
||||
use jni::objects::{JObject, JString, JValue};
|
||||
use jni::JNIEnv;
|
||||
pub const NATIVE_CONNECTION: &str = "nativeConnectionHandle";
|
||||
use crate::Result;
|
||||
use lancedb::connection::{connect, Connection};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct BlockingConnection {
|
||||
pub(crate) inner: Connection,
|
||||
}
|
||||
|
||||
impl BlockingConnection {
|
||||
pub fn create(dataset_uri: &str) -> Result<Self> {
|
||||
let inner = RT.block_on(connect(dataset_uri).execute())?;
|
||||
Ok(Self { inner })
|
||||
}
|
||||
|
||||
pub fn table_names(
|
||||
&self,
|
||||
start_after: Option<String>,
|
||||
limit: Option<i32>,
|
||||
) -> Result<Vec<String>> {
|
||||
let mut op = self.inner.table_names();
|
||||
if let Some(start_after) = start_after {
|
||||
op = op.start_after(start_after);
|
||||
}
|
||||
if let Some(limit) = limit {
|
||||
op = op.limit(limit as u32);
|
||||
}
|
||||
Ok(RT.block_on(op.execute())?)
|
||||
}
|
||||
}
|
||||
|
||||
impl IntoJava for BlockingConnection {
|
||||
fn into_java<'a>(self, env: &mut JNIEnv<'a>) -> JObject<'a> {
|
||||
attach_native_connection(env, self)
|
||||
}
|
||||
}
|
||||
|
||||
fn attach_native_connection<'local>(
|
||||
env: &mut JNIEnv<'local>,
|
||||
connection: BlockingConnection,
|
||||
) -> JObject<'local> {
|
||||
let j_connection = create_java_connection_object(env);
|
||||
// This block sets a native Rust object (Connection) as a field in the Java object (j_Connection).
|
||||
// Caution: This creates a potential for memory leaks. The Rust object (Connection) is not
|
||||
// automatically garbage-collected by Java, and its memory will not be freed unless
|
||||
// explicitly handled.
|
||||
//
|
||||
// To prevent memory leaks, ensure the following:
|
||||
// 1. The Java object (`j_Connection`) should implement the `java.io.Closeable` interface.
|
||||
// 2. Users of this Java object should be instructed to always use it within a try-with-resources
|
||||
// statement (or manually call the `close()` method) to ensure that `self.close()` is invoked.
|
||||
match unsafe { env.set_rust_field(&j_connection, NATIVE_CONNECTION, connection) } {
|
||||
Ok(_) => j_connection,
|
||||
Err(err) => {
|
||||
env.throw_new(
|
||||
"java/lang/RuntimeException",
|
||||
format!("Failed to set native handle for Connection: {}", err),
|
||||
)
|
||||
.expect("Error throwing exception");
|
||||
JObject::null()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn create_java_connection_object<'a>(env: &mut JNIEnv<'a>) -> JObject<'a> {
|
||||
env.new_object("com/lancedb/lancedb/Connection", "()V", &[])
|
||||
.expect("Failed to create Java Lance Connection instance")
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub extern "system" fn Java_com_lancedb_lancedb_Connection_releaseNativeConnection(
|
||||
mut env: JNIEnv,
|
||||
j_connection: JObject,
|
||||
) {
|
||||
let _: BlockingConnection = unsafe {
|
||||
env.take_rust_field(j_connection, NATIVE_CONNECTION)
|
||||
.expect("Failed to take native Connection handle")
|
||||
};
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub extern "system" fn Java_com_lancedb_lancedb_Connection_connect<'local>(
|
||||
mut env: JNIEnv<'local>,
|
||||
_obj: JObject,
|
||||
dataset_uri_object: JString,
|
||||
) -> JObject<'local> {
|
||||
let dataset_uri: String = ok_or_throw!(env, env.get_string(&dataset_uri_object)).into();
|
||||
let blocking_connection = ok_or_throw!(env, BlockingConnection::create(&dataset_uri));
|
||||
blocking_connection.into_java(&mut env)
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub extern "system" fn Java_com_lancedb_lancedb_Connection_tableNames<'local>(
|
||||
mut env: JNIEnv<'local>,
|
||||
j_connection: JObject,
|
||||
start_after_obj: JObject, // Optional<String>
|
||||
limit_obj: JObject, // Optional<Integer>
|
||||
) -> JObject<'local> {
|
||||
ok_or_throw!(
|
||||
env,
|
||||
inner_table_names(&mut env, j_connection, start_after_obj, limit_obj)
|
||||
)
|
||||
}
|
||||
|
||||
fn inner_table_names<'local>(
|
||||
env: &mut JNIEnv<'local>,
|
||||
j_connection: JObject,
|
||||
start_after_obj: JObject, // Optional<String>
|
||||
limit_obj: JObject, // Optional<Integer>
|
||||
) -> Result<JObject<'local>> {
|
||||
let start_after = env.get_string_opt(&start_after_obj)?;
|
||||
let limit = env.get_int_opt(&limit_obj)?;
|
||||
let conn =
|
||||
unsafe { env.get_rust_field::<_, _, BlockingConnection>(j_connection, NATIVE_CONNECTION) }?;
|
||||
let table_names = conn.table_names(start_after, limit)?;
|
||||
drop(conn);
|
||||
let j_names = env.new_object("java/util/ArrayList", "()V", &[])?;
|
||||
for item in table_names {
|
||||
let jstr_item = env.new_string(item)?;
|
||||
let item_jobj = JObject::from(jstr_item);
|
||||
let item_gen = JValue::Object(&item_jobj);
|
||||
env.call_method(&j_names, "add", "(Ljava/lang/Object;)Z", &[item_gen])?;
|
||||
}
|
||||
Ok(j_names)
|
||||
}
|
||||
217
java/core/lancedb-jni/src/error.rs
Normal file
217
java/core/lancedb-jni/src/error.rs
Normal file
@@ -0,0 +1,217 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||
|
||||
use std::str::Utf8Error;
|
||||
|
||||
use arrow_schema::ArrowError;
|
||||
use jni::errors::Error as JniError;
|
||||
use serde_json::Error as JsonError;
|
||||
use snafu::{Location, Snafu};
|
||||
|
||||
type BoxedError = Box<dyn std::error::Error + Send + Sync + 'static>;
|
||||
|
||||
/// Java Exception types
|
||||
pub enum JavaException {
|
||||
IllegalArgumentException,
|
||||
IOException,
|
||||
RuntimeException,
|
||||
}
|
||||
|
||||
impl JavaException {
|
||||
pub fn as_str(&self) -> &str {
|
||||
match self {
|
||||
Self::IllegalArgumentException => "java/lang/IllegalArgumentException",
|
||||
Self::IOException => "java/io/IOException",
|
||||
Self::RuntimeException => "java/lang/RuntimeException",
|
||||
}
|
||||
}
|
||||
}
|
||||
/// TODO(lu) change to lancedb-jni
|
||||
#[derive(Debug, Snafu)]
|
||||
#[snafu(visibility(pub))]
|
||||
pub enum Error {
|
||||
#[snafu(display("JNI error: {message}, {location}"))]
|
||||
Jni { message: String, location: Location },
|
||||
#[snafu(display("Invalid argument: {message}, {location}"))]
|
||||
InvalidArgument { message: String, location: Location },
|
||||
#[snafu(display("IO error: {source}, {location}"))]
|
||||
IO {
|
||||
source: BoxedError,
|
||||
location: Location,
|
||||
},
|
||||
#[snafu(display("Arrow error: {message}, {location}"))]
|
||||
Arrow { message: String, location: Location },
|
||||
#[snafu(display("Index error: {message}, {location}"))]
|
||||
Index { message: String, location: Location },
|
||||
#[snafu(display("JSON error: {message}, {location}"))]
|
||||
JSON { message: String, location: Location },
|
||||
#[snafu(display("Dataset at path {path} was not found, {location}"))]
|
||||
DatasetNotFound { path: String, location: Location },
|
||||
#[snafu(display("Dataset already exists: {uri}, {location}"))]
|
||||
DatasetAlreadyExists { uri: String, location: Location },
|
||||
#[snafu(display("Table '{name}' already exists"))]
|
||||
TableAlreadyExists { name: String },
|
||||
#[snafu(display("Table '{name}' was not found: {source}"))]
|
||||
TableNotFound {
|
||||
name: String,
|
||||
source: Box<dyn std::error::Error + Send + Sync>,
|
||||
},
|
||||
#[snafu(display("Invalid table name '{name}': {reason}"))]
|
||||
InvalidTableName { name: String, reason: String },
|
||||
#[snafu(display("Embedding function '{name}' was not found: {reason}, {location}"))]
|
||||
EmbeddingFunctionNotFound {
|
||||
name: String,
|
||||
reason: String,
|
||||
location: Location,
|
||||
},
|
||||
#[snafu(display("Other Lance error: {message}, {location}"))]
|
||||
OtherLance { message: String, location: Location },
|
||||
#[snafu(display("Other LanceDB error: {message}, {location}"))]
|
||||
OtherLanceDB { message: String, location: Location },
|
||||
}
|
||||
|
||||
impl Error {
|
||||
/// Throw as Java Exception
|
||||
pub fn throw(&self, env: &mut jni::JNIEnv) {
|
||||
match self {
|
||||
Self::InvalidArgument { .. }
|
||||
| Self::DatasetNotFound { .. }
|
||||
| Self::DatasetAlreadyExists { .. }
|
||||
| Self::TableAlreadyExists { .. }
|
||||
| Self::TableNotFound { .. }
|
||||
| Self::InvalidTableName { .. }
|
||||
| Self::EmbeddingFunctionNotFound { .. } => {
|
||||
self.throw_as(env, JavaException::IllegalArgumentException)
|
||||
}
|
||||
Self::IO { .. } | Self::Index { .. } => self.throw_as(env, JavaException::IOException),
|
||||
Self::Arrow { .. }
|
||||
| Self::JSON { .. }
|
||||
| Self::OtherLance { .. }
|
||||
| Self::OtherLanceDB { .. }
|
||||
| Self::Jni { .. } => self.throw_as(env, JavaException::RuntimeException),
|
||||
}
|
||||
}
|
||||
|
||||
/// Throw as an concrete Java Exception
|
||||
pub fn throw_as(&self, env: &mut jni::JNIEnv, exception: JavaException) {
|
||||
let message = &format!(
|
||||
"Error when throwing Java exception: {}:{}",
|
||||
exception.as_str(),
|
||||
self
|
||||
);
|
||||
env.throw_new(exception.as_str(), self.to_string())
|
||||
.expect(message);
|
||||
}
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
|
||||
trait ToSnafuLocation {
|
||||
fn to_snafu_location(&'static self) -> snafu::Location;
|
||||
}
|
||||
|
||||
impl ToSnafuLocation for std::panic::Location<'static> {
|
||||
fn to_snafu_location(&'static self) -> snafu::Location {
|
||||
snafu::Location::new(self.file(), self.line(), self.column())
|
||||
}
|
||||
}
|
||||
|
||||
impl From<JniError> for Error {
|
||||
#[track_caller]
|
||||
fn from(source: JniError) -> Self {
|
||||
Self::Jni {
|
||||
message: source.to_string(),
|
||||
location: std::panic::Location::caller().to_snafu_location(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Utf8Error> for Error {
|
||||
#[track_caller]
|
||||
fn from(source: Utf8Error) -> Self {
|
||||
Self::InvalidArgument {
|
||||
message: source.to_string(),
|
||||
location: std::panic::Location::caller().to_snafu_location(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<ArrowError> for Error {
|
||||
#[track_caller]
|
||||
fn from(source: ArrowError) -> Self {
|
||||
Self::Arrow {
|
||||
message: source.to_string(),
|
||||
location: std::panic::Location::caller().to_snafu_location(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<JsonError> for Error {
|
||||
#[track_caller]
|
||||
fn from(source: JsonError) -> Self {
|
||||
Self::JSON {
|
||||
message: source.to_string(),
|
||||
location: std::panic::Location::caller().to_snafu_location(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<lance::Error> for Error {
|
||||
#[track_caller]
|
||||
fn from(source: lance::Error) -> Self {
|
||||
match source {
|
||||
lance::Error::DatasetNotFound {
|
||||
path,
|
||||
source: _,
|
||||
location,
|
||||
} => Self::DatasetNotFound { path, location },
|
||||
lance::Error::DatasetAlreadyExists { uri, location } => {
|
||||
Self::DatasetAlreadyExists { uri, location }
|
||||
}
|
||||
lance::Error::IO { source, location } => Self::IO { source, location },
|
||||
lance::Error::Arrow { message, location } => Self::Arrow { message, location },
|
||||
lance::Error::Index { message, location } => Self::Index { message, location },
|
||||
lance::Error::InvalidInput { source, location } => Self::InvalidArgument {
|
||||
message: source.to_string(),
|
||||
location,
|
||||
},
|
||||
_ => Self::OtherLance {
|
||||
message: source.to_string(),
|
||||
location: std::panic::Location::caller().to_snafu_location(),
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<lancedb::Error> for Error {
|
||||
#[track_caller]
|
||||
fn from(source: lancedb::Error) -> Self {
|
||||
match source {
|
||||
lancedb::Error::InvalidTableName { name, reason } => {
|
||||
Self::InvalidTableName { name, reason }
|
||||
}
|
||||
lancedb::Error::InvalidInput { message } => Self::InvalidArgument {
|
||||
message,
|
||||
location: std::panic::Location::caller().to_snafu_location(),
|
||||
},
|
||||
lancedb::Error::TableNotFound { name, source } => Self::TableNotFound { name, source },
|
||||
lancedb::Error::TableAlreadyExists { name } => Self::TableAlreadyExists { name },
|
||||
lancedb::Error::EmbeddingFunctionNotFound { name, reason } => {
|
||||
Self::EmbeddingFunctionNotFound {
|
||||
name,
|
||||
reason,
|
||||
location: std::panic::Location::caller().to_snafu_location(),
|
||||
}
|
||||
}
|
||||
lancedb::Error::Arrow { source } => Self::Arrow {
|
||||
message: source.to_string(),
|
||||
location: std::panic::Location::caller().to_snafu_location(),
|
||||
},
|
||||
lancedb::Error::Lance { source } => Self::from(source),
|
||||
_ => Self::OtherLanceDB {
|
||||
message: source.to_string(),
|
||||
location: std::panic::Location::caller().to_snafu_location(),
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
194
java/core/lancedb-jni/src/ffi.rs
Normal file
194
java/core/lancedb-jni/src/ffi.rs
Normal file
@@ -0,0 +1,194 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||
|
||||
use core::slice;
|
||||
|
||||
use jni::objects::{JByteBuffer, JObjectArray, JString};
|
||||
use jni::sys::jobjectArray;
|
||||
use jni::{objects::JObject, JNIEnv};
|
||||
|
||||
use crate::error::{Error, Result};
|
||||
|
||||
/// TODO(lu) import from lance-jni without duplicate
|
||||
/// Extend JNIEnv with helper functions.
|
||||
pub trait JNIEnvExt {
|
||||
/// Get integers from Java List<Integer> object.
|
||||
fn get_integers(&mut self, obj: &JObject) -> Result<Vec<i32>>;
|
||||
|
||||
/// Get strings from Java List<String> object.
|
||||
#[allow(dead_code)]
|
||||
fn get_strings(&mut self, obj: &JObject) -> Result<Vec<String>>;
|
||||
|
||||
/// Get strings from Java String[] object.
|
||||
/// Note that get Option<Vec<String>> from Java Optional<String[]> just doesn't work.
|
||||
#[allow(unused)]
|
||||
fn get_strings_array(&mut self, obj: jobjectArray) -> Result<Vec<String>>;
|
||||
|
||||
/// Get Option<String> from Java Optional<String>.
|
||||
fn get_string_opt(&mut self, obj: &JObject) -> Result<Option<String>>;
|
||||
|
||||
/// Get Option<Vec<String>> from Java Optional<List<String>>.
|
||||
#[allow(unused)]
|
||||
fn get_strings_opt(&mut self, obj: &JObject) -> Result<Option<Vec<String>>>;
|
||||
|
||||
/// Get Option<i32> from Java Optional<Integer>.
|
||||
fn get_int_opt(&mut self, obj: &JObject) -> Result<Option<i32>>;
|
||||
|
||||
/// Get Option<Vec<i32>> from Java Optional<List<Integer>>.
|
||||
fn get_ints_opt(&mut self, obj: &JObject) -> Result<Option<Vec<i32>>>;
|
||||
|
||||
/// Get Option<i64> from Java Optional<Long>.
|
||||
#[allow(unused)]
|
||||
fn get_long_opt(&mut self, obj: &JObject) -> Result<Option<i64>>;
|
||||
|
||||
/// Get Option<u64> from Java Optional<Long>.
|
||||
#[allow(unused)]
|
||||
fn get_u64_opt(&mut self, obj: &JObject) -> Result<Option<u64>>;
|
||||
|
||||
/// Get Option<&[u8]> from Java Optional<ByteBuffer>.
|
||||
#[allow(unused)]
|
||||
fn get_bytes_opt(&mut self, obj: &JObject) -> Result<Option<&[u8]>>;
|
||||
|
||||
fn get_optional<T, F>(&mut self, obj: &JObject, f: F) -> Result<Option<T>>
|
||||
where
|
||||
F: FnOnce(&mut JNIEnv, &JObject) -> Result<T>;
|
||||
}
|
||||
|
||||
impl JNIEnvExt for JNIEnv<'_> {
|
||||
fn get_integers(&mut self, obj: &JObject) -> Result<Vec<i32>> {
|
||||
let list = self.get_list(obj)?;
|
||||
let mut iter = list.iter(self)?;
|
||||
let mut results = Vec::with_capacity(list.size(self)? as usize);
|
||||
while let Some(elem) = iter.next(self)? {
|
||||
let int_obj = self.call_method(elem, "intValue", "()I", &[])?;
|
||||
let int_value = int_obj.i()?;
|
||||
results.push(int_value);
|
||||
}
|
||||
Ok(results)
|
||||
}
|
||||
|
||||
fn get_strings(&mut self, obj: &JObject) -> Result<Vec<String>> {
|
||||
let list = self.get_list(obj)?;
|
||||
let mut iter = list.iter(self)?;
|
||||
let mut results = Vec::with_capacity(list.size(self)? as usize);
|
||||
while let Some(elem) = iter.next(self)? {
|
||||
let jstr = JString::from(elem);
|
||||
let val = self.get_string(&jstr)?;
|
||||
results.push(val.to_str()?.to_string())
|
||||
}
|
||||
Ok(results)
|
||||
}
|
||||
|
||||
fn get_strings_array(&mut self, obj: jobjectArray) -> Result<Vec<String>> {
|
||||
let jobject_array = unsafe { JObjectArray::from_raw(obj) };
|
||||
let array_len = self.get_array_length(&jobject_array)?;
|
||||
let mut res: Vec<String> = Vec::new();
|
||||
for i in 0..array_len {
|
||||
let item: JString = self.get_object_array_element(&jobject_array, i)?.into();
|
||||
res.push(self.get_string(&item)?.into());
|
||||
}
|
||||
Ok(res)
|
||||
}
|
||||
|
||||
fn get_string_opt(&mut self, obj: &JObject) -> Result<Option<String>> {
|
||||
self.get_optional(obj, |env, inner_obj| {
|
||||
let java_obj_gen = env.call_method(inner_obj, "get", "()Ljava/lang/Object;", &[])?;
|
||||
let java_string_obj = java_obj_gen.l()?;
|
||||
let jstr = JString::from(java_string_obj);
|
||||
let val = env.get_string(&jstr)?;
|
||||
Ok(val.to_str()?.to_string())
|
||||
})
|
||||
}
|
||||
|
||||
fn get_strings_opt(&mut self, obj: &JObject) -> Result<Option<Vec<String>>> {
|
||||
self.get_optional(obj, |env, inner_obj| {
|
||||
let java_obj_gen = env.call_method(inner_obj, "get", "()Ljava/lang/Object;", &[])?;
|
||||
let java_list_obj = java_obj_gen.l()?;
|
||||
env.get_strings(&java_list_obj)
|
||||
})
|
||||
}
|
||||
|
||||
fn get_int_opt(&mut self, obj: &JObject) -> Result<Option<i32>> {
|
||||
self.get_optional(obj, |env, inner_obj| {
|
||||
let java_obj_gen = env.call_method(inner_obj, "get", "()Ljava/lang/Object;", &[])?;
|
||||
let java_int_obj = java_obj_gen.l()?;
|
||||
let int_obj = env.call_method(java_int_obj, "intValue", "()I", &[])?;
|
||||
let int_value = int_obj.i()?;
|
||||
Ok(int_value)
|
||||
})
|
||||
}
|
||||
|
||||
fn get_ints_opt(&mut self, obj: &JObject) -> Result<Option<Vec<i32>>> {
|
||||
self.get_optional(obj, |env, inner_obj| {
|
||||
let java_obj_gen = env.call_method(inner_obj, "get", "()Ljava/lang/Object;", &[])?;
|
||||
let java_list_obj = java_obj_gen.l()?;
|
||||
env.get_integers(&java_list_obj)
|
||||
})
|
||||
}
|
||||
|
||||
fn get_long_opt(&mut self, obj: &JObject) -> Result<Option<i64>> {
|
||||
self.get_optional(obj, |env, inner_obj| {
|
||||
let java_obj_gen = env.call_method(inner_obj, "get", "()Ljava/lang/Object;", &[])?;
|
||||
let java_long_obj = java_obj_gen.l()?;
|
||||
let long_obj = env.call_method(java_long_obj, "longValue", "()J", &[])?;
|
||||
let long_value = long_obj.j()?;
|
||||
Ok(long_value)
|
||||
})
|
||||
}
|
||||
|
||||
fn get_u64_opt(&mut self, obj: &JObject) -> Result<Option<u64>> {
|
||||
self.get_optional(obj, |env, inner_obj| {
|
||||
let java_obj_gen = env.call_method(inner_obj, "get", "()Ljava/lang/Object;", &[])?;
|
||||
let java_long_obj = java_obj_gen.l()?;
|
||||
let long_obj = env.call_method(java_long_obj, "longValue", "()J", &[])?;
|
||||
let long_value = long_obj.j()?;
|
||||
Ok(long_value as u64)
|
||||
})
|
||||
}
|
||||
|
||||
fn get_bytes_opt(&mut self, obj: &JObject) -> Result<Option<&[u8]>> {
|
||||
self.get_optional(obj, |env, inner_obj| {
|
||||
let java_obj_gen = env.call_method(inner_obj, "get", "()Ljava/lang/Object;", &[])?;
|
||||
let java_byte_buffer_obj = java_obj_gen.l()?;
|
||||
let j_byte_buffer = JByteBuffer::from(java_byte_buffer_obj);
|
||||
let raw_data = env.get_direct_buffer_address(&j_byte_buffer)?;
|
||||
let capacity = env.get_direct_buffer_capacity(&j_byte_buffer)?;
|
||||
let data = unsafe { slice::from_raw_parts(raw_data, capacity) };
|
||||
Ok(data)
|
||||
})
|
||||
}
|
||||
|
||||
fn get_optional<T, F>(&mut self, obj: &JObject, f: F) -> Result<Option<T>>
|
||||
where
|
||||
F: FnOnce(&mut JNIEnv, &JObject) -> Result<T>,
|
||||
{
|
||||
if obj.is_null() {
|
||||
return Ok(None);
|
||||
}
|
||||
let is_present = self.call_method(obj, "isPresent", "()Z", &[])?;
|
||||
if !is_present.z()? {
|
||||
// TODO(lu): put get java object into here cuz can only get java Object
|
||||
Ok(None)
|
||||
} else {
|
||||
f(self, obj).map(Some)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub extern "system" fn Java_com_lancedb_lance_test_JniTestHelper_parseInts(
|
||||
mut env: JNIEnv,
|
||||
_obj: JObject,
|
||||
list_obj: JObject, // List<Integer>
|
||||
) {
|
||||
ok_or_throw_without_return!(env, env.get_integers(&list_obj));
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub extern "system" fn Java_com_lancedb_lance_test_JniTestHelper_parseIntsOpt(
|
||||
mut env: JNIEnv,
|
||||
_obj: JObject,
|
||||
list_obj: JObject, // Optional<List<Integer>>
|
||||
) {
|
||||
ok_or_throw_without_return!(env, env.get_ints_opt(&list_obj));
|
||||
}
|
||||
57
java/core/lancedb-jni/src/lib.rs
Normal file
57
java/core/lancedb-jni/src/lib.rs
Normal file
@@ -0,0 +1,57 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||
|
||||
use lazy_static::lazy_static;
|
||||
|
||||
// TODO import from lance-jni without duplicate
|
||||
#[macro_export]
|
||||
macro_rules! ok_or_throw {
|
||||
($env:expr, $result:expr) => {
|
||||
match $result {
|
||||
Ok(value) => value,
|
||||
Err(err) => {
|
||||
Error::from(err).throw(&mut $env);
|
||||
return JObject::null();
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! ok_or_throw_without_return {
|
||||
($env:expr, $result:expr) => {
|
||||
match $result {
|
||||
Ok(value) => value,
|
||||
Err(err) => {
|
||||
Error::from(err).throw(&mut $env);
|
||||
return;
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! ok_or_throw_with_return {
|
||||
($env:expr, $result:expr, $ret:expr) => {
|
||||
match $result {
|
||||
Ok(value) => value,
|
||||
Err(err) => {
|
||||
Error::from(err).throw(&mut $env);
|
||||
return $ret;
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
mod connection;
|
||||
pub mod error;
|
||||
mod ffi;
|
||||
mod traits;
|
||||
|
||||
pub use error::{Error, Result};
|
||||
|
||||
lazy_static! {
|
||||
static ref RT: tokio::runtime::Runtime = tokio::runtime::Builder::new_multi_thread()
|
||||
.enable_all()
|
||||
.build()
|
||||
.expect("Failed to create tokio runtime");
|
||||
}
|
||||
114
java/core/lancedb-jni/src/traits.rs
Normal file
114
java/core/lancedb-jni/src/traits.rs
Normal file
@@ -0,0 +1,114 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||
|
||||
use jni::objects::{JMap, JObject, JString, JValue};
|
||||
use jni::JNIEnv;
|
||||
|
||||
use crate::Result;
|
||||
|
||||
#[allow(dead_code)]
|
||||
pub trait FromJObject<T> {
|
||||
fn extract(&self) -> Result<T>;
|
||||
}
|
||||
|
||||
/// Convert a Rust type into a Java Object.
|
||||
pub trait IntoJava {
|
||||
fn into_java<'a>(self, env: &mut JNIEnv<'a>) -> JObject<'a>;
|
||||
}
|
||||
|
||||
impl FromJObject<i32> for JObject<'_> {
|
||||
fn extract(&self) -> Result<i32> {
|
||||
Ok(JValue::from(self).i()?)
|
||||
}
|
||||
}
|
||||
|
||||
impl FromJObject<i64> for JObject<'_> {
|
||||
fn extract(&self) -> Result<i64> {
|
||||
Ok(JValue::from(self).j()?)
|
||||
}
|
||||
}
|
||||
|
||||
impl FromJObject<f32> for JObject<'_> {
|
||||
fn extract(&self) -> Result<f32> {
|
||||
Ok(JValue::from(self).f()?)
|
||||
}
|
||||
}
|
||||
|
||||
impl FromJObject<f64> for JObject<'_> {
|
||||
fn extract(&self) -> Result<f64> {
|
||||
Ok(JValue::from(self).d()?)
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
pub trait FromJString {
|
||||
fn extract(&self, env: &mut JNIEnv) -> Result<String>;
|
||||
}
|
||||
|
||||
impl FromJString for JString<'_> {
|
||||
fn extract(&self, env: &mut JNIEnv) -> Result<String> {
|
||||
Ok(env.get_string(self)?.into())
|
||||
}
|
||||
}
|
||||
|
||||
pub trait JMapExt {
|
||||
#[allow(dead_code)]
|
||||
fn get_string(&self, env: &mut JNIEnv, key: &str) -> Result<Option<String>>;
|
||||
|
||||
#[allow(dead_code)]
|
||||
fn get_i32(&self, env: &mut JNIEnv, key: &str) -> Result<Option<i32>>;
|
||||
|
||||
#[allow(dead_code)]
|
||||
fn get_i64(&self, env: &mut JNIEnv, key: &str) -> Result<Option<i64>>;
|
||||
|
||||
#[allow(dead_code)]
|
||||
fn get_f32(&self, env: &mut JNIEnv, key: &str) -> Result<Option<f32>>;
|
||||
|
||||
#[allow(dead_code)]
|
||||
fn get_f64(&self, env: &mut JNIEnv, key: &str) -> Result<Option<f64>>;
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
fn get_map_value<T>(env: &mut JNIEnv, map: &JMap, key: &str) -> Result<Option<T>>
|
||||
where
|
||||
for<'a> JObject<'a>: FromJObject<T>,
|
||||
{
|
||||
let key_obj: JObject = env.new_string(key)?.into();
|
||||
if let Some(value) = map.get(env, &key_obj)? {
|
||||
if value.is_null() {
|
||||
Ok(None)
|
||||
} else {
|
||||
Ok(Some(value.extract()?))
|
||||
}
|
||||
} else {
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
|
||||
impl JMapExt for JMap<'_, '_, '_> {
|
||||
fn get_string(&self, env: &mut JNIEnv, key: &str) -> Result<Option<String>> {
|
||||
let key_obj: JObject = env.new_string(key)?.into();
|
||||
if let Some(value) = self.get(env, &key_obj)? {
|
||||
let value_str: JString = value.into();
|
||||
Ok(Some(value_str.extract(env)?))
|
||||
} else {
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
|
||||
fn get_i32(&self, env: &mut JNIEnv, key: &str) -> Result<Option<i32>> {
|
||||
get_map_value(env, self, key)
|
||||
}
|
||||
|
||||
fn get_i64(&self, env: &mut JNIEnv, key: &str) -> Result<Option<i64>> {
|
||||
get_map_value(env, self, key)
|
||||
}
|
||||
|
||||
fn get_f32(&self, env: &mut JNIEnv, key: &str) -> Result<Option<f32>> {
|
||||
get_map_value(env, self, key)
|
||||
}
|
||||
|
||||
fn get_f64(&self, env: &mut JNIEnv, key: &str) -> Result<Option<f64>> {
|
||||
get_map_value(env, self, key)
|
||||
}
|
||||
}
|
||||
103
java/core/pom.xml
Normal file
103
java/core/pom.xml
Normal file
@@ -0,0 +1,103 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
<parent>
|
||||
<groupId>com.lancedb</groupId>
|
||||
<artifactId>lancedb-parent</artifactId>
|
||||
<version>0.22.4-beta.2</version>
|
||||
<relativePath>../pom.xml</relativePath>
|
||||
</parent>
|
||||
|
||||
<artifactId>lancedb-core</artifactId>
|
||||
<name>${project.artifactId}</name>
|
||||
<description>LanceDB Core</description>
|
||||
<packaging>jar</packaging>
|
||||
<properties>
|
||||
<rust.release.build>false</rust.release.build>
|
||||
</properties>
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>com.lancedb</groupId>
|
||||
<artifactId>lance-namespace-core</artifactId>
|
||||
<version>0.0.1</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.arrow</groupId>
|
||||
<artifactId>arrow-vector</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.arrow</groupId>
|
||||
<artifactId>arrow-memory-netty</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.arrow</groupId>
|
||||
<artifactId>arrow-c-data</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.arrow</groupId>
|
||||
<artifactId>arrow-dataset</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.json</groupId>
|
||||
<artifactId>json</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.questdb</groupId>
|
||||
<artifactId>jar-jni</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.junit.jupiter</groupId>
|
||||
<artifactId>junit-jupiter</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
<profiles>
|
||||
<profile>
|
||||
<id>build-jni</id>
|
||||
<activation>
|
||||
<activeByDefault>true</activeByDefault>
|
||||
</activation>
|
||||
<build>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<groupId>org.questdb</groupId>
|
||||
<artifactId>rust-maven-plugin</artifactId>
|
||||
<version>1.1.1</version>
|
||||
<executions>
|
||||
<execution>
|
||||
<id>lancedb-jni</id>
|
||||
<goals>
|
||||
<goal>build</goal>
|
||||
</goals>
|
||||
<configuration>
|
||||
<path>lancedb-jni</path>
|
||||
<release>${rust.release.build}</release>
|
||||
<!-- Copy native libraries to target/classes for runtime access -->
|
||||
<copyTo>${project.build.directory}/classes/nativelib</copyTo>
|
||||
<copyWithPlatformDir>true</copyWithPlatformDir>
|
||||
</configuration>
|
||||
</execution>
|
||||
<execution>
|
||||
<id>lancedb-jni-test</id>
|
||||
<goals>
|
||||
<goal>test</goal>
|
||||
</goals>
|
||||
<configuration>
|
||||
<path>lancedb-jni</path>
|
||||
<release>false</release>
|
||||
<verbosity>-v</verbosity>
|
||||
</configuration>
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
</profile>
|
||||
</profiles>
|
||||
</project>
|
||||
108
java/core/src/main/java/com/lancedb/lancedb/Connection.java
Normal file
108
java/core/src/main/java/com/lancedb/lancedb/Connection.java
Normal file
@@ -0,0 +1,108 @@
|
||||
/*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package com.lancedb.lancedb;
|
||||
|
||||
import io.questdb.jar.jni.JarJniLoader;
|
||||
|
||||
import java.io.Closeable;
|
||||
import java.util.List;
|
||||
import java.util.Optional;
|
||||
|
||||
/** Represents LanceDB database. */
|
||||
public class Connection implements Closeable {
|
||||
static {
|
||||
JarJniLoader.loadLib(Connection.class, "/nativelib", "lancedb_jni");
|
||||
}
|
||||
|
||||
private long nativeConnectionHandle;
|
||||
|
||||
/** Connect to a LanceDB instance. */
|
||||
public static native Connection connect(String uri);
|
||||
|
||||
/**
|
||||
* Get the names of all tables in the database. The names are sorted in ascending order.
|
||||
*
|
||||
* @return the table names
|
||||
*/
|
||||
public List<String> tableNames() {
|
||||
return tableNames(Optional.empty(), Optional.empty());
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the names of filtered tables in the database. The names are sorted in ascending order.
|
||||
*
|
||||
* @param limit The number of results to return.
|
||||
* @return the table names
|
||||
*/
|
||||
public List<String> tableNames(int limit) {
|
||||
return tableNames(Optional.empty(), Optional.of(limit));
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the names of filtered tables in the database. The names are sorted in ascending order.
|
||||
*
|
||||
* @param startAfter If present, only return names that come lexicographically after the supplied
|
||||
* value. This can be combined with limit to implement pagination by setting this to the last
|
||||
* table name from the previous page.
|
||||
* @return the table names
|
||||
*/
|
||||
public List<String> tableNames(String startAfter) {
|
||||
return tableNames(Optional.of(startAfter), Optional.empty());
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the names of filtered tables in the database. The names are sorted in ascending order.
|
||||
*
|
||||
* @param startAfter If present, only return names that come lexicographically after the supplied
|
||||
* value. This can be combined with limit to implement pagination by setting this to the last
|
||||
* table name from the previous page.
|
||||
* @param limit The number of results to return.
|
||||
* @return the table names
|
||||
*/
|
||||
public List<String> tableNames(String startAfter, int limit) {
|
||||
return tableNames(Optional.of(startAfter), Optional.of(limit));
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the names of filtered tables in the database. The names are sorted in ascending order.
|
||||
*
|
||||
* @param startAfter If present, only return names that come lexicographically after the supplied
|
||||
* value. This can be combined with limit to implement pagination by setting this to the last
|
||||
* table name from the previous page.
|
||||
* @param limit The number of results to return.
|
||||
* @return the table names
|
||||
*/
|
||||
public native List<String> tableNames(Optional<String> startAfter, Optional<Integer> limit);
|
||||
|
||||
/**
|
||||
* Closes this connection and releases any system resources associated with it. If the connection
|
||||
* is already closed, then invoking this method has no effect.
|
||||
*/
|
||||
@Override
|
||||
public void close() {
|
||||
if (nativeConnectionHandle != 0) {
|
||||
releaseNativeConnection(nativeConnectionHandle);
|
||||
nativeConnectionHandle = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Native method to release the Lance connection resources associated with the given handle.
|
||||
*
|
||||
* @param handle The native handle to the connection resource.
|
||||
*/
|
||||
private native void releaseNativeConnection(long handle);
|
||||
|
||||
private Connection() {}
|
||||
}
|
||||
135
java/core/src/test/java/com/lancedb/lancedb/ConnectionTest.java
Normal file
135
java/core/src/test/java/com/lancedb/lancedb/ConnectionTest.java
Normal file
@@ -0,0 +1,135 @@
|
||||
/*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package com.lancedb.lancedb;
|
||||
|
||||
import org.junit.jupiter.api.BeforeAll;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.junit.jupiter.api.io.TempDir;
|
||||
|
||||
import java.net.URL;
|
||||
import java.nio.file.Path;
|
||||
import java.util.List;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
|
||||
public class ConnectionTest {
|
||||
private static final String[] TABLE_NAMES = {
|
||||
"dataset_version", "new_empty_dataset", "test", "write_stream"
|
||||
};
|
||||
|
||||
@TempDir static Path tempDir; // Temporary directory for the tests
|
||||
private static URL lanceDbURL;
|
||||
|
||||
@BeforeAll
|
||||
static void setUp() {
|
||||
ClassLoader classLoader = ConnectionTest.class.getClassLoader();
|
||||
lanceDbURL = classLoader.getResource("example_db");
|
||||
}
|
||||
|
||||
@Test
|
||||
void emptyDB() {
|
||||
String databaseUri = tempDir.resolve("emptyDB").toString();
|
||||
try (Connection conn = Connection.connect(databaseUri)) {
|
||||
List<String> tableNames = conn.tableNames();
|
||||
assertTrue(tableNames.isEmpty());
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
void tableNames() {
|
||||
try (Connection conn = Connection.connect(lanceDbURL.toString())) {
|
||||
List<String> tableNames = conn.tableNames();
|
||||
assertEquals(4, tableNames.size());
|
||||
for (int i = 0; i < TABLE_NAMES.length; i++) {
|
||||
assertEquals(TABLE_NAMES[i], tableNames.get(i));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
void tableNamesStartAfter() {
|
||||
try (Connection conn = Connection.connect(lanceDbURL.toString())) {
|
||||
assertTableNamesStartAfter(
|
||||
conn, TABLE_NAMES[0], 3, TABLE_NAMES[1], TABLE_NAMES[2], TABLE_NAMES[3]);
|
||||
assertTableNamesStartAfter(conn, TABLE_NAMES[1], 2, TABLE_NAMES[2], TABLE_NAMES[3]);
|
||||
assertTableNamesStartAfter(conn, TABLE_NAMES[2], 1, TABLE_NAMES[3]);
|
||||
assertTableNamesStartAfter(conn, TABLE_NAMES[3], 0);
|
||||
assertTableNamesStartAfter(
|
||||
conn, "a_dataset", 4, TABLE_NAMES[0], TABLE_NAMES[1], TABLE_NAMES[2], TABLE_NAMES[3]);
|
||||
assertTableNamesStartAfter(conn, "o_dataset", 2, TABLE_NAMES[2], TABLE_NAMES[3]);
|
||||
assertTableNamesStartAfter(conn, "v_dataset", 1, TABLE_NAMES[3]);
|
||||
assertTableNamesStartAfter(conn, "z_dataset", 0);
|
||||
}
|
||||
}
|
||||
|
||||
private void assertTableNamesStartAfter(
|
||||
Connection conn, String startAfter, int expectedSize, String... expectedNames) {
|
||||
List<String> tableNames = conn.tableNames(startAfter);
|
||||
assertEquals(expectedSize, tableNames.size());
|
||||
for (int i = 0; i < expectedNames.length; i++) {
|
||||
assertEquals(expectedNames[i], tableNames.get(i));
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
void tableNamesLimit() {
|
||||
try (Connection conn = Connection.connect(lanceDbURL.toString())) {
|
||||
for (int i = 0; i <= TABLE_NAMES.length; i++) {
|
||||
List<String> tableNames = conn.tableNames(i);
|
||||
assertEquals(i, tableNames.size());
|
||||
for (int j = 0; j < i; j++) {
|
||||
assertEquals(TABLE_NAMES[j], tableNames.get(j));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
void tableNamesStartAfterLimit() {
|
||||
try (Connection conn = Connection.connect(lanceDbURL.toString())) {
|
||||
List<String> tableNames = conn.tableNames(TABLE_NAMES[0], 2);
|
||||
assertEquals(2, tableNames.size());
|
||||
assertEquals(TABLE_NAMES[1], tableNames.get(0));
|
||||
assertEquals(TABLE_NAMES[2], tableNames.get(1));
|
||||
tableNames = conn.tableNames(TABLE_NAMES[1], 1);
|
||||
assertEquals(1, tableNames.size());
|
||||
assertEquals(TABLE_NAMES[2], tableNames.get(0));
|
||||
tableNames = conn.tableNames(TABLE_NAMES[2], 2);
|
||||
assertEquals(1, tableNames.size());
|
||||
assertEquals(TABLE_NAMES[3], tableNames.get(0));
|
||||
tableNames = conn.tableNames(TABLE_NAMES[3], 2);
|
||||
assertEquals(0, tableNames.size());
|
||||
tableNames = conn.tableNames(TABLE_NAMES[0], 0);
|
||||
assertEquals(0, tableNames.size());
|
||||
|
||||
// Limit larger than the number of remaining tables
|
||||
tableNames = conn.tableNames(TABLE_NAMES[0], 10);
|
||||
assertEquals(3, tableNames.size());
|
||||
assertEquals(TABLE_NAMES[1], tableNames.get(0));
|
||||
assertEquals(TABLE_NAMES[2], tableNames.get(1));
|
||||
assertEquals(TABLE_NAMES[3], tableNames.get(2));
|
||||
|
||||
// Start after a value not in the list
|
||||
tableNames = conn.tableNames("non_existent_table", 2);
|
||||
assertEquals(2, tableNames.size());
|
||||
assertEquals(TABLE_NAMES[2], tableNames.get(0));
|
||||
assertEquals(TABLE_NAMES[3], tableNames.get(1));
|
||||
|
||||
// Start after the last table with a limit
|
||||
tableNames = conn.tableNames(TABLE_NAMES[3], 1);
|
||||
assertEquals(0, tableNames.size());
|
||||
}
|
||||
}
|
||||
}
|
||||
Binary file not shown.
@@ -0,0 +1 @@
|
||||
$d51afd07-e3cd-4c76-9b9b-787e13fd55b0<62>=id <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>*int3208name <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>*string08
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1 @@
|
||||
$15648e72-076f-4ef1-8b90-10d305b95b3b<33>=id <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>*int3208name <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>*string08
|
||||
Binary file not shown.
Binary file not shown.
@@ -0,0 +1 @@
|
||||
$a3689caf-4f6b-4afc-a3c7-97af75661843<34>oitem <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>*string8price <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>*double80vector <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>*fixed_size_list:float:28
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
26
java/lance-namespace/pom.xml
Normal file
26
java/lance-namespace/pom.xml
Normal file
@@ -0,0 +1,26 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
<parent>
|
||||
<groupId>com.lancedb</groupId>
|
||||
<artifactId>lancedb-parent</artifactId>
|
||||
<version>0.22.4-beta.2</version>
|
||||
<relativePath>../pom.xml</relativePath>
|
||||
</parent>
|
||||
|
||||
<artifactId>lancedb-lance-namespace</artifactId>
|
||||
<name>${project.artifactId}</name>
|
||||
<description>LanceDB Java Integration with Lance Namespace</description>
|
||||
<packaging>jar</packaging>
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>com.lancedb</groupId>
|
||||
<artifactId>lance-namespace-core</artifactId>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
</project>
|
||||
@@ -11,58 +11,35 @@
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package com.lancedb;
|
||||
package com.lancedb.lancedb;
|
||||
|
||||
import org.lance.namespace.LanceNamespace;
|
||||
import com.lancedb.lance.namespace.LanceRestNamespace;
|
||||
import com.lancedb.lance.namespace.client.apache.ApiClient;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.Optional;
|
||||
|
||||
/**
|
||||
* Util class to help construct a {@link LanceNamespace} for LanceDB.
|
||||
*
|
||||
* <p>For LanceDB Cloud, use the simplified builder API:
|
||||
*
|
||||
* <pre>{@code
|
||||
* import org.lance.namespace.LanceNamespace;
|
||||
*
|
||||
* // If your DB url is db://example-db, then your database here is example-db
|
||||
* LanceNamespace namespaceClient = LanceDbNamespaceClientBuilder.newBuilder()
|
||||
* .apiKey("your_lancedb_cloud_api_key")
|
||||
* .database("your_database_name")
|
||||
* .build();
|
||||
* }</pre>
|
||||
*
|
||||
* <p>For LanceDB Enterprise deployments, use your custom endpoint:
|
||||
*
|
||||
* <pre>{@code
|
||||
* LanceNamespace namespaceClient = LanceDbNamespaceClientBuilder.newBuilder()
|
||||
* .apiKey("your_lancedb_enterprise_api_key")
|
||||
* .database("your_database_name")
|
||||
* .endpoint("<your_enterprise_endpoint>")
|
||||
* .build();
|
||||
* }</pre>
|
||||
*/
|
||||
public class LanceDbNamespaceClientBuilder {
|
||||
/** Util class to help construct a {@link LanceRestNamespace} for LanceDB. */
|
||||
public class LanceDbRestNamespaces {
|
||||
private static final String DEFAULT_REGION = "us-east-1";
|
||||
private static final String CLOUD_URL_PATTERN = "https://%s.%s.api.lancedb.com";
|
||||
|
||||
private String apiKey;
|
||||
private String database;
|
||||
private Optional<String> endpoint = Optional.empty();
|
||||
private Optional<String> hostOverride = Optional.empty();
|
||||
private Optional<String> region = Optional.empty();
|
||||
private Map<String, String> additionalConfig = new HashMap<>();
|
||||
|
||||
private LanceDbNamespaceClientBuilder() {}
|
||||
private LanceDbRestNamespaces() {}
|
||||
|
||||
/**
|
||||
* Create a new builder instance.
|
||||
*
|
||||
* @return A new LanceDbNamespaceClientBuilder
|
||||
* @return A new LanceRestNamespaceBuilder
|
||||
*/
|
||||
public static LanceDbNamespaceClientBuilder newBuilder() {
|
||||
return new LanceDbNamespaceClientBuilder();
|
||||
public static LanceDbRestNamespaces builder() {
|
||||
return new LanceDbRestNamespaces();
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -71,7 +48,7 @@ public class LanceDbNamespaceClientBuilder {
|
||||
* @param apiKey The LanceDB API key
|
||||
* @return This builder
|
||||
*/
|
||||
public LanceDbNamespaceClientBuilder apiKey(String apiKey) {
|
||||
public LanceDbRestNamespaces apiKey(String apiKey) {
|
||||
if (apiKey == null || apiKey.trim().isEmpty()) {
|
||||
throw new IllegalArgumentException("API key cannot be null or empty");
|
||||
}
|
||||
@@ -85,7 +62,7 @@ public class LanceDbNamespaceClientBuilder {
|
||||
* @param database The database name
|
||||
* @return This builder
|
||||
*/
|
||||
public LanceDbNamespaceClientBuilder database(String database) {
|
||||
public LanceDbRestNamespaces database(String database) {
|
||||
if (database == null || database.trim().isEmpty()) {
|
||||
throw new IllegalArgumentException("Database cannot be null or empty");
|
||||
}
|
||||
@@ -94,25 +71,25 @@ public class LanceDbNamespaceClientBuilder {
|
||||
}
|
||||
|
||||
/**
|
||||
* Set a custom endpoint URL (optional). When set, this overrides the default LanceDB Cloud URL
|
||||
* Set a custom host override (optional). When set, this overrides the default LanceDB Cloud URL
|
||||
* construction. Use this for LanceDB Enterprise deployments.
|
||||
*
|
||||
* @param endpoint The complete base URL for your LanceDB Enterprise deployment
|
||||
* @param hostOverride The complete base URL (e.g., "http://your-vpc-endpoint:80")
|
||||
* @return This builder
|
||||
*/
|
||||
public LanceDbNamespaceClientBuilder endpoint(String endpoint) {
|
||||
this.endpoint = Optional.ofNullable(endpoint);
|
||||
public LanceDbRestNamespaces hostOverride(String hostOverride) {
|
||||
this.hostOverride = Optional.ofNullable(hostOverride);
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the region for LanceDB Cloud (optional). Defaults to "us-east-1" if not specified. This is
|
||||
* ignored when endpoint is set.
|
||||
* ignored when hostOverride is set.
|
||||
*
|
||||
* @param region The AWS region (e.g., "us-east-1", "eu-west-1")
|
||||
* @return This builder
|
||||
*/
|
||||
public LanceDbNamespaceClientBuilder region(String region) {
|
||||
public LanceDbRestNamespaces region(String region) {
|
||||
this.region = Optional.ofNullable(region);
|
||||
return this;
|
||||
}
|
||||
@@ -124,18 +101,18 @@ public class LanceDbNamespaceClientBuilder {
|
||||
* @param value The configuration value
|
||||
* @return This builder
|
||||
*/
|
||||
public LanceDbNamespaceClientBuilder config(String key, String value) {
|
||||
public LanceDbRestNamespaces config(String key, String value) {
|
||||
this.additionalConfig.put(key, value);
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Build the LanceNamespace instance.
|
||||
* Build the LanceRestNamespace instance.
|
||||
*
|
||||
* @return A configured LanceNamespace
|
||||
* @return A configured LanceRestNamespace
|
||||
* @throws IllegalStateException if required parameters are missing
|
||||
*/
|
||||
public LanceNamespace build() {
|
||||
public LanceRestNamespace build() {
|
||||
// Validate required fields
|
||||
if (apiKey == null) {
|
||||
throw new IllegalStateException("API key is required");
|
||||
@@ -146,19 +123,24 @@ public class LanceDbNamespaceClientBuilder {
|
||||
|
||||
// Build configuration map
|
||||
Map<String, String> config = new HashMap<>(additionalConfig);
|
||||
config.put("header.x-lancedb-database", database);
|
||||
config.put("header.x-api-key", apiKey);
|
||||
config.put("headers.x-lancedb-database", database);
|
||||
config.put("headers.x-api-key", apiKey);
|
||||
|
||||
// Determine base URL
|
||||
String uri;
|
||||
if (endpoint.isPresent()) {
|
||||
uri = endpoint.get();
|
||||
String baseUrl;
|
||||
if (hostOverride.isPresent()) {
|
||||
baseUrl = hostOverride.get();
|
||||
config.put("host_override", hostOverride.get());
|
||||
} else {
|
||||
String effectiveRegion = region.orElse(DEFAULT_REGION);
|
||||
uri = String.format(CLOUD_URL_PATTERN, database, effectiveRegion);
|
||||
baseUrl = String.format(CLOUD_URL_PATTERN, database, effectiveRegion);
|
||||
config.put("region", effectiveRegion);
|
||||
}
|
||||
config.put("uri", uri);
|
||||
|
||||
return LanceNamespace.connect("rest", config, null);
|
||||
// Create and configure ApiClient
|
||||
ApiClient apiClient = new ApiClient();
|
||||
apiClient.setBasePath(baseUrl);
|
||||
|
||||
return new LanceRestNamespace(apiClient, config);
|
||||
}
|
||||
}
|
||||
@@ -1,99 +0,0 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
<parent>
|
||||
<groupId>com.lancedb</groupId>
|
||||
<artifactId>lancedb-parent</artifactId>
|
||||
<version>0.23.1-final.0</version>
|
||||
<relativePath>../pom.xml</relativePath>
|
||||
</parent>
|
||||
|
||||
<artifactId>lancedb-core</artifactId>
|
||||
<name>${project.artifactId}</name>
|
||||
<description>Utilities to work with LanceDB Cloud and Enterprise via Lance REST Namespace</description>
|
||||
<packaging>jar</packaging>
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>org.lance</groupId>
|
||||
<artifactId>lance-core</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.arrow</groupId>
|
||||
<artifactId>arrow-vector</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.arrow</groupId>
|
||||
<artifactId>arrow-memory-netty</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.junit.jupiter</groupId>
|
||||
<artifactId>junit-jupiter</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.mockito</groupId>
|
||||
<artifactId>mockito-junit-jupiter</artifactId>
|
||||
<version>5.18.0</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.slf4j</groupId>
|
||||
<artifactId>slf4j-api</artifactId>
|
||||
<version>2.0.16</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.logging.log4j</groupId>
|
||||
<artifactId>log4j-slf4j2-impl</artifactId>
|
||||
<version>2.24.3</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.logging.log4j</groupId>
|
||||
<artifactId>log4j-core</artifactId>
|
||||
<version>2.24.3</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.logging.log4j</groupId>
|
||||
<artifactId>log4j-api</artifactId>
|
||||
<version>2.24.3</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
<build>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-source-plugin</artifactId>
|
||||
<version>3.3.0</version>
|
||||
<executions>
|
||||
<execution>
|
||||
<id>attach-sources</id>
|
||||
<goals>
|
||||
<goal>jar-no-fork</goal>
|
||||
</goals>
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-surefire-plugin</artifactId>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
</project>
|
||||
@@ -1,96 +0,0 @@
|
||||
/*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package com.lancedb;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.*;
|
||||
|
||||
/** Unit tests for LanceDbNamespaceClientBuilder. */
|
||||
public class LanceDbNamespaceClientBuilderTest {
|
||||
|
||||
@Test
|
||||
public void testBuilderRequiresApiKey() {
|
||||
LanceDbNamespaceClientBuilder builder =
|
||||
LanceDbNamespaceClientBuilder.newBuilder().database("test-db");
|
||||
|
||||
IllegalStateException exception = assertThrows(IllegalStateException.class, builder::build);
|
||||
assertEquals("API key is required", exception.getMessage());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testBuilderRequiresDatabase() {
|
||||
LanceDbNamespaceClientBuilder builder =
|
||||
LanceDbNamespaceClientBuilder.newBuilder().apiKey("test-api-key");
|
||||
|
||||
IllegalStateException exception = assertThrows(IllegalStateException.class, builder::build);
|
||||
assertEquals("Database is required", exception.getMessage());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testApiKeyCannotBeNull() {
|
||||
IllegalArgumentException exception =
|
||||
assertThrows(
|
||||
IllegalArgumentException.class,
|
||||
() -> LanceDbNamespaceClientBuilder.newBuilder().apiKey(null));
|
||||
assertEquals("API key cannot be null or empty", exception.getMessage());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testApiKeyCannotBeEmpty() {
|
||||
IllegalArgumentException exception =
|
||||
assertThrows(
|
||||
IllegalArgumentException.class,
|
||||
() -> LanceDbNamespaceClientBuilder.newBuilder().apiKey(" "));
|
||||
assertEquals("API key cannot be null or empty", exception.getMessage());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDatabaseCannotBeNull() {
|
||||
IllegalArgumentException exception =
|
||||
assertThrows(
|
||||
IllegalArgumentException.class,
|
||||
() -> LanceDbNamespaceClientBuilder.newBuilder().database(null));
|
||||
assertEquals("Database cannot be null or empty", exception.getMessage());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDatabaseCannotBeEmpty() {
|
||||
IllegalArgumentException exception =
|
||||
assertThrows(
|
||||
IllegalArgumentException.class,
|
||||
() -> LanceDbNamespaceClientBuilder.newBuilder().database(" "));
|
||||
assertEquals("Database cannot be null or empty", exception.getMessage());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testBuilderFluentApi() {
|
||||
// Verify the builder returns itself for chaining
|
||||
LanceDbNamespaceClientBuilder builder = LanceDbNamespaceClientBuilder.newBuilder();
|
||||
|
||||
assertSame(builder, builder.apiKey("test-key"));
|
||||
assertSame(builder, builder.database("test-db"));
|
||||
assertSame(builder, builder.endpoint("http://localhost:8080"));
|
||||
assertSame(builder, builder.region("eu-west-1"));
|
||||
assertSame(builder, builder.config("custom-key", "custom-value"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNewBuilderCreatesNewInstance() {
|
||||
LanceDbNamespaceClientBuilder builder1 = LanceDbNamespaceClientBuilder.newBuilder();
|
||||
LanceDbNamespaceClientBuilder builder2 = LanceDbNamespaceClientBuilder.newBuilder();
|
||||
|
||||
assertNotSame(builder1, builder2);
|
||||
}
|
||||
}
|
||||
@@ -1,32 +0,0 @@
|
||||
<?xml version='1.0' encoding='UTF-8'?>
|
||||
|
||||
<!--
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing,
|
||||
software distributed under the License is distributed on an
|
||||
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
KIND, either express or implied. See the License for the
|
||||
specific language governing permissions and limitations
|
||||
under the License.
|
||||
-->
|
||||
|
||||
<configuration monitorInterval="30">
|
||||
<appenders>
|
||||
<Console name='Console' target='SYSTEM_ERR'>
|
||||
<PatternLayout pattern='%d{HH:mm:ss.SSS} %p [%t] %C{1}.%M: %m%n'/>
|
||||
</Console>
|
||||
</appenders>
|
||||
<loggers>
|
||||
<logger name='com.lancedb' level='DEBUG' additivity='false'>
|
||||
<appender-ref ref='Console'/>
|
||||
</logger>
|
||||
<root level='INFO'>
|
||||
<appender-ref ref='Console'/>
|
||||
</root>
|
||||
</loggers>
|
||||
</configuration>
|
||||
23
java/pom.xml
23
java/pom.xml
@@ -6,7 +6,7 @@
|
||||
|
||||
<groupId>com.lancedb</groupId>
|
||||
<artifactId>lancedb-parent</artifactId>
|
||||
<version>0.23.1-final.0</version>
|
||||
<version>0.22.4-beta.2</version>
|
||||
<packaging>pom</packaging>
|
||||
<name>${project.artifactId}</name>
|
||||
<description>LanceDB Java SDK Parent POM</description>
|
||||
@@ -28,7 +28,7 @@
|
||||
<properties>
|
||||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
||||
<arrow.version>15.0.0</arrow.version>
|
||||
<lance-core.version>1.0.0-rc.2</lance-core.version>
|
||||
<lance-namespace.verison>0.0.1</lance-namespace.verison>
|
||||
<spotless.skip>false</spotless.skip>
|
||||
<spotless.version>2.30.0</spotless.version>
|
||||
<spotless.java.googlejavaformat.version>1.7</spotless.java.googlejavaformat.version>
|
||||
@@ -51,7 +51,8 @@
|
||||
</properties>
|
||||
|
||||
<modules>
|
||||
<module>lancedb-core</module>
|
||||
<module>core</module>
|
||||
<module>lance-namespace</module>
|
||||
</modules>
|
||||
|
||||
<scm>
|
||||
@@ -63,9 +64,9 @@
|
||||
<dependencyManagement>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>org.lance</groupId>
|
||||
<artifactId>lance-core</artifactId>
|
||||
<version>${lance-core.version}</version>
|
||||
<groupId>com.lancedb</groupId>
|
||||
<artifactId>lance-namespace-core</artifactId>
|
||||
<version>${lance-namespace.verison}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.arrow</groupId>
|
||||
@@ -87,11 +88,21 @@
|
||||
<artifactId>arrow-dataset</artifactId>
|
||||
<version>${arrow.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.questdb</groupId>
|
||||
<artifactId>jar-jni</artifactId>
|
||||
<version>1.1.1</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.junit.jupiter</groupId>
|
||||
<artifactId>junit-jupiter</artifactId>
|
||||
<version>5.10.1</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.json</groupId>
|
||||
<artifactId>json</artifactId>
|
||||
<version>20210307</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
</dependencyManagement>
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
[package]
|
||||
name = "lancedb-nodejs"
|
||||
edition.workspace = true
|
||||
version = "0.23.1"
|
||||
version = "0.22.4-beta.2"
|
||||
license.workspace = true
|
||||
description.workspace = true
|
||||
repository.workspace = true
|
||||
|
||||
@@ -118,7 +118,7 @@ export class PermutationBuilder {
|
||||
* @returns A new PermutationBuilder instance
|
||||
* @example
|
||||
* ```ts
|
||||
* builder.splitCalculated({ calculation: "user_id % 3" });
|
||||
* builder.splitCalculated("user_id % 3");
|
||||
* ```
|
||||
*/
|
||||
splitCalculated(options: SplitCalculatedOptions): PermutationBuilder {
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-darwin-arm64",
|
||||
"version": "0.23.1",
|
||||
"version": "0.22.4-beta.2",
|
||||
"os": ["darwin"],
|
||||
"cpu": ["arm64"],
|
||||
"main": "lancedb.darwin-arm64.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-darwin-x64",
|
||||
"version": "0.23.1",
|
||||
"version": "0.22.4-beta.2",
|
||||
"os": ["darwin"],
|
||||
"cpu": ["x64"],
|
||||
"main": "lancedb.darwin-x64.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-linux-arm64-gnu",
|
||||
"version": "0.23.1",
|
||||
"version": "0.22.4-beta.2",
|
||||
"os": ["linux"],
|
||||
"cpu": ["arm64"],
|
||||
"main": "lancedb.linux-arm64-gnu.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-linux-arm64-musl",
|
||||
"version": "0.23.1",
|
||||
"version": "0.22.4-beta.2",
|
||||
"os": ["linux"],
|
||||
"cpu": ["arm64"],
|
||||
"main": "lancedb.linux-arm64-musl.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-linux-x64-gnu",
|
||||
"version": "0.23.1",
|
||||
"version": "0.22.4-beta.2",
|
||||
"os": ["linux"],
|
||||
"cpu": ["x64"],
|
||||
"main": "lancedb.linux-x64-gnu.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-linux-x64-musl",
|
||||
"version": "0.23.1",
|
||||
"version": "0.22.4-beta.2",
|
||||
"os": ["linux"],
|
||||
"cpu": ["x64"],
|
||||
"main": "lancedb.linux-x64-musl.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-win32-arm64-msvc",
|
||||
"version": "0.23.1",
|
||||
"version": "0.22.4-beta.2",
|
||||
"os": [
|
||||
"win32"
|
||||
],
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-win32-x64-msvc",
|
||||
"version": "0.23.1",
|
||||
"version": "0.22.4-beta.2",
|
||||
"os": ["win32"],
|
||||
"cpu": ["x64"],
|
||||
"main": "lancedb.win32-x64-msvc.node",
|
||||
|
||||
4
nodejs/package-lock.json
generated
4
nodejs/package-lock.json
generated
@@ -1,12 +1,12 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb",
|
||||
"version": "0.23.1",
|
||||
"version": "0.22.4-beta.2",
|
||||
"lockfileVersion": 3,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"name": "@lancedb/lancedb",
|
||||
"version": "0.23.1",
|
||||
"version": "0.22.4-beta.2",
|
||||
"cpu": [
|
||||
"x64",
|
||||
"arm64"
|
||||
|
||||
@@ -11,7 +11,7 @@
|
||||
"ann"
|
||||
],
|
||||
"private": false,
|
||||
"version": "0.23.1",
|
||||
"version": "0.22.4-beta.2",
|
||||
"main": "dist/index.js",
|
||||
"exports": {
|
||||
".": "./dist/index.js",
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
[tool.bumpversion]
|
||||
current_version = "0.26.1"
|
||||
current_version = "0.25.4-beta.2"
|
||||
parse = """(?x)
|
||||
(?P<major>0|[1-9]\\d*)\\.
|
||||
(?P<minor>0|[1-9]\\d*)\\.
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "lancedb-python"
|
||||
version = "0.26.1"
|
||||
version = "0.25.4-beta.2"
|
||||
edition.workspace = true
|
||||
description = "Python bindings for LanceDB"
|
||||
license.workspace = true
|
||||
@@ -18,7 +18,6 @@ arrow = { version = "56.2", features = ["pyarrow"] }
|
||||
async-trait = "0.1"
|
||||
lancedb = { path = "../rust/lancedb", default-features = false }
|
||||
lance-core.workspace = true
|
||||
lance-namespace.workspace = true
|
||||
lance-io.workspace = true
|
||||
env_logger.workspace = true
|
||||
pyo3 = { version = "0.25", features = ["extension-module", "abi3-py39"] }
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
PIP_EXTRA_INDEX_URL ?= https://pypi.fury.io/lance-format/ https://pypi.fury.io/lancedb/
|
||||
PIP_EXTRA_INDEX_URL ?= https://pypi.fury.io/lancedb/
|
||||
|
||||
help: ## Show this help.
|
||||
@sed -ne '/@sed/!s/## //p' $(MAKEFILE_LIST)
|
||||
|
||||
.PHONY: develop
|
||||
develop: ## Install the package in development mode.
|
||||
PIP_EXTRA_INDEX_URL="$(PIP_EXTRA_INDEX_URL)" maturin develop --extras tests,dev,embeddings
|
||||
PIP_EXTRA_INDEX_URL=$(PIP_EXTRA_INDEX_URL) maturin develop --extras tests,dev,embeddings
|
||||
|
||||
.PHONY: format
|
||||
format: ## Format the code.
|
||||
|
||||
@@ -10,7 +10,7 @@ dependencies = [
|
||||
"pyarrow>=16",
|
||||
"pydantic>=1.10",
|
||||
"tqdm>=4.27.0",
|
||||
"lance-namespace>=0.3.2"
|
||||
"lance-namespace>=0.0.21"
|
||||
]
|
||||
description = "lancedb"
|
||||
authors = [{ name = "LanceDB Devs", email = "dev@lancedb.com" }]
|
||||
@@ -45,7 +45,7 @@ repository = "https://github.com/lancedb/lancedb"
|
||||
|
||||
[project.optional-dependencies]
|
||||
pylance = [
|
||||
"pylance>=1.0.0b14",
|
||||
"pylance>=0.25",
|
||||
]
|
||||
tests = [
|
||||
"aiohttp",
|
||||
@@ -59,7 +59,7 @@ tests = [
|
||||
"polars>=0.19, <=1.3.0",
|
||||
"tantivy",
|
||||
"pyarrow-stubs",
|
||||
"pylance>=1.0.0b14",
|
||||
"pylance>=1.0.0b4",
|
||||
"requests",
|
||||
"datafusion",
|
||||
]
|
||||
|
||||
@@ -13,7 +13,6 @@ __version__ = importlib.metadata.version("lancedb")
|
||||
|
||||
from ._lancedb import connect as lancedb_connect
|
||||
from .common import URI, sanitize_uri
|
||||
from urllib.parse import urlparse
|
||||
from .db import AsyncConnection, DBConnection, LanceDBConnection
|
||||
from .io import StorageOptionsProvider
|
||||
from .remote import ClientConfig
|
||||
@@ -29,39 +28,6 @@ from .namespace import (
|
||||
)
|
||||
|
||||
|
||||
def _check_s3_bucket_with_dots(
|
||||
uri: str, storage_options: Optional[Dict[str, str]]
|
||||
) -> None:
|
||||
"""
|
||||
Check if an S3 URI has a bucket name containing dots and warn if no region
|
||||
is specified. S3 buckets with dots cannot use virtual-hosted-style URLs,
|
||||
which breaks automatic region detection.
|
||||
|
||||
See: https://github.com/lancedb/lancedb/issues/1898
|
||||
"""
|
||||
if not isinstance(uri, str) or not uri.startswith("s3://"):
|
||||
return
|
||||
|
||||
parsed = urlparse(uri)
|
||||
bucket = parsed.netloc
|
||||
|
||||
if "." not in bucket:
|
||||
return
|
||||
|
||||
# Check if region is provided in storage_options
|
||||
region_keys = {"region", "aws_region"}
|
||||
has_region = storage_options and any(k in storage_options for k in region_keys)
|
||||
|
||||
if not has_region:
|
||||
raise ValueError(
|
||||
f"S3 bucket name '{bucket}' contains dots, which prevents automatic "
|
||||
f"region detection. Please specify the region explicitly via "
|
||||
f"storage_options={{'region': '<your-region>'}} or "
|
||||
f"storage_options={{'aws_region': '<your-region>'}}. "
|
||||
f"See https://github.com/lancedb/lancedb/issues/1898 for details."
|
||||
)
|
||||
|
||||
|
||||
def connect(
|
||||
uri: URI,
|
||||
*,
|
||||
@@ -155,11 +121,9 @@ def connect(
|
||||
storage_options=storage_options,
|
||||
**kwargs,
|
||||
)
|
||||
_check_s3_bucket_with_dots(str(uri), storage_options)
|
||||
|
||||
if kwargs:
|
||||
raise ValueError(f"Unknown keyword arguments: {kwargs}")
|
||||
|
||||
return LanceDBConnection(
|
||||
uri,
|
||||
read_consistency_interval=read_consistency_interval,
|
||||
@@ -247,8 +211,6 @@ async def connect_async(
|
||||
if isinstance(client_config, dict):
|
||||
client_config = ClientConfig(**client_config)
|
||||
|
||||
_check_s3_bucket_with_dots(str(uri), storage_options)
|
||||
|
||||
return AsyncConnection(
|
||||
await lancedb_connect(
|
||||
sanitize_uri(uri),
|
||||
|
||||
@@ -3,30 +3,10 @@ from typing import Dict, List, Optional, Tuple, Any, TypedDict, Union, Literal
|
||||
|
||||
import pyarrow as pa
|
||||
|
||||
from .index import (
|
||||
BTree,
|
||||
IvfFlat,
|
||||
IvfPq,
|
||||
IvfSq,
|
||||
Bitmap,
|
||||
LabelList,
|
||||
HnswPq,
|
||||
HnswSq,
|
||||
FTS,
|
||||
)
|
||||
from .index import BTree, IvfFlat, IvfPq, Bitmap, LabelList, HnswPq, HnswSq, FTS
|
||||
from .io import StorageOptionsProvider
|
||||
from lance_namespace import (
|
||||
ListNamespacesResponse,
|
||||
CreateNamespaceResponse,
|
||||
DropNamespaceResponse,
|
||||
DescribeNamespaceResponse,
|
||||
ListTablesResponse,
|
||||
)
|
||||
from .remote import ClientConfig
|
||||
|
||||
IvfHnswPq: type[HnswPq] = HnswPq
|
||||
IvfHnswSq: type[HnswSq] = HnswSq
|
||||
|
||||
class Session:
|
||||
def __init__(
|
||||
self,
|
||||
@@ -46,38 +26,18 @@ class Connection(object):
|
||||
async def close(self): ...
|
||||
async def list_namespaces(
|
||||
self,
|
||||
namespace: Optional[List[str]] = None,
|
||||
page_token: Optional[str] = None,
|
||||
limit: Optional[int] = None,
|
||||
) -> ListNamespacesResponse: ...
|
||||
async def create_namespace(
|
||||
self,
|
||||
namespace: List[str],
|
||||
mode: Optional[str] = None,
|
||||
properties: Optional[Dict[str, str]] = None,
|
||||
) -> CreateNamespaceResponse: ...
|
||||
async def drop_namespace(
|
||||
self,
|
||||
namespace: List[str],
|
||||
mode: Optional[str] = None,
|
||||
behavior: Optional[str] = None,
|
||||
) -> DropNamespaceResponse: ...
|
||||
async def describe_namespace(
|
||||
self,
|
||||
namespace: List[str],
|
||||
) -> DescribeNamespaceResponse: ...
|
||||
async def list_tables(
|
||||
self,
|
||||
namespace: Optional[List[str]] = None,
|
||||
page_token: Optional[str] = None,
|
||||
limit: Optional[int] = None,
|
||||
) -> ListTablesResponse: ...
|
||||
namespace: Optional[List[str]],
|
||||
page_token: Optional[str],
|
||||
limit: Optional[int],
|
||||
) -> List[str]: ...
|
||||
async def create_namespace(self, namespace: List[str]) -> None: ...
|
||||
async def drop_namespace(self, namespace: List[str]) -> None: ...
|
||||
async def table_names(
|
||||
self,
|
||||
namespace: Optional[List[str]],
|
||||
start_after: Optional[str],
|
||||
limit: Optional[int],
|
||||
) -> list[str]: ... # Deprecated: Use list_tables instead
|
||||
) -> list[str]: ...
|
||||
async def create_table(
|
||||
self,
|
||||
name: str,
|
||||
@@ -144,17 +104,7 @@ class Table:
|
||||
async def create_index(
|
||||
self,
|
||||
column: str,
|
||||
index: Union[
|
||||
IvfFlat,
|
||||
IvfSq,
|
||||
IvfPq,
|
||||
HnswPq,
|
||||
HnswSq,
|
||||
BTree,
|
||||
Bitmap,
|
||||
LabelList,
|
||||
FTS,
|
||||
],
|
||||
index: Union[IvfFlat, IvfPq, HnswPq, HnswSq, BTree, Bitmap, LabelList, FTS],
|
||||
replace: Optional[bool],
|
||||
wait_timeout: Optional[object],
|
||||
*,
|
||||
|
||||
@@ -22,13 +22,6 @@ from lancedb.embeddings.registry import EmbeddingFunctionRegistry
|
||||
|
||||
from lancedb.common import data_to_reader, sanitize_uri, validate_schema
|
||||
from lancedb.background_loop import LOOP
|
||||
from lance_namespace import (
|
||||
ListNamespacesResponse,
|
||||
CreateNamespaceResponse,
|
||||
DropNamespaceResponse,
|
||||
DescribeNamespaceResponse,
|
||||
ListTablesResponse,
|
||||
)
|
||||
|
||||
from . import __version__
|
||||
from ._lancedb import connect as lancedb_connect # type: ignore
|
||||
@@ -55,12 +48,6 @@ if TYPE_CHECKING:
|
||||
from .io import StorageOptionsProvider
|
||||
from ._lancedb import Session
|
||||
|
||||
from .namespace_utils import (
|
||||
_normalize_create_namespace_mode,
|
||||
_normalize_drop_namespace_mode,
|
||||
_normalize_drop_namespace_behavior,
|
||||
)
|
||||
|
||||
|
||||
class DBConnection(EnforceOverrides):
|
||||
"""An active LanceDB connection interface."""
|
||||
@@ -69,8 +56,8 @@ class DBConnection(EnforceOverrides):
|
||||
self,
|
||||
namespace: Optional[List[str]] = None,
|
||||
page_token: Optional[str] = None,
|
||||
limit: Optional[int] = None,
|
||||
) -> ListNamespacesResponse:
|
||||
limit: int = 10,
|
||||
) -> Iterable[str]:
|
||||
"""List immediate child namespace names in the given namespace.
|
||||
|
||||
Parameters
|
||||
@@ -79,119 +66,43 @@ class DBConnection(EnforceOverrides):
|
||||
The parent namespace to list namespaces in.
|
||||
Empty list represents root namespace.
|
||||
page_token: str, optional
|
||||
Token for pagination. Use the token from a previous response
|
||||
to get the next page of results.
|
||||
limit: int, optional
|
||||
The maximum number of results to return.
|
||||
The token to use for pagination. If not present, start from the beginning.
|
||||
limit: int, default 10
|
||||
The size of the page to return.
|
||||
|
||||
Returns
|
||||
-------
|
||||
ListNamespacesResponse
|
||||
Response containing namespace names and optional page_token for pagination.
|
||||
Iterable of str
|
||||
List of immediate child namespace names
|
||||
"""
|
||||
if namespace is None:
|
||||
namespace = []
|
||||
return ListNamespacesResponse(namespaces=[], page_token=None)
|
||||
return []
|
||||
|
||||
def create_namespace(
|
||||
self,
|
||||
namespace: List[str],
|
||||
mode: Optional[str] = None,
|
||||
properties: Optional[Dict[str, str]] = None,
|
||||
) -> CreateNamespaceResponse:
|
||||
def create_namespace(self, namespace: List[str]) -> None:
|
||||
"""Create a new namespace.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
namespace: List[str]
|
||||
The namespace identifier to create.
|
||||
mode: str, optional
|
||||
Creation mode - "create" (fail if exists), "exist_ok" (skip if exists),
|
||||
or "overwrite" (replace if exists). Case insensitive.
|
||||
properties: Dict[str, str], optional
|
||||
Properties to set on the namespace.
|
||||
|
||||
Returns
|
||||
-------
|
||||
CreateNamespaceResponse
|
||||
Response containing the properties of the created namespace.
|
||||
"""
|
||||
raise NotImplementedError(
|
||||
"Namespace operations are not supported for this connection type"
|
||||
)
|
||||
|
||||
def drop_namespace(
|
||||
self,
|
||||
namespace: List[str],
|
||||
mode: Optional[str] = None,
|
||||
behavior: Optional[str] = None,
|
||||
) -> DropNamespaceResponse:
|
||||
def drop_namespace(self, namespace: List[str]) -> None:
|
||||
"""Drop a namespace.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
namespace: List[str]
|
||||
The namespace identifier to drop.
|
||||
mode: str, optional
|
||||
Whether to skip if not exists ("SKIP") or fail ("FAIL"). Case insensitive.
|
||||
behavior: str, optional
|
||||
Whether to restrict drop if not empty ("RESTRICT") or cascade ("CASCADE").
|
||||
Case insensitive.
|
||||
|
||||
Returns
|
||||
-------
|
||||
DropNamespaceResponse
|
||||
Response containing properties and transaction_id if applicable.
|
||||
"""
|
||||
raise NotImplementedError(
|
||||
"Namespace operations are not supported for this connection type"
|
||||
)
|
||||
|
||||
def describe_namespace(self, namespace: List[str]) -> DescribeNamespaceResponse:
|
||||
"""Describe a namespace.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
namespace: List[str]
|
||||
The namespace identifier to describe.
|
||||
|
||||
Returns
|
||||
-------
|
||||
DescribeNamespaceResponse
|
||||
Response containing the namespace properties.
|
||||
"""
|
||||
raise NotImplementedError(
|
||||
"Namespace operations are not supported for this connection type"
|
||||
)
|
||||
|
||||
def list_tables(
|
||||
self,
|
||||
namespace: Optional[List[str]] = None,
|
||||
page_token: Optional[str] = None,
|
||||
limit: Optional[int] = None,
|
||||
) -> ListTablesResponse:
|
||||
"""List all tables in this database with pagination support.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
namespace: List[str], optional
|
||||
The namespace to list tables in.
|
||||
None or empty list represents root namespace.
|
||||
page_token: str, optional
|
||||
Token for pagination. Use the token from a previous response
|
||||
to get the next page of results.
|
||||
limit: int, optional
|
||||
The maximum number of results to return.
|
||||
|
||||
Returns
|
||||
-------
|
||||
ListTablesResponse
|
||||
Response containing table names and optional page_token for pagination.
|
||||
"""
|
||||
raise NotImplementedError(
|
||||
"list_tables is not supported for this connection type"
|
||||
)
|
||||
|
||||
@abstractmethod
|
||||
def table_names(
|
||||
self,
|
||||
@@ -210,8 +121,10 @@ class DBConnection(EnforceOverrides):
|
||||
page_token: str, optional
|
||||
The token to use for pagination. If not present, start from the beginning.
|
||||
Typically, this token is last table name from the previous page.
|
||||
Only supported by LanceDb Cloud.
|
||||
limit: int, default 10
|
||||
The size of the page to return.
|
||||
Only supported by LanceDb Cloud.
|
||||
|
||||
Returns
|
||||
-------
|
||||
@@ -281,10 +194,6 @@ class DBConnection(EnforceOverrides):
|
||||
connection will be inherited by the table, but can be overridden here.
|
||||
See available options at
|
||||
<https://lancedb.com/docs/storage/>
|
||||
|
||||
To enable stable row IDs (row IDs remain stable after compaction,
|
||||
update, delete, and merges), set `new_table_enable_stable_row_ids`
|
||||
to `"true"` in storage_options when connecting to the database.
|
||||
data_storage_version: optional, str, default "stable"
|
||||
Deprecated. Set `storage_options` when connecting to the database and set
|
||||
`new_table_data_storage_version` in the options.
|
||||
@@ -644,8 +553,8 @@ class LanceDBConnection(DBConnection):
|
||||
self,
|
||||
namespace: Optional[List[str]] = None,
|
||||
page_token: Optional[str] = None,
|
||||
limit: Optional[int] = None,
|
||||
) -> ListNamespacesResponse:
|
||||
limit: int = 10,
|
||||
) -> Iterable[str]:
|
||||
"""List immediate child namespace names in the given namespace.
|
||||
|
||||
Parameters
|
||||
@@ -654,15 +563,14 @@ class LanceDBConnection(DBConnection):
|
||||
The parent namespace to list namespaces in.
|
||||
None or empty list represents root namespace.
|
||||
page_token: str, optional
|
||||
Token for pagination. Use the token from a previous response
|
||||
to get the next page of results.
|
||||
limit: int, optional
|
||||
The maximum number of results to return.
|
||||
The token to use for pagination. If not present, start from the beginning.
|
||||
limit: int, default 10
|
||||
The size of the page to return.
|
||||
|
||||
Returns
|
||||
-------
|
||||
ListNamespacesResponse
|
||||
Response containing namespace names and optional page_token for pagination.
|
||||
Iterable of str
|
||||
List of immediate child namespace names
|
||||
"""
|
||||
if namespace is None:
|
||||
namespace = []
|
||||
@@ -673,111 +581,26 @@ class LanceDBConnection(DBConnection):
|
||||
)
|
||||
|
||||
@override
|
||||
def create_namespace(
|
||||
self,
|
||||
namespace: List[str],
|
||||
mode: Optional[str] = None,
|
||||
properties: Optional[Dict[str, str]] = None,
|
||||
) -> CreateNamespaceResponse:
|
||||
def create_namespace(self, namespace: List[str]) -> None:
|
||||
"""Create a new namespace.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
namespace: List[str]
|
||||
The namespace identifier to create.
|
||||
mode: str, optional
|
||||
Creation mode - "create" (fail if exists), "exist_ok" (skip if exists),
|
||||
or "overwrite" (replace if exists). Case insensitive.
|
||||
properties: Dict[str, str], optional
|
||||
Properties to set on the namespace.
|
||||
|
||||
Returns
|
||||
-------
|
||||
CreateNamespaceResponse
|
||||
Response containing the properties of the created namespace.
|
||||
"""
|
||||
return LOOP.run(
|
||||
self._conn.create_namespace(
|
||||
namespace=namespace, mode=mode, properties=properties
|
||||
)
|
||||
)
|
||||
LOOP.run(self._conn.create_namespace(namespace=namespace))
|
||||
|
||||
@override
|
||||
def drop_namespace(
|
||||
self,
|
||||
namespace: List[str],
|
||||
mode: Optional[str] = None,
|
||||
behavior: Optional[str] = None,
|
||||
) -> DropNamespaceResponse:
|
||||
def drop_namespace(self, namespace: List[str]) -> None:
|
||||
"""Drop a namespace.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
namespace: List[str]
|
||||
The namespace identifier to drop.
|
||||
mode: str, optional
|
||||
Whether to skip if not exists ("SKIP") or fail ("FAIL"). Case insensitive.
|
||||
behavior: str, optional
|
||||
Whether to restrict drop if not empty ("RESTRICT") or cascade ("CASCADE").
|
||||
Case insensitive.
|
||||
|
||||
Returns
|
||||
-------
|
||||
DropNamespaceResponse
|
||||
Response containing properties and transaction_id if applicable.
|
||||
"""
|
||||
return LOOP.run(
|
||||
self._conn.drop_namespace(namespace=namespace, mode=mode, behavior=behavior)
|
||||
)
|
||||
|
||||
@override
|
||||
def describe_namespace(self, namespace: List[str]) -> DescribeNamespaceResponse:
|
||||
"""Describe a namespace.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
namespace: List[str]
|
||||
The namespace identifier to describe.
|
||||
|
||||
Returns
|
||||
-------
|
||||
DescribeNamespaceResponse
|
||||
Response containing the namespace properties.
|
||||
"""
|
||||
return LOOP.run(self._conn.describe_namespace(namespace=namespace))
|
||||
|
||||
@override
|
||||
def list_tables(
|
||||
self,
|
||||
namespace: Optional[List[str]] = None,
|
||||
page_token: Optional[str] = None,
|
||||
limit: Optional[int] = None,
|
||||
) -> ListTablesResponse:
|
||||
"""List all tables in this database with pagination support.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
namespace: List[str], optional
|
||||
The namespace to list tables in.
|
||||
None or empty list represents root namespace.
|
||||
page_token: str, optional
|
||||
Token for pagination. Use the token from a previous response
|
||||
to get the next page of results.
|
||||
limit: int, optional
|
||||
The maximum number of results to return.
|
||||
|
||||
Returns
|
||||
-------
|
||||
ListTablesResponse
|
||||
Response containing table names and optional page_token for pagination.
|
||||
"""
|
||||
if namespace is None:
|
||||
namespace = []
|
||||
return LOOP.run(
|
||||
self._conn.list_tables(
|
||||
namespace=namespace, page_token=page_token, limit=limit
|
||||
)
|
||||
)
|
||||
return LOOP.run(self._conn.drop_namespace(namespace=namespace))
|
||||
|
||||
@override
|
||||
def table_names(
|
||||
@@ -789,9 +612,6 @@ class LanceDBConnection(DBConnection):
|
||||
) -> Iterable[str]:
|
||||
"""Get the names of all tables in the database. The names are sorted.
|
||||
|
||||
.. deprecated::
|
||||
Use :meth:`list_tables` instead, which provides proper pagination support.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
namespace: List[str], optional
|
||||
@@ -806,13 +626,6 @@ class LanceDBConnection(DBConnection):
|
||||
Iterator of str.
|
||||
A list of table names.
|
||||
"""
|
||||
import warnings
|
||||
|
||||
warnings.warn(
|
||||
"table_names() is deprecated, use list_tables() instead",
|
||||
DeprecationWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
if namespace is None:
|
||||
namespace = []
|
||||
return LOOP.run(
|
||||
@@ -1127,8 +940,8 @@ class AsyncConnection(object):
|
||||
self,
|
||||
namespace: Optional[List[str]] = None,
|
||||
page_token: Optional[str] = None,
|
||||
limit: Optional[int] = None,
|
||||
) -> ListNamespacesResponse:
|
||||
limit: int = 10,
|
||||
) -> Iterable[str]:
|
||||
"""List immediate child namespace names in the given namespace.
|
||||
|
||||
Parameters
|
||||
@@ -1138,128 +951,39 @@ class AsyncConnection(object):
|
||||
None or empty list represents root namespace.
|
||||
page_token: str, optional
|
||||
The token to use for pagination. If not present, start from the beginning.
|
||||
limit: int, optional
|
||||
The maximum number of results to return.
|
||||
limit: int, default 10
|
||||
The size of the page to return.
|
||||
|
||||
Returns
|
||||
-------
|
||||
ListNamespacesResponse
|
||||
Response containing namespace names and optional pagination token
|
||||
Iterable of str
|
||||
List of immediate child namespace names (not full paths)
|
||||
"""
|
||||
if namespace is None:
|
||||
namespace = []
|
||||
result = await self._inner.list_namespaces(
|
||||
return await self._inner.list_namespaces(
|
||||
namespace=namespace, page_token=page_token, limit=limit
|
||||
)
|
||||
return ListNamespacesResponse(**result)
|
||||
|
||||
async def create_namespace(
|
||||
self,
|
||||
namespace: List[str],
|
||||
mode: Optional[str] = None,
|
||||
properties: Optional[Dict[str, str]] = None,
|
||||
) -> CreateNamespaceResponse:
|
||||
async def create_namespace(self, namespace: List[str]) -> None:
|
||||
"""Create a new namespace.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
namespace: List[str]
|
||||
The namespace identifier to create.
|
||||
mode: str, optional
|
||||
Creation mode - "create", "exist_ok", or "overwrite". Case insensitive.
|
||||
properties: Dict[str, str], optional
|
||||
Properties to associate with the namespace
|
||||
|
||||
Returns
|
||||
-------
|
||||
CreateNamespaceResponse
|
||||
Response containing namespace properties
|
||||
"""
|
||||
result = await self._inner.create_namespace(
|
||||
namespace,
|
||||
mode=_normalize_create_namespace_mode(mode),
|
||||
properties=properties,
|
||||
)
|
||||
return CreateNamespaceResponse(**result)
|
||||
await self._inner.create_namespace(namespace)
|
||||
|
||||
async def drop_namespace(
|
||||
self,
|
||||
namespace: List[str],
|
||||
mode: Optional[str] = None,
|
||||
behavior: Optional[str] = None,
|
||||
) -> DropNamespaceResponse:
|
||||
async def drop_namespace(self, namespace: List[str]) -> None:
|
||||
"""Drop a namespace.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
namespace: List[str]
|
||||
The namespace identifier to drop.
|
||||
mode: str, optional
|
||||
Whether to skip if not exists ("SKIP") or fail ("FAIL"). Case insensitive.
|
||||
behavior: str, optional
|
||||
Whether to restrict drop if not empty ("RESTRICT") or cascade ("CASCADE").
|
||||
Case insensitive.
|
||||
|
||||
Returns
|
||||
-------
|
||||
DropNamespaceResponse
|
||||
Response containing properties and transaction_id if applicable.
|
||||
"""
|
||||
result = await self._inner.drop_namespace(
|
||||
namespace,
|
||||
mode=_normalize_drop_namespace_mode(mode),
|
||||
behavior=_normalize_drop_namespace_behavior(behavior),
|
||||
)
|
||||
return DropNamespaceResponse(**result)
|
||||
|
||||
async def describe_namespace(
|
||||
self, namespace: List[str]
|
||||
) -> DescribeNamespaceResponse:
|
||||
"""Describe a namespace.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
namespace: List[str]
|
||||
The namespace identifier to describe.
|
||||
|
||||
Returns
|
||||
-------
|
||||
DescribeNamespaceResponse
|
||||
Response containing the namespace properties.
|
||||
"""
|
||||
result = await self._inner.describe_namespace(namespace)
|
||||
return DescribeNamespaceResponse(**result)
|
||||
|
||||
async def list_tables(
|
||||
self,
|
||||
namespace: Optional[List[str]] = None,
|
||||
page_token: Optional[str] = None,
|
||||
limit: Optional[int] = None,
|
||||
) -> ListTablesResponse:
|
||||
"""List all tables in this database with pagination support.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
namespace: List[str], optional
|
||||
The namespace to list tables in.
|
||||
None or empty list represents root namespace.
|
||||
page_token: str, optional
|
||||
Token for pagination. Use the token from a previous response
|
||||
to get the next page of results.
|
||||
limit: int, optional
|
||||
The maximum number of results to return.
|
||||
|
||||
Returns
|
||||
-------
|
||||
ListTablesResponse
|
||||
Response containing table names and optional page_token for pagination.
|
||||
"""
|
||||
if namespace is None:
|
||||
namespace = []
|
||||
result = await self._inner.list_tables(
|
||||
namespace=namespace, page_token=page_token, limit=limit
|
||||
)
|
||||
return ListTablesResponse(**result)
|
||||
await self._inner.drop_namespace(namespace)
|
||||
|
||||
async def table_names(
|
||||
self,
|
||||
@@ -1270,9 +994,6 @@ class AsyncConnection(object):
|
||||
) -> Iterable[str]:
|
||||
"""List all tables in this database, in sorted order
|
||||
|
||||
.. deprecated::
|
||||
Use :meth:`list_tables` instead, which provides proper pagination support.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
namespace: List[str], optional
|
||||
@@ -1291,13 +1012,6 @@ class AsyncConnection(object):
|
||||
-------
|
||||
Iterable of str
|
||||
"""
|
||||
import warnings
|
||||
|
||||
warnings.warn(
|
||||
"table_names() is deprecated, use list_tables() instead",
|
||||
DeprecationWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
if namespace is None:
|
||||
namespace = []
|
||||
return await self._inner.table_names(
|
||||
@@ -1365,10 +1079,6 @@ class AsyncConnection(object):
|
||||
See available options at
|
||||
<https://lancedb.com/docs/storage/>
|
||||
|
||||
To enable stable row IDs (row IDs remain stable after compaction,
|
||||
update, delete, and merges), set `new_table_enable_stable_row_ids`
|
||||
to `"true"` in storage_options when connecting to the database.
|
||||
|
||||
Returns
|
||||
-------
|
||||
AsyncTable
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
# SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||
import base64
|
||||
import os
|
||||
from typing import ClassVar, TYPE_CHECKING, List, Union, Any, Generator, Optional
|
||||
from typing import ClassVar, TYPE_CHECKING, List, Union, Any, Generator
|
||||
|
||||
from pathlib import Path
|
||||
from urllib.parse import urlparse
|
||||
@@ -45,29 +45,11 @@ def is_valid_url(text):
|
||||
return False
|
||||
|
||||
|
||||
VIDEO_EXTENSIONS = {".mp4", ".webm", ".mov", ".avi", ".mkv", ".m4v", ".gif"}
|
||||
|
||||
|
||||
def is_video_url(url: str) -> bool:
|
||||
"""Check if URL points to a video file based on extension."""
|
||||
parsed = urlparse(url)
|
||||
path = parsed.path.lower()
|
||||
return any(path.endswith(ext) for ext in VIDEO_EXTENSIONS)
|
||||
|
||||
|
||||
def is_video_path(path: Path) -> bool:
|
||||
"""Check if file path is a video file based on extension."""
|
||||
return path.suffix.lower() in VIDEO_EXTENSIONS
|
||||
|
||||
|
||||
def transform_input(input_data: Union[str, bytes, Path]):
|
||||
PIL = attempt_import_or_raise("PIL", "pillow")
|
||||
if isinstance(input_data, str):
|
||||
if is_valid_url(input_data):
|
||||
if is_video_url(input_data):
|
||||
content = {"type": "video_url", "video_url": input_data}
|
||||
else:
|
||||
content = {"type": "image_url", "image_url": input_data}
|
||||
content = {"type": "image_url", "image_url": input_data}
|
||||
else:
|
||||
content = {"type": "text", "text": input_data}
|
||||
elif isinstance(input_data, PIL.Image.Image):
|
||||
@@ -88,24 +70,14 @@ def transform_input(input_data: Union[str, bytes, Path]):
|
||||
"image_base64": "data:image/jpeg;base64," + img_str,
|
||||
}
|
||||
elif isinstance(input_data, Path):
|
||||
if is_video_path(input_data):
|
||||
# Read video file and encode as base64
|
||||
with open(input_data, "rb") as f:
|
||||
video_bytes = f.read()
|
||||
video_str = base64.b64encode(video_bytes).decode("utf-8")
|
||||
content = {
|
||||
"type": "video_base64",
|
||||
"video_base64": video_str,
|
||||
}
|
||||
else:
|
||||
img = PIL.Image.open(input_data)
|
||||
buffered = BytesIO()
|
||||
img.save(buffered, format="JPEG")
|
||||
img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
|
||||
content = {
|
||||
"type": "image_base64",
|
||||
"image_base64": "data:image/jpeg;base64," + img_str,
|
||||
}
|
||||
img = PIL.Image.open(input_data)
|
||||
buffered = BytesIO()
|
||||
img.save(buffered, format="JPEG")
|
||||
img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
|
||||
content = {
|
||||
"type": "image_base64",
|
||||
"image_base64": "data:image/jpeg;base64," + img_str,
|
||||
}
|
||||
else:
|
||||
raise ValueError("Each input should be either str, bytes, Path or Image.")
|
||||
|
||||
@@ -119,8 +91,6 @@ def sanitize_multimodal_input(inputs: Union[TEXT, IMAGES]) -> List[Any]:
|
||||
PIL = attempt_import_or_raise("PIL", "pillow")
|
||||
if isinstance(inputs, (str, bytes, Path, PIL.Image.Image)):
|
||||
inputs = [inputs]
|
||||
elif isinstance(inputs, list):
|
||||
pass # Already a list, use as-is
|
||||
elif isinstance(inputs, pa.Array):
|
||||
inputs = inputs.to_pylist()
|
||||
elif isinstance(inputs, pa.ChunkedArray):
|
||||
@@ -173,16 +143,11 @@ class VoyageAIEmbeddingFunction(EmbeddingFunction):
|
||||
* voyage-3
|
||||
* voyage-3-lite
|
||||
* voyage-multimodal-3
|
||||
* voyage-multimodal-3.5
|
||||
* voyage-finance-2
|
||||
* voyage-multilingual-2
|
||||
* voyage-law-2
|
||||
* voyage-code-2
|
||||
|
||||
output_dimension: int, optional
|
||||
The output dimension for models that support flexible dimensions.
|
||||
Currently only voyage-multimodal-3.5 supports this feature.
|
||||
Valid options: 256, 512, 1024 (default), 2048.
|
||||
|
||||
Examples
|
||||
--------
|
||||
@@ -210,10 +175,7 @@ class VoyageAIEmbeddingFunction(EmbeddingFunction):
|
||||
"""
|
||||
|
||||
name: str
|
||||
output_dimension: Optional[int] = None
|
||||
client: ClassVar = None
|
||||
_FLEXIBLE_DIM_MODELS: ClassVar[list] = ["voyage-multimodal-3.5"]
|
||||
_VALID_DIMENSIONS: ClassVar[list] = [256, 512, 1024, 2048]
|
||||
text_embedding_models: list = [
|
||||
"voyage-3.5",
|
||||
"voyage-3.5-lite",
|
||||
@@ -224,7 +186,7 @@ class VoyageAIEmbeddingFunction(EmbeddingFunction):
|
||||
"voyage-law-2",
|
||||
"voyage-code-2",
|
||||
]
|
||||
multimodal_embedding_models: list = ["voyage-multimodal-3", "voyage-multimodal-3.5"]
|
||||
multimodal_embedding_models: list = ["voyage-multimodal-3"]
|
||||
contextual_embedding_models: list = ["voyage-context-3"]
|
||||
|
||||
def _is_multimodal_model(self, model_name: str):
|
||||
@@ -236,17 +198,6 @@ class VoyageAIEmbeddingFunction(EmbeddingFunction):
|
||||
return model_name in self.contextual_embedding_models or "context" in model_name
|
||||
|
||||
def ndims(self):
|
||||
# Handle flexible dimension models
|
||||
if self.name in self._FLEXIBLE_DIM_MODELS:
|
||||
if self.output_dimension is not None:
|
||||
if self.output_dimension not in self._VALID_DIMENSIONS:
|
||||
raise ValueError(
|
||||
f"Invalid output_dimension {self.output_dimension} "
|
||||
f"for {self.name}. Valid options: {self._VALID_DIMENSIONS}"
|
||||
)
|
||||
return self.output_dimension
|
||||
return 1024 # default dimension
|
||||
|
||||
if self.name == "voyage-3-lite":
|
||||
return 512
|
||||
elif self.name == "voyage-code-2":
|
||||
@@ -260,17 +211,12 @@ class VoyageAIEmbeddingFunction(EmbeddingFunction):
|
||||
"voyage-finance-2",
|
||||
"voyage-multilingual-2",
|
||||
"voyage-law-2",
|
||||
"voyage-multimodal-3",
|
||||
]:
|
||||
return 1024
|
||||
else:
|
||||
raise ValueError(f"Model {self.name} not supported")
|
||||
|
||||
def _get_multimodal_kwargs(self, **kwargs):
|
||||
"""Get kwargs for multimodal embed call, including output_dimension if set."""
|
||||
if self.name in self._FLEXIBLE_DIM_MODELS and self.output_dimension is not None:
|
||||
kwargs["output_dimension"] = self.output_dimension
|
||||
return kwargs
|
||||
|
||||
def compute_query_embeddings(
|
||||
self, query: Union[str, "PIL.Image.Image"], *args, **kwargs
|
||||
) -> List[np.ndarray]:
|
||||
@@ -288,7 +234,6 @@ class VoyageAIEmbeddingFunction(EmbeddingFunction):
|
||||
"""
|
||||
client = VoyageAIEmbeddingFunction._get_client()
|
||||
if self._is_multimodal_model(self.name):
|
||||
kwargs = self._get_multimodal_kwargs(**kwargs)
|
||||
result = client.multimodal_embed(
|
||||
inputs=[[query]], model=self.name, input_type="query", **kwargs
|
||||
)
|
||||
@@ -330,7 +275,6 @@ class VoyageAIEmbeddingFunction(EmbeddingFunction):
|
||||
)
|
||||
if has_images:
|
||||
# Use non-batched API for images
|
||||
kwargs = self._get_multimodal_kwargs(**kwargs)
|
||||
result = client.multimodal_embed(
|
||||
inputs=sanitized, model=self.name, input_type="document", **kwargs
|
||||
)
|
||||
@@ -413,7 +357,6 @@ class VoyageAIEmbeddingFunction(EmbeddingFunction):
|
||||
callable: A function that takes a batch of texts and returns embeddings.
|
||||
"""
|
||||
if self._is_multimodal_model(self.name):
|
||||
multimodal_kwargs = self._get_multimodal_kwargs(**kwargs)
|
||||
|
||||
def embed_batch(batch: List[str]) -> List[np.array]:
|
||||
batch_inputs = sanitize_multimodal_input(batch)
|
||||
@@ -421,7 +364,7 @@ class VoyageAIEmbeddingFunction(EmbeddingFunction):
|
||||
inputs=batch_inputs,
|
||||
model=self.name,
|
||||
input_type=input_type,
|
||||
**multimodal_kwargs,
|
||||
**kwargs,
|
||||
)
|
||||
return result.embeddings
|
||||
|
||||
|
||||
@@ -376,11 +376,6 @@ class HnswSq:
|
||||
target_partition_size: Optional[int] = None
|
||||
|
||||
|
||||
# Backwards-compatible aliases
|
||||
IvfHnswPq = HnswPq
|
||||
IvfHnswSq = HnswSq
|
||||
|
||||
|
||||
@dataclass
|
||||
class IvfFlat:
|
||||
"""Describes an IVF Flat Index
|
||||
@@ -480,36 +475,6 @@ class IvfFlat:
|
||||
target_partition_size: Optional[int] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class IvfSq:
|
||||
"""Describes an IVF Scalar Quantization (SQ) index.
|
||||
|
||||
This index applies scalar quantization to compress vectors and organizes the
|
||||
quantized vectors into IVF partitions. It offers a balance between search
|
||||
speed and storage efficiency while keeping good recall.
|
||||
|
||||
Attributes
|
||||
----------
|
||||
distance_type: str, default "l2"
|
||||
The distance metric used to train and search the index. Supported values
|
||||
are "l2", "cosine", and "dot".
|
||||
num_partitions: int, default sqrt(num_rows)
|
||||
Number of IVF partitions to create.
|
||||
max_iterations: int, default 50
|
||||
Maximum iterations for kmeans during partition training.
|
||||
sample_rate: int, default 256
|
||||
Controls the number of training vectors: sample_rate * num_partitions.
|
||||
target_partition_size: int, optional
|
||||
Target size for each partition; adjusts the balance between speed and accuracy.
|
||||
"""
|
||||
|
||||
distance_type: Literal["l2", "cosine", "dot"] = "l2"
|
||||
num_partitions: Optional[int] = None
|
||||
max_iterations: int = 50
|
||||
sample_rate: int = 256
|
||||
target_partition_size: Optional[int] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class IvfPq:
|
||||
"""Describes an IVF PQ Index
|
||||
@@ -644,19 +609,9 @@ class IvfPq:
|
||||
class IvfRq:
|
||||
"""Describes an IVF RQ Index
|
||||
|
||||
IVF-RQ (RabitQ Quantization) compresses vectors using RabitQ quantization
|
||||
and organizes them into IVF partitions.
|
||||
|
||||
The compression scheme is called RabitQ quantization. Each dimension is
|
||||
quantized into a small number of bits. The parameters `num_bits` and
|
||||
`num_partitions` control this process, providing a tradeoff between
|
||||
index size (and thus search speed) and index accuracy.
|
||||
|
||||
The partitioning process is called IVF and the `num_partitions` parameter
|
||||
controls how many groups to create.
|
||||
|
||||
Note that training an IVF RQ index on a large dataset is a slow operation
|
||||
and currently is also a memory intensive operation.
|
||||
IVF-RQ (Residual Quantization) stores a compressed copy of each vector using
|
||||
residual quantization and organizes them into IVF partitions. Parameters
|
||||
largely mirror IVF-PQ for consistency.
|
||||
|
||||
Attributes
|
||||
----------
|
||||
@@ -673,7 +628,7 @@ class IvfRq:
|
||||
Number of IVF partitions to create.
|
||||
|
||||
num_bits: int, default 1
|
||||
Number of bits to encode each dimension in the RabitQ codebook.
|
||||
Number of bits to encode each dimension.
|
||||
|
||||
max_iterations: int, default 50
|
||||
Max iterations to train kmeans when computing IVF partitions.
|
||||
@@ -696,9 +651,6 @@ class IvfRq:
|
||||
__all__ = [
|
||||
"BTree",
|
||||
"IvfPq",
|
||||
"IvfHnswPq",
|
||||
"IvfHnswSq",
|
||||
"IvfSq",
|
||||
"IvfRq",
|
||||
"IvfFlat",
|
||||
"HnswPq",
|
||||
|
||||
@@ -23,29 +23,7 @@ from datetime import timedelta
|
||||
import pyarrow as pa
|
||||
|
||||
from lancedb.db import DBConnection, LanceDBConnection
|
||||
from lancedb.namespace_utils import (
|
||||
_normalize_create_namespace_mode,
|
||||
_normalize_drop_namespace_mode,
|
||||
_normalize_drop_namespace_behavior,
|
||||
)
|
||||
from lancedb.io import StorageOptionsProvider
|
||||
from lance_namespace import (
|
||||
LanceNamespace,
|
||||
connect as namespace_connect,
|
||||
CreateNamespaceResponse,
|
||||
DescribeNamespaceResponse,
|
||||
DropNamespaceResponse,
|
||||
ListNamespacesResponse,
|
||||
ListTablesResponse,
|
||||
ListTablesRequest,
|
||||
DescribeTableRequest,
|
||||
DescribeNamespaceRequest,
|
||||
DropTableRequest,
|
||||
ListNamespacesRequest,
|
||||
CreateNamespaceRequest,
|
||||
DropNamespaceRequest,
|
||||
CreateEmptyTableRequest,
|
||||
)
|
||||
from lancedb.table import AsyncTable, LanceTable, Table
|
||||
from lancedb.util import validate_table_name
|
||||
from lancedb.common import DATA
|
||||
@@ -53,9 +31,19 @@ from lancedb.pydantic import LanceModel
|
||||
from lancedb.embeddings import EmbeddingFunctionConfig
|
||||
from ._lancedb import Session
|
||||
|
||||
from lance_namespace_urllib3_client.models.json_arrow_schema import JsonArrowSchema
|
||||
from lance_namespace_urllib3_client.models.json_arrow_field import JsonArrowField
|
||||
from lance_namespace_urllib3_client.models.json_arrow_data_type import JsonArrowDataType
|
||||
from lance_namespace import LanceNamespace, connect as namespace_connect
|
||||
from lance_namespace_urllib3_client.models import (
|
||||
ListTablesRequest,
|
||||
DescribeTableRequest,
|
||||
DropTableRequest,
|
||||
ListNamespacesRequest,
|
||||
CreateNamespaceRequest,
|
||||
DropNamespaceRequest,
|
||||
CreateEmptyTableRequest,
|
||||
JsonArrowSchema,
|
||||
JsonArrowField,
|
||||
JsonArrowDataType,
|
||||
)
|
||||
|
||||
|
||||
def _convert_pyarrow_type_to_json(arrow_type: pa.DataType) -> JsonArrowDataType:
|
||||
@@ -139,17 +127,13 @@ class LanceNamespaceStorageOptionsProvider(StorageOptionsProvider):
|
||||
|
||||
Examples
|
||||
--------
|
||||
Create a provider and fetch storage options::
|
||||
|
||||
from lance_namespace import connect as namespace_connect
|
||||
|
||||
# Connect to namespace (requires a running namespace server)
|
||||
namespace = namespace_connect("rest", {"uri": "https://..."})
|
||||
provider = LanceNamespaceStorageOptionsProvider(
|
||||
namespace=namespace,
|
||||
table_id=["my_namespace", "my_table"]
|
||||
)
|
||||
options = provider.fetch_storage_options()
|
||||
>>> from lance_namespace import connect as namespace_connect
|
||||
>>> namespace = namespace_connect("rest", {"url": "https://..."})
|
||||
>>> provider = LanceNamespaceStorageOptionsProvider(
|
||||
... namespace=namespace,
|
||||
... table_id=["my_namespace", "my_table"]
|
||||
... )
|
||||
>>> options = provider.fetch_storage_options()
|
||||
"""
|
||||
|
||||
def __init__(self, namespace: LanceNamespace, table_id: List[str]):
|
||||
@@ -253,19 +237,6 @@ class LanceNamespaceDBConnection(DBConnection):
|
||||
*,
|
||||
namespace: Optional[List[str]] = None,
|
||||
) -> Iterable[str]:
|
||||
"""
|
||||
List table names in the database.
|
||||
|
||||
.. deprecated::
|
||||
Use :meth:`list_tables` instead, which provides proper pagination support.
|
||||
"""
|
||||
import warnings
|
||||
|
||||
warnings.warn(
|
||||
"table_names() is deprecated, use list_tables() instead",
|
||||
DeprecationWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
if namespace is None:
|
||||
namespace = []
|
||||
request = ListTablesRequest(id=namespace, page_token=page_token, limit=limit)
|
||||
@@ -458,8 +429,8 @@ class LanceNamespaceDBConnection(DBConnection):
|
||||
self,
|
||||
namespace: Optional[List[str]] = None,
|
||||
page_token: Optional[str] = None,
|
||||
limit: Optional[int] = None,
|
||||
) -> ListNamespacesResponse:
|
||||
limit: int = 10,
|
||||
) -> Iterable[str]:
|
||||
"""
|
||||
List child namespaces under the given namespace.
|
||||
|
||||
@@ -469,15 +440,14 @@ class LanceNamespaceDBConnection(DBConnection):
|
||||
The parent namespace to list children from.
|
||||
If None, lists root-level namespaces.
|
||||
page_token : Optional[str]
|
||||
Token for pagination. Use the token from a previous response
|
||||
to get the next page of results.
|
||||
limit : int, optional
|
||||
Pagination token for listing results.
|
||||
limit : int
|
||||
Maximum number of namespaces to return.
|
||||
|
||||
Returns
|
||||
-------
|
||||
ListNamespacesResponse
|
||||
Response containing namespace names and optional page_token for pagination.
|
||||
Iterable[str]
|
||||
Names of child namespaces.
|
||||
"""
|
||||
if namespace is None:
|
||||
namespace = []
|
||||
@@ -485,18 +455,10 @@ class LanceNamespaceDBConnection(DBConnection):
|
||||
id=namespace, page_token=page_token, limit=limit
|
||||
)
|
||||
response = self._ns.list_namespaces(request)
|
||||
return ListNamespacesResponse(
|
||||
namespaces=response.namespaces if response.namespaces else [],
|
||||
page_token=response.page_token,
|
||||
)
|
||||
return response.namespaces if response.namespaces else []
|
||||
|
||||
@override
|
||||
def create_namespace(
|
||||
self,
|
||||
namespace: List[str],
|
||||
mode: Optional[str] = None,
|
||||
properties: Optional[Dict[str, str]] = None,
|
||||
) -> CreateNamespaceResponse:
|
||||
def create_namespace(self, namespace: List[str]) -> None:
|
||||
"""
|
||||
Create a new namespace.
|
||||
|
||||
@@ -504,34 +466,12 @@ class LanceNamespaceDBConnection(DBConnection):
|
||||
----------
|
||||
namespace : List[str]
|
||||
The namespace path to create.
|
||||
mode : str, optional
|
||||
Creation mode - "create" (fail if exists), "exist_ok" (skip if exists),
|
||||
or "overwrite" (replace if exists). Case insensitive.
|
||||
properties : Dict[str, str], optional
|
||||
Properties to set on the namespace.
|
||||
|
||||
Returns
|
||||
-------
|
||||
CreateNamespaceResponse
|
||||
Response containing the properties of the created namespace.
|
||||
"""
|
||||
request = CreateNamespaceRequest(
|
||||
id=namespace,
|
||||
mode=_normalize_create_namespace_mode(mode),
|
||||
properties=properties,
|
||||
)
|
||||
response = self._ns.create_namespace(request)
|
||||
return CreateNamespaceResponse(
|
||||
properties=response.properties if hasattr(response, "properties") else None
|
||||
)
|
||||
request = CreateNamespaceRequest(id=namespace)
|
||||
self._ns.create_namespace(request)
|
||||
|
||||
@override
|
||||
def drop_namespace(
|
||||
self,
|
||||
namespace: List[str],
|
||||
mode: Optional[str] = None,
|
||||
behavior: Optional[str] = None,
|
||||
) -> DropNamespaceResponse:
|
||||
def drop_namespace(self, namespace: List[str]) -> None:
|
||||
"""
|
||||
Drop a namespace.
|
||||
|
||||
@@ -539,87 +479,9 @@ class LanceNamespaceDBConnection(DBConnection):
|
||||
----------
|
||||
namespace : List[str]
|
||||
The namespace path to drop.
|
||||
mode : str, optional
|
||||
Whether to skip if not exists ("SKIP") or fail ("FAIL"). Case insensitive.
|
||||
behavior : str, optional
|
||||
Whether to restrict drop if not empty ("RESTRICT") or cascade ("CASCADE").
|
||||
Case insensitive.
|
||||
|
||||
Returns
|
||||
-------
|
||||
DropNamespaceResponse
|
||||
Response containing properties and transaction_id if applicable.
|
||||
"""
|
||||
request = DropNamespaceRequest(
|
||||
id=namespace,
|
||||
mode=_normalize_drop_namespace_mode(mode),
|
||||
behavior=_normalize_drop_namespace_behavior(behavior),
|
||||
)
|
||||
response = self._ns.drop_namespace(request)
|
||||
return DropNamespaceResponse(
|
||||
properties=(
|
||||
response.properties if hasattr(response, "properties") else None
|
||||
),
|
||||
transaction_id=(
|
||||
response.transaction_id if hasattr(response, "transaction_id") else None
|
||||
),
|
||||
)
|
||||
|
||||
@override
|
||||
def describe_namespace(self, namespace: List[str]) -> DescribeNamespaceResponse:
|
||||
"""
|
||||
Describe a namespace.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
namespace : List[str]
|
||||
The namespace identifier to describe.
|
||||
|
||||
Returns
|
||||
-------
|
||||
DescribeNamespaceResponse
|
||||
Response containing the namespace properties.
|
||||
"""
|
||||
request = DescribeNamespaceRequest(id=namespace)
|
||||
response = self._ns.describe_namespace(request)
|
||||
return DescribeNamespaceResponse(
|
||||
properties=response.properties if hasattr(response, "properties") else None
|
||||
)
|
||||
|
||||
@override
|
||||
def list_tables(
|
||||
self,
|
||||
namespace: Optional[List[str]] = None,
|
||||
page_token: Optional[str] = None,
|
||||
limit: Optional[int] = None,
|
||||
) -> ListTablesResponse:
|
||||
"""
|
||||
List all tables in this database with pagination support.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
namespace : List[str], optional
|
||||
The namespace to list tables in.
|
||||
None or empty list represents root namespace.
|
||||
page_token : str, optional
|
||||
Token for pagination. Use the token from a previous response
|
||||
to get the next page of results.
|
||||
limit : int, optional
|
||||
The maximum number of results to return.
|
||||
|
||||
Returns
|
||||
-------
|
||||
ListTablesResponse
|
||||
Response containing table names and optional page_token for pagination.
|
||||
"""
|
||||
if namespace is None:
|
||||
namespace = []
|
||||
request = ListTablesRequest(id=namespace, page_token=page_token, limit=limit)
|
||||
response = self._ns.list_tables(request)
|
||||
return ListTablesResponse(
|
||||
tables=response.tables if response.tables else [],
|
||||
page_token=response.page_token,
|
||||
)
|
||||
request = DropNamespaceRequest(id=namespace)
|
||||
self._ns.drop_namespace(request)
|
||||
|
||||
def _lance_table_from_uri(
|
||||
self,
|
||||
@@ -697,19 +559,7 @@ class AsyncLanceNamespaceDBConnection:
|
||||
*,
|
||||
namespace: Optional[List[str]] = None,
|
||||
) -> Iterable[str]:
|
||||
"""
|
||||
List table names in the namespace.
|
||||
|
||||
.. deprecated::
|
||||
Use :meth:`list_tables` instead, which provides proper pagination support.
|
||||
"""
|
||||
import warnings
|
||||
|
||||
warnings.warn(
|
||||
"table_names() is deprecated, use list_tables() instead",
|
||||
DeprecationWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
"""List table names in the namespace."""
|
||||
if namespace is None:
|
||||
namespace = []
|
||||
request = ListTablesRequest(id=namespace, page_token=page_token, limit=limit)
|
||||
@@ -917,8 +767,8 @@ class AsyncLanceNamespaceDBConnection:
|
||||
self,
|
||||
namespace: Optional[List[str]] = None,
|
||||
page_token: Optional[str] = None,
|
||||
limit: Optional[int] = None,
|
||||
) -> ListNamespacesResponse:
|
||||
limit: int = 10,
|
||||
) -> Iterable[str]:
|
||||
"""
|
||||
List child namespaces under the given namespace.
|
||||
|
||||
@@ -928,15 +778,14 @@ class AsyncLanceNamespaceDBConnection:
|
||||
The parent namespace to list children from.
|
||||
If None, lists root-level namespaces.
|
||||
page_token : Optional[str]
|
||||
Token for pagination. Use the token from a previous response
|
||||
to get the next page of results.
|
||||
limit : int, optional
|
||||
Pagination token for listing results.
|
||||
limit : int
|
||||
Maximum number of namespaces to return.
|
||||
|
||||
Returns
|
||||
-------
|
||||
ListNamespacesResponse
|
||||
Response containing namespace names and optional page_token for pagination.
|
||||
Iterable[str]
|
||||
Names of child namespaces.
|
||||
"""
|
||||
if namespace is None:
|
||||
namespace = []
|
||||
@@ -944,17 +793,9 @@ class AsyncLanceNamespaceDBConnection:
|
||||
id=namespace, page_token=page_token, limit=limit
|
||||
)
|
||||
response = self._ns.list_namespaces(request)
|
||||
return ListNamespacesResponse(
|
||||
namespaces=response.namespaces if response.namespaces else [],
|
||||
page_token=response.page_token,
|
||||
)
|
||||
return response.namespaces if response.namespaces else []
|
||||
|
||||
async def create_namespace(
|
||||
self,
|
||||
namespace: List[str],
|
||||
mode: Optional[str] = None,
|
||||
properties: Optional[Dict[str, str]] = None,
|
||||
) -> CreateNamespaceResponse:
|
||||
async def create_namespace(self, namespace: List[str]) -> None:
|
||||
"""
|
||||
Create a new namespace.
|
||||
|
||||
@@ -962,33 +803,11 @@ class AsyncLanceNamespaceDBConnection:
|
||||
----------
|
||||
namespace : List[str]
|
||||
The namespace path to create.
|
||||
mode : str, optional
|
||||
Creation mode - "create" (fail if exists), "exist_ok" (skip if exists),
|
||||
or "overwrite" (replace if exists). Case insensitive.
|
||||
properties : Dict[str, str], optional
|
||||
Properties to set on the namespace.
|
||||
|
||||
Returns
|
||||
-------
|
||||
CreateNamespaceResponse
|
||||
Response containing the properties of the created namespace.
|
||||
"""
|
||||
request = CreateNamespaceRequest(
|
||||
id=namespace,
|
||||
mode=_normalize_create_namespace_mode(mode),
|
||||
properties=properties,
|
||||
)
|
||||
response = self._ns.create_namespace(request)
|
||||
return CreateNamespaceResponse(
|
||||
properties=response.properties if hasattr(response, "properties") else None
|
||||
)
|
||||
request = CreateNamespaceRequest(id=namespace)
|
||||
self._ns.create_namespace(request)
|
||||
|
||||
async def drop_namespace(
|
||||
self,
|
||||
namespace: List[str],
|
||||
mode: Optional[str] = None,
|
||||
behavior: Optional[str] = None,
|
||||
) -> DropNamespaceResponse:
|
||||
async def drop_namespace(self, namespace: List[str]) -> None:
|
||||
"""
|
||||
Drop a namespace.
|
||||
|
||||
@@ -996,87 +815,9 @@ class AsyncLanceNamespaceDBConnection:
|
||||
----------
|
||||
namespace : List[str]
|
||||
The namespace path to drop.
|
||||
mode : str, optional
|
||||
Whether to skip if not exists ("SKIP") or fail ("FAIL"). Case insensitive.
|
||||
behavior : str, optional
|
||||
Whether to restrict drop if not empty ("RESTRICT") or cascade ("CASCADE").
|
||||
Case insensitive.
|
||||
|
||||
Returns
|
||||
-------
|
||||
DropNamespaceResponse
|
||||
Response containing properties and transaction_id if applicable.
|
||||
"""
|
||||
request = DropNamespaceRequest(
|
||||
id=namespace,
|
||||
mode=_normalize_drop_namespace_mode(mode),
|
||||
behavior=_normalize_drop_namespace_behavior(behavior),
|
||||
)
|
||||
response = self._ns.drop_namespace(request)
|
||||
return DropNamespaceResponse(
|
||||
properties=(
|
||||
response.properties if hasattr(response, "properties") else None
|
||||
),
|
||||
transaction_id=(
|
||||
response.transaction_id if hasattr(response, "transaction_id") else None
|
||||
),
|
||||
)
|
||||
|
||||
async def describe_namespace(
|
||||
self, namespace: List[str]
|
||||
) -> DescribeNamespaceResponse:
|
||||
"""
|
||||
Describe a namespace.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
namespace : List[str]
|
||||
The namespace identifier to describe.
|
||||
|
||||
Returns
|
||||
-------
|
||||
DescribeNamespaceResponse
|
||||
Response containing the namespace properties.
|
||||
"""
|
||||
request = DescribeNamespaceRequest(id=namespace)
|
||||
response = self._ns.describe_namespace(request)
|
||||
return DescribeNamespaceResponse(
|
||||
properties=response.properties if hasattr(response, "properties") else None
|
||||
)
|
||||
|
||||
async def list_tables(
|
||||
self,
|
||||
namespace: Optional[List[str]] = None,
|
||||
page_token: Optional[str] = None,
|
||||
limit: Optional[int] = None,
|
||||
) -> ListTablesResponse:
|
||||
"""
|
||||
List all tables in this database with pagination support.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
namespace : List[str], optional
|
||||
The namespace to list tables in.
|
||||
None or empty list represents root namespace.
|
||||
page_token : str, optional
|
||||
Token for pagination. Use the token from a previous response
|
||||
to get the next page of results.
|
||||
limit : int, optional
|
||||
The maximum number of results to return.
|
||||
|
||||
Returns
|
||||
-------
|
||||
ListTablesResponse
|
||||
Response containing table names and optional page_token for pagination.
|
||||
"""
|
||||
if namespace is None:
|
||||
namespace = []
|
||||
request = ListTablesRequest(id=namespace, page_token=page_token, limit=limit)
|
||||
response = self._ns.list_tables(request)
|
||||
return ListTablesResponse(
|
||||
tables=response.tables if response.tables else [],
|
||||
page_token=response.page_token,
|
||||
)
|
||||
request = DropNamespaceRequest(id=namespace)
|
||||
self._ns.drop_namespace(request)
|
||||
|
||||
|
||||
def connect_namespace(
|
||||
|
||||
@@ -1,27 +0,0 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||
|
||||
"""Utility functions for namespace operations."""
|
||||
|
||||
from typing import Optional
|
||||
|
||||
|
||||
def _normalize_create_namespace_mode(mode: Optional[str]) -> Optional[str]:
|
||||
"""Normalize create namespace mode to lowercase (API expects lowercase)."""
|
||||
if mode is None:
|
||||
return None
|
||||
return mode.lower()
|
||||
|
||||
|
||||
def _normalize_drop_namespace_mode(mode: Optional[str]) -> Optional[str]:
|
||||
"""Normalize drop namespace mode to uppercase (API expects uppercase)."""
|
||||
if mode is None:
|
||||
return None
|
||||
return mode.upper()
|
||||
|
||||
|
||||
def _normalize_drop_namespace_behavior(behavior: Optional[str]) -> Optional[str]:
|
||||
"""Normalize drop namespace behavior to uppercase (API expects uppercase)."""
|
||||
if behavior is None:
|
||||
return None
|
||||
return behavior.upper()
|
||||
@@ -2429,8 +2429,9 @@ class AsyncQueryBase(object):
|
||||
>>> from lancedb import connect_async
|
||||
>>> async def doctest_example():
|
||||
... conn = await connect_async("./.lancedb")
|
||||
... table = await conn.create_table("my_table", [{"vector": [99.0, 99.0]}])
|
||||
... plan = await table.query().nearest_to([1.0, 2.0]).explain_plan(True)
|
||||
... table = await conn.create_table("my_table", [{"vector": [99, 99]}])
|
||||
... query = [100, 100]
|
||||
... plan = await table.query().nearest_to([1, 2]).explain_plan(True)
|
||||
... print(plan)
|
||||
>>> asyncio.run(doctest_example()) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
|
||||
ProjectionExec: expr=[vector@0 as vector, _distance@2 as _distance]
|
||||
@@ -2439,7 +2440,6 @@ class AsyncQueryBase(object):
|
||||
SortExec: TopK(fetch=10), expr=[_distance@2 ASC NULLS LAST, _rowid@1 ASC NULLS LAST], preserve_partitioning=[false]
|
||||
KNNVectorDistance: metric=l2
|
||||
LanceRead: uri=..., projection=[vector], ...
|
||||
<BLANKLINE>
|
||||
|
||||
Parameters
|
||||
----------
|
||||
@@ -3141,9 +3141,10 @@ class AsyncHybridQuery(AsyncStandardQuery, AsyncVectorQueryBase):
|
||||
>>> from lancedb.index import FTS
|
||||
>>> async def doctest_example():
|
||||
... conn = await connect_async("./.lancedb")
|
||||
... table = await conn.create_table("my_table", [{"vector": [99.0, 99.0], "text": "hello world"}])
|
||||
... table = await conn.create_table("my_table", [{"vector": [99, 99], "text": "hello world"}])
|
||||
... await table.create_index("text", config=FTS(with_position=False))
|
||||
... plan = await table.query().nearest_to([1.0, 2.0]).nearest_to_text("hello").explain_plan(True)
|
||||
... query = [100, 100]
|
||||
... plan = await table.query().nearest_to([1, 2]).nearest_to_text("hello").explain_plan(True)
|
||||
... print(plan)
|
||||
>>> asyncio.run(doctest_example()) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
|
||||
Vector Search Plan:
|
||||
@@ -3417,8 +3418,9 @@ class BaseQueryBuilder(object):
|
||||
>>> from lancedb import connect_async
|
||||
>>> async def doctest_example():
|
||||
... conn = await connect_async("./.lancedb")
|
||||
... table = await conn.create_table("my_table", [{"vector": [99.0, 99.0]}])
|
||||
... plan = await table.query().nearest_to([1.0, 2.0]).explain_plan(True)
|
||||
... table = await conn.create_table("my_table", [{"vector": [99, 99]}])
|
||||
... query = [100, 100]
|
||||
... plan = await table.query().nearest_to([1, 2]).explain_plan(True)
|
||||
... print(plan)
|
||||
>>> asyncio.run(doctest_example()) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
|
||||
ProjectionExec: expr=[vector@0 as vector, _distance@2 as _distance]
|
||||
@@ -3427,7 +3429,6 @@ class BaseQueryBuilder(object):
|
||||
SortExec: TopK(fetch=10), expr=[_distance@2 ASC NULLS LAST, _rowid@1 ASC NULLS LAST], preserve_partitioning=[false]
|
||||
KNNVectorDistance: metric=l2
|
||||
LanceRead: uri=..., projection=[vector], ...
|
||||
<BLANKLINE>
|
||||
|
||||
Parameters
|
||||
----------
|
||||
|
||||
@@ -23,13 +23,6 @@ import pyarrow as pa
|
||||
from ..common import DATA
|
||||
from ..db import DBConnection, LOOP
|
||||
from ..embeddings import EmbeddingFunctionConfig
|
||||
from lance_namespace import (
|
||||
CreateNamespaceResponse,
|
||||
DescribeNamespaceResponse,
|
||||
DropNamespaceResponse,
|
||||
ListNamespacesResponse,
|
||||
ListTablesResponse,
|
||||
)
|
||||
from ..pydantic import LanceModel
|
||||
from ..table import Table
|
||||
from ..util import validate_table_name
|
||||
@@ -113,8 +106,8 @@ class RemoteDBConnection(DBConnection):
|
||||
self,
|
||||
namespace: Optional[List[str]] = None,
|
||||
page_token: Optional[str] = None,
|
||||
limit: Optional[int] = None,
|
||||
) -> ListNamespacesResponse:
|
||||
limit: int = 10,
|
||||
) -> Iterable[str]:
|
||||
"""List immediate child namespace names in the given namespace.
|
||||
|
||||
Parameters
|
||||
@@ -123,15 +116,14 @@ class RemoteDBConnection(DBConnection):
|
||||
The parent namespace to list namespaces in.
|
||||
None or empty list represents root namespace.
|
||||
page_token: str, optional
|
||||
Token for pagination. Use the token from a previous response
|
||||
to get the next page of results.
|
||||
limit: int, optional
|
||||
The maximum number of results to return.
|
||||
The token to use for pagination. If not present, start from the beginning.
|
||||
limit: int, default 10
|
||||
The size of the page to return.
|
||||
|
||||
Returns
|
||||
-------
|
||||
ListNamespacesResponse
|
||||
Response containing namespace names and optional page_token for pagination.
|
||||
Iterable of str
|
||||
List of immediate child namespace names
|
||||
"""
|
||||
if namespace is None:
|
||||
namespace = []
|
||||
@@ -142,111 +134,26 @@ class RemoteDBConnection(DBConnection):
|
||||
)
|
||||
|
||||
@override
|
||||
def create_namespace(
|
||||
self,
|
||||
namespace: List[str],
|
||||
mode: Optional[str] = None,
|
||||
properties: Optional[Dict[str, str]] = None,
|
||||
) -> CreateNamespaceResponse:
|
||||
def create_namespace(self, namespace: List[str]) -> None:
|
||||
"""Create a new namespace.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
namespace: List[str]
|
||||
The namespace identifier to create.
|
||||
mode: str, optional
|
||||
Creation mode - "create" (fail if exists), "exist_ok" (skip if exists),
|
||||
or "overwrite" (replace if exists). Case insensitive.
|
||||
properties: Dict[str, str], optional
|
||||
Properties to set on the namespace.
|
||||
|
||||
Returns
|
||||
-------
|
||||
CreateNamespaceResponse
|
||||
Response containing the properties of the created namespace.
|
||||
"""
|
||||
return LOOP.run(
|
||||
self._conn.create_namespace(
|
||||
namespace=namespace, mode=mode, properties=properties
|
||||
)
|
||||
)
|
||||
LOOP.run(self._conn.create_namespace(namespace=namespace))
|
||||
|
||||
@override
|
||||
def drop_namespace(
|
||||
self,
|
||||
namespace: List[str],
|
||||
mode: Optional[str] = None,
|
||||
behavior: Optional[str] = None,
|
||||
) -> DropNamespaceResponse:
|
||||
def drop_namespace(self, namespace: List[str]) -> None:
|
||||
"""Drop a namespace.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
namespace: List[str]
|
||||
The namespace identifier to drop.
|
||||
mode: str, optional
|
||||
Whether to skip if not exists ("SKIP") or fail ("FAIL"). Case insensitive.
|
||||
behavior: str, optional
|
||||
Whether to restrict drop if not empty ("RESTRICT") or cascade ("CASCADE").
|
||||
Case insensitive.
|
||||
|
||||
Returns
|
||||
-------
|
||||
DropNamespaceResponse
|
||||
Response containing properties and transaction_id if applicable.
|
||||
"""
|
||||
return LOOP.run(
|
||||
self._conn.drop_namespace(namespace=namespace, mode=mode, behavior=behavior)
|
||||
)
|
||||
|
||||
@override
|
||||
def describe_namespace(self, namespace: List[str]) -> DescribeNamespaceResponse:
|
||||
"""Describe a namespace.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
namespace: List[str]
|
||||
The namespace identifier to describe.
|
||||
|
||||
Returns
|
||||
-------
|
||||
DescribeNamespaceResponse
|
||||
Response containing the namespace properties.
|
||||
"""
|
||||
return LOOP.run(self._conn.describe_namespace(namespace=namespace))
|
||||
|
||||
@override
|
||||
def list_tables(
|
||||
self,
|
||||
namespace: Optional[List[str]] = None,
|
||||
page_token: Optional[str] = None,
|
||||
limit: Optional[int] = None,
|
||||
) -> ListTablesResponse:
|
||||
"""List all tables in this database with pagination support.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
namespace: List[str], optional
|
||||
The namespace to list tables in.
|
||||
None or empty list represents root namespace.
|
||||
page_token: str, optional
|
||||
Token for pagination. Use the token from a previous response
|
||||
to get the next page of results.
|
||||
limit: int, optional
|
||||
The maximum number of results to return.
|
||||
|
||||
Returns
|
||||
-------
|
||||
ListTablesResponse
|
||||
Response containing table names and optional page_token for pagination.
|
||||
"""
|
||||
if namespace is None:
|
||||
namespace = []
|
||||
return LOOP.run(
|
||||
self._conn.list_tables(
|
||||
namespace=namespace, page_token=page_token, limit=limit
|
||||
)
|
||||
)
|
||||
return LOOP.run(self._conn.drop_namespace(namespace=namespace))
|
||||
|
||||
@override
|
||||
def table_names(
|
||||
@@ -258,9 +165,6 @@ class RemoteDBConnection(DBConnection):
|
||||
) -> Iterable[str]:
|
||||
"""List the names of all tables in the database.
|
||||
|
||||
.. deprecated::
|
||||
Use :meth:`list_tables` instead, which provides proper pagination support.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
namespace: List[str], default []
|
||||
@@ -275,13 +179,6 @@ class RemoteDBConnection(DBConnection):
|
||||
-------
|
||||
An iterator of table names.
|
||||
"""
|
||||
import warnings
|
||||
|
||||
warnings.warn(
|
||||
"table_names() is deprecated, use list_tables() instead",
|
||||
DeprecationWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
if namespace is None:
|
||||
namespace = []
|
||||
return LOOP.run(
|
||||
@@ -384,7 +281,6 @@ class RemoteDBConnection(DBConnection):
|
||||
on_bad_vectors: str = "error",
|
||||
fill_value: float = 0.0,
|
||||
mode: Optional[str] = None,
|
||||
exist_ok: bool = False,
|
||||
embedding_functions: Optional[List[EmbeddingFunctionConfig]] = None,
|
||||
*,
|
||||
namespace: Optional[List[str]] = None,
|
||||
@@ -413,12 +309,6 @@ class RemoteDBConnection(DBConnection):
|
||||
- pyarrow.Schema
|
||||
|
||||
- [LanceModel][lancedb.pydantic.LanceModel]
|
||||
mode: str, default "create"
|
||||
The mode to use when creating the table.
|
||||
Can be either "create", "overwrite", or "exist_ok".
|
||||
exist_ok: bool, default False
|
||||
If exist_ok is True, and mode is None or "create", mode will be changed
|
||||
to "exist_ok".
|
||||
on_bad_vectors: str, default "error"
|
||||
What to do if any of the vectors are not the same size or contains NaNs.
|
||||
One of "error", "drop", "fill".
|
||||
@@ -490,11 +380,6 @@ class RemoteDBConnection(DBConnection):
|
||||
LanceTable(table4)
|
||||
|
||||
"""
|
||||
if exist_ok:
|
||||
if mode == "create":
|
||||
mode = "exist_ok"
|
||||
elif not mode:
|
||||
mode = "exist_ok"
|
||||
if namespace is None:
|
||||
namespace = []
|
||||
validate_table_name(name)
|
||||
|
||||
@@ -18,17 +18,7 @@ from lancedb._lancedb import (
|
||||
UpdateResult,
|
||||
)
|
||||
from lancedb.embeddings.base import EmbeddingFunctionConfig
|
||||
from lancedb.index import (
|
||||
FTS,
|
||||
BTree,
|
||||
Bitmap,
|
||||
HnswSq,
|
||||
IvfFlat,
|
||||
IvfPq,
|
||||
IvfRq,
|
||||
IvfSq,
|
||||
LabelList,
|
||||
)
|
||||
from lancedb.index import FTS, BTree, Bitmap, HnswSq, IvfFlat, IvfPq, LabelList
|
||||
from lancedb.remote.db import LOOP
|
||||
import pyarrow as pa
|
||||
|
||||
@@ -275,14 +265,6 @@ class RemoteTable(Table):
|
||||
num_sub_vectors=num_sub_vectors,
|
||||
num_bits=num_bits,
|
||||
)
|
||||
elif index_type == "IVF_RQ":
|
||||
config = IvfRq(
|
||||
distance_type=metric,
|
||||
num_partitions=num_partitions,
|
||||
num_bits=num_bits,
|
||||
)
|
||||
elif index_type == "IVF_SQ":
|
||||
config = IvfSq(distance_type=metric, num_partitions=num_partitions)
|
||||
elif index_type == "IVF_HNSW_PQ":
|
||||
raise ValueError(
|
||||
"IVF_HNSW_PQ is not supported on LanceDB cloud."
|
||||
@@ -295,8 +277,7 @@ class RemoteTable(Table):
|
||||
else:
|
||||
raise ValueError(
|
||||
f"Unknown vector index type: {index_type}. Valid options are"
|
||||
" 'IVF_FLAT', 'IVF_PQ', 'IVF_RQ', 'IVF_SQ',"
|
||||
" 'IVF_HNSW_PQ', 'IVF_HNSW_SQ'"
|
||||
" 'IVF_FLAT', 'IVF_PQ', 'IVF_HNSW_PQ', 'IVF_HNSW_SQ'"
|
||||
)
|
||||
|
||||
LOOP.run(
|
||||
|
||||
@@ -44,18 +44,7 @@ import numpy as np
|
||||
|
||||
from .common import DATA, VEC, VECTOR_COLUMN_NAME
|
||||
from .embeddings import EmbeddingFunctionConfig, EmbeddingFunctionRegistry
|
||||
from .index import (
|
||||
BTree,
|
||||
IvfFlat,
|
||||
IvfPq,
|
||||
IvfSq,
|
||||
Bitmap,
|
||||
IvfRq,
|
||||
LabelList,
|
||||
HnswPq,
|
||||
HnswSq,
|
||||
FTS,
|
||||
)
|
||||
from .index import BTree, IvfFlat, IvfPq, Bitmap, IvfRq, LabelList, HnswPq, HnswSq, FTS
|
||||
from .merge import LanceMergeInsertBuilder
|
||||
from .pydantic import LanceModel, model_to_dict
|
||||
from .query import (
|
||||
@@ -684,24 +673,6 @@ class Table(ABC):
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
def to_lance(self, **kwargs) -> lance.LanceDataset:
|
||||
"""Return the table as a lance.LanceDataset.
|
||||
|
||||
Returns
|
||||
-------
|
||||
lance.LanceDataset
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
def to_polars(self, **kwargs) -> "pl.DataFrame":
|
||||
"""Return the table as a polars.DataFrame.
|
||||
|
||||
Returns
|
||||
-------
|
||||
polars.DataFrame
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
def create_index(
|
||||
self,
|
||||
metric="l2",
|
||||
@@ -1047,7 +1018,7 @@ class Table(ABC):
|
||||
... .when_not_matched_insert_all() \\
|
||||
... .execute(new_data)
|
||||
>>> res
|
||||
MergeResult(version=2, num_updated_rows=2, num_inserted_rows=1, num_deleted_rows=0, num_attempts=1)
|
||||
MergeResult(version=2, num_updated_rows=2, num_inserted_rows=1, num_deleted_rows=0)
|
||||
>>> # The order of new rows is non-deterministic since we use
|
||||
>>> # a hash-join as part of this operation and so we sort here
|
||||
>>> table.to_arrow().sort_by("a").to_pandas()
|
||||
@@ -2083,7 +2054,7 @@ class LanceTable(Table):
|
||||
index_cache_size: Optional[int] = None,
|
||||
num_bits: int = 8,
|
||||
index_type: Literal[
|
||||
"IVF_FLAT", "IVF_SQ", "IVF_PQ", "IVF_RQ", "IVF_HNSW_SQ", "IVF_HNSW_PQ"
|
||||
"IVF_FLAT", "IVF_PQ", "IVF_RQ", "IVF_HNSW_SQ", "IVF_HNSW_PQ"
|
||||
] = "IVF_PQ",
|
||||
max_iterations: int = 50,
|
||||
sample_rate: int = 256,
|
||||
@@ -2121,14 +2092,6 @@ class LanceTable(Table):
|
||||
sample_rate=sample_rate,
|
||||
target_partition_size=target_partition_size,
|
||||
)
|
||||
elif index_type == "IVF_SQ":
|
||||
config = IvfSq(
|
||||
distance_type=metric,
|
||||
num_partitions=num_partitions,
|
||||
max_iterations=max_iterations,
|
||||
sample_rate=sample_rate,
|
||||
target_partition_size=target_partition_size,
|
||||
)
|
||||
elif index_type == "IVF_PQ":
|
||||
config = IvfPq(
|
||||
distance_type=metric,
|
||||
@@ -3226,27 +3189,7 @@ def _infer_target_schema(
|
||||
if pa.types.is_floating(field.type.value_type):
|
||||
target_type = pa.list_(pa.float32(), dim)
|
||||
elif pa.types.is_integer(field.type.value_type):
|
||||
values = peeked.column(i)
|
||||
|
||||
if isinstance(values, pa.ChunkedArray):
|
||||
values = values.combine_chunks()
|
||||
|
||||
flattened = values.flatten()
|
||||
valid_count = pc.count(flattened, mode="only_valid").as_py()
|
||||
|
||||
if valid_count == 0:
|
||||
target_type = pa.list_(pa.uint8(), dim)
|
||||
else:
|
||||
min_max = pc.min_max(flattened)
|
||||
min_value = min_max["min"].as_py()
|
||||
max_value = min_max["max"].as_py()
|
||||
|
||||
if (min_value is not None and min_value < 0) or (
|
||||
max_value is not None and max_value > 255
|
||||
):
|
||||
target_type = pa.list_(pa.float32(), dim)
|
||||
else:
|
||||
target_type = pa.list_(pa.uint8(), dim)
|
||||
target_type = pa.list_(pa.uint8(), dim)
|
||||
else:
|
||||
continue # Skip non-numeric types
|
||||
|
||||
@@ -3513,22 +3456,11 @@ class AsyncTable:
|
||||
if config is not None:
|
||||
if not isinstance(
|
||||
config,
|
||||
(
|
||||
IvfFlat,
|
||||
IvfSq,
|
||||
IvfPq,
|
||||
IvfRq,
|
||||
HnswPq,
|
||||
HnswSq,
|
||||
BTree,
|
||||
Bitmap,
|
||||
LabelList,
|
||||
FTS,
|
||||
),
|
||||
(IvfFlat, IvfPq, IvfRq, HnswPq, HnswSq, BTree, Bitmap, LabelList, FTS),
|
||||
):
|
||||
raise TypeError(
|
||||
"config must be an instance of IvfSq, IvfPq, IvfRq, HnswPq, HnswSq,"
|
||||
" BTree, Bitmap, LabelList, or FTS, but got " + str(type(config))
|
||||
"config must be an instance of IvfPq, IvfRq, HnswPq, HnswSq, BTree,"
|
||||
" Bitmap, LabelList, or FTS, but got " + str(type(config))
|
||||
)
|
||||
try:
|
||||
await self._inner.create_index(
|
||||
@@ -3702,7 +3634,7 @@ class AsyncTable:
|
||||
... .when_not_matched_insert_all() \\
|
||||
... .execute(new_data)
|
||||
>>> res
|
||||
MergeResult(version=2, num_updated_rows=2, num_inserted_rows=1, num_deleted_rows=0, num_attempts=1)
|
||||
MergeResult(version=2, num_updated_rows=2, num_inserted_rows=1, num_deleted_rows=0)
|
||||
>>> # The order of new rows is non-deterministic since we use
|
||||
>>> # a hash-join as part of this operation and so we sort here
|
||||
>>> table.to_arrow().sort_by("a").to_pandas()
|
||||
|
||||
@@ -18,20 +18,12 @@ AddMode = Literal["append", "overwrite"]
|
||||
CreateMode = Literal["create", "overwrite"]
|
||||
|
||||
# Index type literals
|
||||
VectorIndexType = Literal[
|
||||
"IVF_FLAT",
|
||||
"IVF_SQ",
|
||||
"IVF_PQ",
|
||||
"IVF_HNSW_SQ",
|
||||
"IVF_HNSW_PQ",
|
||||
"IVF_RQ",
|
||||
]
|
||||
VectorIndexType = Literal["IVF_FLAT", "IVF_PQ", "IVF_HNSW_SQ", "IVF_HNSW_PQ", "IVF_RQ"]
|
||||
ScalarIndexType = Literal["BTREE", "BITMAP", "LABEL_LIST"]
|
||||
IndexType = Literal[
|
||||
"IVF_PQ",
|
||||
"IVF_HNSW_PQ",
|
||||
"IVF_HNSW_SQ",
|
||||
"IVF_SQ",
|
||||
"FTS",
|
||||
"BTREE",
|
||||
"BITMAP",
|
||||
|
||||
@@ -441,150 +441,6 @@ async def test_create_table_v2_manifest_paths_async(tmp_path):
|
||||
assert re.match(r"\d{20}\.manifest", manifest)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_create_table_stable_row_ids_via_storage_options(tmp_path):
|
||||
"""Test stable_row_ids via storage_options at connect time."""
|
||||
import lance
|
||||
|
||||
# Connect with stable row IDs enabled as default for new tables
|
||||
db_with = await lancedb.connect_async(
|
||||
tmp_path, storage_options={"new_table_enable_stable_row_ids": "true"}
|
||||
)
|
||||
# Connect without stable row IDs (default)
|
||||
db_without = await lancedb.connect_async(
|
||||
tmp_path, storage_options={"new_table_enable_stable_row_ids": "false"}
|
||||
)
|
||||
|
||||
# Create table using connection with stable row IDs enabled
|
||||
await db_with.create_table(
|
||||
"with_stable_via_opts",
|
||||
data=[{"id": i} for i in range(10)],
|
||||
)
|
||||
lance_ds_with = lance.dataset(tmp_path / "with_stable_via_opts.lance")
|
||||
fragments_with = lance_ds_with.get_fragments()
|
||||
assert len(fragments_with) > 0
|
||||
assert fragments_with[0].metadata.row_id_meta is not None
|
||||
|
||||
# Create table using connection without stable row IDs
|
||||
await db_without.create_table(
|
||||
"without_stable_via_opts",
|
||||
data=[{"id": i} for i in range(10)],
|
||||
)
|
||||
lance_ds_without = lance.dataset(tmp_path / "without_stable_via_opts.lance")
|
||||
fragments_without = lance_ds_without.get_fragments()
|
||||
assert len(fragments_without) > 0
|
||||
assert fragments_without[0].metadata.row_id_meta is None
|
||||
|
||||
|
||||
def test_create_table_stable_row_ids_via_storage_options_sync(tmp_path):
|
||||
"""Test that enable_stable_row_ids can be set via storage_options (sync API)."""
|
||||
# Connect with stable row IDs enabled as default for new tables
|
||||
db_with = lancedb.connect(
|
||||
tmp_path, storage_options={"new_table_enable_stable_row_ids": "true"}
|
||||
)
|
||||
# Connect without stable row IDs (default)
|
||||
db_without = lancedb.connect(
|
||||
tmp_path, storage_options={"new_table_enable_stable_row_ids": "false"}
|
||||
)
|
||||
|
||||
# Create table using connection with stable row IDs enabled
|
||||
tbl_with = db_with.create_table(
|
||||
"with_stable_sync",
|
||||
data=[{"id": i} for i in range(10)],
|
||||
)
|
||||
lance_ds_with = tbl_with.to_lance()
|
||||
fragments_with = lance_ds_with.get_fragments()
|
||||
assert len(fragments_with) > 0
|
||||
assert fragments_with[0].metadata.row_id_meta is not None
|
||||
|
||||
# Create table using connection without stable row IDs
|
||||
tbl_without = db_without.create_table(
|
||||
"without_stable_sync",
|
||||
data=[{"id": i} for i in range(10)],
|
||||
)
|
||||
lance_ds_without = tbl_without.to_lance()
|
||||
fragments_without = lance_ds_without.get_fragments()
|
||||
assert len(fragments_without) > 0
|
||||
assert fragments_without[0].metadata.row_id_meta is None
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_create_table_stable_row_ids_table_level_override(tmp_path):
|
||||
"""Test that stable_row_ids can be enabled/disabled at create_table level."""
|
||||
import lance
|
||||
|
||||
# Connect without any stable row ID setting
|
||||
db_default = await lancedb.connect_async(tmp_path)
|
||||
|
||||
# Connect with stable row IDs enabled at connection level
|
||||
db_with_stable = await lancedb.connect_async(
|
||||
tmp_path, storage_options={"new_table_enable_stable_row_ids": "true"}
|
||||
)
|
||||
|
||||
# Case 1: No connection setting, enable at table level
|
||||
await db_default.create_table(
|
||||
"table_level_enabled",
|
||||
data=[{"id": i} for i in range(10)],
|
||||
storage_options={"new_table_enable_stable_row_ids": "true"},
|
||||
)
|
||||
lance_ds = lance.dataset(tmp_path / "table_level_enabled.lance")
|
||||
fragments = lance_ds.get_fragments()
|
||||
assert len(fragments) > 0
|
||||
assert fragments[0].metadata.row_id_meta is not None, (
|
||||
"Table should have stable row IDs when enabled at table level"
|
||||
)
|
||||
|
||||
# Case 2: Connection has stable row IDs, override with false at table level
|
||||
await db_with_stable.create_table(
|
||||
"table_level_disabled",
|
||||
data=[{"id": i} for i in range(10)],
|
||||
storage_options={"new_table_enable_stable_row_ids": "false"},
|
||||
)
|
||||
lance_ds = lance.dataset(tmp_path / "table_level_disabled.lance")
|
||||
fragments = lance_ds.get_fragments()
|
||||
assert len(fragments) > 0
|
||||
assert fragments[0].metadata.row_id_meta is None, (
|
||||
"Table should NOT have stable row IDs when disabled at table level"
|
||||
)
|
||||
|
||||
|
||||
def test_create_table_stable_row_ids_table_level_override_sync(tmp_path):
|
||||
"""Test that stable_row_ids can be enabled/disabled at create_table level (sync)."""
|
||||
# Connect without any stable row ID setting
|
||||
db_default = lancedb.connect(tmp_path)
|
||||
|
||||
# Connect with stable row IDs enabled at connection level
|
||||
db_with_stable = lancedb.connect(
|
||||
tmp_path, storage_options={"new_table_enable_stable_row_ids": "true"}
|
||||
)
|
||||
|
||||
# Case 1: No connection setting, enable at table level
|
||||
tbl = db_default.create_table(
|
||||
"table_level_enabled_sync",
|
||||
data=[{"id": i} for i in range(10)],
|
||||
storage_options={"new_table_enable_stable_row_ids": "true"},
|
||||
)
|
||||
lance_ds = tbl.to_lance()
|
||||
fragments = lance_ds.get_fragments()
|
||||
assert len(fragments) > 0
|
||||
assert fragments[0].metadata.row_id_meta is not None, (
|
||||
"Table should have stable row IDs when enabled at table level"
|
||||
)
|
||||
|
||||
# Case 2: Connection has stable row IDs, override with false at table level
|
||||
tbl = db_with_stable.create_table(
|
||||
"table_level_disabled_sync",
|
||||
data=[{"id": i} for i in range(10)],
|
||||
storage_options={"new_table_enable_stable_row_ids": "false"},
|
||||
)
|
||||
lance_ds = tbl.to_lance()
|
||||
fragments = lance_ds.get_fragments()
|
||||
assert len(fragments) > 0
|
||||
assert fragments[0].metadata.row_id_meta is None, (
|
||||
"Table should NOT have stable row IDs when disabled at table level"
|
||||
)
|
||||
|
||||
|
||||
def test_open_table_sync(tmp_db: lancedb.DBConnection):
|
||||
tmp_db.create_table("test", data=[{"id": 0}])
|
||||
assert tmp_db.open_table("test").count_rows() == 1
|
||||
@@ -892,7 +748,7 @@ def test_local_namespace_operations(tmp_path):
|
||||
db = lancedb.connect(tmp_path)
|
||||
|
||||
# Test list_namespaces returns empty list for root namespace
|
||||
namespaces = db.list_namespaces().namespaces
|
||||
namespaces = list(db.list_namespaces())
|
||||
assert namespaces == []
|
||||
|
||||
# Test list_namespaces with non-empty namespace raises NotImplementedError
|
||||
@@ -900,7 +756,7 @@ def test_local_namespace_operations(tmp_path):
|
||||
NotImplementedError,
|
||||
match="Namespace operations are not supported for listing database",
|
||||
):
|
||||
db.list_namespaces(namespace=["test"])
|
||||
list(db.list_namespaces(namespace=["test"]))
|
||||
|
||||
|
||||
def test_local_create_namespace_not_supported(tmp_path):
|
||||
|
||||
@@ -613,133 +613,6 @@ def test_voyageai_multimodal_embedding_text_function():
|
||||
assert len(tbl.to_pandas()["vector"][0]) == voyageai.ndims()
|
||||
|
||||
|
||||
@pytest.mark.slow
|
||||
@pytest.mark.skipif(
|
||||
os.environ.get("VOYAGE_API_KEY") is None, reason="VOYAGE_API_KEY not set"
|
||||
)
|
||||
def test_voyageai_multimodal_35_embedding_function():
|
||||
"""Test voyage-multimodal-3.5 model with text input."""
|
||||
voyageai = (
|
||||
get_registry()
|
||||
.get("voyageai")
|
||||
.create(name="voyage-multimodal-3.5", max_retries=0)
|
||||
)
|
||||
|
||||
class TextModel(LanceModel):
|
||||
text: str = voyageai.SourceField()
|
||||
vector: Vector(voyageai.ndims()) = voyageai.VectorField()
|
||||
|
||||
df = pd.DataFrame({"text": ["hello world", "goodbye world"]})
|
||||
db = lancedb.connect("~/lancedb")
|
||||
tbl = db.create_table("test_multimodal_35", schema=TextModel, mode="overwrite")
|
||||
|
||||
tbl.add(df)
|
||||
assert len(tbl.to_pandas()["vector"][0]) == voyageai.ndims()
|
||||
assert voyageai.ndims() == 1024
|
||||
|
||||
|
||||
@pytest.mark.slow
|
||||
@pytest.mark.skipif(
|
||||
os.environ.get("VOYAGE_API_KEY") is None, reason="VOYAGE_API_KEY not set"
|
||||
)
|
||||
def test_voyageai_multimodal_35_flexible_dimensions():
|
||||
"""Test voyage-multimodal-3.5 model with custom output dimension."""
|
||||
voyageai = (
|
||||
get_registry()
|
||||
.get("voyageai")
|
||||
.create(name="voyage-multimodal-3.5", output_dimension=512, max_retries=0)
|
||||
)
|
||||
|
||||
class TextModel(LanceModel):
|
||||
text: str = voyageai.SourceField()
|
||||
vector: Vector(voyageai.ndims()) = voyageai.VectorField()
|
||||
|
||||
assert voyageai.ndims() == 512
|
||||
|
||||
df = pd.DataFrame({"text": ["hello world", "goodbye world"]})
|
||||
db = lancedb.connect("~/lancedb")
|
||||
tbl = db.create_table("test_multimodal_35_dim", schema=TextModel, mode="overwrite")
|
||||
|
||||
tbl.add(df)
|
||||
assert len(tbl.to_pandas()["vector"][0]) == 512
|
||||
|
||||
|
||||
@pytest.mark.slow
|
||||
@pytest.mark.skipif(
|
||||
os.environ.get("VOYAGE_API_KEY") is None, reason="VOYAGE_API_KEY not set"
|
||||
)
|
||||
def test_voyageai_multimodal_35_image_embedding():
|
||||
"""Test voyage-multimodal-3.5 model with image input."""
|
||||
voyageai = (
|
||||
get_registry()
|
||||
.get("voyageai")
|
||||
.create(name="voyage-multimodal-3.5", max_retries=0)
|
||||
)
|
||||
|
||||
class Images(LanceModel):
|
||||
label: str
|
||||
image_uri: str = voyageai.SourceField()
|
||||
vector: Vector(voyageai.ndims()) = voyageai.VectorField()
|
||||
|
||||
db = lancedb.connect("~/lancedb")
|
||||
table = db.create_table(
|
||||
"test_multimodal_35_images", schema=Images, mode="overwrite"
|
||||
)
|
||||
labels = ["cat", "dog"]
|
||||
uris = [
|
||||
"http://farm1.staticflickr.com/53/167798175_7c7845bbbd_z.jpg",
|
||||
"http://farm9.staticflickr.com/8387/8602747737_2e5c2a45d4_z.jpg",
|
||||
]
|
||||
table.add(pd.DataFrame({"label": labels, "image_uri": uris}))
|
||||
assert len(table.to_pandas()["vector"][0]) == voyageai.ndims()
|
||||
assert voyageai.ndims() == 1024
|
||||
|
||||
|
||||
@pytest.mark.slow
|
||||
@pytest.mark.skipif(
|
||||
os.environ.get("VOYAGE_API_KEY") is None, reason="VOYAGE_API_KEY not set"
|
||||
)
|
||||
@pytest.mark.parametrize("dimension", [256, 512, 1024, 2048])
|
||||
def test_voyageai_multimodal_35_all_dimensions(dimension):
|
||||
"""Test voyage-multimodal-3.5 model with all valid output dimensions."""
|
||||
voyageai = (
|
||||
get_registry()
|
||||
.get("voyageai")
|
||||
.create(name="voyage-multimodal-3.5", output_dimension=dimension, max_retries=0)
|
||||
)
|
||||
|
||||
assert voyageai.ndims() == dimension
|
||||
|
||||
class TextModel(LanceModel):
|
||||
text: str = voyageai.SourceField()
|
||||
vector: Vector(voyageai.ndims()) = voyageai.VectorField()
|
||||
|
||||
df = pd.DataFrame({"text": ["hello world"]})
|
||||
db = lancedb.connect("~/lancedb")
|
||||
tbl = db.create_table(
|
||||
f"test_multimodal_35_dim_{dimension}", schema=TextModel, mode="overwrite"
|
||||
)
|
||||
|
||||
tbl.add(df)
|
||||
assert len(tbl.to_pandas()["vector"][0]) == dimension
|
||||
|
||||
|
||||
@pytest.mark.slow
|
||||
@pytest.mark.skipif(
|
||||
os.environ.get("VOYAGE_API_KEY") is None, reason="VOYAGE_API_KEY not set"
|
||||
)
|
||||
def test_voyageai_multimodal_35_invalid_dimension():
|
||||
"""Test voyage-multimodal-3.5 model raises error for invalid output dimension."""
|
||||
with pytest.raises(ValueError, match="Invalid output_dimension"):
|
||||
voyageai = (
|
||||
get_registry()
|
||||
.get("voyageai")
|
||||
.create(name="voyage-multimodal-3.5", output_dimension=999, max_retries=0)
|
||||
)
|
||||
# ndims() is where the validation happens
|
||||
voyageai.ndims()
|
||||
|
||||
|
||||
@pytest.mark.slow
|
||||
@pytest.mark.skipif(
|
||||
importlib.util.find_spec("colpali_engine") is None,
|
||||
|
||||
@@ -12,9 +12,6 @@ from lancedb.index import (
|
||||
BTree,
|
||||
IvfFlat,
|
||||
IvfPq,
|
||||
IvfSq,
|
||||
IvfHnswPq,
|
||||
IvfHnswSq,
|
||||
IvfRq,
|
||||
Bitmap,
|
||||
LabelList,
|
||||
@@ -232,35 +229,6 @@ async def test_create_hnswsq_index(some_table: AsyncTable):
|
||||
assert len(indices) == 1
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_create_hnswsq_alias_index(some_table: AsyncTable):
|
||||
await some_table.create_index("vector", config=IvfHnswSq(num_partitions=5))
|
||||
indices = await some_table.list_indices()
|
||||
assert len(indices) == 1
|
||||
assert indices[0].index_type in {"HnswSq", "IvfHnswSq"}
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_create_hnswpq_alias_index(some_table: AsyncTable):
|
||||
await some_table.create_index("vector", config=IvfHnswPq(num_partitions=5))
|
||||
indices = await some_table.list_indices()
|
||||
assert len(indices) == 1
|
||||
assert indices[0].index_type in {"HnswPq", "IvfHnswPq"}
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_create_ivfsq_index(some_table: AsyncTable):
|
||||
await some_table.create_index("vector", config=IvfSq(num_partitions=10))
|
||||
indices = await some_table.list_indices()
|
||||
assert len(indices) == 1
|
||||
assert indices[0].index_type == "IvfSq"
|
||||
stats = await some_table.index_stats(indices[0].name)
|
||||
assert stats.index_type == "IVF_SQ"
|
||||
assert stats.distance_type == "l2"
|
||||
assert stats.num_indexed_rows == await some_table.count_rows()
|
||||
assert stats.num_unindexed_rows == 0
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_create_index_with_binary_vectors(binary_table: AsyncTable):
|
||||
await binary_table.create_index(
|
||||
|
||||
@@ -279,13 +279,13 @@ class TestNamespaceConnection:
|
||||
db = lancedb.connect_namespace("dir", {"root": self.temp_dir})
|
||||
|
||||
# Initially no namespaces
|
||||
assert len(db.list_namespaces().namespaces) == 0
|
||||
assert len(list(db.list_namespaces())) == 0
|
||||
|
||||
# Create a namespace
|
||||
db.create_namespace(["test_namespace"])
|
||||
|
||||
# Verify namespace exists
|
||||
namespaces = db.list_namespaces().namespaces
|
||||
namespaces = list(db.list_namespaces())
|
||||
assert "test_namespace" in namespaces
|
||||
assert len(namespaces) == 1
|
||||
|
||||
@@ -322,7 +322,7 @@ class TestNamespaceConnection:
|
||||
db.drop_namespace(["test_namespace"])
|
||||
|
||||
# Verify namespace no longer exists
|
||||
namespaces = db.list_namespaces().namespaces
|
||||
namespaces = list(db.list_namespaces())
|
||||
assert len(namespaces) == 0
|
||||
|
||||
def test_namespace_with_tables_cannot_be_dropped(self):
|
||||
@@ -570,13 +570,13 @@ class TestAsyncNamespaceConnection:
|
||||
|
||||
# Initially no namespaces
|
||||
namespaces = await db.list_namespaces()
|
||||
assert len(namespaces.namespaces) == 0
|
||||
assert len(list(namespaces)) == 0
|
||||
|
||||
# Create a namespace
|
||||
await db.create_namespace(["test_namespace"])
|
||||
|
||||
# Verify namespace exists
|
||||
namespaces = (await db.list_namespaces()).namespaces
|
||||
namespaces = list(await db.list_namespaces())
|
||||
assert "test_namespace" in namespaces
|
||||
assert len(namespaces) == 1
|
||||
|
||||
@@ -608,7 +608,7 @@ class TestAsyncNamespaceConnection:
|
||||
await db.drop_namespace(["test_namespace"])
|
||||
|
||||
# Verify namespace no longer exists
|
||||
namespaces = (await db.list_namespaces()).namespaces
|
||||
namespaces = list(await db.list_namespaces())
|
||||
assert len(namespaces) == 0
|
||||
|
||||
async def test_drop_all_tables_async(self):
|
||||
|
||||
@@ -168,42 +168,6 @@ def test_table_len_sync():
|
||||
assert len(table) == 1
|
||||
|
||||
|
||||
def test_create_table_exist_ok():
|
||||
def handler(request):
|
||||
if request.path == "/v1/table/test/create/?mode=exist_ok":
|
||||
request.send_response(200)
|
||||
request.send_header("Content-Type", "application/json")
|
||||
request.end_headers()
|
||||
request.wfile.write(b"{}")
|
||||
else:
|
||||
request.send_response(404)
|
||||
request.end_headers()
|
||||
|
||||
with mock_lancedb_connection(handler) as db:
|
||||
table = db.create_table("test", [{"id": 1}], exist_ok=True)
|
||||
assert table is not None
|
||||
|
||||
with mock_lancedb_connection(handler) as db:
|
||||
table = db.create_table("test", [{"id": 1}], mode="create", exist_ok=True)
|
||||
assert table is not None
|
||||
|
||||
|
||||
def test_create_table_exist_ok_with_mode_overwrite():
|
||||
def handler(request):
|
||||
if request.path == "/v1/table/test/create/?mode=overwrite":
|
||||
request.send_response(200)
|
||||
request.send_header("Content-Type", "application/json")
|
||||
request.end_headers()
|
||||
request.wfile.write(b"{}")
|
||||
else:
|
||||
request.send_response(404)
|
||||
request.end_headers()
|
||||
|
||||
with mock_lancedb_connection(handler) as db:
|
||||
table = db.create_table("test", [{"id": 1}], mode="overwrite", exist_ok=True)
|
||||
assert table is not None
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_http_error():
|
||||
request_id_holder = {"request_id": None}
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user