Compare commits

...

16 Commits

Author SHA1 Message Date
Lance Release
ba94e69d5d Bump version: 0.26.0 → 0.26.1-beta.0 2025-12-17 03:30:18 +00:00
Jack Ye
9e60fda0ec fix: use post for describe_namespace and allow access to underlying client (#2871)
Issues found during integration tests:
1. describe_namespace should use POST
2. service needs to access the underlying namespace to be able to do
operations like create_empty_table directly, or get credentials in
isolated paths like a remote take
2025-12-16 19:29:27 -08:00
LanceDB Robot
3e0d451e9b chore: update lance dependency to v1.0.1-beta.1 (#2872)
bump Lance crates to v1.0.1-beta.1

Triggering tag:
https://github.com/lance-format/lance/releases/tag/v1.0.1-beta.1
2025-12-16 17:44:32 -08:00
Lance Release
94bdffe13c Bump version: 0.23.0-beta.2 → 0.23.0 2025-12-16 16:58:35 +00:00
Lance Release
b93ea3a388 Bump version: 0.23.0-beta.1 → 0.23.0-beta.2 2025-12-16 16:57:55 +00:00
Lance Release
ff20d12f20 Bump version: 0.26.0-beta.2 → 0.26.0 2025-12-16 16:57:09 +00:00
Lance Release
5f3e133470 Bump version: 0.26.0-beta.1 → 0.26.0-beta.2 2025-12-16 16:57:07 +00:00
Jack Ye
332e722a64 feat: upgrade lance-namespace python to 0.3.2 (#2868)
Includes fix https://github.com/lance-format/lance-namespace/pull/281
2025-12-16 08:56:04 -08:00
LanceDB Robot
3f63c4f8d9 chore: update lance dependency to v1.0.0 (#2867)
## Summary
- update all lance crates to v1.0.0 using the helper script (fallbacks
to the v1.0.0 tag)
- refresh Cargo.lock to pull the new release
- add script fallback to retry with the git tag when a crates.io release
is unavailable

## Testing
- cargo clippy --workspace --tests --all-features -- -D warnings
- cargo fmt --all

Tag: https://github.com/lance-format/lance/releases/tag/v1.0.0

---------

Co-authored-by: Jack Ye <yezhaoqin@gmail.com>
2025-12-15 20:36:19 -08:00
BubbleCal
39a18baf59 feat: infer vector type to float32 if integers are out of uint8 range (#2856)
## Summary
- infer integer vector columns as float32 when any value exceeds uint8
range or is negative
- keep uint8 for integer vectors within range and nulls only
- add sync/async tests covering large integer vector inference

## Testing
- ./.venv/bin/pytest python/python/tests/test_table.py -k
"large_int_vectors"
2025-12-08 17:10:25 +08:00
Lance Release
0960e19559 Bump version: 0.23.0-beta.0 → 0.23.0-beta.1 2025-12-05 00:36:39 +00:00
Lance Release
e5321ba311 Bump version: 0.26.0-beta.0 → 0.26.0-beta.1 2025-12-05 00:35:17 +00:00
Jack Ye
f523191d21 feat: make java client builder generic (#2851)
In #2845 we ported the lancedb integration in lance-namespace to
lancedb. But that is too specific to RestNamespace. We can improve the
user entry point so that we can put local mode and future version of the
Flight SQL-based LanceDB server all behind this single
`LanceDbNamespaceClientBuilder` API.

Also I renamed `namespace` to `namesapceClient` to avoid confusion with
the namespace path.
2025-12-04 16:34:32 -08:00
Jack Ye
4c3790cde4 chore: remove java-jni from cargo workspace (#2849)
Fixes
https://github.com/lancedb/lancedb/actions/runs/19945349063/job/57193307680
2025-12-04 16:31:37 -08:00
Jack Ye
ff75f2467b feat: use rest namespace for lancedb java sdk (#2845)
After the refactoring on both client and server side, we should have the
ability to fully use lance REST namespace to call into LanceDB cloud and
enterprise. We can avoid having a JNI implementation (which today does
not really do anything except for vending a connection object), and just
use lance-core's RestNamespace.

We will at this moment have a LanceDbRestNamespaceBuilder to allow users
to more easily build the RestNamespace to talk to LanceDB Cloud or
Enterprise endpoint.

In the future, we could extend this further to also support the local
mode through DirectoryNamespace. That will be a separated PR.
2025-12-04 13:53:47 -08:00
Lance Release
6f79770248 Bump version: 0.22.4-beta.3 → 0.23.0-beta.0 2025-12-04 19:33:37 +00:00
73 changed files with 1595 additions and 1594 deletions

View File

@@ -1,5 +1,5 @@
[tool.bumpversion]
current_version = "0.22.4-beta.3"
current_version = "0.23.0"
parse = """(?x)
(?P<major>0|[1-9]\\d*)\\.
(?P<minor>0|[1-9]\\d*)\\.
@@ -72,3 +72,9 @@ search = "\nversion = \"{current_version}\""
filename = "nodejs/Cargo.toml"
replace = "\nversion = \"{new_version}\""
search = "\nversion = \"{current_version}\""
# Java documentation
[[tool.bumpversion.files]]
filename = "docs/src/java/java.md"
replace = "<version>{new_version}</version>"
search = "<version>{current_version}</version>"

View File

@@ -1,76 +1,35 @@
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
name: Build and publish Java packages
on:
release:
types: [released]
push:
tags:
- "v*"
pull_request:
paths:
- .github/workflows/java-publish.yml
jobs:
macos-arm64:
name: Build on MacOS Arm64
runs-on: macos-14
timeout-minutes: 45
defaults:
run:
working-directory: ./java/core/lancedb-jni
steps:
- name: Checkout repository
uses: actions/checkout@v4
- uses: Swatinem/rust-cache@v2
- name: Install dependencies
run: |
brew install protobuf
- name: Build release
run: |
cargo build --release
- uses: actions/upload-artifact@v4
with:
name: liblancedb_jni_darwin_aarch64.zip
path: target/release/liblancedb_jni.dylib
retention-days: 1
if-no-files-found: error
linux-arm64:
name: Build on Linux Arm64
runs-on: warp-ubuntu-2204-arm64-8x
timeout-minutes: 45
defaults:
run:
working-directory: ./java/core/lancedb-jni
steps:
- name: Checkout repository
uses: actions/checkout@v4
- uses: Swatinem/rust-cache@v2
- uses: actions-rust-lang/setup-rust-toolchain@v1
with:
cache-workspaces: "./java/core/lancedb-jni"
# Disable full debug symbol generation to speed up CI build and keep memory down
# "1" means line tables only, which is useful for panic tracebacks.
rustflags: "-C debuginfo=1"
- name: Install dependencies
run: |
sudo apt -y -qq update
sudo apt install -y protobuf-compiler libssl-dev pkg-config
- name: Build release
run: |
cargo build --release
- uses: actions/upload-artifact@v4
with:
name: liblancedb_jni_linux_aarch64.zip
path: target/release/liblancedb_jni.so
retention-days: 1
if-no-files-found: error
linux-x86:
runs-on: warp-ubuntu-2204-x64-8x
publish:
name: Build and Publish
runs-on: ubuntu-24.04
timeout-minutes: 30
needs: [macos-arm64, linux-arm64]
defaults:
run:
working-directory: ./java
steps:
- name: Checkout repository
uses: actions/checkout@v4
- uses: Swatinem/rust-cache@v2
- name: Set up Java 8
uses: actions/setup-java@v4
with:
@@ -82,40 +41,30 @@ jobs:
server-password: SONATYPE_TOKEN
gpg-private-key: ${{ secrets.GPG_PRIVATE_KEY }}
gpg-passphrase: ${{ secrets.GPG_PASSPHRASE }}
- name: Install dependencies
- name: Set git config
run: |
sudo apt -y -qq update
sudo apt install -y protobuf-compiler libssl-dev pkg-config
- name: Download artifact
uses: actions/download-artifact@v4
- name: Copy native libs
run: |
mkdir -p ./core/target/classes/nativelib/darwin-aarch64 ./core/target/classes/nativelib/linux-aarch64
cp ../liblancedb_jni_darwin_aarch64.zip/liblancedb_jni.dylib ./core/target/classes/nativelib/darwin-aarch64/liblancedb_jni.dylib
cp ../liblancedb_jni_linux_aarch64.zip/liblancedb_jni.so ./core/target/classes/nativelib/linux-aarch64/liblancedb_jni.so
git config --global user.email "dev+gha@lancedb.com"
git config --global user.name "LanceDB Github Runner"
- name: Dry run
if: github.event_name == 'pull_request'
run: |
mvn --batch-mode -DskipTests -Drust.release.build=true package
- name: Set github
run: |
git config --global user.email "LanceDB Github Runner"
git config --global user.name "dev+gha@lancedb.com"
- name: Publish with Java 8
if: github.event_name == 'release'
./mvnw --batch-mode -DskipTests package -pl lancedb-core -am
- name: Publish
if: startsWith(github.ref, 'refs/tags/v')
run: |
echo "use-agent" >> ~/.gnupg/gpg.conf
echo "pinentry-mode loopback" >> ~/.gnupg/gpg.conf
export GPG_TTY=$(tty)
mvn --batch-mode -DskipTests -Drust.release.build=true -DpushChanges=false -Dgpg.passphrase=${{ secrets.GPG_PASSPHRASE }} deploy -P deploy-to-ossrh
./mvnw --batch-mode -DskipTests -DpushChanges=false -Dgpg.passphrase=${{ secrets.GPG_PASSPHRASE }} deploy -pl lancedb-core -am -P deploy-to-ossrh
env:
SONATYPE_USER: ${{ secrets.SONATYPE_USER }}
SONATYPE_TOKEN: ${{ secrets.SONATYPE_TOKEN }}
report-failure:
name: Report Workflow Failure
runs-on: ubuntu-latest
needs: [linux-arm64, linux-x86, macos-arm64]
if: always() && (github.event_name == 'release' || github.event_name == 'workflow_dispatch')
needs: [publish]
if: always() && failure() && startsWith(github.ref, 'refs/tags/v')
permissions:
contents: read
issues: write

View File

@@ -1,118 +1,46 @@
name: Build and Run Java JNI Tests
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
name: Build Java LanceDB Core
on:
push:
branches:
- main
paths:
- java/**
- .github/workflows/java.yml
pull_request:
paths:
- java/**
- rust/**
- .github/workflows/java.yml
env:
# This env var is used by Swatinem/rust-cache@v2 for the cache
# key, so we set it to make sure it is always consistent.
CARGO_TERM_COLOR: always
# Disable full debug symbol generation to speed up CI build and keep memory down
# "1" means line tables only, which is useful for panic tracebacks.
RUSTFLAGS: "-C debuginfo=1"
RUST_BACKTRACE: "1"
# according to: https://matklad.github.io/2021/09/04/fast-rust-builds.html
# CI builds are faster with incremental disabled.
CARGO_INCREMENTAL: "0"
CARGO_BUILD_JOBS: "1"
jobs:
linux-build-java-11:
runs-on: ubuntu-22.04
name: ubuntu-22.04 + Java 11
build-java:
runs-on: ubuntu-24.04
name: Build
defaults:
run:
working-directory: ./java
steps:
- name: Checkout repository
uses: actions/checkout@v4
- uses: Swatinem/rust-cache@v2
with:
workspaces: java/core/lancedb-jni
- uses: actions-rust-lang/setup-rust-toolchain@v1
with:
components: rustfmt
- name: Run cargo fmt
run: cargo fmt --check
working-directory: ./java/core/lancedb-jni
- name: Install dependencies
run: |
sudo apt update
sudo apt install -y protobuf-compiler libssl-dev
- name: Install Java 11
uses: actions/setup-java@v4
with:
distribution: temurin
java-version: 11
cache: "maven"
- name: Java Style Check
run: mvn checkstyle:check
# Disable because of issues in lancedb rust core code
# - name: Rust Clippy
# working-directory: java/core/lancedb-jni
# run: cargo clippy --all-targets -- -D warnings
- name: Running tests with Java 11
run: mvn clean test
linux-build-java-17:
runs-on: ubuntu-22.04
name: ubuntu-22.04 + Java 17
defaults:
run:
working-directory: ./java
steps:
- name: Checkout repository
uses: actions/checkout@v4
- uses: Swatinem/rust-cache@v2
with:
workspaces: java/core/lancedb-jni
- uses: actions-rust-lang/setup-rust-toolchain@v1
with:
components: rustfmt
- name: Run cargo fmt
run: cargo fmt --check
working-directory: ./java/core/lancedb-jni
- name: Install dependencies
run: |
sudo apt update
sudo apt install -y protobuf-compiler libssl-dev
- name: Install Java 17
- name: Set up Java 17
uses: actions/setup-java@v4
with:
distribution: temurin
java-version: 17
cache: "maven"
- run: echo "JAVA_17=$JAVA_HOME" >> $GITHUB_ENV
- name: Java Style Check
run: mvn checkstyle:check
# Disable because of issues in lancedb rust core code
# - name: Rust Clippy
# working-directory: java/core/lancedb-jni
# run: cargo clippy --all-targets -- -D warnings
- name: Running tests with Java 17
run: |
export JAVA_TOOL_OPTIONS="$JAVA_TOOL_OPTIONS \
-XX:+IgnoreUnrecognizedVMOptions \
--add-opens=java.base/java.lang=ALL-UNNAMED \
--add-opens=java.base/java.lang.invoke=ALL-UNNAMED \
--add-opens=java.base/java.lang.reflect=ALL-UNNAMED \
--add-opens=java.base/java.io=ALL-UNNAMED \
--add-opens=java.base/java.net=ALL-UNNAMED \
--add-opens=java.base/java.nio=ALL-UNNAMED \
--add-opens=java.base/java.util=ALL-UNNAMED \
--add-opens=java.base/java.util.concurrent=ALL-UNNAMED \
--add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED \
--add-opens=java.base/jdk.internal.ref=ALL-UNNAMED \
--add-opens=java.base/sun.nio.ch=ALL-UNNAMED \
--add-opens=java.base/sun.nio.cs=ALL-UNNAMED \
--add-opens=java.base/sun.security.action=ALL-UNNAMED \
--add-opens=java.base/sun.util.calendar=ALL-UNNAMED \
--add-opens=java.security.jgss/sun.security.krb5=ALL-UNNAMED \
-Djdk.reflect.useDirectMethodHandle=false \
-Dio.netty.tryReflectionSetAccessible=true"
JAVA_HOME=$JAVA_17 mvn clean test
run: ./mvnw checkstyle:check
- name: Build and install
run: ./mvnw clean install

View File

@@ -88,7 +88,7 @@ jobs:
npm install -g @napi-rs/cli
- name: Build
run: |
npm ci
npm ci --include=optional
npm run build:debug -- --profile ci
npm run tsc
- name: Setup localstack
@@ -146,7 +146,7 @@ jobs:
npm install -g @napi-rs/cli
- name: Build
run: |
npm ci
npm ci --include=optional
npm run build:debug -- --profile ci
npm run tsc
- name: Test

View File

@@ -49,8 +49,8 @@ jobs:
type-check:
name: "Type Check"
timeout-minutes: 30
runs-on: "ubuntu-22.04"
timeout-minutes: 60
runs-on: ubuntu-2404-8x-x64
defaults:
run:
shell: bash
@@ -78,7 +78,7 @@ jobs:
doctest:
name: "Doctest"
timeout-minutes: 30
timeout-minutes: 60
runs-on: ubuntu-2404-8x-x64
defaults:
run:

292
Cargo.lock generated
View File

@@ -1041,6 +1041,61 @@ dependencies = [
"tracing",
]
[[package]]
name = "axum"
version = "0.7.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "edca88bc138befd0323b20752846e6587272d3b03b0343c8ea28a6f819e6e71f"
dependencies = [
"async-trait",
"axum-core",
"bytes",
"futures-util",
"http 1.3.1",
"http-body 1.0.1",
"http-body-util",
"hyper 1.7.0",
"hyper-util",
"itoa",
"matchit",
"memchr",
"mime",
"percent-encoding",
"pin-project-lite",
"rustversion",
"serde",
"serde_json",
"serde_path_to_error",
"serde_urlencoded",
"sync_wrapper",
"tokio",
"tower",
"tower-layer",
"tower-service",
"tracing",
]
[[package]]
name = "axum-core"
version = "0.4.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09f2bd6146b97ae3359fa0cc6d6b376d9539582c7b4220f041a33ec24c226199"
dependencies = [
"async-trait",
"bytes",
"futures-util",
"http 1.3.1",
"http-body 1.0.1",
"http-body-util",
"mime",
"pin-project-lite",
"rustversion",
"sync_wrapper",
"tower-layer",
"tower-service",
"tracing",
]
[[package]]
name = "backoff"
version = "0.4.0"
@@ -1456,12 +1511,6 @@ version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4f4c707c6a209cbe82d10abd08e1ea8995e9ea937d2550646e02798948992be0"
[[package]]
name = "cesu8"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6d43a04d8753f35258c91f8ec639f792891f748a1edbd759cf1dcea3382ad83c"
[[package]]
name = "cexpr"
version = "0.6.0"
@@ -1565,16 +1614,6 @@ version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75"
[[package]]
name = "combine"
version = "4.6.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ba5a308b75df32fe02788e748662718f03fde005016435c444eea572398219fd"
dependencies = [
"bytes",
"memchr",
]
[[package]]
name = "comfy-table"
version = "7.1.2"
@@ -3102,8 +3141,8 @@ checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c"
[[package]]
name = "fsst"
version = "1.1.0-beta.1"
source = "git+https://github.com/lance-format/lance.git?tag=v1.1.0-beta.1#ddea38f049e64df8b893e1c8ecca7878ea373d1e"
version = "1.0.1-beta.1"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.1-beta.1#9e65b2a9ca17b1c81a33183e5660f88d1b3b9ce0"
dependencies = [
"arrow-array",
"rand 0.9.2",
@@ -3946,6 +3985,7 @@ dependencies = [
"http 1.3.1",
"http-body 1.0.1",
"httparse",
"httpdate",
"itoa",
"pin-project-lite",
"pin-utils",
@@ -4381,28 +4421,6 @@ dependencies = [
"jiff-tzdb",
]
[[package]]
name = "jni"
version = "0.21.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1a87aa2bb7d2af34197c04845522473242e1aa17c12f4935d5856491a7fb8c97"
dependencies = [
"cesu8",
"cfg-if",
"combine",
"jni-sys",
"log",
"thiserror 1.0.69",
"walkdir",
"windows-sys 0.45.0",
]
[[package]]
name = "jni-sys"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8eaf4bc02d17cbdd7ff4c7438cafcdf7fb9a4613313ad11b4f8fefe7d3fa0130"
[[package]]
name = "jobserver"
version = "0.1.34"
@@ -4460,8 +4478,8 @@ dependencies = [
[[package]]
name = "lance"
version = "1.1.0-beta.1"
source = "git+https://github.com/lance-format/lance.git?tag=v1.1.0-beta.1#ddea38f049e64df8b893e1c8ecca7878ea373d1e"
version = "1.0.1-beta.1"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.1-beta.1#9e65b2a9ca17b1c81a33183e5660f88d1b3b9ce0"
dependencies = [
"arrow",
"arrow-arith",
@@ -4526,8 +4544,8 @@ dependencies = [
[[package]]
name = "lance-arrow"
version = "1.1.0-beta.1"
source = "git+https://github.com/lance-format/lance.git?tag=v1.1.0-beta.1#ddea38f049e64df8b893e1c8ecca7878ea373d1e"
version = "1.0.1-beta.1"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.1-beta.1#9e65b2a9ca17b1c81a33183e5660f88d1b3b9ce0"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -4545,8 +4563,8 @@ dependencies = [
[[package]]
name = "lance-bitpacking"
version = "1.1.0-beta.1"
source = "git+https://github.com/lance-format/lance.git?tag=v1.1.0-beta.1#ddea38f049e64df8b893e1c8ecca7878ea373d1e"
version = "1.0.1-beta.1"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.1-beta.1#9e65b2a9ca17b1c81a33183e5660f88d1b3b9ce0"
dependencies = [
"arrayref",
"paste",
@@ -4555,8 +4573,8 @@ dependencies = [
[[package]]
name = "lance-core"
version = "1.1.0-beta.1"
source = "git+https://github.com/lance-format/lance.git?tag=v1.1.0-beta.1#ddea38f049e64df8b893e1c8ecca7878ea373d1e"
version = "1.0.1-beta.1"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.1-beta.1#9e65b2a9ca17b1c81a33183e5660f88d1b3b9ce0"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -4592,8 +4610,8 @@ dependencies = [
[[package]]
name = "lance-datafusion"
version = "1.1.0-beta.1"
source = "git+https://github.com/lance-format/lance.git?tag=v1.1.0-beta.1#ddea38f049e64df8b893e1c8ecca7878ea373d1e"
version = "1.0.1-beta.1"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.1-beta.1#9e65b2a9ca17b1c81a33183e5660f88d1b3b9ce0"
dependencies = [
"arrow",
"arrow-array",
@@ -4623,8 +4641,8 @@ dependencies = [
[[package]]
name = "lance-datagen"
version = "1.1.0-beta.1"
source = "git+https://github.com/lance-format/lance.git?tag=v1.1.0-beta.1#ddea38f049e64df8b893e1c8ecca7878ea373d1e"
version = "1.0.1-beta.1"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.1-beta.1#9e65b2a9ca17b1c81a33183e5660f88d1b3b9ce0"
dependencies = [
"arrow",
"arrow-array",
@@ -4641,8 +4659,8 @@ dependencies = [
[[package]]
name = "lance-encoding"
version = "1.1.0-beta.1"
source = "git+https://github.com/lance-format/lance.git?tag=v1.1.0-beta.1#ddea38f049e64df8b893e1c8ecca7878ea373d1e"
version = "1.0.1-beta.1"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.1-beta.1#9e65b2a9ca17b1c81a33183e5660f88d1b3b9ce0"
dependencies = [
"arrow-arith",
"arrow-array",
@@ -4679,8 +4697,8 @@ dependencies = [
[[package]]
name = "lance-file"
version = "1.1.0-beta.1"
source = "git+https://github.com/lance-format/lance.git?tag=v1.1.0-beta.1#ddea38f049e64df8b893e1c8ecca7878ea373d1e"
version = "1.0.1-beta.1"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.1-beta.1#9e65b2a9ca17b1c81a33183e5660f88d1b3b9ce0"
dependencies = [
"arrow-arith",
"arrow-array",
@@ -4712,8 +4730,8 @@ dependencies = [
[[package]]
name = "lance-geo"
version = "1.1.0-beta.1"
source = "git+https://github.com/lance-format/lance.git?tag=v1.1.0-beta.1#ddea38f049e64df8b893e1c8ecca7878ea373d1e"
version = "1.0.1-beta.1"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.1-beta.1#9e65b2a9ca17b1c81a33183e5660f88d1b3b9ce0"
dependencies = [
"datafusion",
"geo-types",
@@ -4724,8 +4742,8 @@ dependencies = [
[[package]]
name = "lance-index"
version = "1.1.0-beta.1"
source = "git+https://github.com/lance-format/lance.git?tag=v1.1.0-beta.1#ddea38f049e64df8b893e1c8ecca7878ea373d1e"
version = "1.0.1-beta.1"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.1-beta.1#9e65b2a9ca17b1c81a33183e5660f88d1b3b9ce0"
dependencies = [
"arrow",
"arrow-arith",
@@ -4786,8 +4804,8 @@ dependencies = [
[[package]]
name = "lance-io"
version = "1.1.0-beta.1"
source = "git+https://github.com/lance-format/lance.git?tag=v1.1.0-beta.1#ddea38f049e64df8b893e1c8ecca7878ea373d1e"
version = "1.0.1-beta.1"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.1-beta.1#9e65b2a9ca17b1c81a33183e5660f88d1b3b9ce0"
dependencies = [
"arrow",
"arrow-arith",
@@ -4827,8 +4845,8 @@ dependencies = [
[[package]]
name = "lance-linalg"
version = "1.1.0-beta.1"
source = "git+https://github.com/lance-format/lance.git?tag=v1.1.0-beta.1#ddea38f049e64df8b893e1c8ecca7878ea373d1e"
version = "1.0.1-beta.1"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.1-beta.1#9e65b2a9ca17b1c81a33183e5660f88d1b3b9ce0"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -4844,8 +4862,8 @@ dependencies = [
[[package]]
name = "lance-namespace"
version = "1.1.0-beta.1"
source = "git+https://github.com/lance-format/lance.git?tag=v1.1.0-beta.1#ddea38f049e64df8b893e1c8ecca7878ea373d1e"
version = "1.0.1-beta.1"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.1-beta.1#9e65b2a9ca17b1c81a33183e5660f88d1b3b9ce0"
dependencies = [
"arrow",
"async-trait",
@@ -4857,13 +4875,14 @@ dependencies = [
[[package]]
name = "lance-namespace-impls"
version = "1.1.0-beta.1"
source = "git+https://github.com/lance-format/lance.git?tag=v1.1.0-beta.1#ddea38f049e64df8b893e1c8ecca7878ea373d1e"
version = "1.0.1-beta.1"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.1-beta.1#9e65b2a9ca17b1c81a33183e5660f88d1b3b9ce0"
dependencies = [
"arrow",
"arrow-ipc",
"arrow-schema",
"async-trait",
"axum",
"bytes",
"futures",
"lance",
@@ -4875,9 +4894,12 @@ dependencies = [
"object_store",
"rand 0.9.2",
"reqwest",
"serde",
"serde_json",
"snafu",
"tokio",
"tower",
"tower-http 0.5.2",
"url",
]
@@ -4896,8 +4918,8 @@ dependencies = [
[[package]]
name = "lance-table"
version = "1.1.0-beta.1"
source = "git+https://github.com/lance-format/lance.git?tag=v1.1.0-beta.1#ddea38f049e64df8b893e1c8ecca7878ea373d1e"
version = "1.0.1-beta.1"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.1-beta.1#9e65b2a9ca17b1c81a33183e5660f88d1b3b9ce0"
dependencies = [
"arrow",
"arrow-array",
@@ -4936,8 +4958,8 @@ dependencies = [
[[package]]
name = "lance-testing"
version = "1.1.0-beta.1"
source = "git+https://github.com/lance-format/lance.git?tag=v1.1.0-beta.1#ddea38f049e64df8b893e1c8ecca7878ea373d1e"
version = "1.0.1-beta.1"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.1-beta.1#9e65b2a9ca17b1c81a33183e5660f88d1b3b9ce0"
dependencies = [
"arrow-array",
"arrow-schema",
@@ -4948,7 +4970,7 @@ dependencies = [
[[package]]
name = "lancedb"
version = "0.22.4-beta.3"
version = "0.23.0"
dependencies = [
"ahash",
"anyhow",
@@ -5025,25 +5047,9 @@ dependencies = [
"walkdir",
]
[[package]]
name = "lancedb-jni"
version = "0.10.0"
dependencies = [
"arrow",
"arrow-schema",
"jni",
"lance",
"lancedb",
"lazy_static",
"serde",
"serde_json",
"snafu",
"tokio",
]
[[package]]
name = "lancedb-nodejs"
version = "0.22.4-beta.3"
version = "0.23.0"
dependencies = [
"arrow-array",
"arrow-ipc",
@@ -5063,7 +5069,7 @@ dependencies = [
[[package]]
name = "lancedb-python"
version = "0.25.4-beta.3"
version = "0.26.0"
dependencies = [
"arrow",
"async-trait",
@@ -5331,6 +5337,12 @@ dependencies = [
"regex-automata",
]
[[package]]
name = "matchit"
version = "0.7.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0e7465ac9959cc2b1404e8e2367b43684a6d13790fe23056cc8c6c5a6b7bcb94"
[[package]]
name = "matrixmultiply"
version = "0.3.10"
@@ -7319,7 +7331,7 @@ dependencies = [
"tokio-rustls 0.26.4",
"tokio-util",
"tower",
"tower-http",
"tower-http 0.6.6",
"tower-service",
"url",
"wasm-bindgen",
@@ -7838,6 +7850,17 @@ dependencies = [
"serde_core",
]
[[package]]
name = "serde_path_to_error"
version = "0.1.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "10a9ff822e371bb5403e391ecd83e182e0e77ba7f6fe0160b795797109d1b457"
dependencies = [
"itoa",
"serde",
"serde_core",
]
[[package]]
name = "serde_plain"
version = "1.0.2"
@@ -8873,6 +8896,24 @@ dependencies = [
"tokio",
"tower-layer",
"tower-service",
"tracing",
]
[[package]]
name = "tower-http"
version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e9cd434a998747dd2c4276bc96ee2e0c7a2eadf3cae88e52be55a05fa9053f5"
dependencies = [
"bitflags 2.9.4",
"bytes",
"http 1.3.1",
"http-body 1.0.1",
"http-body-util",
"pin-project-lite",
"tower-layer",
"tower-service",
"tracing",
]
[[package]]
@@ -8911,6 +8952,7 @@ version = "0.1.41"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "784e0ac535deb450455cbfa28a6f0df145ea1bb7ae51b821cf5e7927fdcfbdd0"
dependencies = [
"log",
"pin-project-lite",
"tracing-attributes",
"tracing-core",
@@ -9506,15 +9548,6 @@ dependencies = [
"windows-link 0.1.3",
]
[[package]]
name = "windows-sys"
version = "0.45.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "75283be5efb2831d37ea142365f009c02ec203cd29a3ebecbc093d52315b66d0"
dependencies = [
"windows-targets 0.42.2",
]
[[package]]
name = "windows-sys"
version = "0.52.0"
@@ -9551,21 +9584,6 @@ dependencies = [
"windows-link 0.2.1",
]
[[package]]
name = "windows-targets"
version = "0.42.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e5180c00cd44c9b1c88adb3693291f1cd93605ded80c250a75d472756b4d071"
dependencies = [
"windows_aarch64_gnullvm 0.42.2",
"windows_aarch64_msvc 0.42.2",
"windows_i686_gnu 0.42.2",
"windows_i686_msvc 0.42.2",
"windows_x86_64_gnu 0.42.2",
"windows_x86_64_gnullvm 0.42.2",
"windows_x86_64_msvc 0.42.2",
]
[[package]]
name = "windows-targets"
version = "0.52.6"
@@ -9608,12 +9626,6 @@ dependencies = [
"windows-link 0.1.3",
]
[[package]]
name = "windows_aarch64_gnullvm"
version = "0.42.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8"
[[package]]
name = "windows_aarch64_gnullvm"
version = "0.52.6"
@@ -9626,12 +9638,6 @@ version = "0.53.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a9d8416fa8b42f5c947f8482c43e7d89e73a173cead56d044f6a56104a6d1b53"
[[package]]
name = "windows_aarch64_msvc"
version = "0.42.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43"
[[package]]
name = "windows_aarch64_msvc"
version = "0.52.6"
@@ -9644,12 +9650,6 @@ version = "0.53.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b9d782e804c2f632e395708e99a94275910eb9100b2114651e04744e9b125006"
[[package]]
name = "windows_i686_gnu"
version = "0.42.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f"
[[package]]
name = "windows_i686_gnu"
version = "0.52.6"
@@ -9674,12 +9674,6 @@ version = "0.53.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fa7359d10048f68ab8b09fa71c3daccfb0e9b559aed648a8f95469c27057180c"
[[package]]
name = "windows_i686_msvc"
version = "0.42.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060"
[[package]]
name = "windows_i686_msvc"
version = "0.52.6"
@@ -9692,12 +9686,6 @@ version = "0.53.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e7ac75179f18232fe9c285163565a57ef8d3c89254a30685b57d83a38d326c2"
[[package]]
name = "windows_x86_64_gnu"
version = "0.42.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36"
[[package]]
name = "windows_x86_64_gnu"
version = "0.52.6"
@@ -9710,12 +9698,6 @@ version = "0.53.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c3842cdd74a865a8066ab39c8a7a473c0778a3f29370b5fd6b4b9aa7df4a499"
[[package]]
name = "windows_x86_64_gnullvm"
version = "0.42.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3"
[[package]]
name = "windows_x86_64_gnullvm"
version = "0.52.6"
@@ -9728,12 +9710,6 @@ version = "0.53.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0ffa179e2d07eee8ad8f57493436566c7cc30ac536a3379fdf008f47f6bb7ae1"
[[package]]
name = "windows_x86_64_msvc"
version = "0.42.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0"
[[package]]
name = "windows_x86_64_msvc"
version = "0.52.6"

View File

@@ -1,5 +1,5 @@
[workspace]
members = ["rust/lancedb", "nodejs", "python", "java/core/lancedb-jni"]
members = ["rust/lancedb", "nodejs", "python"]
# Python package needs to be built by maturin.
exclude = ["python"]
resolver = "2"
@@ -15,20 +15,20 @@ categories = ["database-implementations"]
rust-version = "1.78.0"
[workspace.dependencies]
lance = { "version" = "=1.1.0-beta.1", default-features = false, "tag" = "v1.1.0-beta.1", "git" = "https://github.com/lance-format/lance.git" }
lance-core = { "version" = "=1.1.0-beta.1", "tag" = "v1.1.0-beta.1", "git" = "https://github.com/lance-format/lance.git" }
lance-datagen = { "version" = "=1.1.0-beta.1", "tag" = "v1.1.0-beta.1", "git" = "https://github.com/lance-format/lance.git" }
lance-file = { "version" = "=1.1.0-beta.1", "tag" = "v1.1.0-beta.1", "git" = "https://github.com/lance-format/lance.git" }
lance-io = { "version" = "=1.1.0-beta.1", default-features = false, "tag" = "v1.1.0-beta.1", "git" = "https://github.com/lance-format/lance.git" }
lance-index = { "version" = "=1.1.0-beta.1", "tag" = "v1.1.0-beta.1", "git" = "https://github.com/lance-format/lance.git" }
lance-linalg = { "version" = "=1.1.0-beta.1", "tag" = "v1.1.0-beta.1", "git" = "https://github.com/lance-format/lance.git" }
lance-namespace = { "version" = "=1.1.0-beta.1", "tag" = "v1.1.0-beta.1", "git" = "https://github.com/lance-format/lance.git" }
lance-namespace-impls = { "version" = "=1.1.0-beta.1", default-features = false, "tag" = "v1.1.0-beta.1", "git" = "https://github.com/lance-format/lance.git" }
lance-table = { "version" = "=1.1.0-beta.1", "tag" = "v1.1.0-beta.1", "git" = "https://github.com/lance-format/lance.git" }
lance-testing = { "version" = "=1.1.0-beta.1", "tag" = "v1.1.0-beta.1", "git" = "https://github.com/lance-format/lance.git" }
lance-datafusion = { "version" = "=1.1.0-beta.1", "tag" = "v1.1.0-beta.1", "git" = "https://github.com/lance-format/lance.git" }
lance-encoding = { "version" = "=1.1.0-beta.1", "tag" = "v1.1.0-beta.1", "git" = "https://github.com/lance-format/lance.git" }
lance-arrow = { "version" = "=1.1.0-beta.1", "tag" = "v1.1.0-beta.1", "git" = "https://github.com/lance-format/lance.git" }
lance = { "version" = "=1.0.1-beta.1", default-features = false, "tag" = "v1.0.1-beta.1", "git" = "https://github.com/lance-format/lance.git" }
lance-core = { "version" = "=1.0.1-beta.1", "tag" = "v1.0.1-beta.1", "git" = "https://github.com/lance-format/lance.git" }
lance-datagen = { "version" = "=1.0.1-beta.1", "tag" = "v1.0.1-beta.1", "git" = "https://github.com/lance-format/lance.git" }
lance-file = { "version" = "=1.0.1-beta.1", "tag" = "v1.0.1-beta.1", "git" = "https://github.com/lance-format/lance.git" }
lance-io = { "version" = "=1.0.1-beta.1", default-features = false, "tag" = "v1.0.1-beta.1", "git" = "https://github.com/lance-format/lance.git" }
lance-index = { "version" = "=1.0.1-beta.1", "tag" = "v1.0.1-beta.1", "git" = "https://github.com/lance-format/lance.git" }
lance-linalg = { "version" = "=1.0.1-beta.1", "tag" = "v1.0.1-beta.1", "git" = "https://github.com/lance-format/lance.git" }
lance-namespace = { "version" = "=1.0.1-beta.1", "tag" = "v1.0.1-beta.1", "git" = "https://github.com/lance-format/lance.git" }
lance-namespace-impls = { "version" = "=1.0.1-beta.1", default-features = false, "tag" = "v1.0.1-beta.1", "git" = "https://github.com/lance-format/lance.git" }
lance-table = { "version" = "=1.0.1-beta.1", "tag" = "v1.0.1-beta.1", "git" = "https://github.com/lance-format/lance.git" }
lance-testing = { "version" = "=1.0.1-beta.1", "tag" = "v1.0.1-beta.1", "git" = "https://github.com/lance-format/lance.git" }
lance-datafusion = { "version" = "=1.0.1-beta.1", "tag" = "v1.0.1-beta.1", "git" = "https://github.com/lance-format/lance.git" }
lance-encoding = { "version" = "=1.0.1-beta.1", "tag" = "v1.0.1-beta.1", "git" = "https://github.com/lance-format/lance.git" }
lance-arrow = { "version" = "=1.0.1-beta.1", "tag" = "v1.0.1-beta.1", "git" = "https://github.com/lance-format/lance.git" }
ahash = "0.8"
# Note that this one does not include pyarrow
arrow = { version = "56.2", optional = false }

View File

@@ -229,6 +229,29 @@ def set_local_version():
update_cargo_toml(line_updater)
def update_lockfiles(version: str, fallback_to_git: bool = False):
"""
Update Cargo metadata and optionally fall back to using the git tag if the
requested crates.io version is unavailable.
"""
try:
print("Updating lockfiles...", file=sys.stderr, end="")
run_command("cargo metadata > /dev/null")
print(" done.", file=sys.stderr)
except Exception as e:
if fallback_to_git and "failed to select a version" in str(e):
print(
f" failed for crates.io v{version}, retrying with git tag...",
file=sys.stderr,
)
set_preview_version(version)
print("Updating lockfiles...", file=sys.stderr, end="")
run_command("cargo metadata > /dev/null")
print(" done.", file=sys.stderr)
else:
raise
parser = argparse.ArgumentParser(description="Set the version of the Lance package.")
parser.add_argument(
"version",
@@ -244,6 +267,7 @@ if args.version == "stable":
file=sys.stderr,
)
set_stable_version(latest_stable_version)
update_lockfiles(latest_stable_version)
elif args.version == "preview":
latest_preview_version = get_latest_preview_version()
print(
@@ -251,8 +275,10 @@ elif args.version == "preview":
file=sys.stderr,
)
set_preview_version(latest_preview_version)
update_lockfiles(latest_preview_version)
elif args.version == "local":
set_local_version()
update_lockfiles("local")
else:
# Parse the version number.
version = args.version
@@ -262,9 +288,7 @@ else:
if "beta" in version:
set_preview_version(version)
update_lockfiles(version)
else:
set_stable_version(version)
print("Updating lockfiles...", file=sys.stderr, end="")
run_command("cargo metadata > /dev/null")
print(" done.", file=sys.stderr)
update_lockfiles(version, fallback_to_git=True)

View File

@@ -123,6 +123,7 @@ nav:
- Overview: index.md
- Python: python/python.md
- Javascript/TypeScript: js/globals.md
- Java: java/java.md
- Rust: https://docs.rs/lancedb/latest/lancedb/index.html
extra_css:

View File

@@ -4,4 +4,5 @@ This page contains the API reference for the SDKs supported by the LanceDB team.
- [Python](python/python.md)
- [JavaScript/TypeScript](js/globals.md)
- [Java](java/java.md)
- [Rust](https://docs.rs/lancedb/latest/lancedb/index.html)

499
docs/src/java/java.md Normal file
View File

@@ -0,0 +1,499 @@
# Java SDK
The LanceDB Java SDK provides a convenient way to interact with LanceDB Cloud and Enterprise deployments using the Lance REST Namespace API.
!!! note
The Java SDK currently only works for LanceDB remote database that connects to LanceDB Cloud and Enterprise.
Local database support is a work in progress. Check [LANCEDB-2848](https://github.com/lancedb/lancedb/issues/2848) for the latest progress.
## Installation
Add the following dependency to your `pom.xml`:
```xml
<dependency>
<groupId>com.lancedb</groupId>
<artifactId>lancedb-core</artifactId>
<version>0.23.0</version>
</dependency>
```
## Quick Start
### Connecting to LanceDB Cloud
```java
import com.lancedb.LanceDbNamespaceClientBuilder;
import org.lance.namespace.LanceNamespace;
// If your DB url is db://example-db, then your database here is example-db
LanceNamespace namespaceClient = LanceDbNamespaceClientBuilder.newBuilder()
.apiKey("your_lancedb_cloud_api_key")
.database("your_database_name")
.build();
```
### Connecting to LanceDB Enterprise
For LanceDB Enterprise deployments with a custom endpoint:
```java
LanceNamespace namespaceClient = LanceDbNamespaceClientBuilder.newBuilder()
.apiKey("your_lancedb_enterprise_api_key")
.database("your_database_name")
.endpoint("<your_enterprise_endpoint>")
.build();
```
### Configuration Options
| Method | Description | Required |
|--------|-------------|----------|
| `apiKey(String)` | LanceDB API key | Yes |
| `database(String)` | Database name | Yes |
| `endpoint(String)` | Custom endpoint URL for Enterprise deployments | No |
| `region(String)` | AWS region (default: "us-east-1") | No |
| `config(String, String)` | Additional configuration parameters | No |
## Metadata Operations
### Creating a Namespace
Namespaces organize tables hierarchically. Create a namespace before creating tables within it:
```java
import org.lance.namespace.model.CreateNamespaceRequest;
import org.lance.namespace.model.CreateNamespaceResponse;
// Create a child namespace
CreateNamespaceRequest request = new CreateNamespaceRequest();
request.setId(Arrays.asList("my_namespace"));
CreateNamespaceResponse response = namespaceClient.createNamespace(request);
```
You can also create nested namespaces:
```java
// Create a nested namespace: parent/child
CreateNamespaceRequest request = new CreateNamespaceRequest();
request.setId(Arrays.asList("parent_namespace", "child_namespace"));
CreateNamespaceResponse response = namespaceClient.createNamespace(request);
```
### Describing a Namespace
```java
import org.lance.namespace.model.DescribeNamespaceRequest;
import org.lance.namespace.model.DescribeNamespaceResponse;
DescribeNamespaceRequest request = new DescribeNamespaceRequest();
request.setId(Arrays.asList("my_namespace"));
DescribeNamespaceResponse response = namespaceClient.describeNamespace(request);
System.out.println("Namespace properties: " + response.getProperties());
```
### Listing Namespaces
```java
import org.lance.namespace.model.ListNamespacesRequest;
import org.lance.namespace.model.ListNamespacesResponse;
// List all namespaces at root level
ListNamespacesRequest request = new ListNamespacesRequest();
request.setId(Arrays.asList()); // Empty for root
ListNamespacesResponse response = namespaceClient.listNamespaces(request);
for (String ns : response.getNamespaces()) {
System.out.println("Namespace: " + ns);
}
// List child namespaces under a parent
ListNamespacesRequest childRequest = new ListNamespacesRequest();
childRequest.setId(Arrays.asList("parent_namespace"));
ListNamespacesResponse childResponse = namespaceClient.listNamespaces(childRequest);
```
### Listing Tables
```java
import org.lance.namespace.model.ListTablesRequest;
import org.lance.namespace.model.ListTablesResponse;
// List tables in a namespace
ListTablesRequest request = new ListTablesRequest();
request.setId(Arrays.asList("my_namespace"));
ListTablesResponse response = namespaceClient.listTables(request);
for (String table : response.getTables()) {
System.out.println("Table: " + table);
}
```
### Dropping a Namespace
```java
import org.lance.namespace.model.DropNamespaceRequest;
import org.lance.namespace.model.DropNamespaceResponse;
DropNamespaceRequest request = new DropNamespaceRequest();
request.setId(Arrays.asList("my_namespace"));
DropNamespaceResponse response = namespaceClient.dropNamespace(request);
```
### Describing a Table
```java
import org.lance.namespace.model.DescribeTableRequest;
import org.lance.namespace.model.DescribeTableResponse;
DescribeTableRequest request = new DescribeTableRequest();
request.setId(Arrays.asList("my_namespace", "my_table"));
DescribeTableResponse response = namespaceClient.describeTable(request);
System.out.println("Table version: " + response.getVersion());
System.out.println("Schema fields: " + response.getSchema().getFields());
```
### Dropping a Table
```java
import org.lance.namespace.model.DropTableRequest;
import org.lance.namespace.model.DropTableResponse;
DropTableRequest request = new DropTableRequest();
request.setId(Arrays.asList("my_namespace", "my_table"));
DropTableResponse response = namespaceClient.dropTable(request);
```
## Writing Data
### Creating a Table
Tables are created within a namespace by providing data in Apache Arrow IPC format:
```java
import org.lance.namespace.LanceNamespace;
import org.lance.namespace.model.CreateTableRequest;
import org.lance.namespace.model.CreateTableResponse;
import org.apache.arrow.memory.BufferAllocator;
import org.apache.arrow.memory.RootAllocator;
import org.apache.arrow.vector.IntVector;
import org.apache.arrow.vector.VarCharVector;
import org.apache.arrow.vector.VectorSchemaRoot;
import org.apache.arrow.vector.complex.FixedSizeListVector;
import org.apache.arrow.vector.Float4Vector;
import org.apache.arrow.vector.ipc.ArrowStreamWriter;
import org.apache.arrow.vector.types.FloatingPointPrecision;
import org.apache.arrow.vector.types.pojo.ArrowType;
import org.apache.arrow.vector.types.pojo.Field;
import org.apache.arrow.vector.types.pojo.FieldType;
import org.apache.arrow.vector.types.pojo.Schema;
import java.io.ByteArrayOutputStream;
import java.nio.channels.Channels;
import java.util.Arrays;
// Create schema with id, name, and embedding fields
Schema schema = new Schema(Arrays.asList(
new Field("id", FieldType.nullable(new ArrowType.Int(32, true)), null),
new Field("name", FieldType.nullable(new ArrowType.Utf8()), null),
new Field("embedding",
FieldType.nullable(new ArrowType.FixedSizeList(128)),
Arrays.asList(new Field("item",
FieldType.nullable(new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE)),
null)))
));
try (BufferAllocator allocator = new RootAllocator();
VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) {
// Populate data
root.setRowCount(3);
IntVector idVector = (IntVector) root.getVector("id");
VarCharVector nameVector = (VarCharVector) root.getVector("name");
FixedSizeListVector embeddingVector = (FixedSizeListVector) root.getVector("embedding");
Float4Vector embeddingData = (Float4Vector) embeddingVector.getDataVector();
for (int i = 0; i < 3; i++) {
idVector.setSafe(i, i + 1);
nameVector.setSafe(i, ("item_" + i).getBytes());
embeddingVector.setNotNull(i);
for (int j = 0; j < 128; j++) {
embeddingData.setSafe(i * 128 + j, (float) i);
}
}
idVector.setValueCount(3);
nameVector.setValueCount(3);
embeddingData.setValueCount(3 * 128);
embeddingVector.setValueCount(3);
// Serialize to Arrow IPC format
ByteArrayOutputStream out = new ByteArrayOutputStream();
try (ArrowStreamWriter writer = new ArrowStreamWriter(root, null, Channels.newChannel(out))) {
writer.start();
writer.writeBatch();
writer.end();
}
byte[] tableData = out.toByteArray();
// Create table in a namespace
CreateTableRequest request = new CreateTableRequest();
request.setId(Arrays.asList("my_namespace", "my_table"));
CreateTableResponse response = namespaceClient.createTable(request, tableData);
}
```
### Insert
```java
import org.lance.namespace.model.InsertIntoTableRequest;
import org.lance.namespace.model.InsertIntoTableResponse;
// Prepare data in Arrow IPC format (similar to create table example)
byte[] insertData = prepareArrowData();
InsertIntoTableRequest request = new InsertIntoTableRequest();
request.setId(Arrays.asList("my_namespace", "my_table"));
request.setMode(InsertIntoTableRequest.ModeEnum.APPEND);
InsertIntoTableResponse response = namespaceClient.insertIntoTable(request, insertData);
System.out.println("New version: " + response.getVersion());
```
### Update
Update rows matching a predicate condition:
```java
import org.lance.namespace.model.UpdateTableRequest;
import org.lance.namespace.model.UpdateTableResponse;
UpdateTableRequest request = new UpdateTableRequest();
request.setId(Arrays.asList("my_namespace", "my_table"));
// Predicate to select rows to update
request.setPredicate("id = 1");
// Set new values using SQL expressions as [column_name, expression] pairs
request.setUpdates(Arrays.asList(
Arrays.asList("name", "'updated_name'")
));
UpdateTableResponse response = namespaceClient.updateTable(request);
System.out.println("Updated rows: " + response.getUpdatedRows());
```
### Delete
Delete rows matching a predicate condition:
```java
import org.lance.namespace.model.DeleteFromTableRequest;
import org.lance.namespace.model.DeleteFromTableResponse;
DeleteFromTableRequest request = new DeleteFromTableRequest();
request.setId(Arrays.asList("my_namespace", "my_table"));
// Predicate to select rows to delete
request.setPredicate("id > 100");
DeleteFromTableResponse response = namespaceClient.deleteFromTable(request);
System.out.println("New version: " + response.getVersion());
```
### Merge Insert (Upsert)
Merge insert allows you to update existing rows and insert new rows in a single operation based on a key column:
```java
import org.lance.namespace.model.MergeInsertIntoTableRequest;
import org.lance.namespace.model.MergeInsertIntoTableResponse;
// Prepare data with rows to update (id=2,3) and new rows (id=4)
byte[] mergeData = prepareArrowData(); // Contains rows with id=2,3,4
MergeInsertIntoTableRequest request = new MergeInsertIntoTableRequest();
request.setId(Arrays.asList("my_namespace", "my_table"));
// Match on the "id" column
request.setOn("id");
// Update all columns when a matching row is found
request.setWhenMatchedUpdateAll(true);
// Insert new rows when no match is found
request.setWhenNotMatchedInsertAll(true);
MergeInsertIntoTableResponse response = namespaceClient.mergeInsertIntoTable(request, mergeData);
System.out.println("Updated rows: " + response.getNumUpdatedRows());
System.out.println("Inserted rows: " + response.getNumInsertedRows());
```
## Querying Data
### Counting Rows
```java
import org.lance.namespace.model.CountTableRowsRequest;
CountTableRowsRequest request = new CountTableRowsRequest();
request.setId(Arrays.asList("my_namespace", "my_table"));
Long rowCount = namespaceClient.countTableRows(request);
System.out.println("Row count: " + rowCount);
```
### Vector Search
```java
import org.lance.namespace.model.QueryTableRequest;
import org.lance.namespace.model.QueryTableRequestVector;
QueryTableRequest query = new QueryTableRequest();
query.setId(Arrays.asList("my_namespace", "my_table"));
query.setK(10); // Return top 10 results
// Set the query vector
List<Float> queryVector = new ArrayList<>();
for (int i = 0; i < 128; i++) {
queryVector.add(1.0f);
}
QueryTableRequestVector vector = new QueryTableRequestVector();
vector.setSingleVector(queryVector);
query.setVector(vector);
// Specify columns to return
query.setColumns(Arrays.asList("id", "name", "embedding"));
// Execute query - returns Arrow IPC format
byte[] result = namespaceClient.queryTable(query);
```
### Full Text Search
```java
import org.lance.namespace.model.QueryTableRequest;
import org.lance.namespace.model.QueryTableRequestFullTextQuery;
import org.lance.namespace.model.StringFtsQuery;
QueryTableRequest query = new QueryTableRequest();
query.setId(Arrays.asList("my_namespace", "my_table"));
query.setK(10);
// Set full text search query
StringFtsQuery stringQuery = new StringFtsQuery();
stringQuery.setQuery("search terms");
stringQuery.setColumns(Arrays.asList("text_column"));
QueryTableRequestFullTextQuery fts = new QueryTableRequestFullTextQuery();
fts.setStringQuery(stringQuery);
query.setFullTextQuery(fts);
// Specify columns to return
query.setColumns(Arrays.asList("id", "text_column"));
byte[] result = namespaceClient.queryTable(query);
```
### Query with Filter
```java
QueryTableRequest query = new QueryTableRequest();
query.setId(Arrays.asList("my_namespace", "my_table"));
query.setK(10);
query.setFilter("id > 50");
query.setColumns(Arrays.asList("id", "name"));
byte[] result = namespaceClient.queryTable(query);
```
### Query with Prefilter
```java
QueryTableRequest query = new QueryTableRequest();
query.setId(Arrays.asList("my_namespace", "my_table"));
query.setK(5);
query.setPrefilter(true); // Apply filter before vector search
query.setFilter("category = 'electronics'");
// Set query vector
QueryTableRequestVector vector = new QueryTableRequestVector();
vector.setSingleVector(queryVector);
query.setVector(vector);
byte[] result = namespaceClient.queryTable(query);
```
### Reading Query Results
Query results are returned in Apache Arrow IPC file format. Here's how to read them:
```java
import org.apache.arrow.vector.ipc.ArrowFileReader;
import org.apache.arrow.vector.VectorSchemaRoot;
import org.apache.arrow.memory.BufferAllocator;
import org.apache.arrow.memory.RootAllocator;
import java.nio.ByteBuffer;
import java.nio.channels.SeekableByteChannel;
// Helper class to read Arrow data from byte array
class ByteArraySeekableByteChannel implements SeekableByteChannel {
private final byte[] data;
private long position = 0;
private boolean isOpen = true;
public ByteArraySeekableByteChannel(byte[] data) {
this.data = data;
}
@Override
public int read(ByteBuffer dst) {
int remaining = dst.remaining();
int available = (int) (data.length - position);
if (available <= 0) return -1;
int toRead = Math.min(remaining, available);
dst.put(data, (int) position, toRead);
position += toRead;
return toRead;
}
@Override public long position() { return position; }
@Override public SeekableByteChannel position(long newPosition) { position = newPosition; return this; }
@Override public long size() { return data.length; }
@Override public boolean isOpen() { return isOpen; }
@Override public void close() { isOpen = false; }
@Override public int write(ByteBuffer src) { throw new UnsupportedOperationException(); }
@Override public SeekableByteChannel truncate(long size) { throw new UnsupportedOperationException(); }
}
// Read query results
byte[] queryResult = namespaceClient.queryTable(query);
try (BufferAllocator allocator = new RootAllocator();
ArrowFileReader reader = new ArrowFileReader(
new ByteArraySeekableByteChannel(queryResult), allocator)) {
for (int i = 0; i < reader.getRecordBlocks().size(); i++) {
reader.loadRecordBatch(reader.getRecordBlocks().get(i));
VectorSchemaRoot root = reader.getVectorSchemaRoot();
// Access data
IntVector idVector = (IntVector) root.getVector("id");
VarCharVector nameVector = (VarCharVector) root.getVector("name");
for (int row = 0; row < root.getRowCount(); row++) {
int id = idVector.get(row);
String name = new String(nameVector.get(row));
System.out.println("Row " + row + ": id=" + id + ", name=" + name);
}
}
}
```

28
java/Makefile Normal file
View File

@@ -0,0 +1,28 @@
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
.PHONY: build-lancedb
build-lancedb:
./mvnw spotless:apply -pl lancedb-core -am
./mvnw install -pl lancedb-core -am
.PHONY: test-lancedb
test-lancedb:
# Requires LANCEDB_DB and LANCEDB_API_KEY environment variables
./mvnw test -pl lancedb-core -P integration-tests
.PHONY: clean
clean:
./mvnw clean
.PHONY: build
build: build-lancedb

View File

@@ -7,10 +7,11 @@
For LanceDB Cloud, use the simplified builder API:
```java
import com.lancedb.lance.namespace.LanceRestNamespace;
import com.lancedb.LanceDbNamespaceClientBuilder;
import org.lance.namespace.LanceNamespace;
// If your DB url is db://example-db, then your database here is example-db
LanceRestNamespace namespace = LanceDBRestNamespaces.builder()
LanceNamespace namespaceClient = LanceDbNamespaceClientBuilder.newBuilder()
.apiKey("your_lancedb_cloud_api_key")
.database("your_database_name")
.build();
@@ -18,13 +19,13 @@ LanceRestNamespace namespace = LanceDBRestNamespaces.builder()
### LanceDB Enterprise
For Enterprise deployments, use your VPC endpoint:
For Enterprise deployments, use your custom endpoint:
```java
LanceRestNamespace namespace = LanceDBRestNamespaces.builder()
LanceNamespace namespaceClient = LanceDbNamespaceClientBuilder.newBuilder()
.apiKey("your_lancedb_enterprise_api_key")
.database("your-top-dir") // Your top level folder under your cloud bucket, e.g. s3://your-bucket/your-top-dir/
.hostOverride("http://<vpc_endpoint_dns_name>:80")
.database("your_database_name")
.endpoint("<your_enterprise_endpoint>")
.build();
```
@@ -33,5 +34,11 @@ LanceRestNamespace namespace = LanceDBRestNamespaces.builder()
Build:
```shell
./mvnw install
```
./mvnw install -pl lancedb-core -am
```
Run tests:
```shell
./mvnw test -pl lancedb-core
```

View File

@@ -1,30 +0,0 @@
[package]
name = "lancedb-jni"
description = "JNI bindings for LanceDB"
# TODO modify lancedb/Cargo.toml for version and dependencies
version = "0.10.0"
edition.workspace = true
repository.workspace = true
readme.workspace = true
license.workspace = true
keywords.workspace = true
categories.workspace = true
publish = false
[lib]
crate-type = ["cdylib"]
[dependencies]
lancedb = { path = "../../../rust/lancedb", default-features = false }
lance = { workspace = true }
arrow = { workspace = true, features = ["ffi"] }
arrow-schema.workspace = true
tokio = "1.46"
jni = "0.21.1"
snafu.workspace = true
lazy_static.workspace = true
serde = { version = "^1" }
serde_json = { version = "1" }
[features]
default = ["lancedb/default"]

View File

@@ -1,133 +0,0 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
use crate::ffi::JNIEnvExt;
use crate::traits::IntoJava;
use crate::{Error, RT};
use jni::objects::{JObject, JString, JValue};
use jni::JNIEnv;
pub const NATIVE_CONNECTION: &str = "nativeConnectionHandle";
use crate::Result;
use lancedb::connection::{connect, Connection};
#[derive(Clone)]
pub struct BlockingConnection {
pub(crate) inner: Connection,
}
impl BlockingConnection {
pub fn create(dataset_uri: &str) -> Result<Self> {
let inner = RT.block_on(connect(dataset_uri).execute())?;
Ok(Self { inner })
}
pub fn table_names(
&self,
start_after: Option<String>,
limit: Option<i32>,
) -> Result<Vec<String>> {
let mut op = self.inner.table_names();
if let Some(start_after) = start_after {
op = op.start_after(start_after);
}
if let Some(limit) = limit {
op = op.limit(limit as u32);
}
Ok(RT.block_on(op.execute())?)
}
}
impl IntoJava for BlockingConnection {
fn into_java<'a>(self, env: &mut JNIEnv<'a>) -> JObject<'a> {
attach_native_connection(env, self)
}
}
fn attach_native_connection<'local>(
env: &mut JNIEnv<'local>,
connection: BlockingConnection,
) -> JObject<'local> {
let j_connection = create_java_connection_object(env);
// This block sets a native Rust object (Connection) as a field in the Java object (j_Connection).
// Caution: This creates a potential for memory leaks. The Rust object (Connection) is not
// automatically garbage-collected by Java, and its memory will not be freed unless
// explicitly handled.
//
// To prevent memory leaks, ensure the following:
// 1. The Java object (`j_Connection`) should implement the `java.io.Closeable` interface.
// 2. Users of this Java object should be instructed to always use it within a try-with-resources
// statement (or manually call the `close()` method) to ensure that `self.close()` is invoked.
match unsafe { env.set_rust_field(&j_connection, NATIVE_CONNECTION, connection) } {
Ok(_) => j_connection,
Err(err) => {
env.throw_new(
"java/lang/RuntimeException",
format!("Failed to set native handle for Connection: {}", err),
)
.expect("Error throwing exception");
JObject::null()
}
}
}
fn create_java_connection_object<'a>(env: &mut JNIEnv<'a>) -> JObject<'a> {
env.new_object("com/lancedb/lancedb/Connection", "()V", &[])
.expect("Failed to create Java Lance Connection instance")
}
#[no_mangle]
pub extern "system" fn Java_com_lancedb_lancedb_Connection_releaseNativeConnection(
mut env: JNIEnv,
j_connection: JObject,
) {
let _: BlockingConnection = unsafe {
env.take_rust_field(j_connection, NATIVE_CONNECTION)
.expect("Failed to take native Connection handle")
};
}
#[no_mangle]
pub extern "system" fn Java_com_lancedb_lancedb_Connection_connect<'local>(
mut env: JNIEnv<'local>,
_obj: JObject,
dataset_uri_object: JString,
) -> JObject<'local> {
let dataset_uri: String = ok_or_throw!(env, env.get_string(&dataset_uri_object)).into();
let blocking_connection = ok_or_throw!(env, BlockingConnection::create(&dataset_uri));
blocking_connection.into_java(&mut env)
}
#[no_mangle]
pub extern "system" fn Java_com_lancedb_lancedb_Connection_tableNames<'local>(
mut env: JNIEnv<'local>,
j_connection: JObject,
start_after_obj: JObject, // Optional<String>
limit_obj: JObject, // Optional<Integer>
) -> JObject<'local> {
ok_or_throw!(
env,
inner_table_names(&mut env, j_connection, start_after_obj, limit_obj)
)
}
fn inner_table_names<'local>(
env: &mut JNIEnv<'local>,
j_connection: JObject,
start_after_obj: JObject, // Optional<String>
limit_obj: JObject, // Optional<Integer>
) -> Result<JObject<'local>> {
let start_after = env.get_string_opt(&start_after_obj)?;
let limit = env.get_int_opt(&limit_obj)?;
let conn =
unsafe { env.get_rust_field::<_, _, BlockingConnection>(j_connection, NATIVE_CONNECTION) }?;
let table_names = conn.table_names(start_after, limit)?;
drop(conn);
let j_names = env.new_object("java/util/ArrayList", "()V", &[])?;
for item in table_names {
let jstr_item = env.new_string(item)?;
let item_jobj = JObject::from(jstr_item);
let item_gen = JValue::Object(&item_jobj);
env.call_method(&j_names, "add", "(Ljava/lang/Object;)Z", &[item_gen])?;
}
Ok(j_names)
}

View File

@@ -1,217 +0,0 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
use std::str::Utf8Error;
use arrow_schema::ArrowError;
use jni::errors::Error as JniError;
use serde_json::Error as JsonError;
use snafu::{Location, Snafu};
type BoxedError = Box<dyn std::error::Error + Send + Sync + 'static>;
/// Java Exception types
pub enum JavaException {
IllegalArgumentException,
IOException,
RuntimeException,
}
impl JavaException {
pub fn as_str(&self) -> &str {
match self {
Self::IllegalArgumentException => "java/lang/IllegalArgumentException",
Self::IOException => "java/io/IOException",
Self::RuntimeException => "java/lang/RuntimeException",
}
}
}
/// TODO(lu) change to lancedb-jni
#[derive(Debug, Snafu)]
#[snafu(visibility(pub))]
pub enum Error {
#[snafu(display("JNI error: {message}, {location}"))]
Jni { message: String, location: Location },
#[snafu(display("Invalid argument: {message}, {location}"))]
InvalidArgument { message: String, location: Location },
#[snafu(display("IO error: {source}, {location}"))]
IO {
source: BoxedError,
location: Location,
},
#[snafu(display("Arrow error: {message}, {location}"))]
Arrow { message: String, location: Location },
#[snafu(display("Index error: {message}, {location}"))]
Index { message: String, location: Location },
#[snafu(display("JSON error: {message}, {location}"))]
JSON { message: String, location: Location },
#[snafu(display("Dataset at path {path} was not found, {location}"))]
DatasetNotFound { path: String, location: Location },
#[snafu(display("Dataset already exists: {uri}, {location}"))]
DatasetAlreadyExists { uri: String, location: Location },
#[snafu(display("Table '{name}' already exists"))]
TableAlreadyExists { name: String },
#[snafu(display("Table '{name}' was not found: {source}"))]
TableNotFound {
name: String,
source: Box<dyn std::error::Error + Send + Sync>,
},
#[snafu(display("Invalid table name '{name}': {reason}"))]
InvalidTableName { name: String, reason: String },
#[snafu(display("Embedding function '{name}' was not found: {reason}, {location}"))]
EmbeddingFunctionNotFound {
name: String,
reason: String,
location: Location,
},
#[snafu(display("Other Lance error: {message}, {location}"))]
OtherLance { message: String, location: Location },
#[snafu(display("Other LanceDB error: {message}, {location}"))]
OtherLanceDB { message: String, location: Location },
}
impl Error {
/// Throw as Java Exception
pub fn throw(&self, env: &mut jni::JNIEnv) {
match self {
Self::InvalidArgument { .. }
| Self::DatasetNotFound { .. }
| Self::DatasetAlreadyExists { .. }
| Self::TableAlreadyExists { .. }
| Self::TableNotFound { .. }
| Self::InvalidTableName { .. }
| Self::EmbeddingFunctionNotFound { .. } => {
self.throw_as(env, JavaException::IllegalArgumentException)
}
Self::IO { .. } | Self::Index { .. } => self.throw_as(env, JavaException::IOException),
Self::Arrow { .. }
| Self::JSON { .. }
| Self::OtherLance { .. }
| Self::OtherLanceDB { .. }
| Self::Jni { .. } => self.throw_as(env, JavaException::RuntimeException),
}
}
/// Throw as an concrete Java Exception
pub fn throw_as(&self, env: &mut jni::JNIEnv, exception: JavaException) {
let message = &format!(
"Error when throwing Java exception: {}:{}",
exception.as_str(),
self
);
env.throw_new(exception.as_str(), self.to_string())
.expect(message);
}
}
pub type Result<T> = std::result::Result<T, Error>;
trait ToSnafuLocation {
fn to_snafu_location(&'static self) -> snafu::Location;
}
impl ToSnafuLocation for std::panic::Location<'static> {
fn to_snafu_location(&'static self) -> snafu::Location {
snafu::Location::new(self.file(), self.line(), self.column())
}
}
impl From<JniError> for Error {
#[track_caller]
fn from(source: JniError) -> Self {
Self::Jni {
message: source.to_string(),
location: std::panic::Location::caller().to_snafu_location(),
}
}
}
impl From<Utf8Error> for Error {
#[track_caller]
fn from(source: Utf8Error) -> Self {
Self::InvalidArgument {
message: source.to_string(),
location: std::panic::Location::caller().to_snafu_location(),
}
}
}
impl From<ArrowError> for Error {
#[track_caller]
fn from(source: ArrowError) -> Self {
Self::Arrow {
message: source.to_string(),
location: std::panic::Location::caller().to_snafu_location(),
}
}
}
impl From<JsonError> for Error {
#[track_caller]
fn from(source: JsonError) -> Self {
Self::JSON {
message: source.to_string(),
location: std::panic::Location::caller().to_snafu_location(),
}
}
}
impl From<lance::Error> for Error {
#[track_caller]
fn from(source: lance::Error) -> Self {
match source {
lance::Error::DatasetNotFound {
path,
source: _,
location,
} => Self::DatasetNotFound { path, location },
lance::Error::DatasetAlreadyExists { uri, location } => {
Self::DatasetAlreadyExists { uri, location }
}
lance::Error::IO { source, location } => Self::IO { source, location },
lance::Error::Arrow { message, location } => Self::Arrow { message, location },
lance::Error::Index { message, location } => Self::Index { message, location },
lance::Error::InvalidInput { source, location } => Self::InvalidArgument {
message: source.to_string(),
location,
},
_ => Self::OtherLance {
message: source.to_string(),
location: std::panic::Location::caller().to_snafu_location(),
},
}
}
}
impl From<lancedb::Error> for Error {
#[track_caller]
fn from(source: lancedb::Error) -> Self {
match source {
lancedb::Error::InvalidTableName { name, reason } => {
Self::InvalidTableName { name, reason }
}
lancedb::Error::InvalidInput { message } => Self::InvalidArgument {
message,
location: std::panic::Location::caller().to_snafu_location(),
},
lancedb::Error::TableNotFound { name, source } => Self::TableNotFound { name, source },
lancedb::Error::TableAlreadyExists { name } => Self::TableAlreadyExists { name },
lancedb::Error::EmbeddingFunctionNotFound { name, reason } => {
Self::EmbeddingFunctionNotFound {
name,
reason,
location: std::panic::Location::caller().to_snafu_location(),
}
}
lancedb::Error::Arrow { source } => Self::Arrow {
message: source.to_string(),
location: std::panic::Location::caller().to_snafu_location(),
},
lancedb::Error::Lance { source } => Self::from(source),
_ => Self::OtherLanceDB {
message: source.to_string(),
location: std::panic::Location::caller().to_snafu_location(),
},
}
}
}

View File

@@ -1,194 +0,0 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
use core::slice;
use jni::objects::{JByteBuffer, JObjectArray, JString};
use jni::sys::jobjectArray;
use jni::{objects::JObject, JNIEnv};
use crate::error::{Error, Result};
/// TODO(lu) import from lance-jni without duplicate
/// Extend JNIEnv with helper functions.
pub trait JNIEnvExt {
/// Get integers from Java List<Integer> object.
fn get_integers(&mut self, obj: &JObject) -> Result<Vec<i32>>;
/// Get strings from Java List<String> object.
#[allow(dead_code)]
fn get_strings(&mut self, obj: &JObject) -> Result<Vec<String>>;
/// Get strings from Java String[] object.
/// Note that get Option<Vec<String>> from Java Optional<String[]> just doesn't work.
#[allow(unused)]
fn get_strings_array(&mut self, obj: jobjectArray) -> Result<Vec<String>>;
/// Get Option<String> from Java Optional<String>.
fn get_string_opt(&mut self, obj: &JObject) -> Result<Option<String>>;
/// Get Option<Vec<String>> from Java Optional<List<String>>.
#[allow(unused)]
fn get_strings_opt(&mut self, obj: &JObject) -> Result<Option<Vec<String>>>;
/// Get Option<i32> from Java Optional<Integer>.
fn get_int_opt(&mut self, obj: &JObject) -> Result<Option<i32>>;
/// Get Option<Vec<i32>> from Java Optional<List<Integer>>.
fn get_ints_opt(&mut self, obj: &JObject) -> Result<Option<Vec<i32>>>;
/// Get Option<i64> from Java Optional<Long>.
#[allow(unused)]
fn get_long_opt(&mut self, obj: &JObject) -> Result<Option<i64>>;
/// Get Option<u64> from Java Optional<Long>.
#[allow(unused)]
fn get_u64_opt(&mut self, obj: &JObject) -> Result<Option<u64>>;
/// Get Option<&[u8]> from Java Optional<ByteBuffer>.
#[allow(unused)]
fn get_bytes_opt(&mut self, obj: &JObject) -> Result<Option<&[u8]>>;
fn get_optional<T, F>(&mut self, obj: &JObject, f: F) -> Result<Option<T>>
where
F: FnOnce(&mut JNIEnv, &JObject) -> Result<T>;
}
impl JNIEnvExt for JNIEnv<'_> {
fn get_integers(&mut self, obj: &JObject) -> Result<Vec<i32>> {
let list = self.get_list(obj)?;
let mut iter = list.iter(self)?;
let mut results = Vec::with_capacity(list.size(self)? as usize);
while let Some(elem) = iter.next(self)? {
let int_obj = self.call_method(elem, "intValue", "()I", &[])?;
let int_value = int_obj.i()?;
results.push(int_value);
}
Ok(results)
}
fn get_strings(&mut self, obj: &JObject) -> Result<Vec<String>> {
let list = self.get_list(obj)?;
let mut iter = list.iter(self)?;
let mut results = Vec::with_capacity(list.size(self)? as usize);
while let Some(elem) = iter.next(self)? {
let jstr = JString::from(elem);
let val = self.get_string(&jstr)?;
results.push(val.to_str()?.to_string())
}
Ok(results)
}
fn get_strings_array(&mut self, obj: jobjectArray) -> Result<Vec<String>> {
let jobject_array = unsafe { JObjectArray::from_raw(obj) };
let array_len = self.get_array_length(&jobject_array)?;
let mut res: Vec<String> = Vec::new();
for i in 0..array_len {
let item: JString = self.get_object_array_element(&jobject_array, i)?.into();
res.push(self.get_string(&item)?.into());
}
Ok(res)
}
fn get_string_opt(&mut self, obj: &JObject) -> Result<Option<String>> {
self.get_optional(obj, |env, inner_obj| {
let java_obj_gen = env.call_method(inner_obj, "get", "()Ljava/lang/Object;", &[])?;
let java_string_obj = java_obj_gen.l()?;
let jstr = JString::from(java_string_obj);
let val = env.get_string(&jstr)?;
Ok(val.to_str()?.to_string())
})
}
fn get_strings_opt(&mut self, obj: &JObject) -> Result<Option<Vec<String>>> {
self.get_optional(obj, |env, inner_obj| {
let java_obj_gen = env.call_method(inner_obj, "get", "()Ljava/lang/Object;", &[])?;
let java_list_obj = java_obj_gen.l()?;
env.get_strings(&java_list_obj)
})
}
fn get_int_opt(&mut self, obj: &JObject) -> Result<Option<i32>> {
self.get_optional(obj, |env, inner_obj| {
let java_obj_gen = env.call_method(inner_obj, "get", "()Ljava/lang/Object;", &[])?;
let java_int_obj = java_obj_gen.l()?;
let int_obj = env.call_method(java_int_obj, "intValue", "()I", &[])?;
let int_value = int_obj.i()?;
Ok(int_value)
})
}
fn get_ints_opt(&mut self, obj: &JObject) -> Result<Option<Vec<i32>>> {
self.get_optional(obj, |env, inner_obj| {
let java_obj_gen = env.call_method(inner_obj, "get", "()Ljava/lang/Object;", &[])?;
let java_list_obj = java_obj_gen.l()?;
env.get_integers(&java_list_obj)
})
}
fn get_long_opt(&mut self, obj: &JObject) -> Result<Option<i64>> {
self.get_optional(obj, |env, inner_obj| {
let java_obj_gen = env.call_method(inner_obj, "get", "()Ljava/lang/Object;", &[])?;
let java_long_obj = java_obj_gen.l()?;
let long_obj = env.call_method(java_long_obj, "longValue", "()J", &[])?;
let long_value = long_obj.j()?;
Ok(long_value)
})
}
fn get_u64_opt(&mut self, obj: &JObject) -> Result<Option<u64>> {
self.get_optional(obj, |env, inner_obj| {
let java_obj_gen = env.call_method(inner_obj, "get", "()Ljava/lang/Object;", &[])?;
let java_long_obj = java_obj_gen.l()?;
let long_obj = env.call_method(java_long_obj, "longValue", "()J", &[])?;
let long_value = long_obj.j()?;
Ok(long_value as u64)
})
}
fn get_bytes_opt(&mut self, obj: &JObject) -> Result<Option<&[u8]>> {
self.get_optional(obj, |env, inner_obj| {
let java_obj_gen = env.call_method(inner_obj, "get", "()Ljava/lang/Object;", &[])?;
let java_byte_buffer_obj = java_obj_gen.l()?;
let j_byte_buffer = JByteBuffer::from(java_byte_buffer_obj);
let raw_data = env.get_direct_buffer_address(&j_byte_buffer)?;
let capacity = env.get_direct_buffer_capacity(&j_byte_buffer)?;
let data = unsafe { slice::from_raw_parts(raw_data, capacity) };
Ok(data)
})
}
fn get_optional<T, F>(&mut self, obj: &JObject, f: F) -> Result<Option<T>>
where
F: FnOnce(&mut JNIEnv, &JObject) -> Result<T>,
{
if obj.is_null() {
return Ok(None);
}
let is_present = self.call_method(obj, "isPresent", "()Z", &[])?;
if !is_present.z()? {
// TODO(lu): put get java object into here cuz can only get java Object
Ok(None)
} else {
f(self, obj).map(Some)
}
}
}
#[no_mangle]
pub extern "system" fn Java_com_lancedb_lance_test_JniTestHelper_parseInts(
mut env: JNIEnv,
_obj: JObject,
list_obj: JObject, // List<Integer>
) {
ok_or_throw_without_return!(env, env.get_integers(&list_obj));
}
#[no_mangle]
pub extern "system" fn Java_com_lancedb_lance_test_JniTestHelper_parseIntsOpt(
mut env: JNIEnv,
_obj: JObject,
list_obj: JObject, // Optional<List<Integer>>
) {
ok_or_throw_without_return!(env, env.get_ints_opt(&list_obj));
}

View File

@@ -1,57 +0,0 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
use lazy_static::lazy_static;
// TODO import from lance-jni without duplicate
#[macro_export]
macro_rules! ok_or_throw {
($env:expr, $result:expr) => {
match $result {
Ok(value) => value,
Err(err) => {
Error::from(err).throw(&mut $env);
return JObject::null();
}
}
};
}
macro_rules! ok_or_throw_without_return {
($env:expr, $result:expr) => {
match $result {
Ok(value) => value,
Err(err) => {
Error::from(err).throw(&mut $env);
return;
}
}
};
}
#[macro_export]
macro_rules! ok_or_throw_with_return {
($env:expr, $result:expr, $ret:expr) => {
match $result {
Ok(value) => value,
Err(err) => {
Error::from(err).throw(&mut $env);
return $ret;
}
}
};
}
mod connection;
pub mod error;
mod ffi;
mod traits;
pub use error::{Error, Result};
lazy_static! {
static ref RT: tokio::runtime::Runtime = tokio::runtime::Builder::new_multi_thread()
.enable_all()
.build()
.expect("Failed to create tokio runtime");
}

View File

@@ -1,114 +0,0 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
use jni::objects::{JMap, JObject, JString, JValue};
use jni::JNIEnv;
use crate::Result;
#[allow(dead_code)]
pub trait FromJObject<T> {
fn extract(&self) -> Result<T>;
}
/// Convert a Rust type into a Java Object.
pub trait IntoJava {
fn into_java<'a>(self, env: &mut JNIEnv<'a>) -> JObject<'a>;
}
impl FromJObject<i32> for JObject<'_> {
fn extract(&self) -> Result<i32> {
Ok(JValue::from(self).i()?)
}
}
impl FromJObject<i64> for JObject<'_> {
fn extract(&self) -> Result<i64> {
Ok(JValue::from(self).j()?)
}
}
impl FromJObject<f32> for JObject<'_> {
fn extract(&self) -> Result<f32> {
Ok(JValue::from(self).f()?)
}
}
impl FromJObject<f64> for JObject<'_> {
fn extract(&self) -> Result<f64> {
Ok(JValue::from(self).d()?)
}
}
#[allow(dead_code)]
pub trait FromJString {
fn extract(&self, env: &mut JNIEnv) -> Result<String>;
}
impl FromJString for JString<'_> {
fn extract(&self, env: &mut JNIEnv) -> Result<String> {
Ok(env.get_string(self)?.into())
}
}
pub trait JMapExt {
#[allow(dead_code)]
fn get_string(&self, env: &mut JNIEnv, key: &str) -> Result<Option<String>>;
#[allow(dead_code)]
fn get_i32(&self, env: &mut JNIEnv, key: &str) -> Result<Option<i32>>;
#[allow(dead_code)]
fn get_i64(&self, env: &mut JNIEnv, key: &str) -> Result<Option<i64>>;
#[allow(dead_code)]
fn get_f32(&self, env: &mut JNIEnv, key: &str) -> Result<Option<f32>>;
#[allow(dead_code)]
fn get_f64(&self, env: &mut JNIEnv, key: &str) -> Result<Option<f64>>;
}
#[allow(dead_code)]
fn get_map_value<T>(env: &mut JNIEnv, map: &JMap, key: &str) -> Result<Option<T>>
where
for<'a> JObject<'a>: FromJObject<T>,
{
let key_obj: JObject = env.new_string(key)?.into();
if let Some(value) = map.get(env, &key_obj)? {
if value.is_null() {
Ok(None)
} else {
Ok(Some(value.extract()?))
}
} else {
Ok(None)
}
}
impl JMapExt for JMap<'_, '_, '_> {
fn get_string(&self, env: &mut JNIEnv, key: &str) -> Result<Option<String>> {
let key_obj: JObject = env.new_string(key)?.into();
if let Some(value) = self.get(env, &key_obj)? {
let value_str: JString = value.into();
Ok(Some(value_str.extract(env)?))
} else {
Ok(None)
}
}
fn get_i32(&self, env: &mut JNIEnv, key: &str) -> Result<Option<i32>> {
get_map_value(env, self, key)
}
fn get_i64(&self, env: &mut JNIEnv, key: &str) -> Result<Option<i64>> {
get_map_value(env, self, key)
}
fn get_f32(&self, env: &mut JNIEnv, key: &str) -> Result<Option<f32>> {
get_map_value(env, self, key)
}
fn get_f64(&self, env: &mut JNIEnv, key: &str) -> Result<Option<f64>> {
get_map_value(env, self, key)
}
}

View File

@@ -1,103 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>com.lancedb</groupId>
<artifactId>lancedb-parent</artifactId>
<version>0.22.4-beta.3</version>
<relativePath>../pom.xml</relativePath>
</parent>
<artifactId>lancedb-core</artifactId>
<name>${project.artifactId}</name>
<description>LanceDB Core</description>
<packaging>jar</packaging>
<properties>
<rust.release.build>false</rust.release.build>
</properties>
<dependencies>
<dependency>
<groupId>com.lancedb</groupId>
<artifactId>lance-namespace-core</artifactId>
<version>0.0.1</version>
</dependency>
<dependency>
<groupId>org.apache.arrow</groupId>
<artifactId>arrow-vector</artifactId>
</dependency>
<dependency>
<groupId>org.apache.arrow</groupId>
<artifactId>arrow-memory-netty</artifactId>
</dependency>
<dependency>
<groupId>org.apache.arrow</groupId>
<artifactId>arrow-c-data</artifactId>
</dependency>
<dependency>
<groupId>org.apache.arrow</groupId>
<artifactId>arrow-dataset</artifactId>
</dependency>
<dependency>
<groupId>org.json</groupId>
<artifactId>json</artifactId>
</dependency>
<dependency>
<groupId>org.questdb</groupId>
<artifactId>jar-jni</artifactId>
</dependency>
<dependency>
<groupId>org.junit.jupiter</groupId>
<artifactId>junit-jupiter</artifactId>
<scope>test</scope>
</dependency>
</dependencies>
<profiles>
<profile>
<id>build-jni</id>
<activation>
<activeByDefault>true</activeByDefault>
</activation>
<build>
<plugins>
<plugin>
<groupId>org.questdb</groupId>
<artifactId>rust-maven-plugin</artifactId>
<version>1.1.1</version>
<executions>
<execution>
<id>lancedb-jni</id>
<goals>
<goal>build</goal>
</goals>
<configuration>
<path>lancedb-jni</path>
<release>${rust.release.build}</release>
<!-- Copy native libraries to target/classes for runtime access -->
<copyTo>${project.build.directory}/classes/nativelib</copyTo>
<copyWithPlatformDir>true</copyWithPlatformDir>
</configuration>
</execution>
<execution>
<id>lancedb-jni-test</id>
<goals>
<goal>test</goal>
</goals>
<configuration>
<path>lancedb-jni</path>
<release>false</release>
<verbosity>-v</verbosity>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
</profile>
</profiles>
</project>

View File

@@ -1,108 +0,0 @@
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.lancedb.lancedb;
import io.questdb.jar.jni.JarJniLoader;
import java.io.Closeable;
import java.util.List;
import java.util.Optional;
/** Represents LanceDB database. */
public class Connection implements Closeable {
static {
JarJniLoader.loadLib(Connection.class, "/nativelib", "lancedb_jni");
}
private long nativeConnectionHandle;
/** Connect to a LanceDB instance. */
public static native Connection connect(String uri);
/**
* Get the names of all tables in the database. The names are sorted in ascending order.
*
* @return the table names
*/
public List<String> tableNames() {
return tableNames(Optional.empty(), Optional.empty());
}
/**
* Get the names of filtered tables in the database. The names are sorted in ascending order.
*
* @param limit The number of results to return.
* @return the table names
*/
public List<String> tableNames(int limit) {
return tableNames(Optional.empty(), Optional.of(limit));
}
/**
* Get the names of filtered tables in the database. The names are sorted in ascending order.
*
* @param startAfter If present, only return names that come lexicographically after the supplied
* value. This can be combined with limit to implement pagination by setting this to the last
* table name from the previous page.
* @return the table names
*/
public List<String> tableNames(String startAfter) {
return tableNames(Optional.of(startAfter), Optional.empty());
}
/**
* Get the names of filtered tables in the database. The names are sorted in ascending order.
*
* @param startAfter If present, only return names that come lexicographically after the supplied
* value. This can be combined with limit to implement pagination by setting this to the last
* table name from the previous page.
* @param limit The number of results to return.
* @return the table names
*/
public List<String> tableNames(String startAfter, int limit) {
return tableNames(Optional.of(startAfter), Optional.of(limit));
}
/**
* Get the names of filtered tables in the database. The names are sorted in ascending order.
*
* @param startAfter If present, only return names that come lexicographically after the supplied
* value. This can be combined with limit to implement pagination by setting this to the last
* table name from the previous page.
* @param limit The number of results to return.
* @return the table names
*/
public native List<String> tableNames(Optional<String> startAfter, Optional<Integer> limit);
/**
* Closes this connection and releases any system resources associated with it. If the connection
* is already closed, then invoking this method has no effect.
*/
@Override
public void close() {
if (nativeConnectionHandle != 0) {
releaseNativeConnection(nativeConnectionHandle);
nativeConnectionHandle = 0;
}
}
/**
* Native method to release the Lance connection resources associated with the given handle.
*
* @param handle The native handle to the connection resource.
*/
private native void releaseNativeConnection(long handle);
private Connection() {}
}

View File

@@ -1,135 +0,0 @@
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.lancedb.lancedb;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.io.TempDir;
import java.net.URL;
import java.nio.file.Path;
import java.util.List;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertTrue;
public class ConnectionTest {
private static final String[] TABLE_NAMES = {
"dataset_version", "new_empty_dataset", "test", "write_stream"
};
@TempDir static Path tempDir; // Temporary directory for the tests
private static URL lanceDbURL;
@BeforeAll
static void setUp() {
ClassLoader classLoader = ConnectionTest.class.getClassLoader();
lanceDbURL = classLoader.getResource("example_db");
}
@Test
void emptyDB() {
String databaseUri = tempDir.resolve("emptyDB").toString();
try (Connection conn = Connection.connect(databaseUri)) {
List<String> tableNames = conn.tableNames();
assertTrue(tableNames.isEmpty());
}
}
@Test
void tableNames() {
try (Connection conn = Connection.connect(lanceDbURL.toString())) {
List<String> tableNames = conn.tableNames();
assertEquals(4, tableNames.size());
for (int i = 0; i < TABLE_NAMES.length; i++) {
assertEquals(TABLE_NAMES[i], tableNames.get(i));
}
}
}
@Test
void tableNamesStartAfter() {
try (Connection conn = Connection.connect(lanceDbURL.toString())) {
assertTableNamesStartAfter(
conn, TABLE_NAMES[0], 3, TABLE_NAMES[1], TABLE_NAMES[2], TABLE_NAMES[3]);
assertTableNamesStartAfter(conn, TABLE_NAMES[1], 2, TABLE_NAMES[2], TABLE_NAMES[3]);
assertTableNamesStartAfter(conn, TABLE_NAMES[2], 1, TABLE_NAMES[3]);
assertTableNamesStartAfter(conn, TABLE_NAMES[3], 0);
assertTableNamesStartAfter(
conn, "a_dataset", 4, TABLE_NAMES[0], TABLE_NAMES[1], TABLE_NAMES[2], TABLE_NAMES[3]);
assertTableNamesStartAfter(conn, "o_dataset", 2, TABLE_NAMES[2], TABLE_NAMES[3]);
assertTableNamesStartAfter(conn, "v_dataset", 1, TABLE_NAMES[3]);
assertTableNamesStartAfter(conn, "z_dataset", 0);
}
}
private void assertTableNamesStartAfter(
Connection conn, String startAfter, int expectedSize, String... expectedNames) {
List<String> tableNames = conn.tableNames(startAfter);
assertEquals(expectedSize, tableNames.size());
for (int i = 0; i < expectedNames.length; i++) {
assertEquals(expectedNames[i], tableNames.get(i));
}
}
@Test
void tableNamesLimit() {
try (Connection conn = Connection.connect(lanceDbURL.toString())) {
for (int i = 0; i <= TABLE_NAMES.length; i++) {
List<String> tableNames = conn.tableNames(i);
assertEquals(i, tableNames.size());
for (int j = 0; j < i; j++) {
assertEquals(TABLE_NAMES[j], tableNames.get(j));
}
}
}
}
@Test
void tableNamesStartAfterLimit() {
try (Connection conn = Connection.connect(lanceDbURL.toString())) {
List<String> tableNames = conn.tableNames(TABLE_NAMES[0], 2);
assertEquals(2, tableNames.size());
assertEquals(TABLE_NAMES[1], tableNames.get(0));
assertEquals(TABLE_NAMES[2], tableNames.get(1));
tableNames = conn.tableNames(TABLE_NAMES[1], 1);
assertEquals(1, tableNames.size());
assertEquals(TABLE_NAMES[2], tableNames.get(0));
tableNames = conn.tableNames(TABLE_NAMES[2], 2);
assertEquals(1, tableNames.size());
assertEquals(TABLE_NAMES[3], tableNames.get(0));
tableNames = conn.tableNames(TABLE_NAMES[3], 2);
assertEquals(0, tableNames.size());
tableNames = conn.tableNames(TABLE_NAMES[0], 0);
assertEquals(0, tableNames.size());
// Limit larger than the number of remaining tables
tableNames = conn.tableNames(TABLE_NAMES[0], 10);
assertEquals(3, tableNames.size());
assertEquals(TABLE_NAMES[1], tableNames.get(0));
assertEquals(TABLE_NAMES[2], tableNames.get(1));
assertEquals(TABLE_NAMES[3], tableNames.get(2));
// Start after a value not in the list
tableNames = conn.tableNames("non_existent_table", 2);
assertEquals(2, tableNames.size());
assertEquals(TABLE_NAMES[2], tableNames.get(0));
assertEquals(TABLE_NAMES[3], tableNames.get(1));
// Start after the last table with a limit
tableNames = conn.tableNames(TABLE_NAMES[3], 1);
assertEquals(0, tableNames.size());
}
}
}

View File

@@ -1 +0,0 @@
$d51afd07-e3cd-4c76-9b9b-787e13fd55b0<62>=id <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>*int3208name <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>*string08

View File

@@ -1 +0,0 @@
$15648e72-076f-4ef1-8b90-10d305b95b3b<33>=id <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>*int3208name <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>*string08

View File

@@ -1 +0,0 @@
$a3689caf-4f6b-4afc-a3c7-97af75661843<34>oitem <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>*string8price <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>*double80vector <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>*fixed_size_list:float:28

View File

@@ -1,26 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>com.lancedb</groupId>
<artifactId>lancedb-parent</artifactId>
<version>0.22.4-beta.3</version>
<relativePath>../pom.xml</relativePath>
</parent>
<artifactId>lancedb-lance-namespace</artifactId>
<name>${project.artifactId}</name>
<description>LanceDB Java Integration with Lance Namespace</description>
<packaging>jar</packaging>
<dependencies>
<dependency>
<groupId>com.lancedb</groupId>
<artifactId>lance-namespace-core</artifactId>
</dependency>
</dependencies>
</project>

99
java/lancedb-core/pom.xml Normal file
View File

@@ -0,0 +1,99 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>com.lancedb</groupId>
<artifactId>lancedb-parent</artifactId>
<version>0.23.0-final.0</version>
<relativePath>../pom.xml</relativePath>
</parent>
<artifactId>lancedb-core</artifactId>
<name>${project.artifactId}</name>
<description>Utilities to work with LanceDB Cloud and Enterprise via Lance REST Namespace</description>
<packaging>jar</packaging>
<dependencies>
<dependency>
<groupId>org.lance</groupId>
<artifactId>lance-core</artifactId>
</dependency>
<dependency>
<groupId>org.apache.arrow</groupId>
<artifactId>arrow-vector</artifactId>
</dependency>
<dependency>
<groupId>org.apache.arrow</groupId>
<artifactId>arrow-memory-netty</artifactId>
</dependency>
<dependency>
<groupId>org.junit.jupiter</groupId>
<artifactId>junit-jupiter</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.mockito</groupId>
<artifactId>mockito-junit-jupiter</artifactId>
<version>5.18.0</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
<version>2.0.16</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-slf4j2-impl</artifactId>
<version>2.24.3</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-core</artifactId>
<version>2.24.3</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-api</artifactId>
<version>2.24.3</version>
<scope>test</scope>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-source-plugin</artifactId>
<version>3.3.0</version>
<executions>
<execution>
<id>attach-sources</id>
<goals>
<goal>jar-no-fork</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
</plugin>
</plugins>
</build>
</project>

View File

@@ -11,35 +11,58 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.lancedb.lancedb;
package com.lancedb;
import com.lancedb.lance.namespace.LanceRestNamespace;
import com.lancedb.lance.namespace.client.apache.ApiClient;
import org.lance.namespace.LanceNamespace;
import java.util.HashMap;
import java.util.Map;
import java.util.Optional;
/** Util class to help construct a {@link LanceRestNamespace} for LanceDB. */
public class LanceDbRestNamespaces {
/**
* Util class to help construct a {@link LanceNamespace} for LanceDB.
*
* <p>For LanceDB Cloud, use the simplified builder API:
*
* <pre>{@code
* import org.lance.namespace.LanceNamespace;
*
* // If your DB url is db://example-db, then your database here is example-db
* LanceNamespace namespaceClient = LanceDbNamespaceClientBuilder.newBuilder()
* .apiKey("your_lancedb_cloud_api_key")
* .database("your_database_name")
* .build();
* }</pre>
*
* <p>For LanceDB Enterprise deployments, use your custom endpoint:
*
* <pre>{@code
* LanceNamespace namespaceClient = LanceDbNamespaceClientBuilder.newBuilder()
* .apiKey("your_lancedb_enterprise_api_key")
* .database("your_database_name")
* .endpoint("<your_enterprise_endpoint>")
* .build();
* }</pre>
*/
public class LanceDbNamespaceClientBuilder {
private static final String DEFAULT_REGION = "us-east-1";
private static final String CLOUD_URL_PATTERN = "https://%s.%s.api.lancedb.com";
private String apiKey;
private String database;
private Optional<String> hostOverride = Optional.empty();
private Optional<String> endpoint = Optional.empty();
private Optional<String> region = Optional.empty();
private Map<String, String> additionalConfig = new HashMap<>();
private LanceDbRestNamespaces() {}
private LanceDbNamespaceClientBuilder() {}
/**
* Create a new builder instance.
*
* @return A new LanceRestNamespaceBuilder
* @return A new LanceDbNamespaceClientBuilder
*/
public static LanceDbRestNamespaces builder() {
return new LanceDbRestNamespaces();
public static LanceDbNamespaceClientBuilder newBuilder() {
return new LanceDbNamespaceClientBuilder();
}
/**
@@ -48,7 +71,7 @@ public class LanceDbRestNamespaces {
* @param apiKey The LanceDB API key
* @return This builder
*/
public LanceDbRestNamespaces apiKey(String apiKey) {
public LanceDbNamespaceClientBuilder apiKey(String apiKey) {
if (apiKey == null || apiKey.trim().isEmpty()) {
throw new IllegalArgumentException("API key cannot be null or empty");
}
@@ -62,7 +85,7 @@ public class LanceDbRestNamespaces {
* @param database The database name
* @return This builder
*/
public LanceDbRestNamespaces database(String database) {
public LanceDbNamespaceClientBuilder database(String database) {
if (database == null || database.trim().isEmpty()) {
throw new IllegalArgumentException("Database cannot be null or empty");
}
@@ -71,25 +94,25 @@ public class LanceDbRestNamespaces {
}
/**
* Set a custom host override (optional). When set, this overrides the default LanceDB Cloud URL
* Set a custom endpoint URL (optional). When set, this overrides the default LanceDB Cloud URL
* construction. Use this for LanceDB Enterprise deployments.
*
* @param hostOverride The complete base URL (e.g., "http://your-vpc-endpoint:80")
* @param endpoint The complete base URL for your LanceDB Enterprise deployment
* @return This builder
*/
public LanceDbRestNamespaces hostOverride(String hostOverride) {
this.hostOverride = Optional.ofNullable(hostOverride);
public LanceDbNamespaceClientBuilder endpoint(String endpoint) {
this.endpoint = Optional.ofNullable(endpoint);
return this;
}
/**
* Set the region for LanceDB Cloud (optional). Defaults to "us-east-1" if not specified. This is
* ignored when hostOverride is set.
* ignored when endpoint is set.
*
* @param region The AWS region (e.g., "us-east-1", "eu-west-1")
* @return This builder
*/
public LanceDbRestNamespaces region(String region) {
public LanceDbNamespaceClientBuilder region(String region) {
this.region = Optional.ofNullable(region);
return this;
}
@@ -101,18 +124,18 @@ public class LanceDbRestNamespaces {
* @param value The configuration value
* @return This builder
*/
public LanceDbRestNamespaces config(String key, String value) {
public LanceDbNamespaceClientBuilder config(String key, String value) {
this.additionalConfig.put(key, value);
return this;
}
/**
* Build the LanceRestNamespace instance.
* Build the LanceNamespace instance.
*
* @return A configured LanceRestNamespace
* @return A configured LanceNamespace
* @throws IllegalStateException if required parameters are missing
*/
public LanceRestNamespace build() {
public LanceNamespace build() {
// Validate required fields
if (apiKey == null) {
throw new IllegalStateException("API key is required");
@@ -123,24 +146,19 @@ public class LanceDbRestNamespaces {
// Build configuration map
Map<String, String> config = new HashMap<>(additionalConfig);
config.put("headers.x-lancedb-database", database);
config.put("headers.x-api-key", apiKey);
config.put("header.x-lancedb-database", database);
config.put("header.x-api-key", apiKey);
// Determine base URL
String baseUrl;
if (hostOverride.isPresent()) {
baseUrl = hostOverride.get();
config.put("host_override", hostOverride.get());
String uri;
if (endpoint.isPresent()) {
uri = endpoint.get();
} else {
String effectiveRegion = region.orElse(DEFAULT_REGION);
baseUrl = String.format(CLOUD_URL_PATTERN, database, effectiveRegion);
config.put("region", effectiveRegion);
uri = String.format(CLOUD_URL_PATTERN, database, effectiveRegion);
}
config.put("uri", uri);
// Create and configure ApiClient
ApiClient apiClient = new ApiClient();
apiClient.setBasePath(baseUrl);
return new LanceRestNamespace(apiClient, config);
return LanceNamespace.connect("rest", config, null);
}
}

View File

@@ -0,0 +1,96 @@
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.lancedb;
import org.junit.jupiter.api.Test;
import static org.junit.jupiter.api.Assertions.*;
/** Unit tests for LanceDbNamespaceClientBuilder. */
public class LanceDbNamespaceClientBuilderTest {
@Test
public void testBuilderRequiresApiKey() {
LanceDbNamespaceClientBuilder builder =
LanceDbNamespaceClientBuilder.newBuilder().database("test-db");
IllegalStateException exception = assertThrows(IllegalStateException.class, builder::build);
assertEquals("API key is required", exception.getMessage());
}
@Test
public void testBuilderRequiresDatabase() {
LanceDbNamespaceClientBuilder builder =
LanceDbNamespaceClientBuilder.newBuilder().apiKey("test-api-key");
IllegalStateException exception = assertThrows(IllegalStateException.class, builder::build);
assertEquals("Database is required", exception.getMessage());
}
@Test
public void testApiKeyCannotBeNull() {
IllegalArgumentException exception =
assertThrows(
IllegalArgumentException.class,
() -> LanceDbNamespaceClientBuilder.newBuilder().apiKey(null));
assertEquals("API key cannot be null or empty", exception.getMessage());
}
@Test
public void testApiKeyCannotBeEmpty() {
IllegalArgumentException exception =
assertThrows(
IllegalArgumentException.class,
() -> LanceDbNamespaceClientBuilder.newBuilder().apiKey(" "));
assertEquals("API key cannot be null or empty", exception.getMessage());
}
@Test
public void testDatabaseCannotBeNull() {
IllegalArgumentException exception =
assertThrows(
IllegalArgumentException.class,
() -> LanceDbNamespaceClientBuilder.newBuilder().database(null));
assertEquals("Database cannot be null or empty", exception.getMessage());
}
@Test
public void testDatabaseCannotBeEmpty() {
IllegalArgumentException exception =
assertThrows(
IllegalArgumentException.class,
() -> LanceDbNamespaceClientBuilder.newBuilder().database(" "));
assertEquals("Database cannot be null or empty", exception.getMessage());
}
@Test
public void testBuilderFluentApi() {
// Verify the builder returns itself for chaining
LanceDbNamespaceClientBuilder builder = LanceDbNamespaceClientBuilder.newBuilder();
assertSame(builder, builder.apiKey("test-key"));
assertSame(builder, builder.database("test-db"));
assertSame(builder, builder.endpoint("http://localhost:8080"));
assertSame(builder, builder.region("eu-west-1"));
assertSame(builder, builder.config("custom-key", "custom-value"));
}
@Test
public void testNewBuilderCreatesNewInstance() {
LanceDbNamespaceClientBuilder builder1 = LanceDbNamespaceClientBuilder.newBuilder();
LanceDbNamespaceClientBuilder builder2 = LanceDbNamespaceClientBuilder.newBuilder();
assertNotSame(builder1, builder2);
}
}

View File

@@ -0,0 +1,32 @@
<?xml version='1.0' encoding='UTF-8'?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->
<configuration monitorInterval="30">
<appenders>
<Console name='Console' target='SYSTEM_ERR'>
<PatternLayout pattern='%d{HH:mm:ss.SSS} %p [%t] %C{1}.%M: %m%n'/>
</Console>
</appenders>
<loggers>
<logger name='com.lancedb' level='DEBUG' additivity='false'>
<appender-ref ref='Console'/>
</logger>
<root level='INFO'>
<appender-ref ref='Console'/>
</root>
</loggers>
</configuration>

View File

@@ -6,7 +6,7 @@
<groupId>com.lancedb</groupId>
<artifactId>lancedb-parent</artifactId>
<version>0.22.4-beta.3</version>
<version>0.23.0-final.0</version>
<packaging>pom</packaging>
<name>${project.artifactId}</name>
<description>LanceDB Java SDK Parent POM</description>
@@ -28,7 +28,7 @@
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<arrow.version>15.0.0</arrow.version>
<lance-namespace.verison>0.0.1</lance-namespace.verison>
<lance-core.version>1.0.0-rc.2</lance-core.version>
<spotless.skip>false</spotless.skip>
<spotless.version>2.30.0</spotless.version>
<spotless.java.googlejavaformat.version>1.7</spotless.java.googlejavaformat.version>
@@ -51,8 +51,7 @@
</properties>
<modules>
<module>core</module>
<module>lance-namespace</module>
<module>lancedb-core</module>
</modules>
<scm>
@@ -64,9 +63,9 @@
<dependencyManagement>
<dependencies>
<dependency>
<groupId>com.lancedb</groupId>
<artifactId>lance-namespace-core</artifactId>
<version>${lance-namespace.verison}</version>
<groupId>org.lance</groupId>
<artifactId>lance-core</artifactId>
<version>${lance-core.version}</version>
</dependency>
<dependency>
<groupId>org.apache.arrow</groupId>
@@ -88,21 +87,11 @@
<artifactId>arrow-dataset</artifactId>
<version>${arrow.version}</version>
</dependency>
<dependency>
<groupId>org.questdb</groupId>
<artifactId>jar-jni</artifactId>
<version>1.1.1</version>
</dependency>
<dependency>
<groupId>org.junit.jupiter</groupId>
<artifactId>junit-jupiter</artifactId>
<version>5.10.1</version>
</dependency>
<dependency>
<groupId>org.json</groupId>
<artifactId>json</artifactId>
<version>20210307</version>
</dependency>
</dependencies>
</dependencyManagement>

View File

@@ -1,7 +1,7 @@
[package]
name = "lancedb-nodejs"
edition.workspace = true
version = "0.22.4-beta.3"
version = "0.23.0"
license.workspace = true
description.workspace = true
repository.workspace = true

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-darwin-arm64",
"version": "0.22.4-beta.3",
"version": "0.23.0",
"os": ["darwin"],
"cpu": ["arm64"],
"main": "lancedb.darwin-arm64.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-darwin-x64",
"version": "0.22.4-beta.3",
"version": "0.23.0",
"os": ["darwin"],
"cpu": ["x64"],
"main": "lancedb.darwin-x64.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-linux-arm64-gnu",
"version": "0.22.4-beta.3",
"version": "0.23.0",
"os": ["linux"],
"cpu": ["arm64"],
"main": "lancedb.linux-arm64-gnu.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-linux-arm64-musl",
"version": "0.22.4-beta.3",
"version": "0.23.0",
"os": ["linux"],
"cpu": ["arm64"],
"main": "lancedb.linux-arm64-musl.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-linux-x64-gnu",
"version": "0.22.4-beta.3",
"version": "0.23.0",
"os": ["linux"],
"cpu": ["x64"],
"main": "lancedb.linux-x64-gnu.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-linux-x64-musl",
"version": "0.22.4-beta.3",
"version": "0.23.0",
"os": ["linux"],
"cpu": ["x64"],
"main": "lancedb.linux-x64-musl.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-win32-arm64-msvc",
"version": "0.22.4-beta.3",
"version": "0.23.0",
"os": [
"win32"
],

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-win32-x64-msvc",
"version": "0.22.4-beta.3",
"version": "0.23.0",
"os": ["win32"],
"cpu": ["x64"],
"main": "lancedb.win32-x64-msvc.node",

View File

@@ -1,12 +1,12 @@
{
"name": "@lancedb/lancedb",
"version": "0.22.4-beta.3",
"version": "0.23.0",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "@lancedb/lancedb",
"version": "0.22.4-beta.3",
"version": "0.23.0",
"cpu": [
"x64",
"arm64"

View File

@@ -11,7 +11,7 @@
"ann"
],
"private": false,
"version": "0.22.4-beta.3",
"version": "0.23.0",
"main": "dist/index.js",
"exports": {
".": "./dist/index.js",

View File

@@ -1,5 +1,5 @@
[tool.bumpversion]
current_version = "0.26.0-beta.0"
current_version = "0.26.1-beta.0"
parse = """(?x)
(?P<major>0|[1-9]\\d*)\\.
(?P<minor>0|[1-9]\\d*)\\.

View File

@@ -1,6 +1,6 @@
[package]
name = "lancedb-python"
version = "0.26.0-beta.0"
version = "0.26.1-beta.0"
edition.workspace = true
description = "Python bindings for LanceDB"
license.workspace = true

View File

@@ -10,7 +10,7 @@ dependencies = [
"pyarrow>=16",
"pydantic>=1.10",
"tqdm>=4.27.0",
"lance-namespace>=0.2.1"
"lance-namespace>=0.3.2"
]
description = "lancedb"
authors = [{ name = "LanceDB Devs", email = "dev@lancedb.com" }]

View File

@@ -3208,7 +3208,27 @@ def _infer_target_schema(
if pa.types.is_floating(field.type.value_type):
target_type = pa.list_(pa.float32(), dim)
elif pa.types.is_integer(field.type.value_type):
target_type = pa.list_(pa.uint8(), dim)
values = peeked.column(i)
if isinstance(values, pa.ChunkedArray):
values = values.combine_chunks()
flattened = values.flatten()
valid_count = pc.count(flattened, mode="only_valid").as_py()
if valid_count == 0:
target_type = pa.list_(pa.uint8(), dim)
else:
min_max = pc.min_max(flattened)
min_value = min_max["min"].as_py()
max_value = min_max["max"].as_py()
if (min_value is not None and min_value < 0) or (
max_value is not None and max_value > 255
):
target_type = pa.list_(pa.float32(), dim)
else:
target_type = pa.list_(pa.uint8(), dim)
else:
continue # Skip non-numeric types

View File

@@ -46,6 +46,39 @@ def test_basic(mem_db: DBConnection):
assert table.to_arrow() == expected_data
def test_create_table_infers_large_int_vectors(mem_db: DBConnection):
data = [{"vector": [0, 300]}]
table = mem_db.create_table(
"int_vector_overflow", data=data, mode="overwrite", exist_ok=True
)
vector_field = table.schema.field("vector")
assert vector_field.type == pa.list_(pa.float32(), 2)
vector_column = table.to_arrow().column("vector")
assert vector_column.type == pa.list_(pa.float32(), 2)
assert vector_column.to_pylist() == [[0.0, 300.0]]
@pytest.mark.asyncio
async def test_create_table_async_infers_large_int_vectors(
mem_db_async: AsyncConnection,
):
data = [{"vector": [256, 257]}]
table = await mem_db_async.create_table(
"int_vector_overflow_async", data=data, mode="overwrite", exist_ok=True
)
schema = await table.schema()
assert schema.field("vector").type == pa.list_(pa.float32(), 2)
vector_column = (await table.to_arrow()).column("vector")
assert vector_column.type == pa.list_(pa.float32(), 2)
assert vector_column.to_pylist() == [[256.0, 257.0]]
def test_input_data_type(mem_db: DBConnection, tmp_path):
schema = pa.schema(
{

View File

@@ -1,6 +1,6 @@
[package]
name = "lancedb"
version = "0.22.4-beta.3"
version = "0.23.0"
edition.workspace = true
description = "LanceDB: A serverless, low-latency vector database for AI applications"
license.workspace = true
@@ -110,7 +110,7 @@ oss = ["lance/oss", "lance-io/oss", "lance-namespace-impls/dir-oss"]
gcs = ["lance/gcp", "lance-io/gcp", "lance-namespace-impls/dir-gcp"]
azure = ["lance/azure", "lance-io/azure", "lance-namespace-impls/dir-azure"]
dynamodb = ["lance/dynamodb", "aws"]
remote = ["dep:reqwest", "dep:http", "lance-namespace-impls/rest"]
remote = ["dep:reqwest", "dep:http", "lance-namespace-impls/rest", "lance-namespace-impls/rest-adapter"]
fp16kernels = ["lance-linalg/fp16kernels"]
s3-test = []
bedrock = ["dep:aws-sdk-bedrockruntime"]

View File

@@ -804,6 +804,14 @@ impl Connection {
self.internal.describe_namespace(request).await
}
/// Get the equivalent namespace client in the database of this connection.
/// For LanceNamespaceDatabase, it is the underlying LanceNamespace.
/// For ListingDatabase, it is the equivalent DirectoryNamespace.
/// For RemoteDatabase, it is the equivalent RestNamespace.
pub async fn namespace_client(&self) -> Result<Arc<dyn lance_namespace::LanceNamespace>> {
self.internal.namespace_client().await
}
/// List tables with pagination support
pub async fn list_tables(&self, request: ListTablesRequest) -> Result<ListTablesResponse> {
self.internal.list_tables(request).await

View File

@@ -296,4 +296,10 @@ pub trait Database:
/// Drop all tables in the database
async fn drop_all_tables(&self, namespace: &[String]) -> Result<()>;
fn as_any(&self) -> &dyn std::any::Any;
/// Get the equivalent namespace client of this database
/// For LanceNamespaceDatabase, it is the underlying LanceNamespace.
/// For ListingDatabase, it is the equivalent DirectoryNamespace.
/// For RemoteDatabase, it is the equivalent RestNamespace.
async fn namespace_client(&self) -> Result<Arc<dyn LanceNamespace>>;
}

View File

@@ -1043,6 +1043,24 @@ impl Database for ListingDatabase {
fn as_any(&self) -> &dyn std::any::Any {
self
}
async fn namespace_client(&self) -> Result<Arc<dyn lance_namespace::LanceNamespace>> {
// Create a DirectoryNamespace pointing to the same root with the same storage options
let mut builder = lance_namespace_impls::DirectoryNamespaceBuilder::new(&self.uri);
// Add storage options
if !self.storage_options.is_empty() {
builder = builder.storage_options(self.storage_options.clone());
}
// Use the same session
builder = builder.session(self.session.clone());
let namespace = builder.build().await.map_err(|e| Error::Runtime {
message: format!("Failed to create namespace client: {}", e),
})?;
Ok(Arc::new(namespace) as Arc<dyn lance_namespace::LanceNamespace>)
}
}
#[cfg(test)]
@@ -2027,4 +2045,63 @@ mod tests {
let db_options = ListingDatabaseOptions::parse_from_map(&options).unwrap();
assert_eq!(db_options.new_table_config.enable_stable_row_ids, None);
}
#[tokio::test]
async fn test_namespace_client() {
let (_tempdir, db) = setup_database().await;
// Create some tables first
let schema = Arc::new(Schema::new(vec![
Field::new("id", DataType::Int32, false),
Field::new("name", DataType::Utf8, false),
]));
db.create_table(CreateTableRequest {
name: "table1".to_string(),
namespace: vec![],
data: CreateTableData::Empty(TableDefinition::new_from_schema(schema.clone())),
mode: CreateTableMode::Create,
write_options: Default::default(),
location: None,
namespace_client: None,
})
.await
.unwrap();
db.create_table(CreateTableRequest {
name: "table2".to_string(),
namespace: vec![],
data: CreateTableData::Empty(TableDefinition::new_from_schema(schema)),
mode: CreateTableMode::Create,
write_options: Default::default(),
location: None,
namespace_client: None,
})
.await
.unwrap();
// Get the namespace client
let namespace_client = db.namespace_client().await;
assert!(namespace_client.is_ok());
let namespace_client = namespace_client.unwrap();
// Verify the namespace client can list the tables we created
// Use empty vec for root namespace
let list_result = namespace_client
.list_tables(lance_namespace::models::ListTablesRequest {
id: Some(vec![]),
..Default::default()
})
.await;
assert!(
list_result.is_ok(),
"list_tables failed: {:?}",
list_result.err()
);
let tables = list_result.unwrap().tables;
assert_eq!(tables.len(), 2);
assert!(tables.contains(&"table1".to_string()));
assert!(tables.contains(&"table2".to_string()));
}
}

View File

@@ -425,6 +425,10 @@ impl Database for LanceNamespaceDatabase {
fn as_any(&self) -> &dyn std::any::Any {
self
}
async fn namespace_client(&self) -> Result<Arc<dyn LanceNamespace>> {
Ok(self.namespace.clone())
}
}
#[cfg(test)]

View File

@@ -232,6 +232,38 @@ impl HttpSend for Sender {
}
}
/// Parsed components from a database URL (db://...)
pub struct ParsedDbUrl {
pub db_name: String,
pub db_prefix: Option<String>,
}
/// Parse a database URL and extract the database name and optional prefix.
///
/// Expected format: `db://db_name` or `db://db_name/prefix`
pub fn parse_db_url(db_url: &str) -> Result<ParsedDbUrl> {
let parsed_url = url::Url::parse(db_url).map_err(|err| Error::InvalidInput {
message: format!("db_url is not a valid URL. '{db_url}'. Error: {err}"),
})?;
debug_assert_eq!(parsed_url.scheme(), "db");
if !parsed_url.has_host() {
return Err(Error::InvalidInput {
message: format!("Invalid database URL (missing host) '{}'", db_url),
});
}
let db_name = parsed_url.host_str().unwrap().to_string();
let db_prefix = {
let prefix = parsed_url.path().trim_start_matches('/');
if prefix.is_empty() {
None
} else {
Some(prefix.to_string())
}
};
Ok(ParsedDbUrl { db_name, db_prefix })
}
impl RestfulLanceDbClient<Sender> {
fn get_timeout(passed: Option<Duration>, env_var: &str) -> Result<Option<Duration>> {
if let Some(passed) = passed {
@@ -250,32 +282,12 @@ impl RestfulLanceDbClient<Sender> {
}
pub fn try_new(
db_url: &str,
api_key: &str,
parsed_url: &ParsedDbUrl,
region: &str,
host_override: Option<String>,
default_headers: HeaderMap,
client_config: ClientConfig,
options: &RemoteOptions,
) -> Result<Self> {
let parsed_url = url::Url::parse(db_url).map_err(|err| Error::InvalidInput {
message: format!("db_url is not a valid URL. '{db_url}'. Error: {err}"),
})?;
debug_assert_eq!(parsed_url.scheme(), "db");
if !parsed_url.has_host() {
return Err(Error::InvalidInput {
message: format!("Invalid database URL (missing host) '{}'", db_url),
});
}
let db_name = parsed_url.host_str().unwrap();
let db_prefix = {
let prefix = parsed_url.path().trim_start_matches('/');
if prefix.is_empty() {
None
} else {
Some(prefix)
}
};
// Get the timeouts
let timeout =
Self::get_timeout(client_config.timeout_config.timeout, "LANCE_CLIENT_TIMEOUT")?;
@@ -348,15 +360,7 @@ impl RestfulLanceDbClient<Sender> {
}
let client = client_builder
.default_headers(Self::default_headers(
api_key,
region,
db_name,
host_override.is_some(),
options,
db_prefix,
&client_config,
)?)
.default_headers(default_headers)
.user_agent(client_config.user_agent)
.build()
.map_err(|err| Error::Other {
@@ -366,7 +370,7 @@ impl RestfulLanceDbClient<Sender> {
let host = match host_override {
Some(host_override) => host_override,
None => format!("https://{}.{}.api.lancedb.com", db_name, region),
None => format!("https://{}.{}.api.lancedb.com", parsed_url.db_name, region),
};
debug!("Created client for host: {}", host);
let retry_config = client_config.retry_config.clone().try_into()?;
@@ -389,7 +393,7 @@ impl<S: HttpSend> RestfulLanceDbClient<S> {
&self.host
}
fn default_headers(
pub fn default_headers(
api_key: &str,
region: &str,
db_name: &str,

View File

@@ -189,6 +189,10 @@ pub struct RemoteDatabase<S: HttpSend = Sender> {
client: RestfulLanceDbClient<S>,
table_cache: Cache<String, Arc<RemoteTable<S>>>,
uri: String,
/// Headers to pass to the namespace client for authentication
namespace_headers: HashMap<String, String>,
/// TLS configuration for mTLS support
tls_config: Option<super::client::TlsConfig>,
}
impl RemoteDatabase {
@@ -200,13 +204,32 @@ impl RemoteDatabase {
client_config: ClientConfig,
options: RemoteOptions,
) -> Result<Self> {
let client = RestfulLanceDbClient::try_new(
uri,
let parsed = super::client::parse_db_url(uri)?;
let header_map = RestfulLanceDbClient::<Sender>::default_headers(
api_key,
region,
host_override,
client_config,
&parsed.db_name,
host_override.is_some(),
&options,
parsed.db_prefix.as_deref(),
&client_config,
)?;
let namespace_headers: HashMap<String, String> = header_map
.iter()
.filter_map(|(k, v)| {
v.to_str()
.ok()
.map(|val| (k.as_str().to_string(), val.to_string()))
})
.collect();
let client = RestfulLanceDbClient::try_new(
&parsed,
region,
host_override,
header_map,
client_config.clone(),
)?;
let table_cache = Cache::builder()
@@ -218,6 +241,8 @@ impl RemoteDatabase {
client,
table_cache,
uri: uri.to_owned(),
namespace_headers,
tls_config: client_config.tls_config,
})
}
}
@@ -240,6 +265,8 @@ mod test_utils {
client,
table_cache: Cache::new(0),
uri: "http://localhost".to_string(),
namespace_headers: HashMap::new(),
tls_config: None,
}
}
@@ -248,11 +275,13 @@ mod test_utils {
F: Fn(reqwest::Request) -> http::Response<T> + Send + Sync + 'static,
T: Into<reqwest::Body>,
{
let client = client_with_handler_and_config(handler, config);
let client = client_with_handler_and_config(handler, config.clone());
Self {
client,
table_cache: Cache::new(0),
uri: "http://localhost".to_string(),
namespace_headers: config.extra_headers.clone(),
tls_config: config.tls_config.clone(),
}
}
}
@@ -716,7 +745,8 @@ impl<S: HttpSend> Database for RemoteDatabase<S> {
let namespace_id = build_namespace_identifier(namespace_parts, &self.client.id_delimiter);
let req = self
.client
.get(&format!("/v1/namespace/{}/describe", namespace_id));
.post(&format!("/v1/namespace/{}/describe", namespace_id))
.json(&DescribeNamespaceRequest::default());
let (request_id, resp) = self.client.send(req).await?;
let resp = self.client.check_response(&request_id, resp).await?;
@@ -727,6 +757,31 @@ impl<S: HttpSend> Database for RemoteDatabase<S> {
fn as_any(&self) -> &dyn std::any::Any {
self
}
async fn namespace_client(&self) -> Result<Arc<dyn lance_namespace::LanceNamespace>> {
// Create a RestNamespace pointing to the same remote host with the same authentication headers
let mut builder = lance_namespace_impls::RestNamespaceBuilder::new(self.client.host())
.delimiter(&self.client.id_delimiter)
// TODO: support header provider
.headers(self.namespace_headers.clone());
// Apply mTLS configuration if present
if let Some(tls_config) = &self.tls_config {
if let Some(cert_file) = &tls_config.cert_file {
builder = builder.cert_file(cert_file);
}
if let Some(key_file) = &tls_config.key_file {
builder = builder.key_file(key_file);
}
if let Some(ssl_ca_cert) = &tls_config.ssl_ca_cert {
builder = builder.ssl_ca_cert(ssl_ca_cert);
}
builder = builder.assert_hostname(tls_config.assert_hostname);
}
let namespace = builder.build();
Ok(Arc::new(namespace) as Arc<dyn lance_namespace::LanceNamespace>)
}
}
/// RemoteOptions contains a subset of StorageOptions that are compatible with Remote LanceDB connections
@@ -1518,4 +1573,265 @@ mod tests {
panic!("Expected HTTP error");
}
}
#[tokio::test]
async fn test_namespace_client() {
let conn = Connection::new_with_handler(|_| {
http::Response::builder()
.status(200)
.body(r#"{"tables": []}"#)
.unwrap()
});
// Get the namespace client from the connection's internal database
let namespace_client = conn.namespace_client().await;
assert!(namespace_client.is_ok());
}
#[tokio::test]
async fn test_namespace_client_with_tls_config() {
use crate::remote::client::TlsConfig;
let tls_config = TlsConfig {
cert_file: Some("/path/to/cert.pem".to_string()),
key_file: Some("/path/to/key.pem".to_string()),
ssl_ca_cert: Some("/path/to/ca.pem".to_string()),
assert_hostname: true,
};
let client_config = ClientConfig {
tls_config: Some(tls_config),
..Default::default()
};
let conn = Connection::new_with_handler_and_config(
|_| {
http::Response::builder()
.status(200)
.body(r#"{"tables": []}"#)
.unwrap()
},
client_config,
);
// Get the namespace client - it should be created with the TLS config
let namespace_client = conn.namespace_client().await;
assert!(namespace_client.is_ok());
}
#[tokio::test]
async fn test_namespace_client_with_headers() {
let mut extra_headers = HashMap::new();
extra_headers.insert("X-Custom-Header".to_string(), "custom-value".to_string());
let client_config = ClientConfig {
extra_headers,
..Default::default()
};
let conn = Connection::new_with_handler_and_config(
|_| {
http::Response::builder()
.status(200)
.body(r#"{"tables": []}"#)
.unwrap()
},
client_config,
);
// Get the namespace client - it should be created with the extra headers
let namespace_client = conn.namespace_client().await;
assert!(namespace_client.is_ok());
}
/// Integration tests using RestAdapter to run RemoteDatabase against a real namespace server
mod rest_adapter_integration {
use super::*;
use lance_namespace::models::ListTablesRequest;
use lance_namespace_impls::{DirectoryNamespaceBuilder, RestAdapter, RestAdapterConfig};
use std::sync::Arc;
use tempfile::TempDir;
/// Test fixture that manages a REST server backed by DirectoryNamespace
struct RestServerFixture {
_temp_dir: TempDir,
server_handle: lance_namespace_impls::RestAdapterHandle,
server_url: String,
}
impl RestServerFixture {
async fn new() -> Self {
let temp_dir = TempDir::new().unwrap();
let temp_path = temp_dir.path().to_str().unwrap().to_string();
// Create DirectoryNamespace backend
let backend = DirectoryNamespaceBuilder::new(&temp_path)
.build()
.await
.unwrap();
let backend = Arc::new(backend);
// Start REST server with port 0 (OS assigns available port)
let config = RestAdapterConfig {
port: 0,
..Default::default()
};
let server = RestAdapter::new(backend, config);
let server_handle = server.start().await.unwrap();
// Get the actual port assigned by OS
let actual_port = server_handle.port();
let server_url = format!("http://127.0.0.1:{}", actual_port);
Self {
_temp_dir: temp_dir,
server_handle,
server_url,
}
}
}
impl Drop for RestServerFixture {
fn drop(&mut self) {
self.server_handle.shutdown();
}
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn test_remote_database_with_rest_adapter() {
use lance_namespace::models::CreateNamespaceRequest;
let fixture = RestServerFixture::new().await;
// Connect to the REST server using lancedb Connection
// Use db://dummy as URI and set actual server URL via host_override
let conn = ConnectBuilder::new("db://dummy")
.api_key("test-api-key")
.region("us-east-1")
.host_override(&fixture.server_url)
.execute()
.await
.unwrap();
// Create a child namespace first
let namespace = vec!["test_ns".to_string()];
conn.create_namespace(CreateNamespaceRequest {
id: Some(namespace.clone()),
mode: None,
properties: None,
})
.await
.expect("Failed to create namespace");
// Create a table in the child namespace
let schema = Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, false)]));
let data = RecordBatch::try_new(
schema.clone(),
vec![Arc::new(Int32Array::from(vec![1, 2, 3]))],
)
.unwrap();
let reader = RecordBatchIterator::new([Ok(data.clone())], schema.clone());
let table = conn
.create_table("test_table", reader)
.namespace(namespace.clone())
.execute()
.await;
assert!(table.is_ok(), "Failed to create table: {:?}", table.err());
// List tables in the child namespace
let list_response = conn
.list_tables(ListTablesRequest {
id: Some(namespace.clone()),
page_token: None,
limit: None,
})
.await
.expect("Failed to list tables");
assert_eq!(list_response.tables, vec!["test_table"]);
// Get namespace client and verify it can also list tables
let namespace_client = conn.namespace_client().await.unwrap();
let list_response = namespace_client
.list_tables(ListTablesRequest {
id: Some(namespace.clone()),
page_token: None,
limit: None,
})
.await
.unwrap();
assert_eq!(list_response.tables, vec!["test_table"]);
// Open the table from the child namespace
let opened_table = conn
.open_table("test_table")
.namespace(namespace.clone())
.execute()
.await;
assert!(
opened_table.is_ok(),
"Failed to open table: {:?}",
opened_table.err()
);
assert_eq!(opened_table.unwrap().name(), "test_table");
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn test_remote_database_with_multiple_tables() {
use lance_namespace::models::CreateNamespaceRequest;
let fixture = RestServerFixture::new().await;
// Connect to the REST server
// Use db://dummy as URI and set actual server URL via host_override
let conn = ConnectBuilder::new("db://dummy")
.api_key("test-api-key")
.region("us-east-1")
.host_override(&fixture.server_url)
.execute()
.await
.unwrap();
// Create a child namespace first
let namespace = vec!["multi_table_ns".to_string()];
conn.create_namespace(CreateNamespaceRequest {
id: Some(namespace.clone()),
mode: None,
properties: None,
})
.await
.expect("Failed to create namespace");
// Create multiple tables in the child namespace
let schema = Arc::new(Schema::new(vec![Field::new("id", DataType::Int32, false)]));
for i in 1..=3 {
let data =
RecordBatch::try_new(schema.clone(), vec![Arc::new(Int32Array::from(vec![i]))])
.unwrap();
let reader = RecordBatchIterator::new([Ok(data.clone())], schema.clone());
conn.create_table(format!("table{}", i), reader)
.namespace(namespace.clone())
.execute()
.await
.unwrap_or_else(|e| panic!("Failed to create table{}: {:?}", i, e));
}
// List tables in the child namespace
let list_response = conn
.list_tables(ListTablesRequest {
id: Some(namespace.clone()),
page_token: None,
limit: None,
})
.await
.unwrap();
assert_eq!(list_response.tables.len(), 3);
assert!(list_response.tables.contains(&"table1".to_string()));
assert!(list_response.tables.contains(&"table2".to_string()));
assert!(list_response.tables.contains(&"table3".to_string()));
}
}
}