From ff75f2467b43d93e32c38535e74385879a74c767 Mon Sep 17 00:00:00 2001 From: Jack Ye Date: Thu, 4 Dec 2025 13:53:47 -0800 Subject: [PATCH] feat: use rest namespace for lancedb java sdk (#2845) After the refactoring on both client and server side, we should have the ability to fully use lance REST namespace to call into LanceDB cloud and enterprise. We can avoid having a JNI implementation (which today does not really do anything except for vending a connection object), and just use lance-core's RestNamespace. We will at this moment have a LanceDbRestNamespaceBuilder to allow users to more easily build the RestNamespace to talk to LanceDB Cloud or Enterprise endpoint. In the future, we could extend this further to also support the local mode through DirectoryNamespace. That will be a separated PR. --- .bumpversion.toml | 6 + .github/workflows/java-publish.yml | 107 +--- .github/workflows/java.yml | 118 +---- docs/mkdocs.yml | 1 + docs/src/index.md | 1 + docs/src/java/java.md | 499 ++++++++++++++++++ java/Makefile | 28 + java/README.md | 23 +- java/core/lancedb-jni/Cargo.toml | 30 -- java/core/lancedb-jni/src/connection.rs | 133 ----- java/core/lancedb-jni/src/error.rs | 217 -------- java/core/lancedb-jni/src/ffi.rs | 194 ------- java/core/lancedb-jni/src/lib.rs | 57 -- java/core/lancedb-jni/src/traits.rs | 114 ---- java/core/pom.xml | 103 ---- .../java/com/lancedb/lancedb/Connection.java | 108 ---- .../com/lancedb/lancedb/ConnectionTest.java | 135 ----- .../dataset_version.lance/_latest.manifest | Bin 273 -> 0 bytes ...0-d51afd07-e3cd-4c76-9b9b-787e13fd55b0.txn | 1 - ...1-336c3e56-33fd-45d8-bbfb-95ebb563cbe0.txn | Bin 99 -> 0 bytes ...2-3344b369-7471-4e23-8865-c949b6e19bc2.txn | Bin 99 -> 0 bytes .../_versions/1.manifest | Bin 159 -> 0 bytes .../_versions/2.manifest | Bin 217 -> 0 bytes .../_versions/3.manifest | Bin 273 -> 0 bytes ...60a9b599-f79f-48a8-bffa-b495762b622a.lance | Bin 682 -> 0 bytes ...a13f68ba-04e6-48b5-bec0-bf54444be5f0.lance | Bin 642 -> 0 bytes .../new_empty_dataset.lance/_latest.manifest | Bin 159 -> 0 bytes ...0-15648e72-076f-4ef1-8b90-10d305b95b3b.txn | 1 - .../_versions/1.manifest | Bin 159 -> 0 bytes .../example_db/test.lance/_latest.manifest | Bin 264 -> 0 bytes ...0-a3689caf-4f6b-4afc-a3c7-97af75661843.txn | 1 - ...1-3f0fa7b9-7311-4945-9b0f-57dff4c04ee2.txn | Bin 98 -> 0 bytes .../test.lance/_versions/1.manifest | Bin 209 -> 0 bytes .../test.lance/_versions/2.manifest | Bin 264 -> 0 bytes ...cd209a1b-00e0-4adf-93b2-2547c866e1ef.lance | Bin 694 -> 0 bytes .../write_stream.lance/_latest.manifest | Bin 214 -> 0 bytes ...0-ea2f0479-36d1-4302-908a-dae45b9eb443.txn | Bin 157 -> 0 bytes .../write_stream.lance/_versions/1.manifest | Bin 214 -> 0 bytes ...665ff491-6dc5-4496-b292-166ed5c2a309.lance | Bin 728 -> 0 bytes java/lance-namespace/pom.xml | 26 - java/lancedb-core/pom.xml | 99 ++++ .../lancedb/LanceDbRestNamespaceBuilder.java} | 91 ++-- .../LanceDbRestNamespaceBuilderTest.java | 96 ++++ .../src/test/resources/log4j2.xml | 32 ++ java/pom.xml | 21 +- 45 files changed, 888 insertions(+), 1354 deletions(-) create mode 100644 docs/src/java/java.md create mode 100644 java/Makefile delete mode 100644 java/core/lancedb-jni/Cargo.toml delete mode 100644 java/core/lancedb-jni/src/connection.rs delete mode 100644 java/core/lancedb-jni/src/error.rs delete mode 100644 java/core/lancedb-jni/src/ffi.rs delete mode 100644 java/core/lancedb-jni/src/lib.rs delete mode 100644 java/core/lancedb-jni/src/traits.rs delete mode 100644 java/core/pom.xml delete mode 100644 java/core/src/main/java/com/lancedb/lancedb/Connection.java delete mode 100644 java/core/src/test/java/com/lancedb/lancedb/ConnectionTest.java delete mode 100644 java/core/src/test/resources/example_db/dataset_version.lance/_latest.manifest delete mode 100644 java/core/src/test/resources/example_db/dataset_version.lance/_transactions/0-d51afd07-e3cd-4c76-9b9b-787e13fd55b0.txn delete mode 100644 java/core/src/test/resources/example_db/dataset_version.lance/_transactions/1-336c3e56-33fd-45d8-bbfb-95ebb563cbe0.txn delete mode 100644 java/core/src/test/resources/example_db/dataset_version.lance/_transactions/2-3344b369-7471-4e23-8865-c949b6e19bc2.txn delete mode 100644 java/core/src/test/resources/example_db/dataset_version.lance/_versions/1.manifest delete mode 100644 java/core/src/test/resources/example_db/dataset_version.lance/_versions/2.manifest delete mode 100644 java/core/src/test/resources/example_db/dataset_version.lance/_versions/3.manifest delete mode 100644 java/core/src/test/resources/example_db/dataset_version.lance/data/60a9b599-f79f-48a8-bffa-b495762b622a.lance delete mode 100644 java/core/src/test/resources/example_db/dataset_version.lance/data/a13f68ba-04e6-48b5-bec0-bf54444be5f0.lance delete mode 100644 java/core/src/test/resources/example_db/new_empty_dataset.lance/_latest.manifest delete mode 100644 java/core/src/test/resources/example_db/new_empty_dataset.lance/_transactions/0-15648e72-076f-4ef1-8b90-10d305b95b3b.txn delete mode 100644 java/core/src/test/resources/example_db/new_empty_dataset.lance/_versions/1.manifest delete mode 100644 java/core/src/test/resources/example_db/test.lance/_latest.manifest delete mode 100644 java/core/src/test/resources/example_db/test.lance/_transactions/0-a3689caf-4f6b-4afc-a3c7-97af75661843.txn delete mode 100644 java/core/src/test/resources/example_db/test.lance/_transactions/1-3f0fa7b9-7311-4945-9b0f-57dff4c04ee2.txn delete mode 100644 java/core/src/test/resources/example_db/test.lance/_versions/1.manifest delete mode 100644 java/core/src/test/resources/example_db/test.lance/_versions/2.manifest delete mode 100644 java/core/src/test/resources/example_db/test.lance/data/cd209a1b-00e0-4adf-93b2-2547c866e1ef.lance delete mode 100644 java/core/src/test/resources/example_db/write_stream.lance/_latest.manifest delete mode 100644 java/core/src/test/resources/example_db/write_stream.lance/_transactions/0-ea2f0479-36d1-4302-908a-dae45b9eb443.txn delete mode 100644 java/core/src/test/resources/example_db/write_stream.lance/_versions/1.manifest delete mode 100644 java/core/src/test/resources/example_db/write_stream.lance/data/665ff491-6dc5-4496-b292-166ed5c2a309.lance delete mode 100644 java/lance-namespace/pom.xml create mode 100644 java/lancedb-core/pom.xml rename java/{lance-namespace/src/main/java/com/lancedb/lancedb/LanceDbRestNamespaces.java => lancedb-core/src/main/java/com/lancedb/LanceDbRestNamespaceBuilder.java} (55%) create mode 100644 java/lancedb-core/src/test/java/com/lancedb/LanceDbRestNamespaceBuilderTest.java create mode 100644 java/lancedb-core/src/test/resources/log4j2.xml diff --git a/.bumpversion.toml b/.bumpversion.toml index 76bfaf39..c48506ef 100644 --- a/.bumpversion.toml +++ b/.bumpversion.toml @@ -72,3 +72,9 @@ search = "\nversion = \"{current_version}\"" filename = "nodejs/Cargo.toml" replace = "\nversion = \"{new_version}\"" search = "\nversion = \"{current_version}\"" + +# Java documentation +[[tool.bumpversion.files]] +filename = "docs/src/java/java.md" +replace = "{new_version}" +search = "{current_version}" diff --git a/.github/workflows/java-publish.yml b/.github/workflows/java-publish.yml index eca792f3..6abaac06 100644 --- a/.github/workflows/java-publish.yml +++ b/.github/workflows/java-publish.yml @@ -1,76 +1,35 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + name: Build and publish Java packages on: - release: - types: [released] + push: + tags: + - "v*" pull_request: paths: - .github/workflows/java-publish.yml jobs: - macos-arm64: - name: Build on MacOS Arm64 - runs-on: macos-14 - timeout-minutes: 45 - defaults: - run: - working-directory: ./java/core/lancedb-jni - steps: - - name: Checkout repository - uses: actions/checkout@v4 - - uses: Swatinem/rust-cache@v2 - - name: Install dependencies - run: | - brew install protobuf - - name: Build release - run: | - cargo build --release - - uses: actions/upload-artifact@v4 - with: - name: liblancedb_jni_darwin_aarch64.zip - path: target/release/liblancedb_jni.dylib - retention-days: 1 - if-no-files-found: error - linux-arm64: - name: Build on Linux Arm64 - runs-on: warp-ubuntu-2204-arm64-8x - timeout-minutes: 45 - defaults: - run: - working-directory: ./java/core/lancedb-jni - steps: - - name: Checkout repository - uses: actions/checkout@v4 - - uses: Swatinem/rust-cache@v2 - - uses: actions-rust-lang/setup-rust-toolchain@v1 - with: - cache-workspaces: "./java/core/lancedb-jni" - # Disable full debug symbol generation to speed up CI build and keep memory down - # "1" means line tables only, which is useful for panic tracebacks. - rustflags: "-C debuginfo=1" - - name: Install dependencies - run: | - sudo apt -y -qq update - sudo apt install -y protobuf-compiler libssl-dev pkg-config - - name: Build release - run: | - cargo build --release - - uses: actions/upload-artifact@v4 - with: - name: liblancedb_jni_linux_aarch64.zip - path: target/release/liblancedb_jni.so - retention-days: 1 - if-no-files-found: error - linux-x86: - runs-on: warp-ubuntu-2204-x64-8x + publish: + name: Build and Publish + runs-on: ubuntu-24.04 timeout-minutes: 30 - needs: [macos-arm64, linux-arm64] defaults: run: working-directory: ./java steps: - name: Checkout repository uses: actions/checkout@v4 - - uses: Swatinem/rust-cache@v2 - name: Set up Java 8 uses: actions/setup-java@v4 with: @@ -82,40 +41,30 @@ jobs: server-password: SONATYPE_TOKEN gpg-private-key: ${{ secrets.GPG_PRIVATE_KEY }} gpg-passphrase: ${{ secrets.GPG_PASSPHRASE }} - - name: Install dependencies + - name: Set git config run: | - sudo apt -y -qq update - sudo apt install -y protobuf-compiler libssl-dev pkg-config - - name: Download artifact - uses: actions/download-artifact@v4 - - name: Copy native libs - run: | - mkdir -p ./core/target/classes/nativelib/darwin-aarch64 ./core/target/classes/nativelib/linux-aarch64 - cp ../liblancedb_jni_darwin_aarch64.zip/liblancedb_jni.dylib ./core/target/classes/nativelib/darwin-aarch64/liblancedb_jni.dylib - cp ../liblancedb_jni_linux_aarch64.zip/liblancedb_jni.so ./core/target/classes/nativelib/linux-aarch64/liblancedb_jni.so + git config --global user.email "dev+gha@lancedb.com" + git config --global user.name "LanceDB Github Runner" - name: Dry run if: github.event_name == 'pull_request' run: | - mvn --batch-mode -DskipTests -Drust.release.build=true package - - name: Set github - run: | - git config --global user.email "LanceDB Github Runner" - git config --global user.name "dev+gha@lancedb.com" - - name: Publish with Java 8 - if: github.event_name == 'release' + ./mvnw --batch-mode -DskipTests package -pl lancedb-core -am + - name: Publish + if: startsWith(github.ref, 'refs/tags/v') run: | echo "use-agent" >> ~/.gnupg/gpg.conf echo "pinentry-mode loopback" >> ~/.gnupg/gpg.conf export GPG_TTY=$(tty) - mvn --batch-mode -DskipTests -Drust.release.build=true -DpushChanges=false -Dgpg.passphrase=${{ secrets.GPG_PASSPHRASE }} deploy -P deploy-to-ossrh + ./mvnw --batch-mode -DskipTests -DpushChanges=false -Dgpg.passphrase=${{ secrets.GPG_PASSPHRASE }} deploy -pl lancedb-core -am -P deploy-to-ossrh env: SONATYPE_USER: ${{ secrets.SONATYPE_USER }} SONATYPE_TOKEN: ${{ secrets.SONATYPE_TOKEN }} + report-failure: name: Report Workflow Failure runs-on: ubuntu-latest - needs: [linux-arm64, linux-x86, macos-arm64] - if: always() && (github.event_name == 'release' || github.event_name == 'workflow_dispatch') + needs: [publish] + if: always() && failure() && startsWith(github.ref, 'refs/tags/v') permissions: contents: read issues: write diff --git a/.github/workflows/java.yml b/.github/workflows/java.yml index 29091097..2089838b 100644 --- a/.github/workflows/java.yml +++ b/.github/workflows/java.yml @@ -1,118 +1,46 @@ -name: Build and Run Java JNI Tests +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: Build Java LanceDB Core + on: push: branches: - main paths: - java/** + - .github/workflows/java.yml pull_request: paths: - java/** - - rust/** - .github/workflows/java.yml -env: - # This env var is used by Swatinem/rust-cache@v2 for the cache - # key, so we set it to make sure it is always consistent. - CARGO_TERM_COLOR: always - # Disable full debug symbol generation to speed up CI build and keep memory down - # "1" means line tables only, which is useful for panic tracebacks. - RUSTFLAGS: "-C debuginfo=1" - RUST_BACKTRACE: "1" - # according to: https://matklad.github.io/2021/09/04/fast-rust-builds.html - # CI builds are faster with incremental disabled. - CARGO_INCREMENTAL: "0" - CARGO_BUILD_JOBS: "1" + jobs: - linux-build-java-11: - runs-on: ubuntu-22.04 - name: ubuntu-22.04 + Java 11 + build-java: + runs-on: ubuntu-24.04 + name: Build defaults: run: working-directory: ./java steps: - name: Checkout repository uses: actions/checkout@v4 - - uses: Swatinem/rust-cache@v2 - with: - workspaces: java/core/lancedb-jni - - uses: actions-rust-lang/setup-rust-toolchain@v1 - with: - components: rustfmt - - name: Run cargo fmt - run: cargo fmt --check - working-directory: ./java/core/lancedb-jni - - name: Install dependencies - run: | - sudo apt update - sudo apt install -y protobuf-compiler libssl-dev - - name: Install Java 11 - uses: actions/setup-java@v4 - with: - distribution: temurin - java-version: 11 - cache: "maven" - - name: Java Style Check - run: mvn checkstyle:check - # Disable because of issues in lancedb rust core code - # - name: Rust Clippy - # working-directory: java/core/lancedb-jni - # run: cargo clippy --all-targets -- -D warnings - - name: Running tests with Java 11 - run: mvn clean test - linux-build-java-17: - runs-on: ubuntu-22.04 - name: ubuntu-22.04 + Java 17 - defaults: - run: - working-directory: ./java - steps: - - name: Checkout repository - uses: actions/checkout@v4 - - uses: Swatinem/rust-cache@v2 - with: - workspaces: java/core/lancedb-jni - - uses: actions-rust-lang/setup-rust-toolchain@v1 - with: - components: rustfmt - - name: Run cargo fmt - run: cargo fmt --check - working-directory: ./java/core/lancedb-jni - - name: Install dependencies - run: | - sudo apt update - sudo apt install -y protobuf-compiler libssl-dev - - name: Install Java 17 + - name: Set up Java 17 uses: actions/setup-java@v4 with: distribution: temurin java-version: 17 cache: "maven" - - run: echo "JAVA_17=$JAVA_HOME" >> $GITHUB_ENV - name: Java Style Check - run: mvn checkstyle:check - # Disable because of issues in lancedb rust core code - # - name: Rust Clippy - # working-directory: java/core/lancedb-jni - # run: cargo clippy --all-targets -- -D warnings - - name: Running tests with Java 17 - run: | - export JAVA_TOOL_OPTIONS="$JAVA_TOOL_OPTIONS \ - -XX:+IgnoreUnrecognizedVMOptions \ - --add-opens=java.base/java.lang=ALL-UNNAMED \ - --add-opens=java.base/java.lang.invoke=ALL-UNNAMED \ - --add-opens=java.base/java.lang.reflect=ALL-UNNAMED \ - --add-opens=java.base/java.io=ALL-UNNAMED \ - --add-opens=java.base/java.net=ALL-UNNAMED \ - --add-opens=java.base/java.nio=ALL-UNNAMED \ - --add-opens=java.base/java.util=ALL-UNNAMED \ - --add-opens=java.base/java.util.concurrent=ALL-UNNAMED \ - --add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED \ - --add-opens=java.base/jdk.internal.ref=ALL-UNNAMED \ - --add-opens=java.base/sun.nio.ch=ALL-UNNAMED \ - --add-opens=java.base/sun.nio.cs=ALL-UNNAMED \ - --add-opens=java.base/sun.security.action=ALL-UNNAMED \ - --add-opens=java.base/sun.util.calendar=ALL-UNNAMED \ - --add-opens=java.security.jgss/sun.security.krb5=ALL-UNNAMED \ - -Djdk.reflect.useDirectMethodHandle=false \ - -Dio.netty.tryReflectionSetAccessible=true" - JAVA_HOME=$JAVA_17 mvn clean test + run: ./mvnw checkstyle:check + - name: Build and install + run: ./mvnw clean install diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml index a801520b..35c297e0 100644 --- a/docs/mkdocs.yml +++ b/docs/mkdocs.yml @@ -123,6 +123,7 @@ nav: - Overview: index.md - Python: python/python.md - Javascript/TypeScript: js/globals.md + - Java: java/java.md - Rust: https://docs.rs/lancedb/latest/lancedb/index.html extra_css: diff --git a/docs/src/index.md b/docs/src/index.md index d8da266a..c2dbabe0 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -4,4 +4,5 @@ This page contains the API reference for the SDKs supported by the LanceDB team. - [Python](python/python.md) - [JavaScript/TypeScript](js/globals.md) +- [Java](java/java.md) - [Rust](https://docs.rs/lancedb/latest/lancedb/index.html) \ No newline at end of file diff --git a/docs/src/java/java.md b/docs/src/java/java.md new file mode 100644 index 00000000..583e6b69 --- /dev/null +++ b/docs/src/java/java.md @@ -0,0 +1,499 @@ +# Java SDK + +The LanceDB Java SDK provides a convenient way to interact with LanceDB Cloud and Enterprise deployments using the Lance REST Namespace API. + +!!! note + The Java SDK currently only works for LanceDB remote database that connects to LanceDB Cloud and Enterprise. + Local database support is a work in progress. Check [LANCEDB-2848](https://github.com/lancedb/lancedb/issues/2848) for the latest progress. + +## Installation + +Add the following dependency to your `pom.xml`: + +```xml + + com.lancedb + lancedb-core + 0.23.0-beta.0 + +``` + +## Quick Start + +### Connecting to LanceDB Cloud + +```java +import com.lancedb.LanceDbRestNamespaceBuilder; +import org.lance.namespace.RestNamespace; + +// If your DB url is db://example-db, then your database here is example-db +RestNamespace namespace = LanceDbRestNamespaceBuilder.newBuilder() + .apiKey("your_lancedb_cloud_api_key") + .database("your_database_name") + .build(); +``` + +### Connecting to LanceDB Enterprise + +For LanceDB Enterprise deployments with a custom endpoint: + +```java +RestNamespace namespace = LanceDbRestNamespaceBuilder.newBuilder() + .apiKey("your_lancedb_enterprise_api_key") + .database("your_database_name") + .endpoint("") + .build(); +``` + +### Configuration Options + +| Method | Description | Required | +|--------|-------------|----------| +| `apiKey(String)` | LanceDB API key | Yes | +| `database(String)` | Database name | Yes | +| `endpoint(String)` | Custom endpoint URL for Enterprise deployments | No | +| `region(String)` | AWS region (default: "us-east-1") | No | +| `config(String, String)` | Additional configuration parameters | No | + +## Metadata Operations + +### Creating a Namespace + +Namespaces organize tables hierarchically. Create a namespace before creating tables within it: + +```java +import org.lance.namespace.model.CreateNamespaceRequest; +import org.lance.namespace.model.CreateNamespaceResponse; + +// Create a child namespace +CreateNamespaceRequest request = new CreateNamespaceRequest(); +request.setId(Arrays.asList("my_namespace")); + +CreateNamespaceResponse response = namespace.createNamespace(request); +``` + +You can also create nested namespaces: + +```java +// Create a nested namespace: parent/child +CreateNamespaceRequest request = new CreateNamespaceRequest(); +request.setId(Arrays.asList("parent_namespace", "child_namespace")); + +CreateNamespaceResponse response = namespace.createNamespace(request); +``` + +### Describing a Namespace + +```java +import org.lance.namespace.model.DescribeNamespaceRequest; +import org.lance.namespace.model.DescribeNamespaceResponse; + +DescribeNamespaceRequest request = new DescribeNamespaceRequest(); +request.setId(Arrays.asList("my_namespace")); + +DescribeNamespaceResponse response = namespace.describeNamespace(request); +System.out.println("Namespace properties: " + response.getProperties()); +``` + +### Listing Namespaces + +```java +import org.lance.namespace.model.ListNamespacesRequest; +import org.lance.namespace.model.ListNamespacesResponse; + +// List all namespaces at root level +ListNamespacesRequest request = new ListNamespacesRequest(); +request.setId(Arrays.asList()); // Empty for root + +ListNamespacesResponse response = namespace.listNamespaces(request); +for (String ns : response.getNamespaces()) { + System.out.println("Namespace: " + ns); +} + +// List child namespaces under a parent +ListNamespacesRequest childRequest = new ListNamespacesRequest(); +childRequest.setId(Arrays.asList("parent_namespace")); + +ListNamespacesResponse childResponse = namespace.listNamespaces(childRequest); +``` + +### Listing Tables + +```java +import org.lance.namespace.model.ListTablesRequest; +import org.lance.namespace.model.ListTablesResponse; + +// List tables in a namespace +ListTablesRequest request = new ListTablesRequest(); +request.setId(Arrays.asList("my_namespace")); + +ListTablesResponse response = namespace.listTables(request); +for (String table : response.getTables()) { + System.out.println("Table: " + table); +} +``` + +### Dropping a Namespace + +```java +import org.lance.namespace.model.DropNamespaceRequest; +import org.lance.namespace.model.DropNamespaceResponse; + +DropNamespaceRequest request = new DropNamespaceRequest(); +request.setId(Arrays.asList("my_namespace")); + +DropNamespaceResponse response = namespace.dropNamespace(request); +``` + +### Describing a Table + +```java +import org.lance.namespace.model.DescribeTableRequest; +import org.lance.namespace.model.DescribeTableResponse; + +DescribeTableRequest request = new DescribeTableRequest(); +request.setId(Arrays.asList("my_namespace", "my_table")); + +DescribeTableResponse response = namespace.describeTable(request); +System.out.println("Table version: " + response.getVersion()); +System.out.println("Schema fields: " + response.getSchema().getFields()); +``` + +### Dropping a Table + +```java +import org.lance.namespace.model.DropTableRequest; +import org.lance.namespace.model.DropTableResponse; + +DropTableRequest request = new DropTableRequest(); +request.setId(Arrays.asList("my_namespace", "my_table")); + +DropTableResponse response = namespace.dropTable(request); +``` + +## Writing Data + +### Creating a Table + +Tables are created within a namespace by providing data in Apache Arrow IPC format: + +```java +import org.lance.namespace.RestNamespace; +import org.lance.namespace.model.CreateTableRequest; +import org.lance.namespace.model.CreateTableResponse; +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.memory.RootAllocator; +import org.apache.arrow.vector.IntVector; +import org.apache.arrow.vector.VarCharVector; +import org.apache.arrow.vector.VectorSchemaRoot; +import org.apache.arrow.vector.complex.FixedSizeListVector; +import org.apache.arrow.vector.Float4Vector; +import org.apache.arrow.vector.ipc.ArrowStreamWriter; +import org.apache.arrow.vector.types.FloatingPointPrecision; +import org.apache.arrow.vector.types.pojo.ArrowType; +import org.apache.arrow.vector.types.pojo.Field; +import org.apache.arrow.vector.types.pojo.FieldType; +import org.apache.arrow.vector.types.pojo.Schema; + +import java.io.ByteArrayOutputStream; +import java.nio.channels.Channels; +import java.util.Arrays; + +// Create schema with id, name, and embedding fields +Schema schema = new Schema(Arrays.asList( + new Field("id", FieldType.nullable(new ArrowType.Int(32, true)), null), + new Field("name", FieldType.nullable(new ArrowType.Utf8()), null), + new Field("embedding", + FieldType.nullable(new ArrowType.FixedSizeList(128)), + Arrays.asList(new Field("item", + FieldType.nullable(new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE)), + null))) +)); + +try (BufferAllocator allocator = new RootAllocator(); + VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) { + + // Populate data + root.setRowCount(3); + IntVector idVector = (IntVector) root.getVector("id"); + VarCharVector nameVector = (VarCharVector) root.getVector("name"); + FixedSizeListVector embeddingVector = (FixedSizeListVector) root.getVector("embedding"); + Float4Vector embeddingData = (Float4Vector) embeddingVector.getDataVector(); + + for (int i = 0; i < 3; i++) { + idVector.setSafe(i, i + 1); + nameVector.setSafe(i, ("item_" + i).getBytes()); + embeddingVector.setNotNull(i); + for (int j = 0; j < 128; j++) { + embeddingData.setSafe(i * 128 + j, (float) i); + } + } + idVector.setValueCount(3); + nameVector.setValueCount(3); + embeddingData.setValueCount(3 * 128); + embeddingVector.setValueCount(3); + + // Serialize to Arrow IPC format + ByteArrayOutputStream out = new ByteArrayOutputStream(); + try (ArrowStreamWriter writer = new ArrowStreamWriter(root, null, Channels.newChannel(out))) { + writer.start(); + writer.writeBatch(); + writer.end(); + } + byte[] tableData = out.toByteArray(); + + // Create table in a namespace + CreateTableRequest request = new CreateTableRequest(); + request.setId(Arrays.asList("my_namespace", "my_table")); + CreateTableResponse response = namespace.createTable(request, tableData); +} +``` + +### Insert + +```java +import org.lance.namespace.model.InsertIntoTableRequest; +import org.lance.namespace.model.InsertIntoTableResponse; + +// Prepare data in Arrow IPC format (similar to create table example) +byte[] insertData = prepareArrowData(); + +InsertIntoTableRequest request = new InsertIntoTableRequest(); +request.setId(Arrays.asList("my_namespace", "my_table")); +request.setMode(InsertIntoTableRequest.ModeEnum.APPEND); + +InsertIntoTableResponse response = namespace.insertIntoTable(request, insertData); +System.out.println("New version: " + response.getVersion()); +``` + +### Update + +Update rows matching a predicate condition: + +```java +import org.lance.namespace.model.UpdateTableRequest; +import org.lance.namespace.model.UpdateTableResponse; + +UpdateTableRequest request = new UpdateTableRequest(); +request.setId(Arrays.asList("my_namespace", "my_table")); + +// Predicate to select rows to update +request.setPredicate("id = 1"); + +// Set new values using SQL expressions as [column_name, expression] pairs +request.setUpdates(Arrays.asList( + Arrays.asList("name", "'updated_name'") +)); + +UpdateTableResponse response = namespace.updateTable(request); +System.out.println("Updated rows: " + response.getUpdatedRows()); +``` + +### Delete + +Delete rows matching a predicate condition: + +```java +import org.lance.namespace.model.DeleteFromTableRequest; +import org.lance.namespace.model.DeleteFromTableResponse; + +DeleteFromTableRequest request = new DeleteFromTableRequest(); +request.setId(Arrays.asList("my_namespace", "my_table")); + +// Predicate to select rows to delete +request.setPredicate("id > 100"); + +DeleteFromTableResponse response = namespace.deleteFromTable(request); +System.out.println("New version: " + response.getVersion()); +``` + +### Merge Insert (Upsert) + +Merge insert allows you to update existing rows and insert new rows in a single operation based on a key column: + +```java +import org.lance.namespace.model.MergeInsertIntoTableRequest; +import org.lance.namespace.model.MergeInsertIntoTableResponse; + +// Prepare data with rows to update (id=2,3) and new rows (id=4) +byte[] mergeData = prepareArrowData(); // Contains rows with id=2,3,4 + +MergeInsertIntoTableRequest request = new MergeInsertIntoTableRequest(); +request.setId(Arrays.asList("my_namespace", "my_table")); + +// Match on the "id" column +request.setOn("id"); + +// Update all columns when a matching row is found +request.setWhenMatchedUpdateAll(true); + +// Insert new rows when no match is found +request.setWhenNotMatchedInsertAll(true); + +MergeInsertIntoTableResponse response = namespace.mergeInsertIntoTable(request, mergeData); + +System.out.println("Updated rows: " + response.getNumUpdatedRows()); +System.out.println("Inserted rows: " + response.getNumInsertedRows()); +``` + +## Querying Data + +### Counting Rows + +```java +import org.lance.namespace.model.CountTableRowsRequest; + +CountTableRowsRequest request = new CountTableRowsRequest(); +request.setId(Arrays.asList("my_namespace", "my_table")); + +Long rowCount = namespace.countTableRows(request); +System.out.println("Row count: " + rowCount); +``` + +### Vector Search + +```java +import org.lance.namespace.model.QueryTableRequest; +import org.lance.namespace.model.QueryTableRequestVector; + +QueryTableRequest query = new QueryTableRequest(); +query.setId(Arrays.asList("my_namespace", "my_table")); +query.setK(10); // Return top 10 results + +// Set the query vector +List queryVector = new ArrayList<>(); +for (int i = 0; i < 128; i++) { + queryVector.add(1.0f); +} +QueryTableRequestVector vector = new QueryTableRequestVector(); +vector.setSingleVector(queryVector); +query.setVector(vector); + +// Specify columns to return +query.setColumns(Arrays.asList("id", "name", "embedding")); + +// Execute query - returns Arrow IPC format +byte[] result = namespace.queryTable(query); +``` + +### Full Text Search + +```java +import org.lance.namespace.model.QueryTableRequest; +import org.lance.namespace.model.QueryTableRequestFullTextQuery; +import org.lance.namespace.model.StringFtsQuery; + +QueryTableRequest query = new QueryTableRequest(); +query.setId(Arrays.asList("my_namespace", "my_table")); +query.setK(10); + +// Set full text search query +StringFtsQuery stringQuery = new StringFtsQuery(); +stringQuery.setQuery("search terms"); +stringQuery.setColumns(Arrays.asList("text_column")); + +QueryTableRequestFullTextQuery fts = new QueryTableRequestFullTextQuery(); +fts.setStringQuery(stringQuery); +query.setFullTextQuery(fts); + +// Specify columns to return +query.setColumns(Arrays.asList("id", "text_column")); + +byte[] result = namespace.queryTable(query); +``` + +### Query with Filter + +```java +QueryTableRequest query = new QueryTableRequest(); +query.setId(Arrays.asList("my_namespace", "my_table")); +query.setK(10); +query.setFilter("id > 50"); +query.setColumns(Arrays.asList("id", "name")); + +byte[] result = namespace.queryTable(query); +``` + +### Query with Prefilter + +```java +QueryTableRequest query = new QueryTableRequest(); +query.setId(Arrays.asList("my_namespace", "my_table")); +query.setK(5); +query.setPrefilter(true); // Apply filter before vector search +query.setFilter("category = 'electronics'"); + +// Set query vector +QueryTableRequestVector vector = new QueryTableRequestVector(); +vector.setSingleVector(queryVector); +query.setVector(vector); + +byte[] result = namespace.queryTable(query); +``` + +### Reading Query Results + +Query results are returned in Apache Arrow IPC file format. Here's how to read them: + +```java +import org.apache.arrow.vector.ipc.ArrowFileReader; +import org.apache.arrow.vector.VectorSchemaRoot; +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.memory.RootAllocator; + +import java.nio.ByteBuffer; +import java.nio.channels.SeekableByteChannel; + +// Helper class to read Arrow data from byte array +class ByteArraySeekableByteChannel implements SeekableByteChannel { + private final byte[] data; + private long position = 0; + private boolean isOpen = true; + + public ByteArraySeekableByteChannel(byte[] data) { + this.data = data; + } + + @Override + public int read(ByteBuffer dst) { + int remaining = dst.remaining(); + int available = (int) (data.length - position); + if (available <= 0) return -1; + int toRead = Math.min(remaining, available); + dst.put(data, (int) position, toRead); + position += toRead; + return toRead; + } + + @Override public long position() { return position; } + @Override public SeekableByteChannel position(long newPosition) { position = newPosition; return this; } + @Override public long size() { return data.length; } + @Override public boolean isOpen() { return isOpen; } + @Override public void close() { isOpen = false; } + @Override public int write(ByteBuffer src) { throw new UnsupportedOperationException(); } + @Override public SeekableByteChannel truncate(long size) { throw new UnsupportedOperationException(); } +} + +// Read query results +byte[] queryResult = namespace.queryTable(query); + +try (BufferAllocator allocator = new RootAllocator(); + ArrowFileReader reader = new ArrowFileReader( + new ByteArraySeekableByteChannel(queryResult), allocator)) { + + for (int i = 0; i < reader.getRecordBlocks().size(); i++) { + reader.loadRecordBatch(reader.getRecordBlocks().get(i)); + VectorSchemaRoot root = reader.getVectorSchemaRoot(); + + // Access data + IntVector idVector = (IntVector) root.getVector("id"); + VarCharVector nameVector = (VarCharVector) root.getVector("name"); + + for (int row = 0; row < root.getRowCount(); row++) { + int id = idVector.get(row); + String name = new String(nameVector.get(row)); + System.out.println("Row " + row + ": id=" + id + ", name=" + name); + } + } +} +``` diff --git a/java/Makefile b/java/Makefile new file mode 100644 index 00000000..2d1f33e2 --- /dev/null +++ b/java/Makefile @@ -0,0 +1,28 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +.PHONY: build-lancedb +build-lancedb: + ./mvnw spotless:apply -pl lancedb-core -am + ./mvnw install -pl lancedb-core -am + +.PHONY: test-lancedb +test-lancedb: + # Requires LANCEDB_DB and LANCEDB_API_KEY environment variables + ./mvnw test -pl lancedb-core -P integration-tests + +.PHONY: clean +clean: + ./mvnw clean + +.PHONY: build +build: build-lancedb diff --git a/java/README.md b/java/README.md index de7d9134..6bd3c683 100644 --- a/java/README.md +++ b/java/README.md @@ -7,10 +7,11 @@ For LanceDB Cloud, use the simplified builder API: ```java -import com.lancedb.lance.namespace.LanceRestNamespace; +import com.lancedb.LanceDbRestNamespaceBuilder; +import org.lance.namespace.RestNamespace; // If your DB url is db://example-db, then your database here is example-db -LanceRestNamespace namespace = LanceDBRestNamespaces.builder() +RestNamespace namespace = LanceDbRestNamespaceBuilder.newBuilder() .apiKey("your_lancedb_cloud_api_key") .database("your_database_name") .build(); @@ -18,13 +19,13 @@ LanceRestNamespace namespace = LanceDBRestNamespaces.builder() ### LanceDB Enterprise -For Enterprise deployments, use your VPC endpoint: +For Enterprise deployments, use your custom endpoint: ```java -LanceRestNamespace namespace = LanceDBRestNamespaces.builder() +RestNamespace namespace = LanceDbRestNamespaceBuilder.newBuilder() .apiKey("your_lancedb_enterprise_api_key") - .database("your-top-dir") // Your top level folder under your cloud bucket, e.g. s3://your-bucket/your-top-dir/ - .hostOverride("http://:80") + .database("your_database_name") + .endpoint("") .build(); ``` @@ -33,5 +34,11 @@ LanceRestNamespace namespace = LanceDBRestNamespaces.builder() Build: ```shell -./mvnw install -``` \ No newline at end of file +./mvnw install -pl lancedb-core -am +``` + +Run tests: + +```shell +./mvnw test -pl lancedb-core +``` diff --git a/java/core/lancedb-jni/Cargo.toml b/java/core/lancedb-jni/Cargo.toml deleted file mode 100644 index ec7901ff..00000000 --- a/java/core/lancedb-jni/Cargo.toml +++ /dev/null @@ -1,30 +0,0 @@ -[package] -name = "lancedb-jni" -description = "JNI bindings for LanceDB" -# TODO modify lancedb/Cargo.toml for version and dependencies -version = "0.10.0" -edition.workspace = true -repository.workspace = true -readme.workspace = true -license.workspace = true -keywords.workspace = true -categories.workspace = true -publish = false - -[lib] -crate-type = ["cdylib"] - -[dependencies] -lancedb = { path = "../../../rust/lancedb", default-features = false } -lance = { workspace = true } -arrow = { workspace = true, features = ["ffi"] } -arrow-schema.workspace = true -tokio = "1.46" -jni = "0.21.1" -snafu.workspace = true -lazy_static.workspace = true -serde = { version = "^1" } -serde_json = { version = "1" } - -[features] -default = ["lancedb/default"] diff --git a/java/core/lancedb-jni/src/connection.rs b/java/core/lancedb-jni/src/connection.rs deleted file mode 100644 index 724840d0..00000000 --- a/java/core/lancedb-jni/src/connection.rs +++ /dev/null @@ -1,133 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -// SPDX-FileCopyrightText: Copyright The LanceDB Authors - -use crate::ffi::JNIEnvExt; -use crate::traits::IntoJava; -use crate::{Error, RT}; -use jni::objects::{JObject, JString, JValue}; -use jni::JNIEnv; -pub const NATIVE_CONNECTION: &str = "nativeConnectionHandle"; -use crate::Result; -use lancedb::connection::{connect, Connection}; - -#[derive(Clone)] -pub struct BlockingConnection { - pub(crate) inner: Connection, -} - -impl BlockingConnection { - pub fn create(dataset_uri: &str) -> Result { - let inner = RT.block_on(connect(dataset_uri).execute())?; - Ok(Self { inner }) - } - - pub fn table_names( - &self, - start_after: Option, - limit: Option, - ) -> Result> { - let mut op = self.inner.table_names(); - if let Some(start_after) = start_after { - op = op.start_after(start_after); - } - if let Some(limit) = limit { - op = op.limit(limit as u32); - } - Ok(RT.block_on(op.execute())?) - } -} - -impl IntoJava for BlockingConnection { - fn into_java<'a>(self, env: &mut JNIEnv<'a>) -> JObject<'a> { - attach_native_connection(env, self) - } -} - -fn attach_native_connection<'local>( - env: &mut JNIEnv<'local>, - connection: BlockingConnection, -) -> JObject<'local> { - let j_connection = create_java_connection_object(env); - // This block sets a native Rust object (Connection) as a field in the Java object (j_Connection). - // Caution: This creates a potential for memory leaks. The Rust object (Connection) is not - // automatically garbage-collected by Java, and its memory will not be freed unless - // explicitly handled. - // - // To prevent memory leaks, ensure the following: - // 1. The Java object (`j_Connection`) should implement the `java.io.Closeable` interface. - // 2. Users of this Java object should be instructed to always use it within a try-with-resources - // statement (or manually call the `close()` method) to ensure that `self.close()` is invoked. - match unsafe { env.set_rust_field(&j_connection, NATIVE_CONNECTION, connection) } { - Ok(_) => j_connection, - Err(err) => { - env.throw_new( - "java/lang/RuntimeException", - format!("Failed to set native handle for Connection: {}", err), - ) - .expect("Error throwing exception"); - JObject::null() - } - } -} - -fn create_java_connection_object<'a>(env: &mut JNIEnv<'a>) -> JObject<'a> { - env.new_object("com/lancedb/lancedb/Connection", "()V", &[]) - .expect("Failed to create Java Lance Connection instance") -} - -#[no_mangle] -pub extern "system" fn Java_com_lancedb_lancedb_Connection_releaseNativeConnection( - mut env: JNIEnv, - j_connection: JObject, -) { - let _: BlockingConnection = unsafe { - env.take_rust_field(j_connection, NATIVE_CONNECTION) - .expect("Failed to take native Connection handle") - }; -} - -#[no_mangle] -pub extern "system" fn Java_com_lancedb_lancedb_Connection_connect<'local>( - mut env: JNIEnv<'local>, - _obj: JObject, - dataset_uri_object: JString, -) -> JObject<'local> { - let dataset_uri: String = ok_or_throw!(env, env.get_string(&dataset_uri_object)).into(); - let blocking_connection = ok_or_throw!(env, BlockingConnection::create(&dataset_uri)); - blocking_connection.into_java(&mut env) -} - -#[no_mangle] -pub extern "system" fn Java_com_lancedb_lancedb_Connection_tableNames<'local>( - mut env: JNIEnv<'local>, - j_connection: JObject, - start_after_obj: JObject, // Optional - limit_obj: JObject, // Optional -) -> JObject<'local> { - ok_or_throw!( - env, - inner_table_names(&mut env, j_connection, start_after_obj, limit_obj) - ) -} - -fn inner_table_names<'local>( - env: &mut JNIEnv<'local>, - j_connection: JObject, - start_after_obj: JObject, // Optional - limit_obj: JObject, // Optional -) -> Result> { - let start_after = env.get_string_opt(&start_after_obj)?; - let limit = env.get_int_opt(&limit_obj)?; - let conn = - unsafe { env.get_rust_field::<_, _, BlockingConnection>(j_connection, NATIVE_CONNECTION) }?; - let table_names = conn.table_names(start_after, limit)?; - drop(conn); - let j_names = env.new_object("java/util/ArrayList", "()V", &[])?; - for item in table_names { - let jstr_item = env.new_string(item)?; - let item_jobj = JObject::from(jstr_item); - let item_gen = JValue::Object(&item_jobj); - env.call_method(&j_names, "add", "(Ljava/lang/Object;)Z", &[item_gen])?; - } - Ok(j_names) -} diff --git a/java/core/lancedb-jni/src/error.rs b/java/core/lancedb-jni/src/error.rs deleted file mode 100644 index 452adce9..00000000 --- a/java/core/lancedb-jni/src/error.rs +++ /dev/null @@ -1,217 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -// SPDX-FileCopyrightText: Copyright The LanceDB Authors - -use std::str::Utf8Error; - -use arrow_schema::ArrowError; -use jni::errors::Error as JniError; -use serde_json::Error as JsonError; -use snafu::{Location, Snafu}; - -type BoxedError = Box; - -/// Java Exception types -pub enum JavaException { - IllegalArgumentException, - IOException, - RuntimeException, -} - -impl JavaException { - pub fn as_str(&self) -> &str { - match self { - Self::IllegalArgumentException => "java/lang/IllegalArgumentException", - Self::IOException => "java/io/IOException", - Self::RuntimeException => "java/lang/RuntimeException", - } - } -} -/// TODO(lu) change to lancedb-jni -#[derive(Debug, Snafu)] -#[snafu(visibility(pub))] -pub enum Error { - #[snafu(display("JNI error: {message}, {location}"))] - Jni { message: String, location: Location }, - #[snafu(display("Invalid argument: {message}, {location}"))] - InvalidArgument { message: String, location: Location }, - #[snafu(display("IO error: {source}, {location}"))] - IO { - source: BoxedError, - location: Location, - }, - #[snafu(display("Arrow error: {message}, {location}"))] - Arrow { message: String, location: Location }, - #[snafu(display("Index error: {message}, {location}"))] - Index { message: String, location: Location }, - #[snafu(display("JSON error: {message}, {location}"))] - JSON { message: String, location: Location }, - #[snafu(display("Dataset at path {path} was not found, {location}"))] - DatasetNotFound { path: String, location: Location }, - #[snafu(display("Dataset already exists: {uri}, {location}"))] - DatasetAlreadyExists { uri: String, location: Location }, - #[snafu(display("Table '{name}' already exists"))] - TableAlreadyExists { name: String }, - #[snafu(display("Table '{name}' was not found: {source}"))] - TableNotFound { - name: String, - source: Box, - }, - #[snafu(display("Invalid table name '{name}': {reason}"))] - InvalidTableName { name: String, reason: String }, - #[snafu(display("Embedding function '{name}' was not found: {reason}, {location}"))] - EmbeddingFunctionNotFound { - name: String, - reason: String, - location: Location, - }, - #[snafu(display("Other Lance error: {message}, {location}"))] - OtherLance { message: String, location: Location }, - #[snafu(display("Other LanceDB error: {message}, {location}"))] - OtherLanceDB { message: String, location: Location }, -} - -impl Error { - /// Throw as Java Exception - pub fn throw(&self, env: &mut jni::JNIEnv) { - match self { - Self::InvalidArgument { .. } - | Self::DatasetNotFound { .. } - | Self::DatasetAlreadyExists { .. } - | Self::TableAlreadyExists { .. } - | Self::TableNotFound { .. } - | Self::InvalidTableName { .. } - | Self::EmbeddingFunctionNotFound { .. } => { - self.throw_as(env, JavaException::IllegalArgumentException) - } - Self::IO { .. } | Self::Index { .. } => self.throw_as(env, JavaException::IOException), - Self::Arrow { .. } - | Self::JSON { .. } - | Self::OtherLance { .. } - | Self::OtherLanceDB { .. } - | Self::Jni { .. } => self.throw_as(env, JavaException::RuntimeException), - } - } - - /// Throw as an concrete Java Exception - pub fn throw_as(&self, env: &mut jni::JNIEnv, exception: JavaException) { - let message = &format!( - "Error when throwing Java exception: {}:{}", - exception.as_str(), - self - ); - env.throw_new(exception.as_str(), self.to_string()) - .expect(message); - } -} - -pub type Result = std::result::Result; - -trait ToSnafuLocation { - fn to_snafu_location(&'static self) -> snafu::Location; -} - -impl ToSnafuLocation for std::panic::Location<'static> { - fn to_snafu_location(&'static self) -> snafu::Location { - snafu::Location::new(self.file(), self.line(), self.column()) - } -} - -impl From for Error { - #[track_caller] - fn from(source: JniError) -> Self { - Self::Jni { - message: source.to_string(), - location: std::panic::Location::caller().to_snafu_location(), - } - } -} - -impl From for Error { - #[track_caller] - fn from(source: Utf8Error) -> Self { - Self::InvalidArgument { - message: source.to_string(), - location: std::panic::Location::caller().to_snafu_location(), - } - } -} - -impl From for Error { - #[track_caller] - fn from(source: ArrowError) -> Self { - Self::Arrow { - message: source.to_string(), - location: std::panic::Location::caller().to_snafu_location(), - } - } -} - -impl From for Error { - #[track_caller] - fn from(source: JsonError) -> Self { - Self::JSON { - message: source.to_string(), - location: std::panic::Location::caller().to_snafu_location(), - } - } -} - -impl From for Error { - #[track_caller] - fn from(source: lance::Error) -> Self { - match source { - lance::Error::DatasetNotFound { - path, - source: _, - location, - } => Self::DatasetNotFound { path, location }, - lance::Error::DatasetAlreadyExists { uri, location } => { - Self::DatasetAlreadyExists { uri, location } - } - lance::Error::IO { source, location } => Self::IO { source, location }, - lance::Error::Arrow { message, location } => Self::Arrow { message, location }, - lance::Error::Index { message, location } => Self::Index { message, location }, - lance::Error::InvalidInput { source, location } => Self::InvalidArgument { - message: source.to_string(), - location, - }, - _ => Self::OtherLance { - message: source.to_string(), - location: std::panic::Location::caller().to_snafu_location(), - }, - } - } -} - -impl From for Error { - #[track_caller] - fn from(source: lancedb::Error) -> Self { - match source { - lancedb::Error::InvalidTableName { name, reason } => { - Self::InvalidTableName { name, reason } - } - lancedb::Error::InvalidInput { message } => Self::InvalidArgument { - message, - location: std::panic::Location::caller().to_snafu_location(), - }, - lancedb::Error::TableNotFound { name, source } => Self::TableNotFound { name, source }, - lancedb::Error::TableAlreadyExists { name } => Self::TableAlreadyExists { name }, - lancedb::Error::EmbeddingFunctionNotFound { name, reason } => { - Self::EmbeddingFunctionNotFound { - name, - reason, - location: std::panic::Location::caller().to_snafu_location(), - } - } - lancedb::Error::Arrow { source } => Self::Arrow { - message: source.to_string(), - location: std::panic::Location::caller().to_snafu_location(), - }, - lancedb::Error::Lance { source } => Self::from(source), - _ => Self::OtherLanceDB { - message: source.to_string(), - location: std::panic::Location::caller().to_snafu_location(), - }, - } - } -} diff --git a/java/core/lancedb-jni/src/ffi.rs b/java/core/lancedb-jni/src/ffi.rs deleted file mode 100644 index d353b347..00000000 --- a/java/core/lancedb-jni/src/ffi.rs +++ /dev/null @@ -1,194 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -// SPDX-FileCopyrightText: Copyright The LanceDB Authors - -use core::slice; - -use jni::objects::{JByteBuffer, JObjectArray, JString}; -use jni::sys::jobjectArray; -use jni::{objects::JObject, JNIEnv}; - -use crate::error::{Error, Result}; - -/// TODO(lu) import from lance-jni without duplicate -/// Extend JNIEnv with helper functions. -pub trait JNIEnvExt { - /// Get integers from Java List object. - fn get_integers(&mut self, obj: &JObject) -> Result>; - - /// Get strings from Java List object. - #[allow(dead_code)] - fn get_strings(&mut self, obj: &JObject) -> Result>; - - /// Get strings from Java String[] object. - /// Note that get Option> from Java Optional just doesn't work. - #[allow(unused)] - fn get_strings_array(&mut self, obj: jobjectArray) -> Result>; - - /// Get Option from Java Optional. - fn get_string_opt(&mut self, obj: &JObject) -> Result>; - - /// Get Option> from Java Optional>. - #[allow(unused)] - fn get_strings_opt(&mut self, obj: &JObject) -> Result>>; - - /// Get Option from Java Optional. - fn get_int_opt(&mut self, obj: &JObject) -> Result>; - - /// Get Option> from Java Optional>. - fn get_ints_opt(&mut self, obj: &JObject) -> Result>>; - - /// Get Option from Java Optional. - #[allow(unused)] - fn get_long_opt(&mut self, obj: &JObject) -> Result>; - - /// Get Option from Java Optional. - #[allow(unused)] - fn get_u64_opt(&mut self, obj: &JObject) -> Result>; - - /// Get Option<&[u8]> from Java Optional. - #[allow(unused)] - fn get_bytes_opt(&mut self, obj: &JObject) -> Result>; - - fn get_optional(&mut self, obj: &JObject, f: F) -> Result> - where - F: FnOnce(&mut JNIEnv, &JObject) -> Result; -} - -impl JNIEnvExt for JNIEnv<'_> { - fn get_integers(&mut self, obj: &JObject) -> Result> { - let list = self.get_list(obj)?; - let mut iter = list.iter(self)?; - let mut results = Vec::with_capacity(list.size(self)? as usize); - while let Some(elem) = iter.next(self)? { - let int_obj = self.call_method(elem, "intValue", "()I", &[])?; - let int_value = int_obj.i()?; - results.push(int_value); - } - Ok(results) - } - - fn get_strings(&mut self, obj: &JObject) -> Result> { - let list = self.get_list(obj)?; - let mut iter = list.iter(self)?; - let mut results = Vec::with_capacity(list.size(self)? as usize); - while let Some(elem) = iter.next(self)? { - let jstr = JString::from(elem); - let val = self.get_string(&jstr)?; - results.push(val.to_str()?.to_string()) - } - Ok(results) - } - - fn get_strings_array(&mut self, obj: jobjectArray) -> Result> { - let jobject_array = unsafe { JObjectArray::from_raw(obj) }; - let array_len = self.get_array_length(&jobject_array)?; - let mut res: Vec = Vec::new(); - for i in 0..array_len { - let item: JString = self.get_object_array_element(&jobject_array, i)?.into(); - res.push(self.get_string(&item)?.into()); - } - Ok(res) - } - - fn get_string_opt(&mut self, obj: &JObject) -> Result> { - self.get_optional(obj, |env, inner_obj| { - let java_obj_gen = env.call_method(inner_obj, "get", "()Ljava/lang/Object;", &[])?; - let java_string_obj = java_obj_gen.l()?; - let jstr = JString::from(java_string_obj); - let val = env.get_string(&jstr)?; - Ok(val.to_str()?.to_string()) - }) - } - - fn get_strings_opt(&mut self, obj: &JObject) -> Result>> { - self.get_optional(obj, |env, inner_obj| { - let java_obj_gen = env.call_method(inner_obj, "get", "()Ljava/lang/Object;", &[])?; - let java_list_obj = java_obj_gen.l()?; - env.get_strings(&java_list_obj) - }) - } - - fn get_int_opt(&mut self, obj: &JObject) -> Result> { - self.get_optional(obj, |env, inner_obj| { - let java_obj_gen = env.call_method(inner_obj, "get", "()Ljava/lang/Object;", &[])?; - let java_int_obj = java_obj_gen.l()?; - let int_obj = env.call_method(java_int_obj, "intValue", "()I", &[])?; - let int_value = int_obj.i()?; - Ok(int_value) - }) - } - - fn get_ints_opt(&mut self, obj: &JObject) -> Result>> { - self.get_optional(obj, |env, inner_obj| { - let java_obj_gen = env.call_method(inner_obj, "get", "()Ljava/lang/Object;", &[])?; - let java_list_obj = java_obj_gen.l()?; - env.get_integers(&java_list_obj) - }) - } - - fn get_long_opt(&mut self, obj: &JObject) -> Result> { - self.get_optional(obj, |env, inner_obj| { - let java_obj_gen = env.call_method(inner_obj, "get", "()Ljava/lang/Object;", &[])?; - let java_long_obj = java_obj_gen.l()?; - let long_obj = env.call_method(java_long_obj, "longValue", "()J", &[])?; - let long_value = long_obj.j()?; - Ok(long_value) - }) - } - - fn get_u64_opt(&mut self, obj: &JObject) -> Result> { - self.get_optional(obj, |env, inner_obj| { - let java_obj_gen = env.call_method(inner_obj, "get", "()Ljava/lang/Object;", &[])?; - let java_long_obj = java_obj_gen.l()?; - let long_obj = env.call_method(java_long_obj, "longValue", "()J", &[])?; - let long_value = long_obj.j()?; - Ok(long_value as u64) - }) - } - - fn get_bytes_opt(&mut self, obj: &JObject) -> Result> { - self.get_optional(obj, |env, inner_obj| { - let java_obj_gen = env.call_method(inner_obj, "get", "()Ljava/lang/Object;", &[])?; - let java_byte_buffer_obj = java_obj_gen.l()?; - let j_byte_buffer = JByteBuffer::from(java_byte_buffer_obj); - let raw_data = env.get_direct_buffer_address(&j_byte_buffer)?; - let capacity = env.get_direct_buffer_capacity(&j_byte_buffer)?; - let data = unsafe { slice::from_raw_parts(raw_data, capacity) }; - Ok(data) - }) - } - - fn get_optional(&mut self, obj: &JObject, f: F) -> Result> - where - F: FnOnce(&mut JNIEnv, &JObject) -> Result, - { - if obj.is_null() { - return Ok(None); - } - let is_present = self.call_method(obj, "isPresent", "()Z", &[])?; - if !is_present.z()? { - // TODO(lu): put get java object into here cuz can only get java Object - Ok(None) - } else { - f(self, obj).map(Some) - } - } -} - -#[no_mangle] -pub extern "system" fn Java_com_lancedb_lance_test_JniTestHelper_parseInts( - mut env: JNIEnv, - _obj: JObject, - list_obj: JObject, // List -) { - ok_or_throw_without_return!(env, env.get_integers(&list_obj)); -} - -#[no_mangle] -pub extern "system" fn Java_com_lancedb_lance_test_JniTestHelper_parseIntsOpt( - mut env: JNIEnv, - _obj: JObject, - list_obj: JObject, // Optional> -) { - ok_or_throw_without_return!(env, env.get_ints_opt(&list_obj)); -} diff --git a/java/core/lancedb-jni/src/lib.rs b/java/core/lancedb-jni/src/lib.rs deleted file mode 100644 index 6d498759..00000000 --- a/java/core/lancedb-jni/src/lib.rs +++ /dev/null @@ -1,57 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -// SPDX-FileCopyrightText: Copyright The LanceDB Authors - -use lazy_static::lazy_static; - -// TODO import from lance-jni without duplicate -#[macro_export] -macro_rules! ok_or_throw { - ($env:expr, $result:expr) => { - match $result { - Ok(value) => value, - Err(err) => { - Error::from(err).throw(&mut $env); - return JObject::null(); - } - } - }; -} - -macro_rules! ok_or_throw_without_return { - ($env:expr, $result:expr) => { - match $result { - Ok(value) => value, - Err(err) => { - Error::from(err).throw(&mut $env); - return; - } - } - }; -} - -#[macro_export] -macro_rules! ok_or_throw_with_return { - ($env:expr, $result:expr, $ret:expr) => { - match $result { - Ok(value) => value, - Err(err) => { - Error::from(err).throw(&mut $env); - return $ret; - } - } - }; -} - -mod connection; -pub mod error; -mod ffi; -mod traits; - -pub use error::{Error, Result}; - -lazy_static! { - static ref RT: tokio::runtime::Runtime = tokio::runtime::Builder::new_multi_thread() - .enable_all() - .build() - .expect("Failed to create tokio runtime"); -} diff --git a/java/core/lancedb-jni/src/traits.rs b/java/core/lancedb-jni/src/traits.rs deleted file mode 100644 index e8ed78bc..00000000 --- a/java/core/lancedb-jni/src/traits.rs +++ /dev/null @@ -1,114 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -// SPDX-FileCopyrightText: Copyright The LanceDB Authors - -use jni::objects::{JMap, JObject, JString, JValue}; -use jni::JNIEnv; - -use crate::Result; - -#[allow(dead_code)] -pub trait FromJObject { - fn extract(&self) -> Result; -} - -/// Convert a Rust type into a Java Object. -pub trait IntoJava { - fn into_java<'a>(self, env: &mut JNIEnv<'a>) -> JObject<'a>; -} - -impl FromJObject for JObject<'_> { - fn extract(&self) -> Result { - Ok(JValue::from(self).i()?) - } -} - -impl FromJObject for JObject<'_> { - fn extract(&self) -> Result { - Ok(JValue::from(self).j()?) - } -} - -impl FromJObject for JObject<'_> { - fn extract(&self) -> Result { - Ok(JValue::from(self).f()?) - } -} - -impl FromJObject for JObject<'_> { - fn extract(&self) -> Result { - Ok(JValue::from(self).d()?) - } -} - -#[allow(dead_code)] -pub trait FromJString { - fn extract(&self, env: &mut JNIEnv) -> Result; -} - -impl FromJString for JString<'_> { - fn extract(&self, env: &mut JNIEnv) -> Result { - Ok(env.get_string(self)?.into()) - } -} - -pub trait JMapExt { - #[allow(dead_code)] - fn get_string(&self, env: &mut JNIEnv, key: &str) -> Result>; - - #[allow(dead_code)] - fn get_i32(&self, env: &mut JNIEnv, key: &str) -> Result>; - - #[allow(dead_code)] - fn get_i64(&self, env: &mut JNIEnv, key: &str) -> Result>; - - #[allow(dead_code)] - fn get_f32(&self, env: &mut JNIEnv, key: &str) -> Result>; - - #[allow(dead_code)] - fn get_f64(&self, env: &mut JNIEnv, key: &str) -> Result>; -} - -#[allow(dead_code)] -fn get_map_value(env: &mut JNIEnv, map: &JMap, key: &str) -> Result> -where - for<'a> JObject<'a>: FromJObject, -{ - let key_obj: JObject = env.new_string(key)?.into(); - if let Some(value) = map.get(env, &key_obj)? { - if value.is_null() { - Ok(None) - } else { - Ok(Some(value.extract()?)) - } - } else { - Ok(None) - } -} - -impl JMapExt for JMap<'_, '_, '_> { - fn get_string(&self, env: &mut JNIEnv, key: &str) -> Result> { - let key_obj: JObject = env.new_string(key)?.into(); - if let Some(value) = self.get(env, &key_obj)? { - let value_str: JString = value.into(); - Ok(Some(value_str.extract(env)?)) - } else { - Ok(None) - } - } - - fn get_i32(&self, env: &mut JNIEnv, key: &str) -> Result> { - get_map_value(env, self, key) - } - - fn get_i64(&self, env: &mut JNIEnv, key: &str) -> Result> { - get_map_value(env, self, key) - } - - fn get_f32(&self, env: &mut JNIEnv, key: &str) -> Result> { - get_map_value(env, self, key) - } - - fn get_f64(&self, env: &mut JNIEnv, key: &str) -> Result> { - get_map_value(env, self, key) - } -} diff --git a/java/core/pom.xml b/java/core/pom.xml deleted file mode 100644 index 5cf837e8..00000000 --- a/java/core/pom.xml +++ /dev/null @@ -1,103 +0,0 @@ - - - - 4.0.0 - - - com.lancedb - lancedb-parent - 0.23.0-beta.0 - ../pom.xml - - - lancedb-core - ${project.artifactId} - LanceDB Core - jar - - false - - - - - com.lancedb - lance-namespace-core - 0.0.1 - - - org.apache.arrow - arrow-vector - - - org.apache.arrow - arrow-memory-netty - - - org.apache.arrow - arrow-c-data - - - org.apache.arrow - arrow-dataset - - - org.json - json - - - org.questdb - jar-jni - - - org.junit.jupiter - junit-jupiter - test - - - - - - build-jni - - true - - - - - org.questdb - rust-maven-plugin - 1.1.1 - - - lancedb-jni - - build - - - lancedb-jni - ${rust.release.build} - - ${project.build.directory}/classes/nativelib - true - - - - lancedb-jni-test - - test - - - lancedb-jni - false - -v - - - - - - - - - diff --git a/java/core/src/main/java/com/lancedb/lancedb/Connection.java b/java/core/src/main/java/com/lancedb/lancedb/Connection.java deleted file mode 100644 index c7ac3035..00000000 --- a/java/core/src/main/java/com/lancedb/lancedb/Connection.java +++ /dev/null @@ -1,108 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package com.lancedb.lancedb; - -import io.questdb.jar.jni.JarJniLoader; - -import java.io.Closeable; -import java.util.List; -import java.util.Optional; - -/** Represents LanceDB database. */ -public class Connection implements Closeable { - static { - JarJniLoader.loadLib(Connection.class, "/nativelib", "lancedb_jni"); - } - - private long nativeConnectionHandle; - - /** Connect to a LanceDB instance. */ - public static native Connection connect(String uri); - - /** - * Get the names of all tables in the database. The names are sorted in ascending order. - * - * @return the table names - */ - public List tableNames() { - return tableNames(Optional.empty(), Optional.empty()); - } - - /** - * Get the names of filtered tables in the database. The names are sorted in ascending order. - * - * @param limit The number of results to return. - * @return the table names - */ - public List tableNames(int limit) { - return tableNames(Optional.empty(), Optional.of(limit)); - } - - /** - * Get the names of filtered tables in the database. The names are sorted in ascending order. - * - * @param startAfter If present, only return names that come lexicographically after the supplied - * value. This can be combined with limit to implement pagination by setting this to the last - * table name from the previous page. - * @return the table names - */ - public List tableNames(String startAfter) { - return tableNames(Optional.of(startAfter), Optional.empty()); - } - - /** - * Get the names of filtered tables in the database. The names are sorted in ascending order. - * - * @param startAfter If present, only return names that come lexicographically after the supplied - * value. This can be combined with limit to implement pagination by setting this to the last - * table name from the previous page. - * @param limit The number of results to return. - * @return the table names - */ - public List tableNames(String startAfter, int limit) { - return tableNames(Optional.of(startAfter), Optional.of(limit)); - } - - /** - * Get the names of filtered tables in the database. The names are sorted in ascending order. - * - * @param startAfter If present, only return names that come lexicographically after the supplied - * value. This can be combined with limit to implement pagination by setting this to the last - * table name from the previous page. - * @param limit The number of results to return. - * @return the table names - */ - public native List tableNames(Optional startAfter, Optional limit); - - /** - * Closes this connection and releases any system resources associated with it. If the connection - * is already closed, then invoking this method has no effect. - */ - @Override - public void close() { - if (nativeConnectionHandle != 0) { - releaseNativeConnection(nativeConnectionHandle); - nativeConnectionHandle = 0; - } - } - - /** - * Native method to release the Lance connection resources associated with the given handle. - * - * @param handle The native handle to the connection resource. - */ - private native void releaseNativeConnection(long handle); - - private Connection() {} -} diff --git a/java/core/src/test/java/com/lancedb/lancedb/ConnectionTest.java b/java/core/src/test/java/com/lancedb/lancedb/ConnectionTest.java deleted file mode 100644 index fa3adf8e..00000000 --- a/java/core/src/test/java/com/lancedb/lancedb/ConnectionTest.java +++ /dev/null @@ -1,135 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package com.lancedb.lancedb; - -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.io.TempDir; - -import java.net.URL; -import java.nio.file.Path; -import java.util.List; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertTrue; - -public class ConnectionTest { - private static final String[] TABLE_NAMES = { - "dataset_version", "new_empty_dataset", "test", "write_stream" - }; - - @TempDir static Path tempDir; // Temporary directory for the tests - private static URL lanceDbURL; - - @BeforeAll - static void setUp() { - ClassLoader classLoader = ConnectionTest.class.getClassLoader(); - lanceDbURL = classLoader.getResource("example_db"); - } - - @Test - void emptyDB() { - String databaseUri = tempDir.resolve("emptyDB").toString(); - try (Connection conn = Connection.connect(databaseUri)) { - List tableNames = conn.tableNames(); - assertTrue(tableNames.isEmpty()); - } - } - - @Test - void tableNames() { - try (Connection conn = Connection.connect(lanceDbURL.toString())) { - List tableNames = conn.tableNames(); - assertEquals(4, tableNames.size()); - for (int i = 0; i < TABLE_NAMES.length; i++) { - assertEquals(TABLE_NAMES[i], tableNames.get(i)); - } - } - } - - @Test - void tableNamesStartAfter() { - try (Connection conn = Connection.connect(lanceDbURL.toString())) { - assertTableNamesStartAfter( - conn, TABLE_NAMES[0], 3, TABLE_NAMES[1], TABLE_NAMES[2], TABLE_NAMES[3]); - assertTableNamesStartAfter(conn, TABLE_NAMES[1], 2, TABLE_NAMES[2], TABLE_NAMES[3]); - assertTableNamesStartAfter(conn, TABLE_NAMES[2], 1, TABLE_NAMES[3]); - assertTableNamesStartAfter(conn, TABLE_NAMES[3], 0); - assertTableNamesStartAfter( - conn, "a_dataset", 4, TABLE_NAMES[0], TABLE_NAMES[1], TABLE_NAMES[2], TABLE_NAMES[3]); - assertTableNamesStartAfter(conn, "o_dataset", 2, TABLE_NAMES[2], TABLE_NAMES[3]); - assertTableNamesStartAfter(conn, "v_dataset", 1, TABLE_NAMES[3]); - assertTableNamesStartAfter(conn, "z_dataset", 0); - } - } - - private void assertTableNamesStartAfter( - Connection conn, String startAfter, int expectedSize, String... expectedNames) { - List tableNames = conn.tableNames(startAfter); - assertEquals(expectedSize, tableNames.size()); - for (int i = 0; i < expectedNames.length; i++) { - assertEquals(expectedNames[i], tableNames.get(i)); - } - } - - @Test - void tableNamesLimit() { - try (Connection conn = Connection.connect(lanceDbURL.toString())) { - for (int i = 0; i <= TABLE_NAMES.length; i++) { - List tableNames = conn.tableNames(i); - assertEquals(i, tableNames.size()); - for (int j = 0; j < i; j++) { - assertEquals(TABLE_NAMES[j], tableNames.get(j)); - } - } - } - } - - @Test - void tableNamesStartAfterLimit() { - try (Connection conn = Connection.connect(lanceDbURL.toString())) { - List tableNames = conn.tableNames(TABLE_NAMES[0], 2); - assertEquals(2, tableNames.size()); - assertEquals(TABLE_NAMES[1], tableNames.get(0)); - assertEquals(TABLE_NAMES[2], tableNames.get(1)); - tableNames = conn.tableNames(TABLE_NAMES[1], 1); - assertEquals(1, tableNames.size()); - assertEquals(TABLE_NAMES[2], tableNames.get(0)); - tableNames = conn.tableNames(TABLE_NAMES[2], 2); - assertEquals(1, tableNames.size()); - assertEquals(TABLE_NAMES[3], tableNames.get(0)); - tableNames = conn.tableNames(TABLE_NAMES[3], 2); - assertEquals(0, tableNames.size()); - tableNames = conn.tableNames(TABLE_NAMES[0], 0); - assertEquals(0, tableNames.size()); - - // Limit larger than the number of remaining tables - tableNames = conn.tableNames(TABLE_NAMES[0], 10); - assertEquals(3, tableNames.size()); - assertEquals(TABLE_NAMES[1], tableNames.get(0)); - assertEquals(TABLE_NAMES[2], tableNames.get(1)); - assertEquals(TABLE_NAMES[3], tableNames.get(2)); - - // Start after a value not in the list - tableNames = conn.tableNames("non_existent_table", 2); - assertEquals(2, tableNames.size()); - assertEquals(TABLE_NAMES[2], tableNames.get(0)); - assertEquals(TABLE_NAMES[3], tableNames.get(1)); - - // Start after the last table with a limit - tableNames = conn.tableNames(TABLE_NAMES[3], 1); - assertEquals(0, tableNames.size()); - } - } -} diff --git a/java/core/src/test/resources/example_db/dataset_version.lance/_latest.manifest b/java/core/src/test/resources/example_db/dataset_version.lance/_latest.manifest deleted file mode 100644 index f09f8e8be9cfc9aba64234f812dfa3f8dea0e6c9..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 273 zcmYk1Jx;_h5Jv3;FRO@RTi2&Y%jdU`*Tw4b<>{Sn8MYFXDiO+h81&GCj#7bh*1+hsM;m*OqwMB~e7$Pe UZ;3wIH>{_6u_;&i$M|wV{_xuB^r5E7< diff --git a/java/core/src/test/resources/example_db/dataset_version.lance/_transactions/2-3344b369-7471-4e23-8865-c949b6e19bc2.txn b/java/core/src/test/resources/example_db/dataset_version.lance/_transactions/2-3344b369-7471-4e23-8865-c949b6e19bc2.txn deleted file mode 100644 index c0119b6ea5c6371535ac17c6fdda93531af9fae8..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 99 zcmWN;%ME}a3;<9+i6I`Dz=J1~8la_?aVRM#<4{hj@43nEh(wB9;G!!6Swaxu46RU6 rwpg^KcafFd?8AE0h@C|%1TqE^&7jr-XwURh%RQWTeVX}c7`OQYLsS;8 diff --git a/java/core/src/test/resources/example_db/dataset_version.lance/_versions/1.manifest b/java/core/src/test/resources/example_db/dataset_version.lance/_versions/1.manifest deleted file mode 100644 index d94ff721e987921a75ccd51ff3afb8cadd99b97b..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 159 zcmeBXU|`^q5@O0sQTPu7j9RRjc_qe128aaxM0 lX_A3nNkv|k02gacVqS8p5W9h%ArM&rT>}M-3_gy2&H!ijGB*GK diff --git a/java/core/src/test/resources/example_db/dataset_version.lance/_versions/2.manifest b/java/core/src/test/resources/example_db/dataset_version.lance/_versions/2.manifest deleted file mode 100644 index f8764e44c397e4e66eb1a1a811e91426adadc709..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 217 zcmX@gz`(#ICB&4OqVOLE7`0e4^Gb}33>YmKx#Wdd@)C1XB^Z(A*@{bwGV{`b3Ydh< zI9P=YxU|d+5-pQVEiHA^%q`P&O)L^Ebd%E35_OYIEKSYLjFQZZj1u*767!N%g_sx^ z6<8&htav!)Y<{3OgButlSd+92b&ZY9l8sYM&47%w6kQY36rd?dX-T@4rm0Cu mre?;;NvQ^UB^7yD0$i*R!`Kb<41vf3=np7hWbkqHa|Qropg^tw diff --git a/java/core/src/test/resources/example_db/dataset_version.lance/_versions/3.manifest b/java/core/src/test/resources/example_db/dataset_version.lance/_versions/3.manifest deleted file mode 100644 index f09f8e8be9cfc9aba64234f812dfa3f8dea0e6c9..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 273 zcmYk1Jx;_h5Jv3;FRO@RTi2&Y%jdU`*Tw4b<>{Sn8MYFXDiO+h81&GCj#7bh*1+hsM;m*OqwMB~e7$Pe UZ;3M@{hnYpwT^0KFFUSFa<(^{S9KyK;uKX zaB2>kJj{NWc`$on{6L_3E-4|V%oK(HFu6y>hH_kC5@KRtmB`V0$jBuo#Atx7zcjgo zOI(O6uQVqoJ~_WMuS5c3ubBzZRtX`_+|0cAvc#OyR0$?z_kd&)E1)vWs4|R(5-ixP zkwmeERe=Sp4Q!D#vPEn_86>Z9fh6H}v7<@~aWF74F|)9;u}fTGTnP(KVA}L?^m7IP Dk;8(} diff --git a/java/core/src/test/resources/example_db/dataset_version.lance/data/a13f68ba-04e6-48b5-bec0-bf54444be5f0.lance b/java/core/src/test/resources/example_db/dataset_version.lance/data/a13f68ba-04e6-48b5-bec0-bf54444be5f0.lance deleted file mode 100644 index c7d88b1cfa0a15f4dd0176443f705014af6a470f..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 642 zcmZva%}T^D6oqrszcILwAR2$bE?mt*+EHY-gDyq%35L=sQt4!5CKVTM1Ruerk1#Ja z58yLs(&i3r@X~O4a&q$}rGyXydQd$cisfn{*7zQ9NfJJ?2IY z2Mc-ccH+}2a=)zq#s)`c)|>O2s#mOycX6k`L^BumpI k<0QS3io7fVF4mmHyyR3Nb^|>_AhH0u1_~G%d>s9p0dv|iE&u=k diff --git a/java/core/src/test/resources/example_db/new_empty_dataset.lance/_transactions/0-15648e72-076f-4ef1-8b90-10d305b95b3b.txn b/java/core/src/test/resources/example_db/new_empty_dataset.lance/_transactions/0-15648e72-076f-4ef1-8b90-10d305b95b3b.txn deleted file mode 100644 index 4ca22d68..00000000 --- a/java/core/src/test/resources/example_db/new_empty_dataset.lance/_transactions/0-15648e72-076f-4ef1-8b90-10d305b95b3b.txn +++ /dev/null @@ -1 +0,0 @@ -$15648e72-076f-4ef1-8b90-10d305b95b3b²=id ÿÿÿÿÿÿÿÿÿ*int3208name ÿÿÿÿÿÿÿÿÿ*string08 \ No newline at end of file diff --git a/java/core/src/test/resources/example_db/new_empty_dataset.lance/_versions/1.manifest b/java/core/src/test/resources/example_db/new_empty_dataset.lance/_versions/1.manifest deleted file mode 100644 index 4f5495c6bda1795cd1822bd823bbced7cfccc024..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 159 zcmeBXU|`^q5@O0sQTPu7j9RRjc_qe128mOycX6k`L^BumpI k<0QS3io7fVF4mmHyyR3Nb^|>_AhH0u1_~G%d>s9p0dv|iE&u=k diff --git a/java/core/src/test/resources/example_db/test.lance/_latest.manifest b/java/core/src/test/resources/example_db/test.lance/_latest.manifest deleted file mode 100644 index d1d46b3e71227417409b0899bd6670aa0a717927..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 264 zcmY+9yGq1B7=<&Vo0VX(Ok+S;whl~~nM^j9PHe?jSmyGNjKoBeQ5N6I!pg?ZSFzg4 zT3t{$*XOR@8`;geRG?f;Eb3$`~*zm36c!w4kDtm8sWj;NbF3 z^#;g6gzyTdxII4lc-=k5i*MgTr>KX@N&DUr(pen6%TSO)1gu;^2Rc+) t?mUsr^8J)p6UFyj8MV^@L_^@d9JGuKxw6b}YewdutN*5@{Smg;_6LPs7gPWM diff --git a/java/core/src/test/resources/example_db/test.lance/_versions/1.manifest b/java/core/src/test/resources/example_db/test.lance/_versions/1.manifest deleted file mode 100644 index b0c30b9da3173d8d61f32da259f9033ffea25ea0..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 209 zcmdnXz`(#IEyR*plA5dV9|jn;*osSvGV{_cn7HJGSPP0WlT#%aQRGwdOOtX^Ef~2B zgxJbblS}f8B$$vDh^J*%q^86dXI7=g=VTU_Sf%CUCzeICMTxpnxvT}>6#>_B?E<$&2=r!6VuF1&CCofOpNtPD)O=fxL9)% Z^O94A*bVdyfyfBxekfpM@Nx8W1^|aVO1c05 diff --git a/java/core/src/test/resources/example_db/test.lance/_versions/2.manifest b/java/core/src/test/resources/example_db/test.lance/_versions/2.manifest deleted file mode 100644 index d1d46b3e71227417409b0899bd6670aa0a717927..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 264 zcmY+9yGq1B7=<&Vo0VX(Ok+S;whl~~nM^j9PHe?jSmyGNjKoBeQ5N6I!pg?ZSFzg4 zT3t{$*XOR@8`;geRG?f;Eb3$`~*zm36c!w4kDtm8sWj;NbF3 z^#;g6gzyTdxII4lc-=k5i;_ij=^7LoH@YIt}LR zo(7!F6~6w_p*L}LR>y`03n-fu11{SpYo&1&j$6uCKRLWH1WFxX za30w80!b9(z&ap?cez$*UDXi9X+rLR(ij1e2?oy|IwDPR^b#DmcgdsLVrS};z1TyR fht{7T)0D6Ex#ZcIzSw#%v!2cWAD6razqR=SQGGGg diff --git a/java/core/src/test/resources/example_db/write_stream.lance/_versions/1.manifest b/java/core/src/test/resources/example_db/write_stream.lance/_versions/1.manifest deleted file mode 100644 index ac2cd0f69b1b3461577f1073bf93dfe47938c3c0..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 214 zcmX@az`(#IE5wqQn47Bb9|jn;*osSvGV{_67%iB9(#(nJsS=FH@~oM8CB{ZTX+|Lv zAp6~6w_p*L}LR>y`03n-fu11{SpYo&1&jL>A|IJ&sI zxYREojt+t*?VH*=e7W$;b5Gv)$#Iu_BDz2%lB-Tc^OfVOerg|jYGLM52Nl^P4&O1^p}4Z%Z{{bR znCw!b5-HoAVWMJ&Lo@gMtcFQIivywB6E2g8IjQ>?i%-FFQx>DR%Dn%Af>e-S_Wq*w k)}wOZEI*DIZ - - - 4.0.0 - - - com.lancedb - lancedb-parent - 0.23.0-beta.0 - ../pom.xml - - - lancedb-lance-namespace - ${project.artifactId} - LanceDB Java Integration with Lance Namespace - jar - - - - com.lancedb - lance-namespace-core - - - diff --git a/java/lancedb-core/pom.xml b/java/lancedb-core/pom.xml new file mode 100644 index 00000000..efebeccf --- /dev/null +++ b/java/lancedb-core/pom.xml @@ -0,0 +1,99 @@ + + + + 4.0.0 + + + com.lancedb + lancedb-parent + 0.23.0-beta.0 + ../pom.xml + + + lancedb-core + ${project.artifactId} + Utilities to work with LanceDB Cloud and Enterprise via Lance REST Namespace + jar + + + + org.lance + lance-core + + + + org.apache.arrow + arrow-vector + + + + org.apache.arrow + arrow-memory-netty + + + + org.junit.jupiter + junit-jupiter + test + + + + org.mockito + mockito-junit-jupiter + 5.18.0 + test + + + + org.slf4j + slf4j-api + 2.0.16 + test + + + + org.apache.logging.log4j + log4j-slf4j2-impl + 2.24.3 + test + + + + org.apache.logging.log4j + log4j-core + 2.24.3 + test + + + + org.apache.logging.log4j + log4j-api + 2.24.3 + test + + + + + + + org.apache.maven.plugins + maven-source-plugin + 3.3.0 + + + attach-sources + + jar-no-fork + + + + + + org.apache.maven.plugins + maven-surefire-plugin + + + + diff --git a/java/lance-namespace/src/main/java/com/lancedb/lancedb/LanceDbRestNamespaces.java b/java/lancedb-core/src/main/java/com/lancedb/LanceDbRestNamespaceBuilder.java similarity index 55% rename from java/lance-namespace/src/main/java/com/lancedb/lancedb/LanceDbRestNamespaces.java rename to java/lancedb-core/src/main/java/com/lancedb/LanceDbRestNamespaceBuilder.java index c258921f..1a6b8e32 100644 --- a/java/lance-namespace/src/main/java/com/lancedb/lancedb/LanceDbRestNamespaces.java +++ b/java/lancedb-core/src/main/java/com/lancedb/LanceDbRestNamespaceBuilder.java @@ -11,35 +11,58 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package com.lancedb.lancedb; +package com.lancedb; -import com.lancedb.lance.namespace.LanceRestNamespace; -import com.lancedb.lance.namespace.client.apache.ApiClient; +import org.lance.namespace.RestNamespace; import java.util.HashMap; import java.util.Map; import java.util.Optional; -/** Util class to help construct a {@link LanceRestNamespace} for LanceDB. */ -public class LanceDbRestNamespaces { +/** + * Util class to help construct a {@link RestNamespace} for LanceDB. + * + *

For LanceDB Cloud, use the simplified builder API: + * + *

{@code
+ * import org.lance.namespace.RestNamespace;
+ *
+ * // If your DB url is db://example-db, then your database here is example-db
+ * RestNamespace namespace = LanceDbRestNamespaceBuilder.newBuilder()
+ *     .apiKey("your_lancedb_cloud_api_key")
+ *     .database("your_database_name")
+ *     .build();
+ * }
+ * + *

For LanceDB Enterprise deployments, use your custom endpoint: + * + *

{@code
+ * RestNamespace namespace = LanceDbRestNamespaceBuilder.newBuilder()
+ *     .apiKey("your_lancedb_enterprise_api_key")
+ *     .database("your_database_name")
+ *     .endpoint("")
+ *     .build();
+ * }
+ */ +public class LanceDbRestNamespaceBuilder { private static final String DEFAULT_REGION = "us-east-1"; private static final String CLOUD_URL_PATTERN = "https://%s.%s.api.lancedb.com"; private String apiKey; private String database; - private Optional hostOverride = Optional.empty(); + private Optional endpoint = Optional.empty(); private Optional region = Optional.empty(); private Map additionalConfig = new HashMap<>(); - private LanceDbRestNamespaces() {} + private LanceDbRestNamespaceBuilder() {} /** * Create a new builder instance. * - * @return A new LanceRestNamespaceBuilder + * @return A new RestNamespaceBuilder */ - public static LanceDbRestNamespaces builder() { - return new LanceDbRestNamespaces(); + public static LanceDbRestNamespaceBuilder newBuilder() { + return new LanceDbRestNamespaceBuilder(); } /** @@ -48,7 +71,7 @@ public class LanceDbRestNamespaces { * @param apiKey The LanceDB API key * @return This builder */ - public LanceDbRestNamespaces apiKey(String apiKey) { + public LanceDbRestNamespaceBuilder apiKey(String apiKey) { if (apiKey == null || apiKey.trim().isEmpty()) { throw new IllegalArgumentException("API key cannot be null or empty"); } @@ -62,7 +85,7 @@ public class LanceDbRestNamespaces { * @param database The database name * @return This builder */ - public LanceDbRestNamespaces database(String database) { + public LanceDbRestNamespaceBuilder database(String database) { if (database == null || database.trim().isEmpty()) { throw new IllegalArgumentException("Database cannot be null or empty"); } @@ -71,25 +94,25 @@ public class LanceDbRestNamespaces { } /** - * Set a custom host override (optional). When set, this overrides the default LanceDB Cloud URL + * Set a custom endpoint URL (optional). When set, this overrides the default LanceDB Cloud URL * construction. Use this for LanceDB Enterprise deployments. * - * @param hostOverride The complete base URL (e.g., "http://your-vpc-endpoint:80") + * @param endpoint The complete base URL for your LanceDB Enterprise deployment * @return This builder */ - public LanceDbRestNamespaces hostOverride(String hostOverride) { - this.hostOverride = Optional.ofNullable(hostOverride); + public LanceDbRestNamespaceBuilder endpoint(String endpoint) { + this.endpoint = Optional.ofNullable(endpoint); return this; } /** * Set the region for LanceDB Cloud (optional). Defaults to "us-east-1" if not specified. This is - * ignored when hostOverride is set. + * ignored when endpoint is set. * * @param region The AWS region (e.g., "us-east-1", "eu-west-1") * @return This builder */ - public LanceDbRestNamespaces region(String region) { + public LanceDbRestNamespaceBuilder region(String region) { this.region = Optional.ofNullable(region); return this; } @@ -101,18 +124,18 @@ public class LanceDbRestNamespaces { * @param value The configuration value * @return This builder */ - public LanceDbRestNamespaces config(String key, String value) { + public LanceDbRestNamespaceBuilder config(String key, String value) { this.additionalConfig.put(key, value); return this; } /** - * Build the LanceRestNamespace instance. + * Build the Lance RestNamespace instance. * - * @return A configured LanceRestNamespace + * @return A configured Lance RestNamespace * @throws IllegalStateException if required parameters are missing */ - public LanceRestNamespace build() { + public RestNamespace build() { // Validate required fields if (apiKey == null) { throw new IllegalStateException("API key is required"); @@ -123,24 +146,20 @@ public class LanceDbRestNamespaces { // Build configuration map Map config = new HashMap<>(additionalConfig); - config.put("headers.x-lancedb-database", database); - config.put("headers.x-api-key", apiKey); + config.put("header.x-lancedb-database", database); + config.put("header.x-api-key", apiKey); // Determine base URL - String baseUrl; - if (hostOverride.isPresent()) { - baseUrl = hostOverride.get(); - config.put("host_override", hostOverride.get()); + String uri; + if (endpoint.isPresent()) { + uri = endpoint.get(); } else { String effectiveRegion = region.orElse(DEFAULT_REGION); - baseUrl = String.format(CLOUD_URL_PATTERN, database, effectiveRegion); - config.put("region", effectiveRegion); + uri = String.format(CLOUD_URL_PATTERN, database, effectiveRegion); } - - // Create and configure ApiClient - ApiClient apiClient = new ApiClient(); - apiClient.setBasePath(baseUrl); - - return new LanceRestNamespace(apiClient, config); + config.put("uri", uri); + RestNamespace ns = new RestNamespace(); + ns.initialize(config, null); + return ns; } } diff --git a/java/lancedb-core/src/test/java/com/lancedb/LanceDbRestNamespaceBuilderTest.java b/java/lancedb-core/src/test/java/com/lancedb/LanceDbRestNamespaceBuilderTest.java new file mode 100644 index 00000000..f6543028 --- /dev/null +++ b/java/lancedb-core/src/test/java/com/lancedb/LanceDbRestNamespaceBuilderTest.java @@ -0,0 +1,96 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.lancedb; + +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.*; + +/** Unit tests for LanceDbRestNamespaceBuilder. */ +public class LanceDbRestNamespaceBuilderTest { + + @Test + public void testBuilderRequiresApiKey() { + LanceDbRestNamespaceBuilder builder = + LanceDbRestNamespaceBuilder.newBuilder().database("test-db"); + + IllegalStateException exception = assertThrows(IllegalStateException.class, builder::build); + assertEquals("API key is required", exception.getMessage()); + } + + @Test + public void testBuilderRequiresDatabase() { + LanceDbRestNamespaceBuilder builder = + LanceDbRestNamespaceBuilder.newBuilder().apiKey("test-api-key"); + + IllegalStateException exception = assertThrows(IllegalStateException.class, builder::build); + assertEquals("Database is required", exception.getMessage()); + } + + @Test + public void testApiKeyCannotBeNull() { + IllegalArgumentException exception = + assertThrows( + IllegalArgumentException.class, + () -> LanceDbRestNamespaceBuilder.newBuilder().apiKey(null)); + assertEquals("API key cannot be null or empty", exception.getMessage()); + } + + @Test + public void testApiKeyCannotBeEmpty() { + IllegalArgumentException exception = + assertThrows( + IllegalArgumentException.class, + () -> LanceDbRestNamespaceBuilder.newBuilder().apiKey(" ")); + assertEquals("API key cannot be null or empty", exception.getMessage()); + } + + @Test + public void testDatabaseCannotBeNull() { + IllegalArgumentException exception = + assertThrows( + IllegalArgumentException.class, + () -> LanceDbRestNamespaceBuilder.newBuilder().database(null)); + assertEquals("Database cannot be null or empty", exception.getMessage()); + } + + @Test + public void testDatabaseCannotBeEmpty() { + IllegalArgumentException exception = + assertThrows( + IllegalArgumentException.class, + () -> LanceDbRestNamespaceBuilder.newBuilder().database(" ")); + assertEquals("Database cannot be null or empty", exception.getMessage()); + } + + @Test + public void testBuilderFluentApi() { + // Verify the builder returns itself for chaining + LanceDbRestNamespaceBuilder builder = LanceDbRestNamespaceBuilder.newBuilder(); + + assertSame(builder, builder.apiKey("test-key")); + assertSame(builder, builder.database("test-db")); + assertSame(builder, builder.endpoint("http://localhost:8080")); + assertSame(builder, builder.region("eu-west-1")); + assertSame(builder, builder.config("custom-key", "custom-value")); + } + + @Test + public void testNewBuilderCreatesNewInstance() { + LanceDbRestNamespaceBuilder builder1 = LanceDbRestNamespaceBuilder.newBuilder(); + LanceDbRestNamespaceBuilder builder2 = LanceDbRestNamespaceBuilder.newBuilder(); + + assertNotSame(builder1, builder2); + } +} diff --git a/java/lancedb-core/src/test/resources/log4j2.xml b/java/lancedb-core/src/test/resources/log4j2.xml new file mode 100644 index 00000000..00931691 --- /dev/null +++ b/java/lancedb-core/src/test/resources/log4j2.xml @@ -0,0 +1,32 @@ + + + + + + + + + + + + + + + + + + + diff --git a/java/pom.xml b/java/pom.xml index 1eb946f0..0eeefb6d 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -28,7 +28,7 @@ UTF-8 15.0.0 - 0.0.1 + 1.0.0-rc.2 false 2.30.0 1.7 @@ -51,8 +51,7 @@ - core - lance-namespace + lancedb-core @@ -64,9 +63,9 @@ - com.lancedb - lance-namespace-core - ${lance-namespace.verison} + org.lance + lance-core + ${lance-core.version} org.apache.arrow @@ -88,21 +87,11 @@ arrow-dataset ${arrow.version} - - org.questdb - jar-jni - 1.1.1 - org.junit.jupiter junit-jupiter 5.10.1 - - org.json - json - 20210307 -