diff --git a/.bumpversion.toml b/.bumpversion.toml index 76bfaf39..c48506ef 100644 --- a/.bumpversion.toml +++ b/.bumpversion.toml @@ -72,3 +72,9 @@ search = "\nversion = \"{current_version}\"" filename = "nodejs/Cargo.toml" replace = "\nversion = \"{new_version}\"" search = "\nversion = \"{current_version}\"" + +# Java documentation +[[tool.bumpversion.files]] +filename = "docs/src/java/java.md" +replace = "{new_version}" +search = "{current_version}" diff --git a/.github/workflows/java-publish.yml b/.github/workflows/java-publish.yml index eca792f3..6abaac06 100644 --- a/.github/workflows/java-publish.yml +++ b/.github/workflows/java-publish.yml @@ -1,76 +1,35 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + name: Build and publish Java packages on: - release: - types: [released] + push: + tags: + - "v*" pull_request: paths: - .github/workflows/java-publish.yml jobs: - macos-arm64: - name: Build on MacOS Arm64 - runs-on: macos-14 - timeout-minutes: 45 - defaults: - run: - working-directory: ./java/core/lancedb-jni - steps: - - name: Checkout repository - uses: actions/checkout@v4 - - uses: Swatinem/rust-cache@v2 - - name: Install dependencies - run: | - brew install protobuf - - name: Build release - run: | - cargo build --release - - uses: actions/upload-artifact@v4 - with: - name: liblancedb_jni_darwin_aarch64.zip - path: target/release/liblancedb_jni.dylib - retention-days: 1 - if-no-files-found: error - linux-arm64: - name: Build on Linux Arm64 - runs-on: warp-ubuntu-2204-arm64-8x - timeout-minutes: 45 - defaults: - run: - working-directory: ./java/core/lancedb-jni - steps: - - name: Checkout repository - uses: actions/checkout@v4 - - uses: Swatinem/rust-cache@v2 - - uses: actions-rust-lang/setup-rust-toolchain@v1 - with: - cache-workspaces: "./java/core/lancedb-jni" - # Disable full debug symbol generation to speed up CI build and keep memory down - # "1" means line tables only, which is useful for panic tracebacks. - rustflags: "-C debuginfo=1" - - name: Install dependencies - run: | - sudo apt -y -qq update - sudo apt install -y protobuf-compiler libssl-dev pkg-config - - name: Build release - run: | - cargo build --release - - uses: actions/upload-artifact@v4 - with: - name: liblancedb_jni_linux_aarch64.zip - path: target/release/liblancedb_jni.so - retention-days: 1 - if-no-files-found: error - linux-x86: - runs-on: warp-ubuntu-2204-x64-8x + publish: + name: Build and Publish + runs-on: ubuntu-24.04 timeout-minutes: 30 - needs: [macos-arm64, linux-arm64] defaults: run: working-directory: ./java steps: - name: Checkout repository uses: actions/checkout@v4 - - uses: Swatinem/rust-cache@v2 - name: Set up Java 8 uses: actions/setup-java@v4 with: @@ -82,40 +41,30 @@ jobs: server-password: SONATYPE_TOKEN gpg-private-key: ${{ secrets.GPG_PRIVATE_KEY }} gpg-passphrase: ${{ secrets.GPG_PASSPHRASE }} - - name: Install dependencies + - name: Set git config run: | - sudo apt -y -qq update - sudo apt install -y protobuf-compiler libssl-dev pkg-config - - name: Download artifact - uses: actions/download-artifact@v4 - - name: Copy native libs - run: | - mkdir -p ./core/target/classes/nativelib/darwin-aarch64 ./core/target/classes/nativelib/linux-aarch64 - cp ../liblancedb_jni_darwin_aarch64.zip/liblancedb_jni.dylib ./core/target/classes/nativelib/darwin-aarch64/liblancedb_jni.dylib - cp ../liblancedb_jni_linux_aarch64.zip/liblancedb_jni.so ./core/target/classes/nativelib/linux-aarch64/liblancedb_jni.so + git config --global user.email "dev+gha@lancedb.com" + git config --global user.name "LanceDB Github Runner" - name: Dry run if: github.event_name == 'pull_request' run: | - mvn --batch-mode -DskipTests -Drust.release.build=true package - - name: Set github - run: | - git config --global user.email "LanceDB Github Runner" - git config --global user.name "dev+gha@lancedb.com" - - name: Publish with Java 8 - if: github.event_name == 'release' + ./mvnw --batch-mode -DskipTests package -pl lancedb-core -am + - name: Publish + if: startsWith(github.ref, 'refs/tags/v') run: | echo "use-agent" >> ~/.gnupg/gpg.conf echo "pinentry-mode loopback" >> ~/.gnupg/gpg.conf export GPG_TTY=$(tty) - mvn --batch-mode -DskipTests -Drust.release.build=true -DpushChanges=false -Dgpg.passphrase=${{ secrets.GPG_PASSPHRASE }} deploy -P deploy-to-ossrh + ./mvnw --batch-mode -DskipTests -DpushChanges=false -Dgpg.passphrase=${{ secrets.GPG_PASSPHRASE }} deploy -pl lancedb-core -am -P deploy-to-ossrh env: SONATYPE_USER: ${{ secrets.SONATYPE_USER }} SONATYPE_TOKEN: ${{ secrets.SONATYPE_TOKEN }} + report-failure: name: Report Workflow Failure runs-on: ubuntu-latest - needs: [linux-arm64, linux-x86, macos-arm64] - if: always() && (github.event_name == 'release' || github.event_name == 'workflow_dispatch') + needs: [publish] + if: always() && failure() && startsWith(github.ref, 'refs/tags/v') permissions: contents: read issues: write diff --git a/.github/workflows/java.yml b/.github/workflows/java.yml index 29091097..2089838b 100644 --- a/.github/workflows/java.yml +++ b/.github/workflows/java.yml @@ -1,118 +1,46 @@ -name: Build and Run Java JNI Tests +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: Build Java LanceDB Core + on: push: branches: - main paths: - java/** + - .github/workflows/java.yml pull_request: paths: - java/** - - rust/** - .github/workflows/java.yml -env: - # This env var is used by Swatinem/rust-cache@v2 for the cache - # key, so we set it to make sure it is always consistent. - CARGO_TERM_COLOR: always - # Disable full debug symbol generation to speed up CI build and keep memory down - # "1" means line tables only, which is useful for panic tracebacks. - RUSTFLAGS: "-C debuginfo=1" - RUST_BACKTRACE: "1" - # according to: https://matklad.github.io/2021/09/04/fast-rust-builds.html - # CI builds are faster with incremental disabled. - CARGO_INCREMENTAL: "0" - CARGO_BUILD_JOBS: "1" + jobs: - linux-build-java-11: - runs-on: ubuntu-22.04 - name: ubuntu-22.04 + Java 11 + build-java: + runs-on: ubuntu-24.04 + name: Build defaults: run: working-directory: ./java steps: - name: Checkout repository uses: actions/checkout@v4 - - uses: Swatinem/rust-cache@v2 - with: - workspaces: java/core/lancedb-jni - - uses: actions-rust-lang/setup-rust-toolchain@v1 - with: - components: rustfmt - - name: Run cargo fmt - run: cargo fmt --check - working-directory: ./java/core/lancedb-jni - - name: Install dependencies - run: | - sudo apt update - sudo apt install -y protobuf-compiler libssl-dev - - name: Install Java 11 - uses: actions/setup-java@v4 - with: - distribution: temurin - java-version: 11 - cache: "maven" - - name: Java Style Check - run: mvn checkstyle:check - # Disable because of issues in lancedb rust core code - # - name: Rust Clippy - # working-directory: java/core/lancedb-jni - # run: cargo clippy --all-targets -- -D warnings - - name: Running tests with Java 11 - run: mvn clean test - linux-build-java-17: - runs-on: ubuntu-22.04 - name: ubuntu-22.04 + Java 17 - defaults: - run: - working-directory: ./java - steps: - - name: Checkout repository - uses: actions/checkout@v4 - - uses: Swatinem/rust-cache@v2 - with: - workspaces: java/core/lancedb-jni - - uses: actions-rust-lang/setup-rust-toolchain@v1 - with: - components: rustfmt - - name: Run cargo fmt - run: cargo fmt --check - working-directory: ./java/core/lancedb-jni - - name: Install dependencies - run: | - sudo apt update - sudo apt install -y protobuf-compiler libssl-dev - - name: Install Java 17 + - name: Set up Java 17 uses: actions/setup-java@v4 with: distribution: temurin java-version: 17 cache: "maven" - - run: echo "JAVA_17=$JAVA_HOME" >> $GITHUB_ENV - name: Java Style Check - run: mvn checkstyle:check - # Disable because of issues in lancedb rust core code - # - name: Rust Clippy - # working-directory: java/core/lancedb-jni - # run: cargo clippy --all-targets -- -D warnings - - name: Running tests with Java 17 - run: | - export JAVA_TOOL_OPTIONS="$JAVA_TOOL_OPTIONS \ - -XX:+IgnoreUnrecognizedVMOptions \ - --add-opens=java.base/java.lang=ALL-UNNAMED \ - --add-opens=java.base/java.lang.invoke=ALL-UNNAMED \ - --add-opens=java.base/java.lang.reflect=ALL-UNNAMED \ - --add-opens=java.base/java.io=ALL-UNNAMED \ - --add-opens=java.base/java.net=ALL-UNNAMED \ - --add-opens=java.base/java.nio=ALL-UNNAMED \ - --add-opens=java.base/java.util=ALL-UNNAMED \ - --add-opens=java.base/java.util.concurrent=ALL-UNNAMED \ - --add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED \ - --add-opens=java.base/jdk.internal.ref=ALL-UNNAMED \ - --add-opens=java.base/sun.nio.ch=ALL-UNNAMED \ - --add-opens=java.base/sun.nio.cs=ALL-UNNAMED \ - --add-opens=java.base/sun.security.action=ALL-UNNAMED \ - --add-opens=java.base/sun.util.calendar=ALL-UNNAMED \ - --add-opens=java.security.jgss/sun.security.krb5=ALL-UNNAMED \ - -Djdk.reflect.useDirectMethodHandle=false \ - -Dio.netty.tryReflectionSetAccessible=true" - JAVA_HOME=$JAVA_17 mvn clean test + run: ./mvnw checkstyle:check + - name: Build and install + run: ./mvnw clean install diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml index a801520b..35c297e0 100644 --- a/docs/mkdocs.yml +++ b/docs/mkdocs.yml @@ -123,6 +123,7 @@ nav: - Overview: index.md - Python: python/python.md - Javascript/TypeScript: js/globals.md + - Java: java/java.md - Rust: https://docs.rs/lancedb/latest/lancedb/index.html extra_css: diff --git a/docs/src/index.md b/docs/src/index.md index d8da266a..c2dbabe0 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -4,4 +4,5 @@ This page contains the API reference for the SDKs supported by the LanceDB team. - [Python](python/python.md) - [JavaScript/TypeScript](js/globals.md) +- [Java](java/java.md) - [Rust](https://docs.rs/lancedb/latest/lancedb/index.html) \ No newline at end of file diff --git a/docs/src/java/java.md b/docs/src/java/java.md new file mode 100644 index 00000000..583e6b69 --- /dev/null +++ b/docs/src/java/java.md @@ -0,0 +1,499 @@ +# Java SDK + +The LanceDB Java SDK provides a convenient way to interact with LanceDB Cloud and Enterprise deployments using the Lance REST Namespace API. + +!!! note + The Java SDK currently only works for LanceDB remote database that connects to LanceDB Cloud and Enterprise. + Local database support is a work in progress. Check [LANCEDB-2848](https://github.com/lancedb/lancedb/issues/2848) for the latest progress. + +## Installation + +Add the following dependency to your `pom.xml`: + +```xml + + com.lancedb + lancedb-core + 0.23.0-beta.0 + +``` + +## Quick Start + +### Connecting to LanceDB Cloud + +```java +import com.lancedb.LanceDbRestNamespaceBuilder; +import org.lance.namespace.RestNamespace; + +// If your DB url is db://example-db, then your database here is example-db +RestNamespace namespace = LanceDbRestNamespaceBuilder.newBuilder() + .apiKey("your_lancedb_cloud_api_key") + .database("your_database_name") + .build(); +``` + +### Connecting to LanceDB Enterprise + +For LanceDB Enterprise deployments with a custom endpoint: + +```java +RestNamespace namespace = LanceDbRestNamespaceBuilder.newBuilder() + .apiKey("your_lancedb_enterprise_api_key") + .database("your_database_name") + .endpoint("") + .build(); +``` + +### Configuration Options + +| Method | Description | Required | +|--------|-------------|----------| +| `apiKey(String)` | LanceDB API key | Yes | +| `database(String)` | Database name | Yes | +| `endpoint(String)` | Custom endpoint URL for Enterprise deployments | No | +| `region(String)` | AWS region (default: "us-east-1") | No | +| `config(String, String)` | Additional configuration parameters | No | + +## Metadata Operations + +### Creating a Namespace + +Namespaces organize tables hierarchically. Create a namespace before creating tables within it: + +```java +import org.lance.namespace.model.CreateNamespaceRequest; +import org.lance.namespace.model.CreateNamespaceResponse; + +// Create a child namespace +CreateNamespaceRequest request = new CreateNamespaceRequest(); +request.setId(Arrays.asList("my_namespace")); + +CreateNamespaceResponse response = namespace.createNamespace(request); +``` + +You can also create nested namespaces: + +```java +// Create a nested namespace: parent/child +CreateNamespaceRequest request = new CreateNamespaceRequest(); +request.setId(Arrays.asList("parent_namespace", "child_namespace")); + +CreateNamespaceResponse response = namespace.createNamespace(request); +``` + +### Describing a Namespace + +```java +import org.lance.namespace.model.DescribeNamespaceRequest; +import org.lance.namespace.model.DescribeNamespaceResponse; + +DescribeNamespaceRequest request = new DescribeNamespaceRequest(); +request.setId(Arrays.asList("my_namespace")); + +DescribeNamespaceResponse response = namespace.describeNamespace(request); +System.out.println("Namespace properties: " + response.getProperties()); +``` + +### Listing Namespaces + +```java +import org.lance.namespace.model.ListNamespacesRequest; +import org.lance.namespace.model.ListNamespacesResponse; + +// List all namespaces at root level +ListNamespacesRequest request = new ListNamespacesRequest(); +request.setId(Arrays.asList()); // Empty for root + +ListNamespacesResponse response = namespace.listNamespaces(request); +for (String ns : response.getNamespaces()) { + System.out.println("Namespace: " + ns); +} + +// List child namespaces under a parent +ListNamespacesRequest childRequest = new ListNamespacesRequest(); +childRequest.setId(Arrays.asList("parent_namespace")); + +ListNamespacesResponse childResponse = namespace.listNamespaces(childRequest); +``` + +### Listing Tables + +```java +import org.lance.namespace.model.ListTablesRequest; +import org.lance.namespace.model.ListTablesResponse; + +// List tables in a namespace +ListTablesRequest request = new ListTablesRequest(); +request.setId(Arrays.asList("my_namespace")); + +ListTablesResponse response = namespace.listTables(request); +for (String table : response.getTables()) { + System.out.println("Table: " + table); +} +``` + +### Dropping a Namespace + +```java +import org.lance.namespace.model.DropNamespaceRequest; +import org.lance.namespace.model.DropNamespaceResponse; + +DropNamespaceRequest request = new DropNamespaceRequest(); +request.setId(Arrays.asList("my_namespace")); + +DropNamespaceResponse response = namespace.dropNamespace(request); +``` + +### Describing a Table + +```java +import org.lance.namespace.model.DescribeTableRequest; +import org.lance.namespace.model.DescribeTableResponse; + +DescribeTableRequest request = new DescribeTableRequest(); +request.setId(Arrays.asList("my_namespace", "my_table")); + +DescribeTableResponse response = namespace.describeTable(request); +System.out.println("Table version: " + response.getVersion()); +System.out.println("Schema fields: " + response.getSchema().getFields()); +``` + +### Dropping a Table + +```java +import org.lance.namespace.model.DropTableRequest; +import org.lance.namespace.model.DropTableResponse; + +DropTableRequest request = new DropTableRequest(); +request.setId(Arrays.asList("my_namespace", "my_table")); + +DropTableResponse response = namespace.dropTable(request); +``` + +## Writing Data + +### Creating a Table + +Tables are created within a namespace by providing data in Apache Arrow IPC format: + +```java +import org.lance.namespace.RestNamespace; +import org.lance.namespace.model.CreateTableRequest; +import org.lance.namespace.model.CreateTableResponse; +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.memory.RootAllocator; +import org.apache.arrow.vector.IntVector; +import org.apache.arrow.vector.VarCharVector; +import org.apache.arrow.vector.VectorSchemaRoot; +import org.apache.arrow.vector.complex.FixedSizeListVector; +import org.apache.arrow.vector.Float4Vector; +import org.apache.arrow.vector.ipc.ArrowStreamWriter; +import org.apache.arrow.vector.types.FloatingPointPrecision; +import org.apache.arrow.vector.types.pojo.ArrowType; +import org.apache.arrow.vector.types.pojo.Field; +import org.apache.arrow.vector.types.pojo.FieldType; +import org.apache.arrow.vector.types.pojo.Schema; + +import java.io.ByteArrayOutputStream; +import java.nio.channels.Channels; +import java.util.Arrays; + +// Create schema with id, name, and embedding fields +Schema schema = new Schema(Arrays.asList( + new Field("id", FieldType.nullable(new ArrowType.Int(32, true)), null), + new Field("name", FieldType.nullable(new ArrowType.Utf8()), null), + new Field("embedding", + FieldType.nullable(new ArrowType.FixedSizeList(128)), + Arrays.asList(new Field("item", + FieldType.nullable(new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE)), + null))) +)); + +try (BufferAllocator allocator = new RootAllocator(); + VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) { + + // Populate data + root.setRowCount(3); + IntVector idVector = (IntVector) root.getVector("id"); + VarCharVector nameVector = (VarCharVector) root.getVector("name"); + FixedSizeListVector embeddingVector = (FixedSizeListVector) root.getVector("embedding"); + Float4Vector embeddingData = (Float4Vector) embeddingVector.getDataVector(); + + for (int i = 0; i < 3; i++) { + idVector.setSafe(i, i + 1); + nameVector.setSafe(i, ("item_" + i).getBytes()); + embeddingVector.setNotNull(i); + for (int j = 0; j < 128; j++) { + embeddingData.setSafe(i * 128 + j, (float) i); + } + } + idVector.setValueCount(3); + nameVector.setValueCount(3); + embeddingData.setValueCount(3 * 128); + embeddingVector.setValueCount(3); + + // Serialize to Arrow IPC format + ByteArrayOutputStream out = new ByteArrayOutputStream(); + try (ArrowStreamWriter writer = new ArrowStreamWriter(root, null, Channels.newChannel(out))) { + writer.start(); + writer.writeBatch(); + writer.end(); + } + byte[] tableData = out.toByteArray(); + + // Create table in a namespace + CreateTableRequest request = new CreateTableRequest(); + request.setId(Arrays.asList("my_namespace", "my_table")); + CreateTableResponse response = namespace.createTable(request, tableData); +} +``` + +### Insert + +```java +import org.lance.namespace.model.InsertIntoTableRequest; +import org.lance.namespace.model.InsertIntoTableResponse; + +// Prepare data in Arrow IPC format (similar to create table example) +byte[] insertData = prepareArrowData(); + +InsertIntoTableRequest request = new InsertIntoTableRequest(); +request.setId(Arrays.asList("my_namespace", "my_table")); +request.setMode(InsertIntoTableRequest.ModeEnum.APPEND); + +InsertIntoTableResponse response = namespace.insertIntoTable(request, insertData); +System.out.println("New version: " + response.getVersion()); +``` + +### Update + +Update rows matching a predicate condition: + +```java +import org.lance.namespace.model.UpdateTableRequest; +import org.lance.namespace.model.UpdateTableResponse; + +UpdateTableRequest request = new UpdateTableRequest(); +request.setId(Arrays.asList("my_namespace", "my_table")); + +// Predicate to select rows to update +request.setPredicate("id = 1"); + +// Set new values using SQL expressions as [column_name, expression] pairs +request.setUpdates(Arrays.asList( + Arrays.asList("name", "'updated_name'") +)); + +UpdateTableResponse response = namespace.updateTable(request); +System.out.println("Updated rows: " + response.getUpdatedRows()); +``` + +### Delete + +Delete rows matching a predicate condition: + +```java +import org.lance.namespace.model.DeleteFromTableRequest; +import org.lance.namespace.model.DeleteFromTableResponse; + +DeleteFromTableRequest request = new DeleteFromTableRequest(); +request.setId(Arrays.asList("my_namespace", "my_table")); + +// Predicate to select rows to delete +request.setPredicate("id > 100"); + +DeleteFromTableResponse response = namespace.deleteFromTable(request); +System.out.println("New version: " + response.getVersion()); +``` + +### Merge Insert (Upsert) + +Merge insert allows you to update existing rows and insert new rows in a single operation based on a key column: + +```java +import org.lance.namespace.model.MergeInsertIntoTableRequest; +import org.lance.namespace.model.MergeInsertIntoTableResponse; + +// Prepare data with rows to update (id=2,3) and new rows (id=4) +byte[] mergeData = prepareArrowData(); // Contains rows with id=2,3,4 + +MergeInsertIntoTableRequest request = new MergeInsertIntoTableRequest(); +request.setId(Arrays.asList("my_namespace", "my_table")); + +// Match on the "id" column +request.setOn("id"); + +// Update all columns when a matching row is found +request.setWhenMatchedUpdateAll(true); + +// Insert new rows when no match is found +request.setWhenNotMatchedInsertAll(true); + +MergeInsertIntoTableResponse response = namespace.mergeInsertIntoTable(request, mergeData); + +System.out.println("Updated rows: " + response.getNumUpdatedRows()); +System.out.println("Inserted rows: " + response.getNumInsertedRows()); +``` + +## Querying Data + +### Counting Rows + +```java +import org.lance.namespace.model.CountTableRowsRequest; + +CountTableRowsRequest request = new CountTableRowsRequest(); +request.setId(Arrays.asList("my_namespace", "my_table")); + +Long rowCount = namespace.countTableRows(request); +System.out.println("Row count: " + rowCount); +``` + +### Vector Search + +```java +import org.lance.namespace.model.QueryTableRequest; +import org.lance.namespace.model.QueryTableRequestVector; + +QueryTableRequest query = new QueryTableRequest(); +query.setId(Arrays.asList("my_namespace", "my_table")); +query.setK(10); // Return top 10 results + +// Set the query vector +List queryVector = new ArrayList<>(); +for (int i = 0; i < 128; i++) { + queryVector.add(1.0f); +} +QueryTableRequestVector vector = new QueryTableRequestVector(); +vector.setSingleVector(queryVector); +query.setVector(vector); + +// Specify columns to return +query.setColumns(Arrays.asList("id", "name", "embedding")); + +// Execute query - returns Arrow IPC format +byte[] result = namespace.queryTable(query); +``` + +### Full Text Search + +```java +import org.lance.namespace.model.QueryTableRequest; +import org.lance.namespace.model.QueryTableRequestFullTextQuery; +import org.lance.namespace.model.StringFtsQuery; + +QueryTableRequest query = new QueryTableRequest(); +query.setId(Arrays.asList("my_namespace", "my_table")); +query.setK(10); + +// Set full text search query +StringFtsQuery stringQuery = new StringFtsQuery(); +stringQuery.setQuery("search terms"); +stringQuery.setColumns(Arrays.asList("text_column")); + +QueryTableRequestFullTextQuery fts = new QueryTableRequestFullTextQuery(); +fts.setStringQuery(stringQuery); +query.setFullTextQuery(fts); + +// Specify columns to return +query.setColumns(Arrays.asList("id", "text_column")); + +byte[] result = namespace.queryTable(query); +``` + +### Query with Filter + +```java +QueryTableRequest query = new QueryTableRequest(); +query.setId(Arrays.asList("my_namespace", "my_table")); +query.setK(10); +query.setFilter("id > 50"); +query.setColumns(Arrays.asList("id", "name")); + +byte[] result = namespace.queryTable(query); +``` + +### Query with Prefilter + +```java +QueryTableRequest query = new QueryTableRequest(); +query.setId(Arrays.asList("my_namespace", "my_table")); +query.setK(5); +query.setPrefilter(true); // Apply filter before vector search +query.setFilter("category = 'electronics'"); + +// Set query vector +QueryTableRequestVector vector = new QueryTableRequestVector(); +vector.setSingleVector(queryVector); +query.setVector(vector); + +byte[] result = namespace.queryTable(query); +``` + +### Reading Query Results + +Query results are returned in Apache Arrow IPC file format. Here's how to read them: + +```java +import org.apache.arrow.vector.ipc.ArrowFileReader; +import org.apache.arrow.vector.VectorSchemaRoot; +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.memory.RootAllocator; + +import java.nio.ByteBuffer; +import java.nio.channels.SeekableByteChannel; + +// Helper class to read Arrow data from byte array +class ByteArraySeekableByteChannel implements SeekableByteChannel { + private final byte[] data; + private long position = 0; + private boolean isOpen = true; + + public ByteArraySeekableByteChannel(byte[] data) { + this.data = data; + } + + @Override + public int read(ByteBuffer dst) { + int remaining = dst.remaining(); + int available = (int) (data.length - position); + if (available <= 0) return -1; + int toRead = Math.min(remaining, available); + dst.put(data, (int) position, toRead); + position += toRead; + return toRead; + } + + @Override public long position() { return position; } + @Override public SeekableByteChannel position(long newPosition) { position = newPosition; return this; } + @Override public long size() { return data.length; } + @Override public boolean isOpen() { return isOpen; } + @Override public void close() { isOpen = false; } + @Override public int write(ByteBuffer src) { throw new UnsupportedOperationException(); } + @Override public SeekableByteChannel truncate(long size) { throw new UnsupportedOperationException(); } +} + +// Read query results +byte[] queryResult = namespace.queryTable(query); + +try (BufferAllocator allocator = new RootAllocator(); + ArrowFileReader reader = new ArrowFileReader( + new ByteArraySeekableByteChannel(queryResult), allocator)) { + + for (int i = 0; i < reader.getRecordBlocks().size(); i++) { + reader.loadRecordBatch(reader.getRecordBlocks().get(i)); + VectorSchemaRoot root = reader.getVectorSchemaRoot(); + + // Access data + IntVector idVector = (IntVector) root.getVector("id"); + VarCharVector nameVector = (VarCharVector) root.getVector("name"); + + for (int row = 0; row < root.getRowCount(); row++) { + int id = idVector.get(row); + String name = new String(nameVector.get(row)); + System.out.println("Row " + row + ": id=" + id + ", name=" + name); + } + } +} +``` diff --git a/java/Makefile b/java/Makefile new file mode 100644 index 00000000..2d1f33e2 --- /dev/null +++ b/java/Makefile @@ -0,0 +1,28 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +.PHONY: build-lancedb +build-lancedb: + ./mvnw spotless:apply -pl lancedb-core -am + ./mvnw install -pl lancedb-core -am + +.PHONY: test-lancedb +test-lancedb: + # Requires LANCEDB_DB and LANCEDB_API_KEY environment variables + ./mvnw test -pl lancedb-core -P integration-tests + +.PHONY: clean +clean: + ./mvnw clean + +.PHONY: build +build: build-lancedb diff --git a/java/README.md b/java/README.md index de7d9134..6bd3c683 100644 --- a/java/README.md +++ b/java/README.md @@ -7,10 +7,11 @@ For LanceDB Cloud, use the simplified builder API: ```java -import com.lancedb.lance.namespace.LanceRestNamespace; +import com.lancedb.LanceDbRestNamespaceBuilder; +import org.lance.namespace.RestNamespace; // If your DB url is db://example-db, then your database here is example-db -LanceRestNamespace namespace = LanceDBRestNamespaces.builder() +RestNamespace namespace = LanceDbRestNamespaceBuilder.newBuilder() .apiKey("your_lancedb_cloud_api_key") .database("your_database_name") .build(); @@ -18,13 +19,13 @@ LanceRestNamespace namespace = LanceDBRestNamespaces.builder() ### LanceDB Enterprise -For Enterprise deployments, use your VPC endpoint: +For Enterprise deployments, use your custom endpoint: ```java -LanceRestNamespace namespace = LanceDBRestNamespaces.builder() +RestNamespace namespace = LanceDbRestNamespaceBuilder.newBuilder() .apiKey("your_lancedb_enterprise_api_key") - .database("your-top-dir") // Your top level folder under your cloud bucket, e.g. s3://your-bucket/your-top-dir/ - .hostOverride("http://:80") + .database("your_database_name") + .endpoint("") .build(); ``` @@ -33,5 +34,11 @@ LanceRestNamespace namespace = LanceDBRestNamespaces.builder() Build: ```shell -./mvnw install -``` \ No newline at end of file +./mvnw install -pl lancedb-core -am +``` + +Run tests: + +```shell +./mvnw test -pl lancedb-core +``` diff --git a/java/core/lancedb-jni/Cargo.toml b/java/core/lancedb-jni/Cargo.toml deleted file mode 100644 index ec7901ff..00000000 --- a/java/core/lancedb-jni/Cargo.toml +++ /dev/null @@ -1,30 +0,0 @@ -[package] -name = "lancedb-jni" -description = "JNI bindings for LanceDB" -# TODO modify lancedb/Cargo.toml for version and dependencies -version = "0.10.0" -edition.workspace = true -repository.workspace = true -readme.workspace = true -license.workspace = true -keywords.workspace = true -categories.workspace = true -publish = false - -[lib] -crate-type = ["cdylib"] - -[dependencies] -lancedb = { path = "../../../rust/lancedb", default-features = false } -lance = { workspace = true } -arrow = { workspace = true, features = ["ffi"] } -arrow-schema.workspace = true -tokio = "1.46" -jni = "0.21.1" -snafu.workspace = true -lazy_static.workspace = true -serde = { version = "^1" } -serde_json = { version = "1" } - -[features] -default = ["lancedb/default"] diff --git a/java/core/lancedb-jni/src/connection.rs b/java/core/lancedb-jni/src/connection.rs deleted file mode 100644 index 724840d0..00000000 --- a/java/core/lancedb-jni/src/connection.rs +++ /dev/null @@ -1,133 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -// SPDX-FileCopyrightText: Copyright The LanceDB Authors - -use crate::ffi::JNIEnvExt; -use crate::traits::IntoJava; -use crate::{Error, RT}; -use jni::objects::{JObject, JString, JValue}; -use jni::JNIEnv; -pub const NATIVE_CONNECTION: &str = "nativeConnectionHandle"; -use crate::Result; -use lancedb::connection::{connect, Connection}; - -#[derive(Clone)] -pub struct BlockingConnection { - pub(crate) inner: Connection, -} - -impl BlockingConnection { - pub fn create(dataset_uri: &str) -> Result { - let inner = RT.block_on(connect(dataset_uri).execute())?; - Ok(Self { inner }) - } - - pub fn table_names( - &self, - start_after: Option, - limit: Option, - ) -> Result> { - let mut op = self.inner.table_names(); - if let Some(start_after) = start_after { - op = op.start_after(start_after); - } - if let Some(limit) = limit { - op = op.limit(limit as u32); - } - Ok(RT.block_on(op.execute())?) - } -} - -impl IntoJava for BlockingConnection { - fn into_java<'a>(self, env: &mut JNIEnv<'a>) -> JObject<'a> { - attach_native_connection(env, self) - } -} - -fn attach_native_connection<'local>( - env: &mut JNIEnv<'local>, - connection: BlockingConnection, -) -> JObject<'local> { - let j_connection = create_java_connection_object(env); - // This block sets a native Rust object (Connection) as a field in the Java object (j_Connection). - // Caution: This creates a potential for memory leaks. The Rust object (Connection) is not - // automatically garbage-collected by Java, and its memory will not be freed unless - // explicitly handled. - // - // To prevent memory leaks, ensure the following: - // 1. The Java object (`j_Connection`) should implement the `java.io.Closeable` interface. - // 2. Users of this Java object should be instructed to always use it within a try-with-resources - // statement (or manually call the `close()` method) to ensure that `self.close()` is invoked. - match unsafe { env.set_rust_field(&j_connection, NATIVE_CONNECTION, connection) } { - Ok(_) => j_connection, - Err(err) => { - env.throw_new( - "java/lang/RuntimeException", - format!("Failed to set native handle for Connection: {}", err), - ) - .expect("Error throwing exception"); - JObject::null() - } - } -} - -fn create_java_connection_object<'a>(env: &mut JNIEnv<'a>) -> JObject<'a> { - env.new_object("com/lancedb/lancedb/Connection", "()V", &[]) - .expect("Failed to create Java Lance Connection instance") -} - -#[no_mangle] -pub extern "system" fn Java_com_lancedb_lancedb_Connection_releaseNativeConnection( - mut env: JNIEnv, - j_connection: JObject, -) { - let _: BlockingConnection = unsafe { - env.take_rust_field(j_connection, NATIVE_CONNECTION) - .expect("Failed to take native Connection handle") - }; -} - -#[no_mangle] -pub extern "system" fn Java_com_lancedb_lancedb_Connection_connect<'local>( - mut env: JNIEnv<'local>, - _obj: JObject, - dataset_uri_object: JString, -) -> JObject<'local> { - let dataset_uri: String = ok_or_throw!(env, env.get_string(&dataset_uri_object)).into(); - let blocking_connection = ok_or_throw!(env, BlockingConnection::create(&dataset_uri)); - blocking_connection.into_java(&mut env) -} - -#[no_mangle] -pub extern "system" fn Java_com_lancedb_lancedb_Connection_tableNames<'local>( - mut env: JNIEnv<'local>, - j_connection: JObject, - start_after_obj: JObject, // Optional - limit_obj: JObject, // Optional -) -> JObject<'local> { - ok_or_throw!( - env, - inner_table_names(&mut env, j_connection, start_after_obj, limit_obj) - ) -} - -fn inner_table_names<'local>( - env: &mut JNIEnv<'local>, - j_connection: JObject, - start_after_obj: JObject, // Optional - limit_obj: JObject, // Optional -) -> Result> { - let start_after = env.get_string_opt(&start_after_obj)?; - let limit = env.get_int_opt(&limit_obj)?; - let conn = - unsafe { env.get_rust_field::<_, _, BlockingConnection>(j_connection, NATIVE_CONNECTION) }?; - let table_names = conn.table_names(start_after, limit)?; - drop(conn); - let j_names = env.new_object("java/util/ArrayList", "()V", &[])?; - for item in table_names { - let jstr_item = env.new_string(item)?; - let item_jobj = JObject::from(jstr_item); - let item_gen = JValue::Object(&item_jobj); - env.call_method(&j_names, "add", "(Ljava/lang/Object;)Z", &[item_gen])?; - } - Ok(j_names) -} diff --git a/java/core/lancedb-jni/src/error.rs b/java/core/lancedb-jni/src/error.rs deleted file mode 100644 index 452adce9..00000000 --- a/java/core/lancedb-jni/src/error.rs +++ /dev/null @@ -1,217 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -// SPDX-FileCopyrightText: Copyright The LanceDB Authors - -use std::str::Utf8Error; - -use arrow_schema::ArrowError; -use jni::errors::Error as JniError; -use serde_json::Error as JsonError; -use snafu::{Location, Snafu}; - -type BoxedError = Box; - -/// Java Exception types -pub enum JavaException { - IllegalArgumentException, - IOException, - RuntimeException, -} - -impl JavaException { - pub fn as_str(&self) -> &str { - match self { - Self::IllegalArgumentException => "java/lang/IllegalArgumentException", - Self::IOException => "java/io/IOException", - Self::RuntimeException => "java/lang/RuntimeException", - } - } -} -/// TODO(lu) change to lancedb-jni -#[derive(Debug, Snafu)] -#[snafu(visibility(pub))] -pub enum Error { - #[snafu(display("JNI error: {message}, {location}"))] - Jni { message: String, location: Location }, - #[snafu(display("Invalid argument: {message}, {location}"))] - InvalidArgument { message: String, location: Location }, - #[snafu(display("IO error: {source}, {location}"))] - IO { - source: BoxedError, - location: Location, - }, - #[snafu(display("Arrow error: {message}, {location}"))] - Arrow { message: String, location: Location }, - #[snafu(display("Index error: {message}, {location}"))] - Index { message: String, location: Location }, - #[snafu(display("JSON error: {message}, {location}"))] - JSON { message: String, location: Location }, - #[snafu(display("Dataset at path {path} was not found, {location}"))] - DatasetNotFound { path: String, location: Location }, - #[snafu(display("Dataset already exists: {uri}, {location}"))] - DatasetAlreadyExists { uri: String, location: Location }, - #[snafu(display("Table '{name}' already exists"))] - TableAlreadyExists { name: String }, - #[snafu(display("Table '{name}' was not found: {source}"))] - TableNotFound { - name: String, - source: Box, - }, - #[snafu(display("Invalid table name '{name}': {reason}"))] - InvalidTableName { name: String, reason: String }, - #[snafu(display("Embedding function '{name}' was not found: {reason}, {location}"))] - EmbeddingFunctionNotFound { - name: String, - reason: String, - location: Location, - }, - #[snafu(display("Other Lance error: {message}, {location}"))] - OtherLance { message: String, location: Location }, - #[snafu(display("Other LanceDB error: {message}, {location}"))] - OtherLanceDB { message: String, location: Location }, -} - -impl Error { - /// Throw as Java Exception - pub fn throw(&self, env: &mut jni::JNIEnv) { - match self { - Self::InvalidArgument { .. } - | Self::DatasetNotFound { .. } - | Self::DatasetAlreadyExists { .. } - | Self::TableAlreadyExists { .. } - | Self::TableNotFound { .. } - | Self::InvalidTableName { .. } - | Self::EmbeddingFunctionNotFound { .. } => { - self.throw_as(env, JavaException::IllegalArgumentException) - } - Self::IO { .. } | Self::Index { .. } => self.throw_as(env, JavaException::IOException), - Self::Arrow { .. } - | Self::JSON { .. } - | Self::OtherLance { .. } - | Self::OtherLanceDB { .. } - | Self::Jni { .. } => self.throw_as(env, JavaException::RuntimeException), - } - } - - /// Throw as an concrete Java Exception - pub fn throw_as(&self, env: &mut jni::JNIEnv, exception: JavaException) { - let message = &format!( - "Error when throwing Java exception: {}:{}", - exception.as_str(), - self - ); - env.throw_new(exception.as_str(), self.to_string()) - .expect(message); - } -} - -pub type Result = std::result::Result; - -trait ToSnafuLocation { - fn to_snafu_location(&'static self) -> snafu::Location; -} - -impl ToSnafuLocation for std::panic::Location<'static> { - fn to_snafu_location(&'static self) -> snafu::Location { - snafu::Location::new(self.file(), self.line(), self.column()) - } -} - -impl From for Error { - #[track_caller] - fn from(source: JniError) -> Self { - Self::Jni { - message: source.to_string(), - location: std::panic::Location::caller().to_snafu_location(), - } - } -} - -impl From for Error { - #[track_caller] - fn from(source: Utf8Error) -> Self { - Self::InvalidArgument { - message: source.to_string(), - location: std::panic::Location::caller().to_snafu_location(), - } - } -} - -impl From for Error { - #[track_caller] - fn from(source: ArrowError) -> Self { - Self::Arrow { - message: source.to_string(), - location: std::panic::Location::caller().to_snafu_location(), - } - } -} - -impl From for Error { - #[track_caller] - fn from(source: JsonError) -> Self { - Self::JSON { - message: source.to_string(), - location: std::panic::Location::caller().to_snafu_location(), - } - } -} - -impl From for Error { - #[track_caller] - fn from(source: lance::Error) -> Self { - match source { - lance::Error::DatasetNotFound { - path, - source: _, - location, - } => Self::DatasetNotFound { path, location }, - lance::Error::DatasetAlreadyExists { uri, location } => { - Self::DatasetAlreadyExists { uri, location } - } - lance::Error::IO { source, location } => Self::IO { source, location }, - lance::Error::Arrow { message, location } => Self::Arrow { message, location }, - lance::Error::Index { message, location } => Self::Index { message, location }, - lance::Error::InvalidInput { source, location } => Self::InvalidArgument { - message: source.to_string(), - location, - }, - _ => Self::OtherLance { - message: source.to_string(), - location: std::panic::Location::caller().to_snafu_location(), - }, - } - } -} - -impl From for Error { - #[track_caller] - fn from(source: lancedb::Error) -> Self { - match source { - lancedb::Error::InvalidTableName { name, reason } => { - Self::InvalidTableName { name, reason } - } - lancedb::Error::InvalidInput { message } => Self::InvalidArgument { - message, - location: std::panic::Location::caller().to_snafu_location(), - }, - lancedb::Error::TableNotFound { name, source } => Self::TableNotFound { name, source }, - lancedb::Error::TableAlreadyExists { name } => Self::TableAlreadyExists { name }, - lancedb::Error::EmbeddingFunctionNotFound { name, reason } => { - Self::EmbeddingFunctionNotFound { - name, - reason, - location: std::panic::Location::caller().to_snafu_location(), - } - } - lancedb::Error::Arrow { source } => Self::Arrow { - message: source.to_string(), - location: std::panic::Location::caller().to_snafu_location(), - }, - lancedb::Error::Lance { source } => Self::from(source), - _ => Self::OtherLanceDB { - message: source.to_string(), - location: std::panic::Location::caller().to_snafu_location(), - }, - } - } -} diff --git a/java/core/lancedb-jni/src/ffi.rs b/java/core/lancedb-jni/src/ffi.rs deleted file mode 100644 index d353b347..00000000 --- a/java/core/lancedb-jni/src/ffi.rs +++ /dev/null @@ -1,194 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -// SPDX-FileCopyrightText: Copyright The LanceDB Authors - -use core::slice; - -use jni::objects::{JByteBuffer, JObjectArray, JString}; -use jni::sys::jobjectArray; -use jni::{objects::JObject, JNIEnv}; - -use crate::error::{Error, Result}; - -/// TODO(lu) import from lance-jni without duplicate -/// Extend JNIEnv with helper functions. -pub trait JNIEnvExt { - /// Get integers from Java List object. - fn get_integers(&mut self, obj: &JObject) -> Result>; - - /// Get strings from Java List object. - #[allow(dead_code)] - fn get_strings(&mut self, obj: &JObject) -> Result>; - - /// Get strings from Java String[] object. - /// Note that get Option> from Java Optional just doesn't work. - #[allow(unused)] - fn get_strings_array(&mut self, obj: jobjectArray) -> Result>; - - /// Get Option from Java Optional. - fn get_string_opt(&mut self, obj: &JObject) -> Result>; - - /// Get Option> from Java Optional>. - #[allow(unused)] - fn get_strings_opt(&mut self, obj: &JObject) -> Result>>; - - /// Get Option from Java Optional. - fn get_int_opt(&mut self, obj: &JObject) -> Result>; - - /// Get Option> from Java Optional>. - fn get_ints_opt(&mut self, obj: &JObject) -> Result>>; - - /// Get Option from Java Optional. - #[allow(unused)] - fn get_long_opt(&mut self, obj: &JObject) -> Result>; - - /// Get Option from Java Optional. - #[allow(unused)] - fn get_u64_opt(&mut self, obj: &JObject) -> Result>; - - /// Get Option<&[u8]> from Java Optional. - #[allow(unused)] - fn get_bytes_opt(&mut self, obj: &JObject) -> Result>; - - fn get_optional(&mut self, obj: &JObject, f: F) -> Result> - where - F: FnOnce(&mut JNIEnv, &JObject) -> Result; -} - -impl JNIEnvExt for JNIEnv<'_> { - fn get_integers(&mut self, obj: &JObject) -> Result> { - let list = self.get_list(obj)?; - let mut iter = list.iter(self)?; - let mut results = Vec::with_capacity(list.size(self)? as usize); - while let Some(elem) = iter.next(self)? { - let int_obj = self.call_method(elem, "intValue", "()I", &[])?; - let int_value = int_obj.i()?; - results.push(int_value); - } - Ok(results) - } - - fn get_strings(&mut self, obj: &JObject) -> Result> { - let list = self.get_list(obj)?; - let mut iter = list.iter(self)?; - let mut results = Vec::with_capacity(list.size(self)? as usize); - while let Some(elem) = iter.next(self)? { - let jstr = JString::from(elem); - let val = self.get_string(&jstr)?; - results.push(val.to_str()?.to_string()) - } - Ok(results) - } - - fn get_strings_array(&mut self, obj: jobjectArray) -> Result> { - let jobject_array = unsafe { JObjectArray::from_raw(obj) }; - let array_len = self.get_array_length(&jobject_array)?; - let mut res: Vec = Vec::new(); - for i in 0..array_len { - let item: JString = self.get_object_array_element(&jobject_array, i)?.into(); - res.push(self.get_string(&item)?.into()); - } - Ok(res) - } - - fn get_string_opt(&mut self, obj: &JObject) -> Result> { - self.get_optional(obj, |env, inner_obj| { - let java_obj_gen = env.call_method(inner_obj, "get", "()Ljava/lang/Object;", &[])?; - let java_string_obj = java_obj_gen.l()?; - let jstr = JString::from(java_string_obj); - let val = env.get_string(&jstr)?; - Ok(val.to_str()?.to_string()) - }) - } - - fn get_strings_opt(&mut self, obj: &JObject) -> Result>> { - self.get_optional(obj, |env, inner_obj| { - let java_obj_gen = env.call_method(inner_obj, "get", "()Ljava/lang/Object;", &[])?; - let java_list_obj = java_obj_gen.l()?; - env.get_strings(&java_list_obj) - }) - } - - fn get_int_opt(&mut self, obj: &JObject) -> Result> { - self.get_optional(obj, |env, inner_obj| { - let java_obj_gen = env.call_method(inner_obj, "get", "()Ljava/lang/Object;", &[])?; - let java_int_obj = java_obj_gen.l()?; - let int_obj = env.call_method(java_int_obj, "intValue", "()I", &[])?; - let int_value = int_obj.i()?; - Ok(int_value) - }) - } - - fn get_ints_opt(&mut self, obj: &JObject) -> Result>> { - self.get_optional(obj, |env, inner_obj| { - let java_obj_gen = env.call_method(inner_obj, "get", "()Ljava/lang/Object;", &[])?; - let java_list_obj = java_obj_gen.l()?; - env.get_integers(&java_list_obj) - }) - } - - fn get_long_opt(&mut self, obj: &JObject) -> Result> { - self.get_optional(obj, |env, inner_obj| { - let java_obj_gen = env.call_method(inner_obj, "get", "()Ljava/lang/Object;", &[])?; - let java_long_obj = java_obj_gen.l()?; - let long_obj = env.call_method(java_long_obj, "longValue", "()J", &[])?; - let long_value = long_obj.j()?; - Ok(long_value) - }) - } - - fn get_u64_opt(&mut self, obj: &JObject) -> Result> { - self.get_optional(obj, |env, inner_obj| { - let java_obj_gen = env.call_method(inner_obj, "get", "()Ljava/lang/Object;", &[])?; - let java_long_obj = java_obj_gen.l()?; - let long_obj = env.call_method(java_long_obj, "longValue", "()J", &[])?; - let long_value = long_obj.j()?; - Ok(long_value as u64) - }) - } - - fn get_bytes_opt(&mut self, obj: &JObject) -> Result> { - self.get_optional(obj, |env, inner_obj| { - let java_obj_gen = env.call_method(inner_obj, "get", "()Ljava/lang/Object;", &[])?; - let java_byte_buffer_obj = java_obj_gen.l()?; - let j_byte_buffer = JByteBuffer::from(java_byte_buffer_obj); - let raw_data = env.get_direct_buffer_address(&j_byte_buffer)?; - let capacity = env.get_direct_buffer_capacity(&j_byte_buffer)?; - let data = unsafe { slice::from_raw_parts(raw_data, capacity) }; - Ok(data) - }) - } - - fn get_optional(&mut self, obj: &JObject, f: F) -> Result> - where - F: FnOnce(&mut JNIEnv, &JObject) -> Result, - { - if obj.is_null() { - return Ok(None); - } - let is_present = self.call_method(obj, "isPresent", "()Z", &[])?; - if !is_present.z()? { - // TODO(lu): put get java object into here cuz can only get java Object - Ok(None) - } else { - f(self, obj).map(Some) - } - } -} - -#[no_mangle] -pub extern "system" fn Java_com_lancedb_lance_test_JniTestHelper_parseInts( - mut env: JNIEnv, - _obj: JObject, - list_obj: JObject, // List -) { - ok_or_throw_without_return!(env, env.get_integers(&list_obj)); -} - -#[no_mangle] -pub extern "system" fn Java_com_lancedb_lance_test_JniTestHelper_parseIntsOpt( - mut env: JNIEnv, - _obj: JObject, - list_obj: JObject, // Optional> -) { - ok_or_throw_without_return!(env, env.get_ints_opt(&list_obj)); -} diff --git a/java/core/lancedb-jni/src/lib.rs b/java/core/lancedb-jni/src/lib.rs deleted file mode 100644 index 6d498759..00000000 --- a/java/core/lancedb-jni/src/lib.rs +++ /dev/null @@ -1,57 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -// SPDX-FileCopyrightText: Copyright The LanceDB Authors - -use lazy_static::lazy_static; - -// TODO import from lance-jni without duplicate -#[macro_export] -macro_rules! ok_or_throw { - ($env:expr, $result:expr) => { - match $result { - Ok(value) => value, - Err(err) => { - Error::from(err).throw(&mut $env); - return JObject::null(); - } - } - }; -} - -macro_rules! ok_or_throw_without_return { - ($env:expr, $result:expr) => { - match $result { - Ok(value) => value, - Err(err) => { - Error::from(err).throw(&mut $env); - return; - } - } - }; -} - -#[macro_export] -macro_rules! ok_or_throw_with_return { - ($env:expr, $result:expr, $ret:expr) => { - match $result { - Ok(value) => value, - Err(err) => { - Error::from(err).throw(&mut $env); - return $ret; - } - } - }; -} - -mod connection; -pub mod error; -mod ffi; -mod traits; - -pub use error::{Error, Result}; - -lazy_static! { - static ref RT: tokio::runtime::Runtime = tokio::runtime::Builder::new_multi_thread() - .enable_all() - .build() - .expect("Failed to create tokio runtime"); -} diff --git a/java/core/lancedb-jni/src/traits.rs b/java/core/lancedb-jni/src/traits.rs deleted file mode 100644 index e8ed78bc..00000000 --- a/java/core/lancedb-jni/src/traits.rs +++ /dev/null @@ -1,114 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -// SPDX-FileCopyrightText: Copyright The LanceDB Authors - -use jni::objects::{JMap, JObject, JString, JValue}; -use jni::JNIEnv; - -use crate::Result; - -#[allow(dead_code)] -pub trait FromJObject { - fn extract(&self) -> Result; -} - -/// Convert a Rust type into a Java Object. -pub trait IntoJava { - fn into_java<'a>(self, env: &mut JNIEnv<'a>) -> JObject<'a>; -} - -impl FromJObject for JObject<'_> { - fn extract(&self) -> Result { - Ok(JValue::from(self).i()?) - } -} - -impl FromJObject for JObject<'_> { - fn extract(&self) -> Result { - Ok(JValue::from(self).j()?) - } -} - -impl FromJObject for JObject<'_> { - fn extract(&self) -> Result { - Ok(JValue::from(self).f()?) - } -} - -impl FromJObject for JObject<'_> { - fn extract(&self) -> Result { - Ok(JValue::from(self).d()?) - } -} - -#[allow(dead_code)] -pub trait FromJString { - fn extract(&self, env: &mut JNIEnv) -> Result; -} - -impl FromJString for JString<'_> { - fn extract(&self, env: &mut JNIEnv) -> Result { - Ok(env.get_string(self)?.into()) - } -} - -pub trait JMapExt { - #[allow(dead_code)] - fn get_string(&self, env: &mut JNIEnv, key: &str) -> Result>; - - #[allow(dead_code)] - fn get_i32(&self, env: &mut JNIEnv, key: &str) -> Result>; - - #[allow(dead_code)] - fn get_i64(&self, env: &mut JNIEnv, key: &str) -> Result>; - - #[allow(dead_code)] - fn get_f32(&self, env: &mut JNIEnv, key: &str) -> Result>; - - #[allow(dead_code)] - fn get_f64(&self, env: &mut JNIEnv, key: &str) -> Result>; -} - -#[allow(dead_code)] -fn get_map_value(env: &mut JNIEnv, map: &JMap, key: &str) -> Result> -where - for<'a> JObject<'a>: FromJObject, -{ - let key_obj: JObject = env.new_string(key)?.into(); - if let Some(value) = map.get(env, &key_obj)? { - if value.is_null() { - Ok(None) - } else { - Ok(Some(value.extract()?)) - } - } else { - Ok(None) - } -} - -impl JMapExt for JMap<'_, '_, '_> { - fn get_string(&self, env: &mut JNIEnv, key: &str) -> Result> { - let key_obj: JObject = env.new_string(key)?.into(); - if let Some(value) = self.get(env, &key_obj)? { - let value_str: JString = value.into(); - Ok(Some(value_str.extract(env)?)) - } else { - Ok(None) - } - } - - fn get_i32(&self, env: &mut JNIEnv, key: &str) -> Result> { - get_map_value(env, self, key) - } - - fn get_i64(&self, env: &mut JNIEnv, key: &str) -> Result> { - get_map_value(env, self, key) - } - - fn get_f32(&self, env: &mut JNIEnv, key: &str) -> Result> { - get_map_value(env, self, key) - } - - fn get_f64(&self, env: &mut JNIEnv, key: &str) -> Result> { - get_map_value(env, self, key) - } -} diff --git a/java/core/pom.xml b/java/core/pom.xml deleted file mode 100644 index 5cf837e8..00000000 --- a/java/core/pom.xml +++ /dev/null @@ -1,103 +0,0 @@ - - - - 4.0.0 - - - com.lancedb - lancedb-parent - 0.23.0-beta.0 - ../pom.xml - - - lancedb-core - ${project.artifactId} - LanceDB Core - jar - - false - - - - - com.lancedb - lance-namespace-core - 0.0.1 - - - org.apache.arrow - arrow-vector - - - org.apache.arrow - arrow-memory-netty - - - org.apache.arrow - arrow-c-data - - - org.apache.arrow - arrow-dataset - - - org.json - json - - - org.questdb - jar-jni - - - org.junit.jupiter - junit-jupiter - test - - - - - - build-jni - - true - - - - - org.questdb - rust-maven-plugin - 1.1.1 - - - lancedb-jni - - build - - - lancedb-jni - ${rust.release.build} - - ${project.build.directory}/classes/nativelib - true - - - - lancedb-jni-test - - test - - - lancedb-jni - false - -v - - - - - - - - - diff --git a/java/core/src/main/java/com/lancedb/lancedb/Connection.java b/java/core/src/main/java/com/lancedb/lancedb/Connection.java deleted file mode 100644 index c7ac3035..00000000 --- a/java/core/src/main/java/com/lancedb/lancedb/Connection.java +++ /dev/null @@ -1,108 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package com.lancedb.lancedb; - -import io.questdb.jar.jni.JarJniLoader; - -import java.io.Closeable; -import java.util.List; -import java.util.Optional; - -/** Represents LanceDB database. */ -public class Connection implements Closeable { - static { - JarJniLoader.loadLib(Connection.class, "/nativelib", "lancedb_jni"); - } - - private long nativeConnectionHandle; - - /** Connect to a LanceDB instance. */ - public static native Connection connect(String uri); - - /** - * Get the names of all tables in the database. The names are sorted in ascending order. - * - * @return the table names - */ - public List tableNames() { - return tableNames(Optional.empty(), Optional.empty()); - } - - /** - * Get the names of filtered tables in the database. The names are sorted in ascending order. - * - * @param limit The number of results to return. - * @return the table names - */ - public List tableNames(int limit) { - return tableNames(Optional.empty(), Optional.of(limit)); - } - - /** - * Get the names of filtered tables in the database. The names are sorted in ascending order. - * - * @param startAfter If present, only return names that come lexicographically after the supplied - * value. This can be combined with limit to implement pagination by setting this to the last - * table name from the previous page. - * @return the table names - */ - public List tableNames(String startAfter) { - return tableNames(Optional.of(startAfter), Optional.empty()); - } - - /** - * Get the names of filtered tables in the database. The names are sorted in ascending order. - * - * @param startAfter If present, only return names that come lexicographically after the supplied - * value. This can be combined with limit to implement pagination by setting this to the last - * table name from the previous page. - * @param limit The number of results to return. - * @return the table names - */ - public List tableNames(String startAfter, int limit) { - return tableNames(Optional.of(startAfter), Optional.of(limit)); - } - - /** - * Get the names of filtered tables in the database. The names are sorted in ascending order. - * - * @param startAfter If present, only return names that come lexicographically after the supplied - * value. This can be combined with limit to implement pagination by setting this to the last - * table name from the previous page. - * @param limit The number of results to return. - * @return the table names - */ - public native List tableNames(Optional startAfter, Optional limit); - - /** - * Closes this connection and releases any system resources associated with it. If the connection - * is already closed, then invoking this method has no effect. - */ - @Override - public void close() { - if (nativeConnectionHandle != 0) { - releaseNativeConnection(nativeConnectionHandle); - nativeConnectionHandle = 0; - } - } - - /** - * Native method to release the Lance connection resources associated with the given handle. - * - * @param handle The native handle to the connection resource. - */ - private native void releaseNativeConnection(long handle); - - private Connection() {} -} diff --git a/java/core/src/test/java/com/lancedb/lancedb/ConnectionTest.java b/java/core/src/test/java/com/lancedb/lancedb/ConnectionTest.java deleted file mode 100644 index fa3adf8e..00000000 --- a/java/core/src/test/java/com/lancedb/lancedb/ConnectionTest.java +++ /dev/null @@ -1,135 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package com.lancedb.lancedb; - -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.io.TempDir; - -import java.net.URL; -import java.nio.file.Path; -import java.util.List; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertTrue; - -public class ConnectionTest { - private static final String[] TABLE_NAMES = { - "dataset_version", "new_empty_dataset", "test", "write_stream" - }; - - @TempDir static Path tempDir; // Temporary directory for the tests - private static URL lanceDbURL; - - @BeforeAll - static void setUp() { - ClassLoader classLoader = ConnectionTest.class.getClassLoader(); - lanceDbURL = classLoader.getResource("example_db"); - } - - @Test - void emptyDB() { - String databaseUri = tempDir.resolve("emptyDB").toString(); - try (Connection conn = Connection.connect(databaseUri)) { - List tableNames = conn.tableNames(); - assertTrue(tableNames.isEmpty()); - } - } - - @Test - void tableNames() { - try (Connection conn = Connection.connect(lanceDbURL.toString())) { - List tableNames = conn.tableNames(); - assertEquals(4, tableNames.size()); - for (int i = 0; i < TABLE_NAMES.length; i++) { - assertEquals(TABLE_NAMES[i], tableNames.get(i)); - } - } - } - - @Test - void tableNamesStartAfter() { - try (Connection conn = Connection.connect(lanceDbURL.toString())) { - assertTableNamesStartAfter( - conn, TABLE_NAMES[0], 3, TABLE_NAMES[1], TABLE_NAMES[2], TABLE_NAMES[3]); - assertTableNamesStartAfter(conn, TABLE_NAMES[1], 2, TABLE_NAMES[2], TABLE_NAMES[3]); - assertTableNamesStartAfter(conn, TABLE_NAMES[2], 1, TABLE_NAMES[3]); - assertTableNamesStartAfter(conn, TABLE_NAMES[3], 0); - assertTableNamesStartAfter( - conn, "a_dataset", 4, TABLE_NAMES[0], TABLE_NAMES[1], TABLE_NAMES[2], TABLE_NAMES[3]); - assertTableNamesStartAfter(conn, "o_dataset", 2, TABLE_NAMES[2], TABLE_NAMES[3]); - assertTableNamesStartAfter(conn, "v_dataset", 1, TABLE_NAMES[3]); - assertTableNamesStartAfter(conn, "z_dataset", 0); - } - } - - private void assertTableNamesStartAfter( - Connection conn, String startAfter, int expectedSize, String... expectedNames) { - List tableNames = conn.tableNames(startAfter); - assertEquals(expectedSize, tableNames.size()); - for (int i = 0; i < expectedNames.length; i++) { - assertEquals(expectedNames[i], tableNames.get(i)); - } - } - - @Test - void tableNamesLimit() { - try (Connection conn = Connection.connect(lanceDbURL.toString())) { - for (int i = 0; i <= TABLE_NAMES.length; i++) { - List tableNames = conn.tableNames(i); - assertEquals(i, tableNames.size()); - for (int j = 0; j < i; j++) { - assertEquals(TABLE_NAMES[j], tableNames.get(j)); - } - } - } - } - - @Test - void tableNamesStartAfterLimit() { - try (Connection conn = Connection.connect(lanceDbURL.toString())) { - List tableNames = conn.tableNames(TABLE_NAMES[0], 2); - assertEquals(2, tableNames.size()); - assertEquals(TABLE_NAMES[1], tableNames.get(0)); - assertEquals(TABLE_NAMES[2], tableNames.get(1)); - tableNames = conn.tableNames(TABLE_NAMES[1], 1); - assertEquals(1, tableNames.size()); - assertEquals(TABLE_NAMES[2], tableNames.get(0)); - tableNames = conn.tableNames(TABLE_NAMES[2], 2); - assertEquals(1, tableNames.size()); - assertEquals(TABLE_NAMES[3], tableNames.get(0)); - tableNames = conn.tableNames(TABLE_NAMES[3], 2); - assertEquals(0, tableNames.size()); - tableNames = conn.tableNames(TABLE_NAMES[0], 0); - assertEquals(0, tableNames.size()); - - // Limit larger than the number of remaining tables - tableNames = conn.tableNames(TABLE_NAMES[0], 10); - assertEquals(3, tableNames.size()); - assertEquals(TABLE_NAMES[1], tableNames.get(0)); - assertEquals(TABLE_NAMES[2], tableNames.get(1)); - assertEquals(TABLE_NAMES[3], tableNames.get(2)); - - // Start after a value not in the list - tableNames = conn.tableNames("non_existent_table", 2); - assertEquals(2, tableNames.size()); - assertEquals(TABLE_NAMES[2], tableNames.get(0)); - assertEquals(TABLE_NAMES[3], tableNames.get(1)); - - // Start after the last table with a limit - tableNames = conn.tableNames(TABLE_NAMES[3], 1); - assertEquals(0, tableNames.size()); - } - } -} diff --git a/java/core/src/test/resources/example_db/dataset_version.lance/_latest.manifest b/java/core/src/test/resources/example_db/dataset_version.lance/_latest.manifest deleted file mode 100644 index f09f8e8b..00000000 Binary files a/java/core/src/test/resources/example_db/dataset_version.lance/_latest.manifest and /dev/null differ diff --git a/java/core/src/test/resources/example_db/dataset_version.lance/_transactions/0-d51afd07-e3cd-4c76-9b9b-787e13fd55b0.txn b/java/core/src/test/resources/example_db/dataset_version.lance/_transactions/0-d51afd07-e3cd-4c76-9b9b-787e13fd55b0.txn deleted file mode 100644 index 40e927be..00000000 --- a/java/core/src/test/resources/example_db/dataset_version.lance/_transactions/0-d51afd07-e3cd-4c76-9b9b-787e13fd55b0.txn +++ /dev/null @@ -1 +0,0 @@ -$d51afd07-e3cd-4c76-9b9b-787e13fd55b0²=id ÿÿÿÿÿÿÿÿÿ*int3208name ÿÿÿÿÿÿÿÿÿ*string08 \ No newline at end of file diff --git a/java/core/src/test/resources/example_db/dataset_version.lance/_transactions/1-336c3e56-33fd-45d8-bbfb-95ebb563cbe0.txn b/java/core/src/test/resources/example_db/dataset_version.lance/_transactions/1-336c3e56-33fd-45d8-bbfb-95ebb563cbe0.txn deleted file mode 100644 index 77383fe7..00000000 Binary files a/java/core/src/test/resources/example_db/dataset_version.lance/_transactions/1-336c3e56-33fd-45d8-bbfb-95ebb563cbe0.txn and /dev/null differ diff --git a/java/core/src/test/resources/example_db/dataset_version.lance/_transactions/2-3344b369-7471-4e23-8865-c949b6e19bc2.txn b/java/core/src/test/resources/example_db/dataset_version.lance/_transactions/2-3344b369-7471-4e23-8865-c949b6e19bc2.txn deleted file mode 100644 index c0119b6e..00000000 Binary files a/java/core/src/test/resources/example_db/dataset_version.lance/_transactions/2-3344b369-7471-4e23-8865-c949b6e19bc2.txn and /dev/null differ diff --git a/java/core/src/test/resources/example_db/dataset_version.lance/_versions/1.manifest b/java/core/src/test/resources/example_db/dataset_version.lance/_versions/1.manifest deleted file mode 100644 index d94ff721..00000000 Binary files a/java/core/src/test/resources/example_db/dataset_version.lance/_versions/1.manifest and /dev/null differ diff --git a/java/core/src/test/resources/example_db/dataset_version.lance/_versions/2.manifest b/java/core/src/test/resources/example_db/dataset_version.lance/_versions/2.manifest deleted file mode 100644 index f8764e44..00000000 Binary files a/java/core/src/test/resources/example_db/dataset_version.lance/_versions/2.manifest and /dev/null differ diff --git a/java/core/src/test/resources/example_db/dataset_version.lance/_versions/3.manifest b/java/core/src/test/resources/example_db/dataset_version.lance/_versions/3.manifest deleted file mode 100644 index f09f8e8b..00000000 Binary files a/java/core/src/test/resources/example_db/dataset_version.lance/_versions/3.manifest and /dev/null differ diff --git a/java/core/src/test/resources/example_db/dataset_version.lance/data/60a9b599-f79f-48a8-bffa-b495762b622a.lance b/java/core/src/test/resources/example_db/dataset_version.lance/data/60a9b599-f79f-48a8-bffa-b495762b622a.lance deleted file mode 100644 index 7a80cb2c..00000000 Binary files a/java/core/src/test/resources/example_db/dataset_version.lance/data/60a9b599-f79f-48a8-bffa-b495762b622a.lance and /dev/null differ diff --git a/java/core/src/test/resources/example_db/dataset_version.lance/data/a13f68ba-04e6-48b5-bec0-bf54444be5f0.lance b/java/core/src/test/resources/example_db/dataset_version.lance/data/a13f68ba-04e6-48b5-bec0-bf54444be5f0.lance deleted file mode 100644 index c7d88b1c..00000000 Binary files a/java/core/src/test/resources/example_db/dataset_version.lance/data/a13f68ba-04e6-48b5-bec0-bf54444be5f0.lance and /dev/null differ diff --git a/java/core/src/test/resources/example_db/new_empty_dataset.lance/_latest.manifest b/java/core/src/test/resources/example_db/new_empty_dataset.lance/_latest.manifest deleted file mode 100644 index 4f5495c6..00000000 Binary files a/java/core/src/test/resources/example_db/new_empty_dataset.lance/_latest.manifest and /dev/null differ diff --git a/java/core/src/test/resources/example_db/new_empty_dataset.lance/_transactions/0-15648e72-076f-4ef1-8b90-10d305b95b3b.txn b/java/core/src/test/resources/example_db/new_empty_dataset.lance/_transactions/0-15648e72-076f-4ef1-8b90-10d305b95b3b.txn deleted file mode 100644 index 4ca22d68..00000000 --- a/java/core/src/test/resources/example_db/new_empty_dataset.lance/_transactions/0-15648e72-076f-4ef1-8b90-10d305b95b3b.txn +++ /dev/null @@ -1 +0,0 @@ -$15648e72-076f-4ef1-8b90-10d305b95b3b²=id ÿÿÿÿÿÿÿÿÿ*int3208name ÿÿÿÿÿÿÿÿÿ*string08 \ No newline at end of file diff --git a/java/core/src/test/resources/example_db/new_empty_dataset.lance/_versions/1.manifest b/java/core/src/test/resources/example_db/new_empty_dataset.lance/_versions/1.manifest deleted file mode 100644 index 4f5495c6..00000000 Binary files a/java/core/src/test/resources/example_db/new_empty_dataset.lance/_versions/1.manifest and /dev/null differ diff --git a/java/core/src/test/resources/example_db/test.lance/_latest.manifest b/java/core/src/test/resources/example_db/test.lance/_latest.manifest deleted file mode 100644 index d1d46b3e..00000000 Binary files a/java/core/src/test/resources/example_db/test.lance/_latest.manifest and /dev/null differ diff --git a/java/core/src/test/resources/example_db/test.lance/_transactions/0-a3689caf-4f6b-4afc-a3c7-97af75661843.txn b/java/core/src/test/resources/example_db/test.lance/_transactions/0-a3689caf-4f6b-4afc-a3c7-97af75661843.txn deleted file mode 100644 index 2df18204..00000000 --- a/java/core/src/test/resources/example_db/test.lance/_transactions/0-a3689caf-4f6b-4afc-a3c7-97af75661843.txn +++ /dev/null @@ -1 +0,0 @@ -$a3689caf-4f6b-4afc-a3c7-97af75661843²oitem ÿÿÿÿÿÿÿÿÿ*string8price ÿÿÿÿÿÿÿÿÿ*double80vector ÿÿÿÿÿÿÿÿÿ*fixed_size_list:float:28 \ No newline at end of file diff --git a/java/core/src/test/resources/example_db/test.lance/_transactions/1-3f0fa7b9-7311-4945-9b0f-57dff4c04ee2.txn b/java/core/src/test/resources/example_db/test.lance/_transactions/1-3f0fa7b9-7311-4945-9b0f-57dff4c04ee2.txn deleted file mode 100644 index ab679a00..00000000 Binary files a/java/core/src/test/resources/example_db/test.lance/_transactions/1-3f0fa7b9-7311-4945-9b0f-57dff4c04ee2.txn and /dev/null differ diff --git a/java/core/src/test/resources/example_db/test.lance/_versions/1.manifest b/java/core/src/test/resources/example_db/test.lance/_versions/1.manifest deleted file mode 100644 index b0c30b9d..00000000 Binary files a/java/core/src/test/resources/example_db/test.lance/_versions/1.manifest and /dev/null differ diff --git a/java/core/src/test/resources/example_db/test.lance/_versions/2.manifest b/java/core/src/test/resources/example_db/test.lance/_versions/2.manifest deleted file mode 100644 index d1d46b3e..00000000 Binary files a/java/core/src/test/resources/example_db/test.lance/_versions/2.manifest and /dev/null differ diff --git a/java/core/src/test/resources/example_db/test.lance/data/cd209a1b-00e0-4adf-93b2-2547c866e1ef.lance b/java/core/src/test/resources/example_db/test.lance/data/cd209a1b-00e0-4adf-93b2-2547c866e1ef.lance deleted file mode 100644 index 94f50c6d..00000000 Binary files a/java/core/src/test/resources/example_db/test.lance/data/cd209a1b-00e0-4adf-93b2-2547c866e1ef.lance and /dev/null differ diff --git a/java/core/src/test/resources/example_db/write_stream.lance/_latest.manifest b/java/core/src/test/resources/example_db/write_stream.lance/_latest.manifest deleted file mode 100644 index ac2cd0f6..00000000 Binary files a/java/core/src/test/resources/example_db/write_stream.lance/_latest.manifest and /dev/null differ diff --git a/java/core/src/test/resources/example_db/write_stream.lance/_transactions/0-ea2f0479-36d1-4302-908a-dae45b9eb443.txn b/java/core/src/test/resources/example_db/write_stream.lance/_transactions/0-ea2f0479-36d1-4302-908a-dae45b9eb443.txn deleted file mode 100644 index d673b7ff..00000000 Binary files a/java/core/src/test/resources/example_db/write_stream.lance/_transactions/0-ea2f0479-36d1-4302-908a-dae45b9eb443.txn and /dev/null differ diff --git a/java/core/src/test/resources/example_db/write_stream.lance/_versions/1.manifest b/java/core/src/test/resources/example_db/write_stream.lance/_versions/1.manifest deleted file mode 100644 index ac2cd0f6..00000000 Binary files a/java/core/src/test/resources/example_db/write_stream.lance/_versions/1.manifest and /dev/null differ diff --git a/java/core/src/test/resources/example_db/write_stream.lance/data/665ff491-6dc5-4496-b292-166ed5c2a309.lance b/java/core/src/test/resources/example_db/write_stream.lance/data/665ff491-6dc5-4496-b292-166ed5c2a309.lance deleted file mode 100644 index 0e9dc8f3..00000000 Binary files a/java/core/src/test/resources/example_db/write_stream.lance/data/665ff491-6dc5-4496-b292-166ed5c2a309.lance and /dev/null differ diff --git a/java/lance-namespace/pom.xml b/java/lance-namespace/pom.xml deleted file mode 100644 index 41fad29b..00000000 --- a/java/lance-namespace/pom.xml +++ /dev/null @@ -1,26 +0,0 @@ - - - - 4.0.0 - - - com.lancedb - lancedb-parent - 0.23.0-beta.0 - ../pom.xml - - - lancedb-lance-namespace - ${project.artifactId} - LanceDB Java Integration with Lance Namespace - jar - - - - com.lancedb - lance-namespace-core - - - diff --git a/java/lancedb-core/pom.xml b/java/lancedb-core/pom.xml new file mode 100644 index 00000000..efebeccf --- /dev/null +++ b/java/lancedb-core/pom.xml @@ -0,0 +1,99 @@ + + + + 4.0.0 + + + com.lancedb + lancedb-parent + 0.23.0-beta.0 + ../pom.xml + + + lancedb-core + ${project.artifactId} + Utilities to work with LanceDB Cloud and Enterprise via Lance REST Namespace + jar + + + + org.lance + lance-core + + + + org.apache.arrow + arrow-vector + + + + org.apache.arrow + arrow-memory-netty + + + + org.junit.jupiter + junit-jupiter + test + + + + org.mockito + mockito-junit-jupiter + 5.18.0 + test + + + + org.slf4j + slf4j-api + 2.0.16 + test + + + + org.apache.logging.log4j + log4j-slf4j2-impl + 2.24.3 + test + + + + org.apache.logging.log4j + log4j-core + 2.24.3 + test + + + + org.apache.logging.log4j + log4j-api + 2.24.3 + test + + + + + + + org.apache.maven.plugins + maven-source-plugin + 3.3.0 + + + attach-sources + + jar-no-fork + + + + + + org.apache.maven.plugins + maven-surefire-plugin + + + + diff --git a/java/lance-namespace/src/main/java/com/lancedb/lancedb/LanceDbRestNamespaces.java b/java/lancedb-core/src/main/java/com/lancedb/LanceDbRestNamespaceBuilder.java similarity index 55% rename from java/lance-namespace/src/main/java/com/lancedb/lancedb/LanceDbRestNamespaces.java rename to java/lancedb-core/src/main/java/com/lancedb/LanceDbRestNamespaceBuilder.java index c258921f..1a6b8e32 100644 --- a/java/lance-namespace/src/main/java/com/lancedb/lancedb/LanceDbRestNamespaces.java +++ b/java/lancedb-core/src/main/java/com/lancedb/LanceDbRestNamespaceBuilder.java @@ -11,35 +11,58 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package com.lancedb.lancedb; +package com.lancedb; -import com.lancedb.lance.namespace.LanceRestNamespace; -import com.lancedb.lance.namespace.client.apache.ApiClient; +import org.lance.namespace.RestNamespace; import java.util.HashMap; import java.util.Map; import java.util.Optional; -/** Util class to help construct a {@link LanceRestNamespace} for LanceDB. */ -public class LanceDbRestNamespaces { +/** + * Util class to help construct a {@link RestNamespace} for LanceDB. + * + *

For LanceDB Cloud, use the simplified builder API: + * + *

{@code
+ * import org.lance.namespace.RestNamespace;
+ *
+ * // If your DB url is db://example-db, then your database here is example-db
+ * RestNamespace namespace = LanceDbRestNamespaceBuilder.newBuilder()
+ *     .apiKey("your_lancedb_cloud_api_key")
+ *     .database("your_database_name")
+ *     .build();
+ * }
+ * + *

For LanceDB Enterprise deployments, use your custom endpoint: + * + *

{@code
+ * RestNamespace namespace = LanceDbRestNamespaceBuilder.newBuilder()
+ *     .apiKey("your_lancedb_enterprise_api_key")
+ *     .database("your_database_name")
+ *     .endpoint("")
+ *     .build();
+ * }
+ */ +public class LanceDbRestNamespaceBuilder { private static final String DEFAULT_REGION = "us-east-1"; private static final String CLOUD_URL_PATTERN = "https://%s.%s.api.lancedb.com"; private String apiKey; private String database; - private Optional hostOverride = Optional.empty(); + private Optional endpoint = Optional.empty(); private Optional region = Optional.empty(); private Map additionalConfig = new HashMap<>(); - private LanceDbRestNamespaces() {} + private LanceDbRestNamespaceBuilder() {} /** * Create a new builder instance. * - * @return A new LanceRestNamespaceBuilder + * @return A new RestNamespaceBuilder */ - public static LanceDbRestNamespaces builder() { - return new LanceDbRestNamespaces(); + public static LanceDbRestNamespaceBuilder newBuilder() { + return new LanceDbRestNamespaceBuilder(); } /** @@ -48,7 +71,7 @@ public class LanceDbRestNamespaces { * @param apiKey The LanceDB API key * @return This builder */ - public LanceDbRestNamespaces apiKey(String apiKey) { + public LanceDbRestNamespaceBuilder apiKey(String apiKey) { if (apiKey == null || apiKey.trim().isEmpty()) { throw new IllegalArgumentException("API key cannot be null or empty"); } @@ -62,7 +85,7 @@ public class LanceDbRestNamespaces { * @param database The database name * @return This builder */ - public LanceDbRestNamespaces database(String database) { + public LanceDbRestNamespaceBuilder database(String database) { if (database == null || database.trim().isEmpty()) { throw new IllegalArgumentException("Database cannot be null or empty"); } @@ -71,25 +94,25 @@ public class LanceDbRestNamespaces { } /** - * Set a custom host override (optional). When set, this overrides the default LanceDB Cloud URL + * Set a custom endpoint URL (optional). When set, this overrides the default LanceDB Cloud URL * construction. Use this for LanceDB Enterprise deployments. * - * @param hostOverride The complete base URL (e.g., "http://your-vpc-endpoint:80") + * @param endpoint The complete base URL for your LanceDB Enterprise deployment * @return This builder */ - public LanceDbRestNamespaces hostOverride(String hostOverride) { - this.hostOverride = Optional.ofNullable(hostOverride); + public LanceDbRestNamespaceBuilder endpoint(String endpoint) { + this.endpoint = Optional.ofNullable(endpoint); return this; } /** * Set the region for LanceDB Cloud (optional). Defaults to "us-east-1" if not specified. This is - * ignored when hostOverride is set. + * ignored when endpoint is set. * * @param region The AWS region (e.g., "us-east-1", "eu-west-1") * @return This builder */ - public LanceDbRestNamespaces region(String region) { + public LanceDbRestNamespaceBuilder region(String region) { this.region = Optional.ofNullable(region); return this; } @@ -101,18 +124,18 @@ public class LanceDbRestNamespaces { * @param value The configuration value * @return This builder */ - public LanceDbRestNamespaces config(String key, String value) { + public LanceDbRestNamespaceBuilder config(String key, String value) { this.additionalConfig.put(key, value); return this; } /** - * Build the LanceRestNamespace instance. + * Build the Lance RestNamespace instance. * - * @return A configured LanceRestNamespace + * @return A configured Lance RestNamespace * @throws IllegalStateException if required parameters are missing */ - public LanceRestNamespace build() { + public RestNamespace build() { // Validate required fields if (apiKey == null) { throw new IllegalStateException("API key is required"); @@ -123,24 +146,20 @@ public class LanceDbRestNamespaces { // Build configuration map Map config = new HashMap<>(additionalConfig); - config.put("headers.x-lancedb-database", database); - config.put("headers.x-api-key", apiKey); + config.put("header.x-lancedb-database", database); + config.put("header.x-api-key", apiKey); // Determine base URL - String baseUrl; - if (hostOverride.isPresent()) { - baseUrl = hostOverride.get(); - config.put("host_override", hostOverride.get()); + String uri; + if (endpoint.isPresent()) { + uri = endpoint.get(); } else { String effectiveRegion = region.orElse(DEFAULT_REGION); - baseUrl = String.format(CLOUD_URL_PATTERN, database, effectiveRegion); - config.put("region", effectiveRegion); + uri = String.format(CLOUD_URL_PATTERN, database, effectiveRegion); } - - // Create and configure ApiClient - ApiClient apiClient = new ApiClient(); - apiClient.setBasePath(baseUrl); - - return new LanceRestNamespace(apiClient, config); + config.put("uri", uri); + RestNamespace ns = new RestNamespace(); + ns.initialize(config, null); + return ns; } } diff --git a/java/lancedb-core/src/test/java/com/lancedb/LanceDbRestNamespaceBuilderTest.java b/java/lancedb-core/src/test/java/com/lancedb/LanceDbRestNamespaceBuilderTest.java new file mode 100644 index 00000000..f6543028 --- /dev/null +++ b/java/lancedb-core/src/test/java/com/lancedb/LanceDbRestNamespaceBuilderTest.java @@ -0,0 +1,96 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.lancedb; + +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.*; + +/** Unit tests for LanceDbRestNamespaceBuilder. */ +public class LanceDbRestNamespaceBuilderTest { + + @Test + public void testBuilderRequiresApiKey() { + LanceDbRestNamespaceBuilder builder = + LanceDbRestNamespaceBuilder.newBuilder().database("test-db"); + + IllegalStateException exception = assertThrows(IllegalStateException.class, builder::build); + assertEquals("API key is required", exception.getMessage()); + } + + @Test + public void testBuilderRequiresDatabase() { + LanceDbRestNamespaceBuilder builder = + LanceDbRestNamespaceBuilder.newBuilder().apiKey("test-api-key"); + + IllegalStateException exception = assertThrows(IllegalStateException.class, builder::build); + assertEquals("Database is required", exception.getMessage()); + } + + @Test + public void testApiKeyCannotBeNull() { + IllegalArgumentException exception = + assertThrows( + IllegalArgumentException.class, + () -> LanceDbRestNamespaceBuilder.newBuilder().apiKey(null)); + assertEquals("API key cannot be null or empty", exception.getMessage()); + } + + @Test + public void testApiKeyCannotBeEmpty() { + IllegalArgumentException exception = + assertThrows( + IllegalArgumentException.class, + () -> LanceDbRestNamespaceBuilder.newBuilder().apiKey(" ")); + assertEquals("API key cannot be null or empty", exception.getMessage()); + } + + @Test + public void testDatabaseCannotBeNull() { + IllegalArgumentException exception = + assertThrows( + IllegalArgumentException.class, + () -> LanceDbRestNamespaceBuilder.newBuilder().database(null)); + assertEquals("Database cannot be null or empty", exception.getMessage()); + } + + @Test + public void testDatabaseCannotBeEmpty() { + IllegalArgumentException exception = + assertThrows( + IllegalArgumentException.class, + () -> LanceDbRestNamespaceBuilder.newBuilder().database(" ")); + assertEquals("Database cannot be null or empty", exception.getMessage()); + } + + @Test + public void testBuilderFluentApi() { + // Verify the builder returns itself for chaining + LanceDbRestNamespaceBuilder builder = LanceDbRestNamespaceBuilder.newBuilder(); + + assertSame(builder, builder.apiKey("test-key")); + assertSame(builder, builder.database("test-db")); + assertSame(builder, builder.endpoint("http://localhost:8080")); + assertSame(builder, builder.region("eu-west-1")); + assertSame(builder, builder.config("custom-key", "custom-value")); + } + + @Test + public void testNewBuilderCreatesNewInstance() { + LanceDbRestNamespaceBuilder builder1 = LanceDbRestNamespaceBuilder.newBuilder(); + LanceDbRestNamespaceBuilder builder2 = LanceDbRestNamespaceBuilder.newBuilder(); + + assertNotSame(builder1, builder2); + } +} diff --git a/java/lancedb-core/src/test/resources/log4j2.xml b/java/lancedb-core/src/test/resources/log4j2.xml new file mode 100644 index 00000000..00931691 --- /dev/null +++ b/java/lancedb-core/src/test/resources/log4j2.xml @@ -0,0 +1,32 @@ + + + + + + + + + + + + + + + + + + + diff --git a/java/pom.xml b/java/pom.xml index 1eb946f0..0eeefb6d 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -28,7 +28,7 @@ UTF-8 15.0.0 - 0.0.1 + 1.0.0-rc.2 false 2.30.0 1.7 @@ -51,8 +51,7 @@ - core - lance-namespace + lancedb-core @@ -64,9 +63,9 @@ - com.lancedb - lance-namespace-core - ${lance-namespace.verison} + org.lance + lance-core + ${lance-core.version} org.apache.arrow @@ -88,21 +87,11 @@ arrow-dataset ${arrow.version} - - org.questdb - jar-jni - 1.1.1 - org.junit.jupiter junit-jupiter 5.10.1 - - org.json - json - 20210307 -