feat(java): integrate lance-namespace to lancedb Java SDK (#2524)

This commit is contained in:
Jack Ye
2025-07-21 14:21:21 -07:00
committed by GitHub
parent 88283110f4
commit 75edb8756c
7 changed files with 503 additions and 4 deletions

View File

@@ -0,0 +1,19 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
wrapperVersion=3.3.2
distributionType=only-script
distributionUrl=https://repo.maven.apache.org/maven2/org/apache/maven/apache-maven/3.9.9/apache-maven-3.9.9-bin.zip

37
java/README.md Normal file
View File

@@ -0,0 +1,37 @@
# LanceDB Java SDK
## Configuration and Initialization
### LanceDB Cloud
For LanceDB Cloud, use the simplified builder API:
```java
import com.lancedb.lance.namespace.LanceRestNamespace;
// If your DB url is db://example-db, then your database here is example-db
LanceRestNamespace namespace = LanceDBRestNamespaces.builder()
.apiKey("your_lancedb_cloud_api_key")
.database("your_database_name")
.build();
```
### LanceDB Enterprise
For Enterprise deployments, use your VPC endpoint:
```java
LanceRestNamespace namespace = LanceDBRestNamespaces.builder()
.apiKey("your_lancedb_enterprise_api_key")
.database("your-top-dir") // Your top level folder under your cloud bucket, e.g. s3://your-bucket/your-top-dir/
.hostOverride("http://<vpc_endpoint_dns_name>:80")
.build();
```
## Development
Build:
```shell
./mvnw install
```

View File

@@ -13,13 +13,19 @@
</parent>
<artifactId>lancedb-core</artifactId>
<name>LanceDB Core</name>
<name>${project.artifactId}</name>
<description>LanceDB Core</description>
<packaging>jar</packaging>
<properties>
<rust.release.build>false</rust.release.build>
</properties>
<dependencies>
<dependency>
<groupId>com.lancedb</groupId>
<artifactId>lance-namespace-core</artifactId>
<version>0.0.1</version>
</dependency>
<dependency>
<groupId>org.apache.arrow</groupId>
<artifactId>arrow-vector</artifactId>

View File

@@ -0,0 +1,26 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>com.lancedb</groupId>
<artifactId>lancedb-parent</artifactId>
<version>0.21.2-beta.0</version>
<relativePath>../pom.xml</relativePath>
</parent>
<artifactId>lancedb-lance-namespace</artifactId>
<name>${project.artifactId}</name>
<description>LanceDB Java Integration with Lance Namespace</description>
<packaging>jar</packaging>
<dependencies>
<dependency>
<groupId>com.lancedb</groupId>
<artifactId>lance-namespace-core</artifactId>
</dependency>
</dependencies>
</project>

View File

@@ -0,0 +1,146 @@
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.lancedb.lancedb;
import com.lancedb.lance.namespace.LanceRestNamespace;
import com.lancedb.lance.namespace.client.apache.ApiClient;
import java.util.HashMap;
import java.util.Map;
import java.util.Optional;
/** Util class to help construct a {@link LanceRestNamespace} for LanceDB. */
public class LanceDbRestNamespaces {
private static final String DEFAULT_REGION = "us-east-1";
private static final String CLOUD_URL_PATTERN = "https://%s.%s.api.lancedb.com";
private String apiKey;
private String database;
private Optional<String> hostOverride = Optional.empty();
private Optional<String> region = Optional.empty();
private Map<String, String> additionalConfig = new HashMap<>();
private LanceDbRestNamespaces() {}
/**
* Create a new builder instance.
*
* @return A new LanceRestNamespaceBuilder
*/
public static LanceDbRestNamespaces builder() {
return new LanceDbRestNamespaces();
}
/**
* Set the API key (required).
*
* @param apiKey The LanceDB API key
* @return This builder
*/
public LanceDbRestNamespaces apiKey(String apiKey) {
if (apiKey == null || apiKey.trim().isEmpty()) {
throw new IllegalArgumentException("API key cannot be null or empty");
}
this.apiKey = apiKey;
return this;
}
/**
* Set the database name (required).
*
* @param database The database name
* @return This builder
*/
public LanceDbRestNamespaces database(String database) {
if (database == null || database.trim().isEmpty()) {
throw new IllegalArgumentException("Database cannot be null or empty");
}
this.database = database;
return this;
}
/**
* Set a custom host override (optional). When set, this overrides the default LanceDB Cloud URL
* construction. Use this for LanceDB Enterprise deployments.
*
* @param hostOverride The complete base URL (e.g., "http://your-vpc-endpoint:80")
* @return This builder
*/
public LanceDbRestNamespaces hostOverride(String hostOverride) {
this.hostOverride = Optional.ofNullable(hostOverride);
return this;
}
/**
* Set the region for LanceDB Cloud (optional). Defaults to "us-east-1" if not specified. This is
* ignored when hostOverride is set.
*
* @param region The AWS region (e.g., "us-east-1", "eu-west-1")
* @return This builder
*/
public LanceDbRestNamespaces region(String region) {
this.region = Optional.ofNullable(region);
return this;
}
/**
* Add additional configuration parameters.
*
* @param key The configuration key
* @param value The configuration value
* @return This builder
*/
public LanceDbRestNamespaces config(String key, String value) {
this.additionalConfig.put(key, value);
return this;
}
/**
* Build the LanceRestNamespace instance.
*
* @return A configured LanceRestNamespace
* @throws IllegalStateException if required parameters are missing
*/
public LanceRestNamespace build() {
// Validate required fields
if (apiKey == null) {
throw new IllegalStateException("API key is required");
}
if (database == null) {
throw new IllegalStateException("Database is required");
}
// Build configuration map
Map<String, String> config = new HashMap<>(additionalConfig);
config.put("headers.x-lancedb-database", database);
config.put("headers.x-api-key", apiKey);
// Determine base URL
String baseUrl;
if (hostOverride.isPresent()) {
baseUrl = hostOverride.get();
config.put("host_override", hostOverride.get());
} else {
String effectiveRegion = region.orElse(DEFAULT_REGION);
baseUrl = String.format(CLOUD_URL_PATTERN, database, effectiveRegion);
config.put("region", effectiveRegion);
}
// Create and configure ApiClient
ApiClient apiClient = new ApiClient();
apiClient.setBasePath(baseUrl);
return new LanceRestNamespace(apiClient, config);
}
}

259
java/mvnw vendored Executable file
View File

@@ -0,0 +1,259 @@
#!/bin/sh
# ----------------------------------------------------------------------------
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# ----------------------------------------------------------------------------
# ----------------------------------------------------------------------------
# Apache Maven Wrapper startup batch script, version 3.3.2
#
# Optional ENV vars
# -----------------
# JAVA_HOME - location of a JDK home dir, required when download maven via java source
# MVNW_REPOURL - repo url base for downloading maven distribution
# MVNW_USERNAME/MVNW_PASSWORD - user and password for downloading maven
# MVNW_VERBOSE - true: enable verbose log; debug: trace the mvnw script; others: silence the output
# ----------------------------------------------------------------------------
set -euf
[ "${MVNW_VERBOSE-}" != debug ] || set -x
# OS specific support.
native_path() { printf %s\\n "$1"; }
case "$(uname)" in
CYGWIN* | MINGW*)
[ -z "${JAVA_HOME-}" ] || JAVA_HOME="$(cygpath --unix "$JAVA_HOME")"
native_path() { cygpath --path --windows "$1"; }
;;
esac
# set JAVACMD and JAVACCMD
set_java_home() {
# For Cygwin and MinGW, ensure paths are in Unix format before anything is touched
if [ -n "${JAVA_HOME-}" ]; then
if [ -x "$JAVA_HOME/jre/sh/java" ]; then
# IBM's JDK on AIX uses strange locations for the executables
JAVACMD="$JAVA_HOME/jre/sh/java"
JAVACCMD="$JAVA_HOME/jre/sh/javac"
else
JAVACMD="$JAVA_HOME/bin/java"
JAVACCMD="$JAVA_HOME/bin/javac"
if [ ! -x "$JAVACMD" ] || [ ! -x "$JAVACCMD" ]; then
echo "The JAVA_HOME environment variable is not defined correctly, so mvnw cannot run." >&2
echo "JAVA_HOME is set to \"$JAVA_HOME\", but \"\$JAVA_HOME/bin/java\" or \"\$JAVA_HOME/bin/javac\" does not exist." >&2
return 1
fi
fi
else
JAVACMD="$(
'set' +e
'unset' -f command 2>/dev/null
'command' -v java
)" || :
JAVACCMD="$(
'set' +e
'unset' -f command 2>/dev/null
'command' -v javac
)" || :
if [ ! -x "${JAVACMD-}" ] || [ ! -x "${JAVACCMD-}" ]; then
echo "The java/javac command does not exist in PATH nor is JAVA_HOME set, so mvnw cannot run." >&2
return 1
fi
fi
}
# hash string like Java String::hashCode
hash_string() {
str="${1:-}" h=0
while [ -n "$str" ]; do
char="${str%"${str#?}"}"
h=$(((h * 31 + $(LC_CTYPE=C printf %d "'$char")) % 4294967296))
str="${str#?}"
done
printf %x\\n $h
}
verbose() { :; }
[ "${MVNW_VERBOSE-}" != true ] || verbose() { printf %s\\n "${1-}"; }
die() {
printf %s\\n "$1" >&2
exit 1
}
trim() {
# MWRAPPER-139:
# Trims trailing and leading whitespace, carriage returns, tabs, and linefeeds.
# Needed for removing poorly interpreted newline sequences when running in more
# exotic environments such as mingw bash on Windows.
printf "%s" "${1}" | tr -d '[:space:]'
}
# parse distributionUrl and optional distributionSha256Sum, requires .mvn/wrapper/maven-wrapper.properties
while IFS="=" read -r key value; do
case "${key-}" in
distributionUrl) distributionUrl=$(trim "${value-}") ;;
distributionSha256Sum) distributionSha256Sum=$(trim "${value-}") ;;
esac
done <"${0%/*}/.mvn/wrapper/maven-wrapper.properties"
[ -n "${distributionUrl-}" ] || die "cannot read distributionUrl property in ${0%/*}/.mvn/wrapper/maven-wrapper.properties"
case "${distributionUrl##*/}" in
maven-mvnd-*bin.*)
MVN_CMD=mvnd.sh _MVNW_REPO_PATTERN=/maven/mvnd/
case "${PROCESSOR_ARCHITECTURE-}${PROCESSOR_ARCHITEW6432-}:$(uname -a)" in
*AMD64:CYGWIN* | *AMD64:MINGW*) distributionPlatform=windows-amd64 ;;
:Darwin*x86_64) distributionPlatform=darwin-amd64 ;;
:Darwin*arm64) distributionPlatform=darwin-aarch64 ;;
:Linux*x86_64*) distributionPlatform=linux-amd64 ;;
*)
echo "Cannot detect native platform for mvnd on $(uname)-$(uname -m), use pure java version" >&2
distributionPlatform=linux-amd64
;;
esac
distributionUrl="${distributionUrl%-bin.*}-$distributionPlatform.zip"
;;
maven-mvnd-*) MVN_CMD=mvnd.sh _MVNW_REPO_PATTERN=/maven/mvnd/ ;;
*) MVN_CMD="mvn${0##*/mvnw}" _MVNW_REPO_PATTERN=/org/apache/maven/ ;;
esac
# apply MVNW_REPOURL and calculate MAVEN_HOME
# maven home pattern: ~/.m2/wrapper/dists/{apache-maven-<version>,maven-mvnd-<version>-<platform>}/<hash>
[ -z "${MVNW_REPOURL-}" ] || distributionUrl="$MVNW_REPOURL$_MVNW_REPO_PATTERN${distributionUrl#*"$_MVNW_REPO_PATTERN"}"
distributionUrlName="${distributionUrl##*/}"
distributionUrlNameMain="${distributionUrlName%.*}"
distributionUrlNameMain="${distributionUrlNameMain%-bin}"
MAVEN_USER_HOME="${MAVEN_USER_HOME:-${HOME}/.m2}"
MAVEN_HOME="${MAVEN_USER_HOME}/wrapper/dists/${distributionUrlNameMain-}/$(hash_string "$distributionUrl")"
exec_maven() {
unset MVNW_VERBOSE MVNW_USERNAME MVNW_PASSWORD MVNW_REPOURL || :
exec "$MAVEN_HOME/bin/$MVN_CMD" "$@" || die "cannot exec $MAVEN_HOME/bin/$MVN_CMD"
}
if [ -d "$MAVEN_HOME" ]; then
verbose "found existing MAVEN_HOME at $MAVEN_HOME"
exec_maven "$@"
fi
case "${distributionUrl-}" in
*?-bin.zip | *?maven-mvnd-?*-?*.zip) ;;
*) die "distributionUrl is not valid, must match *-bin.zip or maven-mvnd-*.zip, but found '${distributionUrl-}'" ;;
esac
# prepare tmp dir
if TMP_DOWNLOAD_DIR="$(mktemp -d)" && [ -d "$TMP_DOWNLOAD_DIR" ]; then
clean() { rm -rf -- "$TMP_DOWNLOAD_DIR"; }
trap clean HUP INT TERM EXIT
else
die "cannot create temp dir"
fi
mkdir -p -- "${MAVEN_HOME%/*}"
# Download and Install Apache Maven
verbose "Couldn't find MAVEN_HOME, downloading and installing it ..."
verbose "Downloading from: $distributionUrl"
verbose "Downloading to: $TMP_DOWNLOAD_DIR/$distributionUrlName"
# select .zip or .tar.gz
if ! command -v unzip >/dev/null; then
distributionUrl="${distributionUrl%.zip}.tar.gz"
distributionUrlName="${distributionUrl##*/}"
fi
# verbose opt
__MVNW_QUIET_WGET=--quiet __MVNW_QUIET_CURL=--silent __MVNW_QUIET_UNZIP=-q __MVNW_QUIET_TAR=''
[ "${MVNW_VERBOSE-}" != true ] || __MVNW_QUIET_WGET='' __MVNW_QUIET_CURL='' __MVNW_QUIET_UNZIP='' __MVNW_QUIET_TAR=v
# normalize http auth
case "${MVNW_PASSWORD:+has-password}" in
'') MVNW_USERNAME='' MVNW_PASSWORD='' ;;
has-password) [ -n "${MVNW_USERNAME-}" ] || MVNW_USERNAME='' MVNW_PASSWORD='' ;;
esac
if [ -z "${MVNW_USERNAME-}" ] && command -v wget >/dev/null; then
verbose "Found wget ... using wget"
wget ${__MVNW_QUIET_WGET:+"$__MVNW_QUIET_WGET"} "$distributionUrl" -O "$TMP_DOWNLOAD_DIR/$distributionUrlName" || die "wget: Failed to fetch $distributionUrl"
elif [ -z "${MVNW_USERNAME-}" ] && command -v curl >/dev/null; then
verbose "Found curl ... using curl"
curl ${__MVNW_QUIET_CURL:+"$__MVNW_QUIET_CURL"} -f -L -o "$TMP_DOWNLOAD_DIR/$distributionUrlName" "$distributionUrl" || die "curl: Failed to fetch $distributionUrl"
elif set_java_home; then
verbose "Falling back to use Java to download"
javaSource="$TMP_DOWNLOAD_DIR/Downloader.java"
targetZip="$TMP_DOWNLOAD_DIR/$distributionUrlName"
cat >"$javaSource" <<-END
public class Downloader extends java.net.Authenticator
{
protected java.net.PasswordAuthentication getPasswordAuthentication()
{
return new java.net.PasswordAuthentication( System.getenv( "MVNW_USERNAME" ), System.getenv( "MVNW_PASSWORD" ).toCharArray() );
}
public static void main( String[] args ) throws Exception
{
setDefault( new Downloader() );
java.nio.file.Files.copy( java.net.URI.create( args[0] ).toURL().openStream(), java.nio.file.Paths.get( args[1] ).toAbsolutePath().normalize() );
}
}
END
# For Cygwin/MinGW, switch paths to Windows format before running javac and java
verbose " - Compiling Downloader.java ..."
"$(native_path "$JAVACCMD")" "$(native_path "$javaSource")" || die "Failed to compile Downloader.java"
verbose " - Running Downloader.java ..."
"$(native_path "$JAVACMD")" -cp "$(native_path "$TMP_DOWNLOAD_DIR")" Downloader "$distributionUrl" "$(native_path "$targetZip")"
fi
# If specified, validate the SHA-256 sum of the Maven distribution zip file
if [ -n "${distributionSha256Sum-}" ]; then
distributionSha256Result=false
if [ "$MVN_CMD" = mvnd.sh ]; then
echo "Checksum validation is not supported for maven-mvnd." >&2
echo "Please disable validation by removing 'distributionSha256Sum' from your maven-wrapper.properties." >&2
exit 1
elif command -v sha256sum >/dev/null; then
if echo "$distributionSha256Sum $TMP_DOWNLOAD_DIR/$distributionUrlName" | sha256sum -c >/dev/null 2>&1; then
distributionSha256Result=true
fi
elif command -v shasum >/dev/null; then
if echo "$distributionSha256Sum $TMP_DOWNLOAD_DIR/$distributionUrlName" | shasum -a 256 -c >/dev/null 2>&1; then
distributionSha256Result=true
fi
else
echo "Checksum validation was requested but neither 'sha256sum' or 'shasum' are available." >&2
echo "Please install either command, or disable validation by removing 'distributionSha256Sum' from your maven-wrapper.properties." >&2
exit 1
fi
if [ $distributionSha256Result = false ]; then
echo "Error: Failed to validate Maven distribution SHA-256, your Maven distribution might be compromised." >&2
echo "If you updated your Maven version, you need to update the specified distributionSha256Sum property." >&2
exit 1
fi
fi
# unzip and move
if command -v unzip >/dev/null; then
unzip ${__MVNW_QUIET_UNZIP:+"$__MVNW_QUIET_UNZIP"} "$TMP_DOWNLOAD_DIR/$distributionUrlName" -d "$TMP_DOWNLOAD_DIR" || die "failed to unzip"
else
tar xzf${__MVNW_QUIET_TAR:+"$__MVNW_QUIET_TAR"} "$TMP_DOWNLOAD_DIR/$distributionUrlName" -C "$TMP_DOWNLOAD_DIR" || die "failed to untar"
fi
printf %s\\n "$distributionUrl" >"$TMP_DOWNLOAD_DIR/$distributionUrlNameMain/mvnw.url"
mv -- "$TMP_DOWNLOAD_DIR/$distributionUrlNameMain" "$MAVEN_HOME" || [ -d "$MAVEN_HOME" ] || die "fail to move MAVEN_HOME"
clean || :
exec_maven "$@"

View File

@@ -8,9 +8,8 @@
<artifactId>lancedb-parent</artifactId>
<version>0.21.2-beta.0</version>
<packaging>pom</packaging>
<name>LanceDB Parent</name>
<description>LanceDB vector database Java API</description>
<name>${project.artifactId}</name>
<description>LanceDB Java SDK Parent POM</description>
<url>http://lancedb.com/</url>
<developers>
@@ -29,6 +28,7 @@
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<arrow.version>15.0.0</arrow.version>
<lance-namespace.verison>0.0.1</lance-namespace.verison>
<spotless.skip>false</spotless.skip>
<spotless.version>2.30.0</spotless.version>
<spotless.java.googlejavaformat.version>1.7</spotless.java.googlejavaformat.version>
@@ -52,6 +52,7 @@
<modules>
<module>core</module>
<module>lance-namespace</module>
</modules>
<scm>
@@ -62,6 +63,11 @@
<dependencyManagement>
<dependencies>
<dependency>
<groupId>com.lancedb</groupId>
<artifactId>lance-namespace-core</artifactId>
<version>${lance-namespace.verison}</version>
</dependency>
<dependency>
<groupId>org.apache.arrow</groupId>
<artifactId>arrow-vector</artifactId>