Files
lancedb/docs/src/java/java.md
2025-12-17 06:59:29 +00:00

16 KiB

Java SDK

The LanceDB Java SDK provides a convenient way to interact with LanceDB Cloud and Enterprise deployments using the Lance REST Namespace API.

!!! note The Java SDK currently only works for LanceDB remote database that connects to LanceDB Cloud and Enterprise. Local database support is a work in progress. Check LANCEDB-2848 for the latest progress.

Installation

Add the following dependency to your pom.xml:

<dependency>
    <groupId>com.lancedb</groupId>
    <artifactId>lancedb-core</artifactId>
    <version>0.23.1-beta.1</version>
</dependency>

Quick Start

Connecting to LanceDB Cloud

import com.lancedb.LanceDbNamespaceClientBuilder;
import org.lance.namespace.LanceNamespace;

// If your DB url is db://example-db, then your database here is example-db
LanceNamespace namespaceClient = LanceDbNamespaceClientBuilder.newBuilder()
    .apiKey("your_lancedb_cloud_api_key")
    .database("your_database_name")
    .build();

Connecting to LanceDB Enterprise

For LanceDB Enterprise deployments with a custom endpoint:

LanceNamespace namespaceClient = LanceDbNamespaceClientBuilder.newBuilder()
    .apiKey("your_lancedb_enterprise_api_key")
    .database("your_database_name")
    .endpoint("<your_enterprise_endpoint>")
    .build();

Configuration Options

Method Description Required
apiKey(String) LanceDB API key Yes
database(String) Database name Yes
endpoint(String) Custom endpoint URL for Enterprise deployments No
region(String) AWS region (default: "us-east-1") No
config(String, String) Additional configuration parameters No

Metadata Operations

Creating a Namespace

Namespaces organize tables hierarchically. Create a namespace before creating tables within it:

import org.lance.namespace.model.CreateNamespaceRequest;
import org.lance.namespace.model.CreateNamespaceResponse;

// Create a child namespace
CreateNamespaceRequest request = new CreateNamespaceRequest();
request.setId(Arrays.asList("my_namespace"));

CreateNamespaceResponse response = namespaceClient.createNamespace(request);

You can also create nested namespaces:

// Create a nested namespace: parent/child
CreateNamespaceRequest request = new CreateNamespaceRequest();
request.setId(Arrays.asList("parent_namespace", "child_namespace"));

CreateNamespaceResponse response = namespaceClient.createNamespace(request);

Describing a Namespace

import org.lance.namespace.model.DescribeNamespaceRequest;
import org.lance.namespace.model.DescribeNamespaceResponse;

DescribeNamespaceRequest request = new DescribeNamespaceRequest();
request.setId(Arrays.asList("my_namespace"));

DescribeNamespaceResponse response = namespaceClient.describeNamespace(request);
System.out.println("Namespace properties: " + response.getProperties());

Listing Namespaces

import org.lance.namespace.model.ListNamespacesRequest;
import org.lance.namespace.model.ListNamespacesResponse;

// List all namespaces at root level
ListNamespacesRequest request = new ListNamespacesRequest();
request.setId(Arrays.asList());  // Empty for root

ListNamespacesResponse response = namespaceClient.listNamespaces(request);
for (String ns : response.getNamespaces()) {
    System.out.println("Namespace: " + ns);
}

// List child namespaces under a parent
ListNamespacesRequest childRequest = new ListNamespacesRequest();
childRequest.setId(Arrays.asList("parent_namespace"));

ListNamespacesResponse childResponse = namespaceClient.listNamespaces(childRequest);

Listing Tables

import org.lance.namespace.model.ListTablesRequest;
import org.lance.namespace.model.ListTablesResponse;

// List tables in a namespace
ListTablesRequest request = new ListTablesRequest();
request.setId(Arrays.asList("my_namespace"));

ListTablesResponse response = namespaceClient.listTables(request);
for (String table : response.getTables()) {
    System.out.println("Table: " + table);
}

Dropping a Namespace

import org.lance.namespace.model.DropNamespaceRequest;
import org.lance.namespace.model.DropNamespaceResponse;

DropNamespaceRequest request = new DropNamespaceRequest();
request.setId(Arrays.asList("my_namespace"));

DropNamespaceResponse response = namespaceClient.dropNamespace(request);

Describing a Table

import org.lance.namespace.model.DescribeTableRequest;
import org.lance.namespace.model.DescribeTableResponse;

DescribeTableRequest request = new DescribeTableRequest();
request.setId(Arrays.asList("my_namespace", "my_table"));

DescribeTableResponse response = namespaceClient.describeTable(request);
System.out.println("Table version: " + response.getVersion());
System.out.println("Schema fields: " + response.getSchema().getFields());

Dropping a Table

import org.lance.namespace.model.DropTableRequest;
import org.lance.namespace.model.DropTableResponse;

DropTableRequest request = new DropTableRequest();
request.setId(Arrays.asList("my_namespace", "my_table"));

DropTableResponse response = namespaceClient.dropTable(request);

Writing Data

Creating a Table

Tables are created within a namespace by providing data in Apache Arrow IPC format:

import org.lance.namespace.LanceNamespace;
import org.lance.namespace.model.CreateTableRequest;
import org.lance.namespace.model.CreateTableResponse;
import org.apache.arrow.memory.BufferAllocator;
import org.apache.arrow.memory.RootAllocator;
import org.apache.arrow.vector.IntVector;
import org.apache.arrow.vector.VarCharVector;
import org.apache.arrow.vector.VectorSchemaRoot;
import org.apache.arrow.vector.complex.FixedSizeListVector;
import org.apache.arrow.vector.Float4Vector;
import org.apache.arrow.vector.ipc.ArrowStreamWriter;
import org.apache.arrow.vector.types.FloatingPointPrecision;
import org.apache.arrow.vector.types.pojo.ArrowType;
import org.apache.arrow.vector.types.pojo.Field;
import org.apache.arrow.vector.types.pojo.FieldType;
import org.apache.arrow.vector.types.pojo.Schema;

import java.io.ByteArrayOutputStream;
import java.nio.channels.Channels;
import java.util.Arrays;

// Create schema with id, name, and embedding fields
Schema schema = new Schema(Arrays.asList(
    new Field("id", FieldType.nullable(new ArrowType.Int(32, true)), null),
    new Field("name", FieldType.nullable(new ArrowType.Utf8()), null),
    new Field("embedding",
        FieldType.nullable(new ArrowType.FixedSizeList(128)),
        Arrays.asList(new Field("item",
            FieldType.nullable(new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE)),
            null)))
));

try (BufferAllocator allocator = new RootAllocator();
     VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) {

    // Populate data
    root.setRowCount(3);
    IntVector idVector = (IntVector) root.getVector("id");
    VarCharVector nameVector = (VarCharVector) root.getVector("name");
    FixedSizeListVector embeddingVector = (FixedSizeListVector) root.getVector("embedding");
    Float4Vector embeddingData = (Float4Vector) embeddingVector.getDataVector();

    for (int i = 0; i < 3; i++) {
        idVector.setSafe(i, i + 1);
        nameVector.setSafe(i, ("item_" + i).getBytes());
        embeddingVector.setNotNull(i);
        for (int j = 0; j < 128; j++) {
            embeddingData.setSafe(i * 128 + j, (float) i);
        }
    }
    idVector.setValueCount(3);
    nameVector.setValueCount(3);
    embeddingData.setValueCount(3 * 128);
    embeddingVector.setValueCount(3);

    // Serialize to Arrow IPC format
    ByteArrayOutputStream out = new ByteArrayOutputStream();
    try (ArrowStreamWriter writer = new ArrowStreamWriter(root, null, Channels.newChannel(out))) {
        writer.start();
        writer.writeBatch();
        writer.end();
    }
    byte[] tableData = out.toByteArray();

    // Create table in a namespace
    CreateTableRequest request = new CreateTableRequest();
    request.setId(Arrays.asList("my_namespace", "my_table"));
    CreateTableResponse response = namespaceClient.createTable(request, tableData);
}

Insert

import org.lance.namespace.model.InsertIntoTableRequest;
import org.lance.namespace.model.InsertIntoTableResponse;

// Prepare data in Arrow IPC format (similar to create table example)
byte[] insertData = prepareArrowData();

InsertIntoTableRequest request = new InsertIntoTableRequest();
request.setId(Arrays.asList("my_namespace", "my_table"));
request.setMode(InsertIntoTableRequest.ModeEnum.APPEND);

InsertIntoTableResponse response = namespaceClient.insertIntoTable(request, insertData);
System.out.println("New version: " + response.getVersion());

Update

Update rows matching a predicate condition:

import org.lance.namespace.model.UpdateTableRequest;
import org.lance.namespace.model.UpdateTableResponse;

UpdateTableRequest request = new UpdateTableRequest();
request.setId(Arrays.asList("my_namespace", "my_table"));

// Predicate to select rows to update
request.setPredicate("id = 1");

// Set new values using SQL expressions as [column_name, expression] pairs
request.setUpdates(Arrays.asList(
    Arrays.asList("name", "'updated_name'")
));

UpdateTableResponse response = namespaceClient.updateTable(request);
System.out.println("Updated rows: " + response.getUpdatedRows());

Delete

Delete rows matching a predicate condition:

import org.lance.namespace.model.DeleteFromTableRequest;
import org.lance.namespace.model.DeleteFromTableResponse;

DeleteFromTableRequest request = new DeleteFromTableRequest();
request.setId(Arrays.asList("my_namespace", "my_table"));

// Predicate to select rows to delete
request.setPredicate("id > 100");

DeleteFromTableResponse response = namespaceClient.deleteFromTable(request);
System.out.println("New version: " + response.getVersion());

Merge Insert (Upsert)

Merge insert allows you to update existing rows and insert new rows in a single operation based on a key column:

import org.lance.namespace.model.MergeInsertIntoTableRequest;
import org.lance.namespace.model.MergeInsertIntoTableResponse;

// Prepare data with rows to update (id=2,3) and new rows (id=4)
byte[] mergeData = prepareArrowData();  // Contains rows with id=2,3,4

MergeInsertIntoTableRequest request = new MergeInsertIntoTableRequest();
request.setId(Arrays.asList("my_namespace", "my_table"));

// Match on the "id" column
request.setOn("id");

// Update all columns when a matching row is found
request.setWhenMatchedUpdateAll(true);

// Insert new rows when no match is found
request.setWhenNotMatchedInsertAll(true);

MergeInsertIntoTableResponse response = namespaceClient.mergeInsertIntoTable(request, mergeData);

System.out.println("Updated rows: " + response.getNumUpdatedRows());
System.out.println("Inserted rows: " + response.getNumInsertedRows());

Querying Data

Counting Rows

import org.lance.namespace.model.CountTableRowsRequest;

CountTableRowsRequest request = new CountTableRowsRequest();
request.setId(Arrays.asList("my_namespace", "my_table"));

Long rowCount = namespaceClient.countTableRows(request);
System.out.println("Row count: " + rowCount);
import org.lance.namespace.model.QueryTableRequest;
import org.lance.namespace.model.QueryTableRequestVector;

QueryTableRequest query = new QueryTableRequest();
query.setId(Arrays.asList("my_namespace", "my_table"));
query.setK(10);  // Return top 10 results

// Set the query vector
List<Float> queryVector = new ArrayList<>();
for (int i = 0; i < 128; i++) {
    queryVector.add(1.0f);
}
QueryTableRequestVector vector = new QueryTableRequestVector();
vector.setSingleVector(queryVector);
query.setVector(vector);

// Specify columns to return
query.setColumns(Arrays.asList("id", "name", "embedding"));

// Execute query - returns Arrow IPC format
byte[] result = namespaceClient.queryTable(query);
import org.lance.namespace.model.QueryTableRequest;
import org.lance.namespace.model.QueryTableRequestFullTextQuery;
import org.lance.namespace.model.StringFtsQuery;

QueryTableRequest query = new QueryTableRequest();
query.setId(Arrays.asList("my_namespace", "my_table"));
query.setK(10);

// Set full text search query
StringFtsQuery stringQuery = new StringFtsQuery();
stringQuery.setQuery("search terms");
stringQuery.setColumns(Arrays.asList("text_column"));

QueryTableRequestFullTextQuery fts = new QueryTableRequestFullTextQuery();
fts.setStringQuery(stringQuery);
query.setFullTextQuery(fts);

// Specify columns to return
query.setColumns(Arrays.asList("id", "text_column"));

byte[] result = namespaceClient.queryTable(query);

Query with Filter

QueryTableRequest query = new QueryTableRequest();
query.setId(Arrays.asList("my_namespace", "my_table"));
query.setK(10);
query.setFilter("id > 50");
query.setColumns(Arrays.asList("id", "name"));

byte[] result = namespaceClient.queryTable(query);

Query with Prefilter

QueryTableRequest query = new QueryTableRequest();
query.setId(Arrays.asList("my_namespace", "my_table"));
query.setK(5);
query.setPrefilter(true);  // Apply filter before vector search
query.setFilter("category = 'electronics'");

// Set query vector
QueryTableRequestVector vector = new QueryTableRequestVector();
vector.setSingleVector(queryVector);
query.setVector(vector);

byte[] result = namespaceClient.queryTable(query);

Reading Query Results

Query results are returned in Apache Arrow IPC file format. Here's how to read them:

import org.apache.arrow.vector.ipc.ArrowFileReader;
import org.apache.arrow.vector.VectorSchemaRoot;
import org.apache.arrow.memory.BufferAllocator;
import org.apache.arrow.memory.RootAllocator;

import java.nio.ByteBuffer;
import java.nio.channels.SeekableByteChannel;

// Helper class to read Arrow data from byte array
class ByteArraySeekableByteChannel implements SeekableByteChannel {
    private final byte[] data;
    private long position = 0;
    private boolean isOpen = true;

    public ByteArraySeekableByteChannel(byte[] data) {
        this.data = data;
    }

    @Override
    public int read(ByteBuffer dst) {
        int remaining = dst.remaining();
        int available = (int) (data.length - position);
        if (available <= 0) return -1;
        int toRead = Math.min(remaining, available);
        dst.put(data, (int) position, toRead);
        position += toRead;
        return toRead;
    }

    @Override public long position() { return position; }
    @Override public SeekableByteChannel position(long newPosition) { position = newPosition; return this; }
    @Override public long size() { return data.length; }
    @Override public boolean isOpen() { return isOpen; }
    @Override public void close() { isOpen = false; }
    @Override public int write(ByteBuffer src) { throw new UnsupportedOperationException(); }
    @Override public SeekableByteChannel truncate(long size) { throw new UnsupportedOperationException(); }
}

// Read query results
byte[] queryResult = namespaceClient.queryTable(query);

try (BufferAllocator allocator = new RootAllocator();
     ArrowFileReader reader = new ArrowFileReader(
         new ByteArraySeekableByteChannel(queryResult), allocator)) {

    for (int i = 0; i < reader.getRecordBlocks().size(); i++) {
        reader.loadRecordBatch(reader.getRecordBlocks().get(i));
        VectorSchemaRoot root = reader.getVectorSchemaRoot();

        // Access data
        IntVector idVector = (IntVector) root.getVector("id");
        VarCharVector nameVector = (VarCharVector) root.getVector("name");

        for (int row = 0; row < root.getRowCount(); row++) {
            int id = idVector.get(row);
            String name = new String(nameVector.get(row));
            System.out.println("Row " + row + ": id=" + id + ", name=" + name);
        }
    }
}