feat: support shallow clone (#2653)

Support shallow cloning a dataset at a specific location to create a new
dataset, using the shallow_clone feature in Lance. Also introduce remote
`clone` API for remote tables for this functionality.
This commit is contained in:
Jack Ye
2025-09-21 21:28:40 -07:00
committed by GitHub
parent 2261eb95a0
commit ff71d7e552
12 changed files with 1612 additions and 9 deletions

View File

@@ -203,3 +203,106 @@ describe("given a connection", () => {
});
});
});
describe("clone table functionality", () => {
let tmpDir: tmp.DirResult;
let db: Connection;
beforeEach(async () => {
tmpDir = tmp.dirSync({ unsafeCleanup: true });
db = await connect(tmpDir.name);
});
afterEach(() => tmpDir.removeCallback());
it("should clone a table with latest version (default behavior)", async () => {
// Create source table with some data
const data = [
{ id: 1, text: "hello", vector: [1.0, 2.0] },
{ id: 2, text: "world", vector: [3.0, 4.0] },
];
const sourceTable = await db.createTable("source", data);
// Add more data to create a new version
const moreData = [{ id: 3, text: "test", vector: [5.0, 6.0] }];
await sourceTable.add(moreData);
// Clone the table (should get latest version with 3 rows)
const sourceUri = `${tmpDir.name}/source.lance`;
const clonedTable = await db.cloneTable("cloned", sourceUri);
// Verify cloned table has all 3 rows
expect(await clonedTable.countRows()).toBe(3);
expect((await db.tableNames()).includes("cloned")).toBe(true);
});
it("should clone a table from a specific version", async () => {
// Create source table with initial data
const data = [
{ id: 1, text: "hello", vector: [1.0, 2.0] },
{ id: 2, text: "world", vector: [3.0, 4.0] },
];
const sourceTable = await db.createTable("source", data);
// Get the initial version
const initialVersion = await sourceTable.version();
// Add more data to create a new version
const moreData = [{ id: 3, text: "test", vector: [5.0, 6.0] }];
await sourceTable.add(moreData);
// Verify source now has 3 rows
expect(await sourceTable.countRows()).toBe(3);
// Clone from the initial version (should have only 2 rows)
const sourceUri = `${tmpDir.name}/source.lance`;
const clonedTable = await db.cloneTable("cloned", sourceUri, {
sourceVersion: initialVersion,
});
// Verify cloned table has only the initial 2 rows
expect(await clonedTable.countRows()).toBe(2);
});
it("should clone a table from a tagged version", async () => {
// Create source table with initial data
const data = [
{ id: 1, text: "hello", vector: [1.0, 2.0] },
{ id: 2, text: "world", vector: [3.0, 4.0] },
];
const sourceTable = await db.createTable("source", data);
// Create a tag for the current version
const tags = await sourceTable.tags();
await tags.create("v1.0", await sourceTable.version());
// Add more data after the tag
const moreData = [{ id: 3, text: "test", vector: [5.0, 6.0] }];
await sourceTable.add(moreData);
// Verify source now has 3 rows
expect(await sourceTable.countRows()).toBe(3);
// Clone from the tagged version (should have only 2 rows)
const sourceUri = `${tmpDir.name}/source.lance`;
const clonedTable = await db.cloneTable("cloned", sourceUri, {
sourceTag: "v1.0",
});
// Verify cloned table has only the tagged version's 2 rows
expect(await clonedTable.countRows()).toBe(2);
});
it("should fail when attempting deep clone", async () => {
// Create source table with some data
const data = [
{ id: 1, text: "hello", vector: [1.0, 2.0] },
{ id: 2, text: "world", vector: [3.0, 4.0] },
];
await db.createTable("source", data);
// Try to create a deep clone (should fail)
const sourceUri = `${tmpDir.name}/source.lance`;
await expect(
db.cloneTable("cloned", sourceUri, { isShallow: false }),
).rejects.toThrow("Deep clone is not yet implemented");
});
});

View File

@@ -268,6 +268,33 @@ export abstract class Connection {
* @param {string[]} namespace The namespace to drop tables from (defaults to root namespace).
*/
abstract dropAllTables(namespace?: string[]): Promise<void>;
/**
* Clone a table from a source table.
*
* A shallow clone creates a new table that shares the underlying data files
* with the source table but has its own independent manifest. This allows
* both the source and cloned tables to evolve independently while initially
* sharing the same data, deletion, and index files.
*
* @param {string} targetTableName - The name of the target table to create.
* @param {string} sourceUri - The URI of the source table to clone from.
* @param {object} options - Clone options.
* @param {string[]} options.targetNamespace - The namespace for the target table (defaults to root namespace).
* @param {number} options.sourceVersion - The version of the source table to clone.
* @param {string} options.sourceTag - The tag of the source table to clone.
* @param {boolean} options.isShallow - Whether to perform a shallow clone (defaults to true).
*/
abstract cloneTable(
targetTableName: string,
sourceUri: string,
options?: {
targetNamespace?: string[];
sourceVersion?: number;
sourceTag?: string;
isShallow?: boolean;
},
): Promise<Table>;
}
/** @hideconstructor */
@@ -332,6 +359,28 @@ export class LocalConnection extends Connection {
return new LocalTable(innerTable);
}
async cloneTable(
targetTableName: string,
sourceUri: string,
options?: {
targetNamespace?: string[];
sourceVersion?: number;
sourceTag?: string;
isShallow?: boolean;
},
): Promise<Table> {
const innerTable = await this.inner.cloneTable(
targetTableName,
sourceUri,
options?.targetNamespace ?? [],
options?.sourceVersion ?? null,
options?.sourceTag ?? null,
options?.isShallow ?? true,
);
return new LocalTable(innerTable);
}
private getStorageOptions(
options?: Partial<CreateTableOptions>,
): Record<string, string> | undefined {

View File

@@ -213,6 +213,36 @@ impl Connection {
Ok(Table::new(tbl))
}
#[napi(catch_unwind)]
pub async fn clone_table(
&self,
target_table_name: String,
source_uri: String,
target_namespace: Vec<String>,
source_version: Option<i64>,
source_tag: Option<String>,
is_shallow: bool,
) -> napi::Result<Table> {
let mut builder = self
.get_inner()?
.clone_table(&target_table_name, &source_uri);
builder = builder.target_namespace(target_namespace);
if let Some(version) = source_version {
builder = builder.source_version(version as u64);
}
if let Some(tag) = source_tag {
builder = builder.source_tag(tag);
}
builder = builder.is_shallow(is_shallow);
let tbl = builder.execute().await.default_error()?;
Ok(Table::new(tbl))
}
/// Drop table with the name. Or raise an error if the table does not exist.
#[napi(catch_unwind)]
pub async fn drop_table(&self, name: String, namespace: Vec<String>) -> napi::Result<()> {