docs: user guide for merge insert (#2083)

Closes #2062
This commit is contained in:
Will Jones
2025-01-31 10:03:21 -08:00
committed by GitHub
parent 555fa26147
commit dba85f4d6f
12 changed files with 474 additions and 32 deletions

View File

@@ -3,7 +3,7 @@
import { expect, test } from "@jest/globals";
// --8<-- [start:import]
import * as lancedb from "@lancedb/lancedb";
import { VectorQuery } from "@lancedb/lancedb";
import type { VectorQuery } from "@lancedb/lancedb";
// --8<-- [end:import]
import { withTempDirectory } from "./util.ts";

View File

@@ -117,26 +117,24 @@ test("basic table examples", async () => {
// --8<-- [end:add_data]
}
{
// --8<-- [start:add_columns]
await tbl.addColumns([
{ name: "double_price", valueSql: "cast((price * 2) as Float)" },
]);
// --8<-- [end:add_columns]
// --8<-- [start:alter_columns]
await tbl.alterColumns([
{
path: "double_price",
rename: "dbl_price",
dataType: "float",
nullable: true,
},
]);
// --8<-- [end:alter_columns]
// --8<-- [start:drop_columns]
await tbl.dropColumns(["dbl_price"]);
// --8<-- [end:drop_columns]
}
// --8<-- [start:add_columns]
await tbl.addColumns([
{ name: "double_price", valueSql: "cast((price * 2) as Float)" },
]);
// --8<-- [end:add_columns]
// --8<-- [start:alter_columns]
await tbl.alterColumns([
{
path: "double_price",
rename: "dbl_price",
dataType: "float",
nullable: true,
},
]);
// --8<-- [end:alter_columns]
// --8<-- [start:drop_columns]
await tbl.dropColumns(["dbl_price"]);
// --8<-- [end:drop_columns]
{
// --8<-- [start:vector_search]

View File

@@ -0,0 +1,52 @@
{
"$schema": "https://biomejs.dev/schemas/1.9.4/schema.json",
"vcs": {
"enabled": false,
"clientKind": "git",
"useIgnoreFile": false
},
"files": {
"ignoreUnknown": false,
"ignore": []
},
"formatter": {
"enabled": true,
"indentStyle": "space"
},
"organizeImports": {
"enabled": true
},
"linter": {
"enabled": true,
"rules": {
"recommended": true
}
},
"javascript": {
"formatter": {
"quoteStyle": "double"
}
},
"overrides": [
{
"include": ["*"],
"linter": {
"rules": {
"style": {
"noNonNullAssertion": "off"
}
}
}
},
{
"include": ["merge_insert.test.ts"],
"linter": {
"rules": {
"style": {
"useNamingConvention": "off"
}
}
}
}
]
}

View File

@@ -1,4 +1,7 @@
import { FeatureExtractionPipeline, pipeline } from "@huggingface/transformers";
import {
type FeatureExtractionPipeline,
pipeline,
} from "@huggingface/transformers";
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
import { expect, test } from "@jest/globals";

View File

@@ -0,0 +1,68 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
import { expect, test } from "@jest/globals";
import * as lancedb from "@lancedb/lancedb";
test("basic upsert", async () => {
const db = await lancedb.connect("memory://");
// --8<-- [start:upsert_basic]
const table = await db.createTable("users", [
{ id: 0, name: "Alice" },
{ id: 1, name: "Bob" },
]);
const newUsers = [
{ id: 1, name: "Bobby" },
{ id: 2, name: "Charlie" },
];
await table
.mergeInsert("id")
.whenMatchedUpdateAll()
.whenNotMatchedInsertAll()
.execute(newUsers);
await table.countRows(); // 3
// --8<-- [end:upsert_basic]
expect(await table.countRows()).toBe(3);
// --8<-- [start:insert_if_not_exists]
const table2 = await db.createTable("domains", [
{ domain: "google.com", name: "Google" },
{ domain: "github.com", name: "GitHub" },
]);
const newDomains = [
{ domain: "google.com", name: "Google" },
{ domain: "facebook.com", name: "Facebook" },
];
await table2
.mergeInsert("domain")
.whenNotMatchedInsertAll()
.execute(newDomains);
await table2.countRows(); // 3
// --8<-- [end:insert_if_not_exists]
expect(await table2.countRows()).toBe(3);
// --8<-- [start:replace_range]
const table3 = await db.createTable("chunks", [
{ doc_id: 0, chunk_id: 0, text: "Hello" },
{ doc_id: 0, chunk_id: 1, text: "World" },
{ doc_id: 1, chunk_id: 0, text: "Foo" },
{ doc_id: 1, chunk_id: 1, text: "Bar" },
]);
const newChunks = [{ doc_id: 1, chunk_id: 0, text: "Baz" }];
await table3
.mergeInsert(["doc_id", "chunk_id"])
.whenMatchedUpdateAll()
.whenNotMatchedInsertAll()
.whenNotMatchedBySourceDelete({ where: "doc_id = 1" })
.execute(newChunks);
await table3.countRows("doc_id = 1"); // 1
// --8<-- [end:replace_range]
expect(await table3.countRows("doc_id = 1")).toBe(1);
});

View File

@@ -6,7 +6,7 @@ import { withTempDirectory } from "./util.ts";
import * as lancedb from "@lancedb/lancedb";
import "@lancedb/lancedb/embedding/transformers";
import { LanceSchema, getRegistry } from "@lancedb/lancedb/embedding";
import { EmbeddingFunction } from "@lancedb/lancedb/embedding";
import type { EmbeddingFunction } from "@lancedb/lancedb/embedding";
import { Utf8 } from "apache-arrow";
test("full text search", async () => {
@@ -58,6 +58,6 @@ test("full text search", async () => {
const query = "How many bones are in the human body?";
const actual = await tbl.search(query).limit(1).toArray();
expect(actual[0]["text"]).toBe("The human body has 206 bones.");
expect(actual[0].text).toBe("The human body has 206 bones.");
});
}, 100_000);

View File

@@ -1,8 +1,8 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
import * as fs from "fs";
import { tmpdir } from "os";
import * as path from "path";
import * as fs from "node:fs";
import { tmpdir } from "node:os";
import * as path from "node:path";
export async function withTempDirectory(
fn: (tempDir: string) => Promise<void>,