feat: support binary vector and IVF_FLAT in TypeScript (#2221)

resolve #2218

---------

Signed-off-by: BubbleCal <bubble-cal@outlook.com>
This commit is contained in:
BubbleCal
2025-03-22 01:57:08 +08:00
committed by GitHub
parent 2bfdef2624
commit bdb6c09c3b
11 changed files with 406 additions and 5 deletions

View File

@@ -4,9 +4,12 @@ import { expect, test } from "@jest/globals";
// --8<-- [start:import]
import * as lancedb from "@lancedb/lancedb";
// --8<-- [end:import]
// --8<-- [start:import_bin_util]
import { Field, FixedSizeList, Int32, Schema, Uint8 } from "apache-arrow";
// --8<-- [end:import_bin_util]
import { withTempDirectory } from "./util.ts";
test("full text search", async () => {
test("vector search", async () => {
await withTempDirectory(async (databaseDir) => {
{
const db = await lancedb.connect(databaseDir);
@@ -14,8 +17,6 @@ test("full text search", async () => {
const data = Array.from({ length: 10_000 }, (_, i) => ({
vector: Array(128).fill(i),
id: `${i}`,
content: "",
longId: `${i}`,
}));
await db.createTable("my_vectors", data);
@@ -52,5 +53,41 @@ test("full text search", async () => {
expect(r.distance).toBeGreaterThanOrEqual(0.1);
expect(r.distance).toBeLessThan(0.2);
}
{
// --8<-- [start:ingest_binary_data]
const schema = new Schema([
new Field("id", new Int32(), true),
new Field("vec", new FixedSizeList(32, new Field("item", new Uint8()))),
]);
const data = lancedb.makeArrowTable(
Array(1_000)
.fill(0)
.map((_, i) => ({
// the 256 bits would be store in 32 bytes,
// if your data is already in this format, you can skip the packBits step
id: i,
vec: lancedb.packBits(Array(256).fill(i % 2)),
})),
{ schema: schema },
);
const tbl = await db.createTable("binary_table", data);
await tbl.createIndex("vec", {
config: lancedb.Index.ivfFlat({
numPartitions: 10,
distanceType: "hamming",
}),
});
// --8<-- [end:ingest_binary_data]
// --8<-- [start:search_binary_data]
const query = Array(32)
.fill(1)
.map(() => Math.floor(Math.random() * 255));
const results = await tbl.query().nearestTo(query).limit(10).toArrow();
// --8<-- [end:search_binary_data
expect(results.numRows).toBe(10);
}
});
});