feat: support to build FTS without positions (#1621)

This commit is contained in:
BubbleCal
2024-09-10 22:51:32 +08:00
committed by GitHub
parent a405847f9b
commit 2bde5401eb
11 changed files with 150 additions and 25 deletions

View File

@@ -844,6 +844,38 @@ describe.each([arrow13, arrow14, arrow15, arrow16, arrow17])(
expect(results[0].text).toBe(data[0].text);
});
test("full text search without positions", async () => {
const db = await connect(tmpDir.name);
const data = [
{ text: "hello world", vector: [0.1, 0.2, 0.3] },
{ text: "goodbye world", vector: [0.4, 0.5, 0.6] },
];
const table = await db.createTable("test", data);
await table.createIndex("text", {
config: Index.fts({ withPositions: false }),
});
const results = await table.search("hello").toArray();
expect(results[0].text).toBe(data[0].text);
});
test("full text search phrase query", async () => {
const db = await connect(tmpDir.name);
const data = [
{ text: "hello world", vector: [0.1, 0.2, 0.3] },
{ text: "goodbye world", vector: [0.4, 0.5, 0.6] },
];
const table = await db.createTable("test", data);
await table.createIndex("text", {
config: Index.fts(),
});
const results = await table.search("world").toArray();
expect(results.length).toBe(2);
const phraseResults = await table.search('"hello world"').toArray();
expect(phraseResults.length).toBe(1);
});
test.each([
[0.4, 0.5, 0.599], // number[]
Float32Array.of(0.4, 0.5, 0.599), // Float32Array

View File

@@ -113,6 +113,19 @@ export interface IvfPqOptions {
sampleRate?: number;
}
/**
* Options to create a full text search index
*/
export interface FtsOptions {
/**
* Whether to build the index with positions.
* True by default.
* If set to false, the index will not store the positions of the tokens in the text,
* which will make the index smaller and faster to build, but will not support phrase queries.
*/
withPositions?: boolean;
}
export class Index {
private readonly inner: LanceDbIndex;
private constructor(inner: LanceDbIndex) {
@@ -211,8 +224,8 @@ export class Index {
*
* For now, the full text search index only supports English, and doesn't support phrase search.
*/
static fts() {
return new Index(LanceDbIndex.fts());
static fts(options?: Partial<FtsOptions>) {
return new Index(LanceDbIndex.fts(options?.withPositions));
}
}

View File

@@ -92,9 +92,13 @@ impl Index {
}
#[napi(factory)]
pub fn fts() -> Self {
pub fn fts(with_position: Option<bool>) -> Self {
let mut opts = FtsIndexBuilder::default();
if let Some(with_position) = with_position {
opts = opts.with_position(with_position);
}
Self {
inner: Mutex::new(Some(LanceDbIndex::FTS(FtsIndexBuilder::default()))),
inner: Mutex::new(Some(LanceDbIndex::FTS(opts))),
}
}
}