fix: bugs for new FTS APIs (#2314)

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->
## Summary by CodeRabbit

- **New Features**
- Enhanced full-text search capabilities with support for phrase
queries, fuzzy matching, boosting, and multi-column matching.
- Search methods now accept full-text query objects directly, improving
query flexibility and precision.
- Python and JavaScript SDKs updated to handle full-text queries
seamlessly, including async search support.

- **Tests**
- Added comprehensive tests covering fuzzy search, phrase search, and
boosted queries to ensure robust full-text search functionality.

- **Documentation**
- Updated query class documentation to reflect new constructor options
and removal of deprecated methods for clarity and simplicity.
<!-- end of auto-generated comment: release notes by coderabbit.ai -->

---------

Signed-off-by: BubbleCal <bubble-cal@outlook.com>
This commit is contained in:
BubbleCal
2025-04-15 11:51:35 +08:00
committed by GitHub
parent a6fa69ab89
commit 2248aa9508
15 changed files with 397 additions and 415 deletions

View File

@@ -11,6 +11,7 @@ import {
} from "./arrow";
import { type IvfPqOptions } from "./indices";
import {
JsFullTextQuery,
RecordBatchIterator as NativeBatchIterator,
Query as NativeQuery,
Table as NativeTable,
@@ -177,9 +178,7 @@ export class QueryBase<NativeQueryType extends NativeQuery | NativeVectorQuery>
columns: columns,
});
} else {
// If query is a FullTextQuery object, convert it to a dict
const queryObj = query.toDict();
inner.fullTextSearch(queryObj);
inner.fullTextSearch({ query: query.inner });
}
});
return this;
@@ -743,8 +742,7 @@ export class Query extends QueryBase<NativeQuery> {
columns: columns,
});
} else {
const queryObj = query.toDict();
inner.fullTextSearch(queryObj);
inner.fullTextSearch({ query: query.inner });
}
});
return this;
@@ -772,130 +770,141 @@ export enum FullTextQueryType {
* including methods to retrieve the query type and convert the query to a dictionary format.
*/
export interface FullTextQuery {
/**
* Returns the inner query object.
* This is the underlying query object used by the database engine.
* @ignore
*/
inner: JsFullTextQuery;
/**
* The type of the full-text query.
*/
queryType(): FullTextQueryType;
toDict(): Record<string, unknown>;
}
// biome-ignore lint/suspicious/noExplicitAny: we want any here
export function instanceOfFullTextQuery(obj: any): obj is FullTextQuery {
return obj != null && obj.inner instanceof JsFullTextQuery;
}
export class MatchQuery implements FullTextQuery {
/** @ignore */
public readonly inner: JsFullTextQuery;
/**
* Creates an instance of MatchQuery.
*
* @param query - The text query to search for.
* @param column - The name of the column to search within.
* @param boost - (Optional) The boost factor to influence the relevance score of this query. Default is `1.0`.
* @param fuzziness - (Optional) The allowed edit distance for fuzzy matching. Default is `0`.
* @param maxExpansions - (Optional) The maximum number of terms to consider for fuzzy matching. Default is `50`.
* @param options - Optional parameters for the match query.
* - `boost`: The boost factor for the query (default is 1.0).
* - `fuzziness`: The fuzziness level for the query (default is 0).
* - `maxExpansions`: The maximum number of terms to consider for fuzzy matching (default is 50).
*/
constructor(
private query: string,
private column: string,
private boost: number = 1.0,
private fuzziness: number = 0,
private maxExpansions: number = 50,
) {}
query: string,
column: string,
options?: {
boost?: number;
fuzziness?: number;
maxExpansions?: number;
},
) {
let fuzziness = options?.fuzziness;
if (fuzziness === undefined) {
fuzziness = 0;
}
this.inner = JsFullTextQuery.matchQuery(
query,
column,
options?.boost ?? 1.0,
fuzziness,
options?.maxExpansions ?? 50,
);
}
queryType(): FullTextQueryType {
return FullTextQueryType.Match;
}
toDict(): Record<string, unknown> {
return {
[this.queryType()]: {
[this.column]: {
query: this.query,
boost: this.boost,
fuzziness: this.fuzziness,
// biome-ignore lint/style/useNamingConvention: use underscore for consistency with the other APIs
max_expansions: this.maxExpansions,
},
},
};
}
}
export class PhraseQuery implements FullTextQuery {
/** @ignore */
public readonly inner: JsFullTextQuery;
/**
* Creates an instance of `PhraseQuery`.
*
* @param query - The phrase to search for in the specified column.
* @param column - The name of the column to search within.
*/
constructor(
private query: string,
private column: string,
) {}
constructor(query: string, column: string) {
this.inner = JsFullTextQuery.phraseQuery(query, column);
}
queryType(): FullTextQueryType {
return FullTextQueryType.MatchPhrase;
}
toDict(): Record<string, unknown> {
return {
[this.queryType()]: {
[this.column]: this.query,
},
};
}
}
export class BoostQuery implements FullTextQuery {
/** @ignore */
public readonly inner: JsFullTextQuery;
/**
* Creates an instance of BoostQuery.
* The boost returns documents that match the positive query,
* but penalizes those that match the negative query.
* the penalty is controlled by the `negativeBoost` parameter.
*
* @param positive - The positive query that boosts the relevance score.
* @param negative - The negative query that reduces the relevance score.
* @param negativeBoost - The factor by which the negative query reduces the score.
* @param options - Optional parameters for the boost query.
* - `negativeBoost`: The boost factor for the negative query (default is 0.0).
*/
constructor(
private positive: FullTextQuery,
private negative: FullTextQuery,
private negativeBoost: number,
) {}
positive: FullTextQuery,
negative: FullTextQuery,
options?: {
negativeBoost?: number;
},
) {
this.inner = JsFullTextQuery.boostQuery(
positive.inner,
negative.inner,
options?.negativeBoost,
);
}
queryType(): FullTextQueryType {
return FullTextQueryType.Boost;
}
toDict(): Record<string, unknown> {
return {
[this.queryType()]: {
positive: this.positive.toDict(),
negative: this.negative.toDict(),
// biome-ignore lint/style/useNamingConvention: use underscore for consistency with the other APIs
negative_boost: this.negativeBoost,
},
};
}
}
export class MultiMatchQuery implements FullTextQuery {
/** @ignore */
public readonly inner: JsFullTextQuery;
/**
* Creates an instance of MultiMatchQuery.
*
* @param query - The text query to search for across multiple columns.
* @param columns - An array of column names to search within.
* @param boosts - (Optional) An array of boost factors corresponding to each column. Default is an array of 1.0 for each column.
*
* The `boosts` array should have the same length as `columns`. If not provided, all columns will have a default boost of 1.0.
* If the length of `boosts` is less than `columns`, it will be padded with 1.0s.
* @param options - Optional parameters for the multi-match query.
* - `boosts`: An array of boost factors for each column (default is 1.0 for all).
*/
constructor(
private query: string,
private columns: string[],
private boosts: number[] = columns.map(() => 1.0),
) {}
query: string,
columns: string[],
options?: {
boosts?: number[];
},
) {
this.inner = JsFullTextQuery.multiMatchQuery(
query,
columns,
options?.boosts,
);
}
queryType(): FullTextQueryType {
return FullTextQueryType.MultiMatch;
}
toDict(): Record<string, unknown> {
return {
[this.queryType()]: {
query: this.query,
columns: this.columns,
boost: this.boosts,
},
};
}
}

View File

@@ -22,7 +22,12 @@ import {
OptimizeStats,
Table as _NativeTable,
} from "./native";
import { Query, VectorQuery } from "./query";
import {
FullTextQuery,
Query,
VectorQuery,
instanceOfFullTextQuery,
} from "./query";
import { sanitizeType } from "./sanitize";
import { IntoSql, toSQL } from "./util";
export { IndexConfig } from "./native";
@@ -294,7 +299,7 @@ export abstract class Table {
* if the query is a string and no embedding function is defined, it will be treated as a full text search query
*/
abstract search(
query: string | IntoVector,
query: string | IntoVector | FullTextQuery,
queryType?: string,
ftsColumns?: string | string[],
): VectorQuery | Query;
@@ -565,11 +570,11 @@ export class LocalTable extends Table {
}
search(
query: string | IntoVector,
query: string | IntoVector | FullTextQuery,
queryType: string = "auto",
ftsColumns?: string | string[],
): VectorQuery | Query {
if (typeof query !== "string") {
if (typeof query !== "string" && !instanceOfFullTextQuery(query)) {
if (queryType === "fts") {
throw new Error("Cannot perform full text search on a vector query");
}
@@ -585,7 +590,10 @@ export class LocalTable extends Table {
// The query type is auto or vector
// fall back to full text search if no embedding functions are defined and the query is a string
if (queryType === "auto" && getRegistry().length() === 0) {
if (
queryType === "auto" &&
(getRegistry().length() === 0 || instanceOfFullTextQuery(query))
) {
return this.query().fullTextSearch(query, {
columns: ftsColumns,
});