feat: update to lance 0.25.3b1 (#2294)

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->
## Summary by CodeRabbit

- **Chores**
- Updated dependency versions for improved performance and
compatibility.

- **New Features**
- Added support for structured full-text search with expanded query
types (e.g., match, phrase, boost, multi-match) and flexible input
formats.
- Introduced a new method to check server support for structural
full-text search features.
- Enhanced the query system with new classes and interfaces for handling
various full-text queries.
- Expanded the functionality of existing methods to accept more complex
query structures, including updates to method signatures.

- **Bug Fixes**
  - Improved error handling and reporting for full-text search queries.

- **Refactor**
- Enhanced query processing with streamlined input handling and improved
error reporting, ensuring more robust and consistent search results
across platforms.
<!-- end of auto-generated comment: release notes by coderabbit.ai -->

---------

Signed-off-by: BubbleCal <bubble-cal@outlook.com>
Co-authored-by: BubbleCal <bubble-cal@outlook.com>
This commit is contained in:
Weston Pace
2025-04-01 06:36:42 -07:00
committed by GitHub
parent e59f9382a0
commit 625bab3f21
25 changed files with 1442 additions and 183 deletions

View File

@@ -47,6 +47,12 @@ export {
QueryExecutionOptions,
FullTextSearchOptions,
RecordBatchIterator,
FullTextQuery,
MatchQuery,
PhraseQuery,
BoostQuery,
MultiMatchQuery,
FullTextQueryType,
} from "./query";
export {

View File

@@ -17,6 +17,7 @@ import {
VectorQuery as NativeVectorQuery,
} from "./native";
import { Reranker } from "./rerankers";
export class RecordBatchIterator implements AsyncIterator<RecordBatch> {
private promisedInner?: Promise<NativeBatchIterator>;
private inner?: NativeBatchIterator;
@@ -152,7 +153,7 @@ export class QueryBase<NativeQueryType extends NativeQuery | NativeVectorQuery>
}
fullTextSearch(
query: string,
query: string | FullTextQuery,
options?: Partial<FullTextSearchOptions>,
): this {
let columns: string[] | null = null;
@@ -164,9 +165,18 @@ export class QueryBase<NativeQueryType extends NativeQuery | NativeVectorQuery>
}
}
this.doCall((inner: NativeQueryType) =>
inner.fullTextSearch(query, columns),
);
this.doCall((inner: NativeQueryType) => {
if (typeof query === "string") {
inner.fullTextSearch({
query: query,
columns: columns,
});
} else {
// If query is a FullTextQuery object, convert it to a dict
const queryObj = query.toDict();
inner.fullTextSearch(queryObj);
}
});
return this;
}
@@ -718,8 +728,167 @@ export class Query extends QueryBase<NativeQuery> {
}
}
nearestToText(query: string, columns?: string[]): Query {
this.doCall((inner) => inner.fullTextSearch(query, columns));
nearestToText(query: string | FullTextQuery, columns?: string[]): Query {
this.doCall((inner) => {
if (typeof query === "string") {
inner.fullTextSearch({
query: query,
columns: columns,
});
} else {
const queryObj = query.toDict();
inner.fullTextSearch(queryObj);
}
});
return this;
}
}
/**
* Enum representing the types of full-text queries supported.
*
* - `Match`: Performs a full-text search for terms in the query string.
* - `MatchPhrase`: Searches for an exact phrase match in the text.
* - `Boost`: Boosts the relevance score of specific terms in the query.
* - `MultiMatch`: Searches across multiple fields for the query terms.
*/
export enum FullTextQueryType {
Match = "match",
MatchPhrase = "match_phrase",
Boost = "boost",
MultiMatch = "multi_match",
}
/**
* Represents a full-text query interface.
* This interface defines the structure and behavior for full-text queries,
* including methods to retrieve the query type and convert the query to a dictionary format.
*/
export interface FullTextQuery {
queryType(): FullTextQueryType;
toDict(): Record<string, unknown>;
}
export class MatchQuery implements FullTextQuery {
/**
* Creates an instance of MatchQuery.
*
* @param query - The text query to search for.
* @param column - The name of the column to search within.
* @param boost - (Optional) The boost factor to influence the relevance score of this query. Default is `1.0`.
* @param fuzziness - (Optional) The allowed edit distance for fuzzy matching. Default is `0`.
* @param maxExpansions - (Optional) The maximum number of terms to consider for fuzzy matching. Default is `50`.
*/
constructor(
private query: string,
private column: string,
private boost: number = 1.0,
private fuzziness: number = 0,
private maxExpansions: number = 50,
) {}
queryType(): FullTextQueryType {
return FullTextQueryType.Match;
}
toDict(): Record<string, unknown> {
return {
[this.queryType()]: {
[this.column]: {
query: this.query,
boost: this.boost,
fuzziness: this.fuzziness,
// biome-ignore lint/style/useNamingConvention: use underscore for consistency with the other APIs
max_expansions: this.maxExpansions,
},
},
};
}
}
export class PhraseQuery implements FullTextQuery {
/**
* Creates an instance of `PhraseQuery`.
*
* @param query - The phrase to search for in the specified column.
* @param column - The name of the column to search within.
*/
constructor(
private query: string,
private column: string,
) {}
queryType(): FullTextQueryType {
return FullTextQueryType.MatchPhrase;
}
toDict(): Record<string, unknown> {
return {
[this.queryType()]: {
[this.column]: this.query,
},
};
}
}
export class BoostQuery implements FullTextQuery {
/**
* Creates an instance of BoostQuery.
*
* @param positive - The positive query that boosts the relevance score.
* @param negative - The negative query that reduces the relevance score.
* @param negativeBoost - The factor by which the negative query reduces the score.
*/
constructor(
private positive: FullTextQuery,
private negative: FullTextQuery,
private negativeBoost: number,
) {}
queryType(): FullTextQueryType {
return FullTextQueryType.Boost;
}
toDict(): Record<string, unknown> {
return {
[this.queryType()]: {
positive: this.positive.toDict(),
negative: this.negative.toDict(),
// biome-ignore lint/style/useNamingConvention: use underscore for consistency with the other APIs
negative_boost: this.negativeBoost,
},
};
}
}
export class MultiMatchQuery implements FullTextQuery {
/**
* Creates an instance of MultiMatchQuery.
*
* @param query - The text query to search for across multiple columns.
* @param columns - An array of column names to search within.
* @param boosts - (Optional) An array of boost factors corresponding to each column. Default is an array of 1.0 for each column.
*
* The `boosts` array should have the same length as `columns`. If not provided, all columns will have a default boost of 1.0.
* If the length of `boosts` is less than `columns`, it will be padded with 1.0s.
*/
constructor(
private query: string,
private columns: string[],
private boosts: number[] = columns.map(() => 1.0),
) {}
queryType(): FullTextQueryType {
return FullTextQueryType.MultiMatch;
}
toDict(): Record<string, unknown> {
return {
[this.queryType()]: {
query: this.query,
columns: this.columns,
boost: this.boosts,
},
};
}
}

View File

@@ -3,7 +3,7 @@
use std::sync::Arc;
use lancedb::index::scalar::FullTextSearchQuery;
use lancedb::index::scalar::{FtsQuery, FullTextSearchQuery, MatchQuery, PhraseQuery};
use lancedb::query::ExecutableQuery;
use lancedb::query::Query as LanceDbQuery;
use lancedb::query::QueryBase;
@@ -18,7 +18,7 @@ use crate::error::NapiErrorExt;
use crate::iterator::RecordBatchIterator;
use crate::rerankers::Reranker;
use crate::rerankers::RerankerCallbacks;
use crate::util::parse_distance_type;
use crate::util::{parse_distance_type, parse_fts_query};
#[napi]
pub struct Query {
@@ -38,9 +38,53 @@ impl Query {
}
#[napi]
pub fn full_text_search(&mut self, query: String, columns: Option<Vec<String>>) {
let query = FullTextSearchQuery::new(query).columns(columns);
pub fn full_text_search(&mut self, query: napi::JsUnknown) -> napi::Result<()> {
let query = unsafe { query.cast::<napi::JsObject>() };
let query = if let Some(query_text) = query.get::<_, String>("query").transpose() {
let mut query_text = query_text?;
let columns = query.get::<_, Option<Vec<String>>>("columns")?.flatten();
let is_phrase =
query_text.len() >= 2 && query_text.starts_with('"') && query_text.ends_with('"');
let is_multi_match = columns.as_ref().map(|cols| cols.len() > 1).unwrap_or(false);
if is_phrase {
// Remove the surrounding quotes for phrase queries
query_text = query_text[1..query_text.len() - 1].to_string();
}
let query: FtsQuery = match (is_phrase, is_multi_match) {
(false, _) => MatchQuery::new(query_text).into(),
(true, false) => PhraseQuery::new(query_text).into(),
(true, true) => {
return Err(napi::Error::from_reason(
"Phrase queries cannot be used with multiple columns.",
));
}
};
let mut query = FullTextSearchQuery::new_query(query);
if let Some(cols) = columns {
if !cols.is_empty() {
query = query.with_columns(&cols).map_err(|e| {
napi::Error::from_reason(format!(
"Failed to set full text search columns: {}",
e
))
})?;
}
}
query
} else if let Some(query) = query.get::<_, napi::JsObject>("query")? {
let query = parse_fts_query(&query)?;
FullTextSearchQuery::new_query(query)
} else {
return Err(napi::Error::from_reason(
"Invalid full text search query object".to_string(),
));
};
self.inner = self.inner.clone().full_text_search(query);
Ok(())
}
#[napi]
@@ -195,9 +239,53 @@ impl VectorQuery {
}
#[napi]
pub fn full_text_search(&mut self, query: String, columns: Option<Vec<String>>) {
let query = FullTextSearchQuery::new(query).columns(columns);
pub fn full_text_search(&mut self, query: napi::JsUnknown) -> napi::Result<()> {
let query = unsafe { query.cast::<napi::JsObject>() };
let query = if let Some(query_text) = query.get::<_, String>("query").transpose() {
let mut query_text = query_text?;
let columns = query.get::<_, Option<Vec<String>>>("columns")?.flatten();
let is_phrase =
query_text.len() >= 2 && query_text.starts_with('"') && query_text.ends_with('"');
let is_multi_match = columns.as_ref().map(|cols| cols.len() > 1).unwrap_or(false);
if is_phrase {
// Remove the surrounding quotes for phrase queries
query_text = query_text[1..query_text.len() - 1].to_string();
}
let query: FtsQuery = match (is_phrase, is_multi_match) {
(false, _) => MatchQuery::new(query_text).into(),
(true, false) => PhraseQuery::new(query_text).into(),
(true, true) => {
return Err(napi::Error::from_reason(
"Phrase queries cannot be used with multiple columns.",
));
}
};
let mut query = FullTextSearchQuery::new_query(query);
if let Some(cols) = columns {
if !cols.is_empty() {
query = query.with_columns(&cols).map_err(|e| {
napi::Error::from_reason(format!(
"Failed to set full text search columns: {}",
e
))
})?;
}
}
query
} else if let Some(query) = query.get::<_, napi::JsObject>("query")? {
let query = parse_fts_query(&query)?;
FullTextSearchQuery::new_query(query)
} else {
return Err(napi::Error::from_reason(
"Invalid full text search query object".to_string(),
));
};
self.inner = self.inner.clone().full_text_search(query);
Ok(())
}
#[napi]

View File

@@ -1,6 +1,7 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
use lancedb::index::scalar::{BoostQuery, FtsQuery, MatchQuery, MultiMatchQuery, PhraseQuery};
use lancedb::DistanceType;
pub fn parse_distance_type(distance_type: impl AsRef<str>) -> napi::Result<DistanceType> {
@@ -15,3 +16,144 @@ pub fn parse_distance_type(distance_type: impl AsRef<str>) -> napi::Result<Dista
))),
}
}
pub fn parse_fts_query(query: &napi::JsObject) -> napi::Result<FtsQuery> {
let query_type = query
.get_property_names()?
.get_element::<napi::JsString>(0)?;
let query_type = query_type.into_utf8()?.into_owned()?;
let query_value =
query
.get::<_, napi::JsObject>(&query_type)?
.ok_or(napi::Error::from_reason(format!(
"query value {} not found",
query_type
)))?;
match query_type.as_str() {
"match" => {
let column = query_value
.get_property_names()?
.get_element::<napi::JsString>(0)?
.into_utf8()?
.into_owned()?;
let params =
query_value
.get::<_, napi::JsObject>(&column)?
.ok_or(napi::Error::from_reason(format!(
"column {} not found",
column
)))?;
let query = params
.get::<_, napi::JsString>("query")?
.ok_or(napi::Error::from_reason("query not found"))?
.into_utf8()?
.into_owned()?;
let boost = params
.get::<_, napi::JsNumber>("boost")?
.ok_or(napi::Error::from_reason("boost not found"))?
.get_double()? as f32;
let fuzziness = params
.get::<_, napi::JsNumber>("fuzziness")?
.map(|f| f.get_uint32())
.transpose()?;
let max_expansions = params
.get::<_, napi::JsNumber>("max_expansions")?
.ok_or(napi::Error::from_reason("max_expansions not found"))?
.get_uint32()? as usize;
let query = MatchQuery::new(query)
.with_column(Some(column))
.with_boost(boost)
.with_fuzziness(fuzziness)
.with_max_expansions(max_expansions);
Ok(query.into())
}
"match_phrase" => {
let column = query_value
.get_property_names()?
.get_element::<napi::JsString>(0)?
.into_utf8()?
.into_owned()?;
let query = query_value
.get::<_, napi::JsString>(&column)?
.ok_or(napi::Error::from_reason(format!(
"column {} not found",
column
)))?
.into_utf8()?
.into_owned()?;
let query = PhraseQuery::new(query).with_column(Some(column));
Ok(query.into())
}
"boost" => {
let positive = query_value
.get::<_, napi::JsObject>("positive")?
.ok_or(napi::Error::from_reason("positive not found"))?;
let negative = query_value
.get::<_, napi::JsObject>("negative")?
.ok_or(napi::Error::from_reason("negative not found"))?;
let negative_boost = query_value
.get::<_, napi::JsNumber>("negative_boost")?
.ok_or(napi::Error::from_reason("negative_boost not found"))?
.get_double()? as f32;
let positive = parse_fts_query(&positive)?;
let negative = parse_fts_query(&negative)?;
let query = BoostQuery::new(positive, negative, Some(negative_boost));
Ok(query.into())
}
"multi_match" => {
let query = query_value
.get::<_, napi::JsString>("query")?
.ok_or(napi::Error::from_reason("query not found"))?
.into_utf8()?
.into_owned()?;
let columns_array = query_value
.get::<_, napi::JsTypedArray>("columns")?
.ok_or(napi::Error::from_reason("columns not found"))?;
let columns_num = columns_array.get_array_length()?;
let mut columns = Vec::with_capacity(columns_num as usize);
for i in 0..columns_num {
let column = columns_array
.get_element::<napi::JsString>(i)?
.into_utf8()?
.into_owned()?;
columns.push(column);
}
let boost_array = query_value
.get::<_, napi::JsTypedArray>("boost")?
.ok_or(napi::Error::from_reason("boost not found"))?;
if boost_array.get_array_length()? != columns_num {
return Err(napi::Error::from_reason(format!(
"boost array length ({}) does not match columns length ({})",
boost_array.get_array_length()?,
columns_num
)));
}
let mut boost = Vec::with_capacity(columns_num as usize);
for i in 0..columns_num {
let b = boost_array.get_element::<napi::JsNumber>(i)?.get_double()? as f32;
boost.push(b);
}
let query =
MultiMatchQuery::try_new_with_boosts(query, columns, boost).map_err(|e| {
napi::Error::from_reason(format!("Error creating MultiMatchQuery: {}", e))
})?;
Ok(query.into())
}
_ => Err(napi::Error::from_reason(format!(
"Unsupported query type: {}",
query_type
))),
}
}