feat: support to query/index FTS on RemoteTable/AsyncTable (#1537)

Signed-off-by: BubbleCal <bubble-cal@outlook.com>
This commit is contained in:
BubbleCal
2024-08-16 12:01:05 +08:00
committed by GitHub
parent 20faa4424b
commit 0fa50775d6
11 changed files with 229 additions and 102 deletions

View File

@@ -15,17 +15,20 @@
use arrow::array::make_array;
use arrow::array::ArrayData;
use arrow::pyarrow::FromPyArrow;
use lancedb::index::scalar::FullTextSearchQuery;
use lancedb::query::QueryExecutionOptions;
use lancedb::query::{
ExecutableQuery, Query as LanceDbQuery, QueryBase, Select, VectorQuery as LanceDbVectorQuery,
};
use pyo3::exceptions::PyRuntimeError;
use pyo3::pyclass;
use pyo3::prelude::{PyAnyMethods, PyDictMethods};
use pyo3::pymethods;
use pyo3::types::PyDict;
use pyo3::Bound;
use pyo3::PyAny;
use pyo3::PyRef;
use pyo3::PyResult;
use pyo3::{pyclass, PyErr};
use pyo3_asyncio_0_21::tokio::future_into_py;
use crate::arrow::RecordBatchStream;
@@ -68,6 +71,24 @@ impl Query {
Ok(VectorQuery { inner })
}
pub fn nearest_to_text(&mut self, query: Bound<'_, PyDict>) -> PyResult<()> {
let query_text = query
.get_item("query")?
.ok_or(PyErr::new::<PyRuntimeError, _>(
"Query text is required for nearest_to_text",
))?
.extract::<String>()?;
let columns = query
.get_item("columns")?
.map(|columns| columns.extract::<Vec<String>>())
.transpose()?;
let fts_query = FullTextSearchQuery::new(query_text).columns(columns);
self.inner = self.inner.clone().full_text_search(fts_query);
Ok(())
}
pub fn execute(
self_: PyRef<'_, Self>,
max_batch_length: Option<u32>,