mirror of
https://github.com/lancedb/lancedb.git
synced 2025-12-27 23:12:58 +00:00
Compare commits
3 Commits
python-v0.
...
python-v0.
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
273ba18426 | ||
|
|
8b94308cf2 | ||
|
|
0b7b27481e |
@@ -1,5 +1,5 @@
|
|||||||
[tool.bumpversion]
|
[tool.bumpversion]
|
||||||
current_version = "0.22.3-beta.3"
|
current_version = "0.22.3-beta.4"
|
||||||
parse = """(?x)
|
parse = """(?x)
|
||||||
(?P<major>0|[1-9]\\d*)\\.
|
(?P<major>0|[1-9]\\d*)\\.
|
||||||
(?P<minor>0|[1-9]\\d*)\\.
|
(?P<minor>0|[1-9]\\d*)\\.
|
||||||
|
|||||||
6
Cargo.lock
generated
6
Cargo.lock
generated
@@ -4684,7 +4684,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "lancedb"
|
name = "lancedb"
|
||||||
version = "0.22.3-beta.3"
|
version = "0.22.3-beta.4"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"ahash",
|
"ahash",
|
||||||
"anyhow",
|
"anyhow",
|
||||||
@@ -4781,7 +4781,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "lancedb-nodejs"
|
name = "lancedb-nodejs"
|
||||||
version = "0.22.3-beta.3"
|
version = "0.22.3-beta.4"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"arrow-array",
|
"arrow-array",
|
||||||
"arrow-ipc",
|
"arrow-ipc",
|
||||||
@@ -4801,7 +4801,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "lancedb-python"
|
name = "lancedb-python"
|
||||||
version = "0.25.3-beta.3"
|
version = "0.25.3-beta.4"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"arrow",
|
"arrow",
|
||||||
"async-trait",
|
"async-trait",
|
||||||
|
|||||||
@@ -8,7 +8,7 @@
|
|||||||
<parent>
|
<parent>
|
||||||
<groupId>com.lancedb</groupId>
|
<groupId>com.lancedb</groupId>
|
||||||
<artifactId>lancedb-parent</artifactId>
|
<artifactId>lancedb-parent</artifactId>
|
||||||
<version>0.22.3-beta.3</version>
|
<version>0.22.3-beta.4</version>
|
||||||
<relativePath>../pom.xml</relativePath>
|
<relativePath>../pom.xml</relativePath>
|
||||||
</parent>
|
</parent>
|
||||||
|
|
||||||
|
|||||||
@@ -8,7 +8,7 @@
|
|||||||
<parent>
|
<parent>
|
||||||
<groupId>com.lancedb</groupId>
|
<groupId>com.lancedb</groupId>
|
||||||
<artifactId>lancedb-parent</artifactId>
|
<artifactId>lancedb-parent</artifactId>
|
||||||
<version>0.22.3-beta.3</version>
|
<version>0.22.3-beta.4</version>
|
||||||
<relativePath>../pom.xml</relativePath>
|
<relativePath>../pom.xml</relativePath>
|
||||||
</parent>
|
</parent>
|
||||||
|
|
||||||
|
|||||||
@@ -6,7 +6,7 @@
|
|||||||
|
|
||||||
<groupId>com.lancedb</groupId>
|
<groupId>com.lancedb</groupId>
|
||||||
<artifactId>lancedb-parent</artifactId>
|
<artifactId>lancedb-parent</artifactId>
|
||||||
<version>0.22.3-beta.3</version>
|
<version>0.22.3-beta.4</version>
|
||||||
<packaging>pom</packaging>
|
<packaging>pom</packaging>
|
||||||
<name>${project.artifactId}</name>
|
<name>${project.artifactId}</name>
|
||||||
<description>LanceDB Java SDK Parent POM</description>
|
<description>LanceDB Java SDK Parent POM</description>
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "lancedb-nodejs"
|
name = "lancedb-nodejs"
|
||||||
edition.workspace = true
|
edition.workspace = true
|
||||||
version = "0.22.3-beta.3"
|
version = "0.22.3-beta.4"
|
||||||
license.workspace = true
|
license.workspace = true
|
||||||
description.workspace = true
|
description.workspace = true
|
||||||
repository.workspace = true
|
repository.workspace = true
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-darwin-arm64",
|
"name": "@lancedb/lancedb-darwin-arm64",
|
||||||
"version": "0.22.3-beta.3",
|
"version": "0.22.3-beta.4",
|
||||||
"os": ["darwin"],
|
"os": ["darwin"],
|
||||||
"cpu": ["arm64"],
|
"cpu": ["arm64"],
|
||||||
"main": "lancedb.darwin-arm64.node",
|
"main": "lancedb.darwin-arm64.node",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-darwin-x64",
|
"name": "@lancedb/lancedb-darwin-x64",
|
||||||
"version": "0.22.3-beta.3",
|
"version": "0.22.3-beta.4",
|
||||||
"os": ["darwin"],
|
"os": ["darwin"],
|
||||||
"cpu": ["x64"],
|
"cpu": ["x64"],
|
||||||
"main": "lancedb.darwin-x64.node",
|
"main": "lancedb.darwin-x64.node",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-linux-arm64-gnu",
|
"name": "@lancedb/lancedb-linux-arm64-gnu",
|
||||||
"version": "0.22.3-beta.3",
|
"version": "0.22.3-beta.4",
|
||||||
"os": ["linux"],
|
"os": ["linux"],
|
||||||
"cpu": ["arm64"],
|
"cpu": ["arm64"],
|
||||||
"main": "lancedb.linux-arm64-gnu.node",
|
"main": "lancedb.linux-arm64-gnu.node",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-linux-arm64-musl",
|
"name": "@lancedb/lancedb-linux-arm64-musl",
|
||||||
"version": "0.22.3-beta.3",
|
"version": "0.22.3-beta.4",
|
||||||
"os": ["linux"],
|
"os": ["linux"],
|
||||||
"cpu": ["arm64"],
|
"cpu": ["arm64"],
|
||||||
"main": "lancedb.linux-arm64-musl.node",
|
"main": "lancedb.linux-arm64-musl.node",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-linux-x64-gnu",
|
"name": "@lancedb/lancedb-linux-x64-gnu",
|
||||||
"version": "0.22.3-beta.3",
|
"version": "0.22.3-beta.4",
|
||||||
"os": ["linux"],
|
"os": ["linux"],
|
||||||
"cpu": ["x64"],
|
"cpu": ["x64"],
|
||||||
"main": "lancedb.linux-x64-gnu.node",
|
"main": "lancedb.linux-x64-gnu.node",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-linux-x64-musl",
|
"name": "@lancedb/lancedb-linux-x64-musl",
|
||||||
"version": "0.22.3-beta.3",
|
"version": "0.22.3-beta.4",
|
||||||
"os": ["linux"],
|
"os": ["linux"],
|
||||||
"cpu": ["x64"],
|
"cpu": ["x64"],
|
||||||
"main": "lancedb.linux-x64-musl.node",
|
"main": "lancedb.linux-x64-musl.node",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-win32-arm64-msvc",
|
"name": "@lancedb/lancedb-win32-arm64-msvc",
|
||||||
"version": "0.22.3-beta.3",
|
"version": "0.22.3-beta.4",
|
||||||
"os": [
|
"os": [
|
||||||
"win32"
|
"win32"
|
||||||
],
|
],
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-win32-x64-msvc",
|
"name": "@lancedb/lancedb-win32-x64-msvc",
|
||||||
"version": "0.22.3-beta.3",
|
"version": "0.22.3-beta.4",
|
||||||
"os": ["win32"],
|
"os": ["win32"],
|
||||||
"cpu": ["x64"],
|
"cpu": ["x64"],
|
||||||
"main": "lancedb.win32-x64-msvc.node",
|
"main": "lancedb.win32-x64-msvc.node",
|
||||||
|
|||||||
4
nodejs/package-lock.json
generated
4
nodejs/package-lock.json
generated
@@ -1,12 +1,12 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb",
|
"name": "@lancedb/lancedb",
|
||||||
"version": "0.22.3-beta.3",
|
"version": "0.22.3-beta.4",
|
||||||
"lockfileVersion": 3,
|
"lockfileVersion": 3,
|
||||||
"requires": true,
|
"requires": true,
|
||||||
"packages": {
|
"packages": {
|
||||||
"": {
|
"": {
|
||||||
"name": "@lancedb/lancedb",
|
"name": "@lancedb/lancedb",
|
||||||
"version": "0.22.3-beta.3",
|
"version": "0.22.3-beta.4",
|
||||||
"cpu": [
|
"cpu": [
|
||||||
"x64",
|
"x64",
|
||||||
"arm64"
|
"arm64"
|
||||||
|
|||||||
@@ -11,7 +11,7 @@
|
|||||||
"ann"
|
"ann"
|
||||||
],
|
],
|
||||||
"private": false,
|
"private": false,
|
||||||
"version": "0.22.3-beta.3",
|
"version": "0.22.3-beta.4",
|
||||||
"main": "dist/index.js",
|
"main": "dist/index.js",
|
||||||
"exports": {
|
"exports": {
|
||||||
".": "./dist/index.js",
|
".": "./dist/index.js",
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
[tool.bumpversion]
|
[tool.bumpversion]
|
||||||
current_version = "0.25.3-beta.4"
|
current_version = "0.25.3-beta.5"
|
||||||
parse = """(?x)
|
parse = """(?x)
|
||||||
(?P<major>0|[1-9]\\d*)\\.
|
(?P<major>0|[1-9]\\d*)\\.
|
||||||
(?P<minor>0|[1-9]\\d*)\\.
|
(?P<minor>0|[1-9]\\d*)\\.
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "lancedb-python"
|
name = "lancedb-python"
|
||||||
version = "0.25.3-beta.4"
|
version = "0.25.3-beta.5"
|
||||||
edition.workspace = true
|
edition.workspace = true
|
||||||
description = "Python bindings for LanceDB"
|
description = "Python bindings for LanceDB"
|
||||||
license.workspace = true
|
license.workspace = true
|
||||||
|
|||||||
@@ -339,3 +339,4 @@ class AsyncPermutationBuilder:
|
|||||||
def async_permutation_builder(
|
def async_permutation_builder(
|
||||||
table: Table, dest_table_name: str
|
table: Table, dest_table_name: str
|
||||||
) -> AsyncPermutationBuilder: ...
|
) -> AsyncPermutationBuilder: ...
|
||||||
|
def fts_query_to_json(query: Any) -> str: ...
|
||||||
|
|||||||
@@ -37,7 +37,7 @@ from .rerankers.base import Reranker
|
|||||||
from .rerankers.rrf import RRFReranker
|
from .rerankers.rrf import RRFReranker
|
||||||
from .rerankers.util import check_reranker_result
|
from .rerankers.util import check_reranker_result
|
||||||
from .util import flatten_columns
|
from .util import flatten_columns
|
||||||
|
from lancedb._lancedb import fts_query_to_json
|
||||||
from typing_extensions import Annotated
|
from typing_extensions import Annotated
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
@@ -124,6 +124,24 @@ class FullTextQuery(ABC):
|
|||||||
"""
|
"""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
def to_json(self) -> str:
|
||||||
|
"""
|
||||||
|
Convert the query to a JSON string.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
str
|
||||||
|
A JSON string representation of the query.
|
||||||
|
|
||||||
|
Examples
|
||||||
|
--------
|
||||||
|
>>> from lancedb.query import MatchQuery
|
||||||
|
>>> query = MatchQuery("puppy", "text", fuzziness=2)
|
||||||
|
>>> query.to_json()
|
||||||
|
'{"match":{"column":"text","terms":"puppy","boost":1.0,"fuzziness":2,"max_expansions":50,"operator":"Or","prefix_length":0}}'
|
||||||
|
"""
|
||||||
|
return fts_query_to_json(self)
|
||||||
|
|
||||||
def __and__(self, other: "FullTextQuery") -> "FullTextQuery":
|
def __and__(self, other: "FullTextQuery") -> "FullTextQuery":
|
||||||
"""
|
"""
|
||||||
Combine two queries with a logical AND operation.
|
Combine two queries with a logical AND operation.
|
||||||
@@ -288,6 +306,8 @@ class BooleanQuery(FullTextQuery):
|
|||||||
----------
|
----------
|
||||||
queries : list[tuple(Occur, FullTextQuery)]
|
queries : list[tuple(Occur, FullTextQuery)]
|
||||||
The list of queries with their occurrence requirements.
|
The list of queries with their occurrence requirements.
|
||||||
|
Each tuple contains an Occur value (MUST, SHOULD, or MUST_NOT)
|
||||||
|
and a FullTextQuery to apply.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
queries: list[tuple[Occur, FullTextQuery]]
|
queries: list[tuple[Occur, FullTextQuery]]
|
||||||
|
|||||||
@@ -20,7 +20,14 @@ from unittest import mock
|
|||||||
import lancedb as ldb
|
import lancedb as ldb
|
||||||
from lancedb.db import DBConnection
|
from lancedb.db import DBConnection
|
||||||
from lancedb.index import FTS
|
from lancedb.index import FTS
|
||||||
from lancedb.query import BoostQuery, MatchQuery, MultiMatchQuery, PhraseQuery
|
from lancedb.query import (
|
||||||
|
BoostQuery,
|
||||||
|
MatchQuery,
|
||||||
|
MultiMatchQuery,
|
||||||
|
PhraseQuery,
|
||||||
|
BooleanQuery,
|
||||||
|
Occur,
|
||||||
|
)
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import pyarrow as pa
|
import pyarrow as pa
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
@@ -727,3 +734,146 @@ def test_fts_ngram(mem_db: DBConnection):
|
|||||||
results = table.search("la", query_type="fts").limit(10).to_list()
|
results = table.search("la", query_type="fts").limit(10).to_list()
|
||||||
assert len(results) == 2
|
assert len(results) == 2
|
||||||
assert set(r["text"] for r in results) == {"lance database", "lance is cool"}
|
assert set(r["text"] for r in results) == {"lance database", "lance is cool"}
|
||||||
|
|
||||||
|
|
||||||
|
def test_fts_query_to_json():
|
||||||
|
"""Test that FTS query to_json() produces valid JSON strings with exact format."""
|
||||||
|
|
||||||
|
# Test MatchQuery - basic
|
||||||
|
match_query = MatchQuery("hello world", "text")
|
||||||
|
json_str = match_query.to_json()
|
||||||
|
expected = (
|
||||||
|
'{"match":{"column":"text","terms":"hello world","boost":1.0,'
|
||||||
|
'"fuzziness":0,"max_expansions":50,"operator":"Or","prefix_length":0}}'
|
||||||
|
)
|
||||||
|
assert json_str == expected
|
||||||
|
|
||||||
|
# Test MatchQuery with options
|
||||||
|
match_query = MatchQuery("puppy", "text", fuzziness=2, boost=1.5, prefix_length=3)
|
||||||
|
json_str = match_query.to_json()
|
||||||
|
expected = (
|
||||||
|
'{"match":{"column":"text","terms":"puppy","boost":1.5,"fuzziness":2,'
|
||||||
|
'"max_expansions":50,"operator":"Or","prefix_length":3}}'
|
||||||
|
)
|
||||||
|
assert json_str == expected
|
||||||
|
|
||||||
|
# Test PhraseQuery
|
||||||
|
phrase_query = PhraseQuery("quick brown fox", "title")
|
||||||
|
json_str = phrase_query.to_json()
|
||||||
|
expected = '{"phrase":{"column":"title","terms":"quick brown fox","slop":0}}'
|
||||||
|
assert json_str == expected
|
||||||
|
|
||||||
|
# Test PhraseQuery with slop
|
||||||
|
phrase_query = PhraseQuery("quick brown", "title", slop=2)
|
||||||
|
json_str = phrase_query.to_json()
|
||||||
|
expected = '{"phrase":{"column":"title","terms":"quick brown","slop":2}}'
|
||||||
|
assert json_str == expected
|
||||||
|
|
||||||
|
# Test BooleanQuery with MUST
|
||||||
|
must_query = BooleanQuery(
|
||||||
|
[
|
||||||
|
(Occur.MUST, MatchQuery("puppy", "text")),
|
||||||
|
(Occur.MUST, MatchQuery("runs", "text")),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
json_str = must_query.to_json()
|
||||||
|
expected = (
|
||||||
|
'{"boolean":{"should":[],"must":[{"match":{"column":"text","terms":"puppy",'
|
||||||
|
'"boost":1.0,"fuzziness":0,"max_expansions":50,"operator":"Or",'
|
||||||
|
'"prefix_length":0}},{"match":{"column":"text","terms":"runs","boost":1.0,'
|
||||||
|
'"fuzziness":0,"max_expansions":50,"operator":"Or","prefix_length":0}}],'
|
||||||
|
'"must_not":[]}}'
|
||||||
|
)
|
||||||
|
assert json_str == expected
|
||||||
|
|
||||||
|
# Test BooleanQuery with SHOULD
|
||||||
|
should_query = BooleanQuery(
|
||||||
|
[
|
||||||
|
(Occur.SHOULD, MatchQuery("cat", "text")),
|
||||||
|
(Occur.SHOULD, MatchQuery("dog", "text")),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
json_str = should_query.to_json()
|
||||||
|
expected = (
|
||||||
|
'{"boolean":{"should":[{"match":{"column":"text","terms":"cat","boost":1.0,'
|
||||||
|
'"fuzziness":0,"max_expansions":50,"operator":"Or","prefix_length":0}},'
|
||||||
|
'{"match":{"column":"text","terms":"dog","boost":1.0,"fuzziness":0,'
|
||||||
|
'"max_expansions":50,"operator":"Or","prefix_length":0}}],"must":[],'
|
||||||
|
'"must_not":[]}}'
|
||||||
|
)
|
||||||
|
assert json_str == expected
|
||||||
|
|
||||||
|
# Test BooleanQuery with MUST_NOT
|
||||||
|
must_not_query = BooleanQuery(
|
||||||
|
[
|
||||||
|
(Occur.MUST, MatchQuery("puppy", "text")),
|
||||||
|
(Occur.MUST_NOT, MatchQuery("training", "text")),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
json_str = must_not_query.to_json()
|
||||||
|
expected = (
|
||||||
|
'{"boolean":{"should":[],"must":[{"match":{"column":"text","terms":"puppy",'
|
||||||
|
'"boost":1.0,"fuzziness":0,"max_expansions":50,"operator":"Or",'
|
||||||
|
'"prefix_length":0}}],"must_not":[{"match":{"column":"text",'
|
||||||
|
'"terms":"training","boost":1.0,"fuzziness":0,"max_expansions":50,'
|
||||||
|
'"operator":"Or","prefix_length":0}}]}}'
|
||||||
|
)
|
||||||
|
assert json_str == expected
|
||||||
|
|
||||||
|
# Test BoostQuery
|
||||||
|
positive = MatchQuery("puppy", "text")
|
||||||
|
negative = MatchQuery("training", "text")
|
||||||
|
boost_query = BoostQuery(positive, negative, negative_boost=0.3)
|
||||||
|
json_str = boost_query.to_json()
|
||||||
|
expected = (
|
||||||
|
'{"boost":{"positive":{"match":{"column":"text","terms":"puppy",'
|
||||||
|
'"boost":1.0,"fuzziness":0,"max_expansions":50,"operator":"Or",'
|
||||||
|
'"prefix_length":0}},"negative":{"match":{"column":"text",'
|
||||||
|
'"terms":"training","boost":1.0,"fuzziness":0,"max_expansions":50,'
|
||||||
|
'"operator":"Or","prefix_length":0}},"negative_boost":0.3}}'
|
||||||
|
)
|
||||||
|
assert json_str == expected
|
||||||
|
|
||||||
|
# Test MultiMatchQuery
|
||||||
|
multi_match = MultiMatchQuery("python", ["tags", "title"])
|
||||||
|
json_str = multi_match.to_json()
|
||||||
|
expected = (
|
||||||
|
'{"multi_match":{"query":"python","columns":["tags","title"],'
|
||||||
|
'"boost":[1.0,1.0]}}'
|
||||||
|
)
|
||||||
|
assert json_str == expected
|
||||||
|
|
||||||
|
# Test complex nested BooleanQuery
|
||||||
|
inner1 = BooleanQuery(
|
||||||
|
[
|
||||||
|
(Occur.MUST, MatchQuery("python", "tags")),
|
||||||
|
(Occur.MUST, MatchQuery("tutorial", "title")),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
inner2 = BooleanQuery(
|
||||||
|
[
|
||||||
|
(Occur.MUST, MatchQuery("rust", "tags")),
|
||||||
|
(Occur.MUST, MatchQuery("guide", "title")),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
complex_query = BooleanQuery(
|
||||||
|
[
|
||||||
|
(Occur.SHOULD, inner1),
|
||||||
|
(Occur.SHOULD, inner2),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
json_str = complex_query.to_json()
|
||||||
|
expected = (
|
||||||
|
'{"boolean":{"should":[{"boolean":{"should":[],"must":[{"match":'
|
||||||
|
'{"column":"tags","terms":"python","boost":1.0,"fuzziness":0,'
|
||||||
|
'"max_expansions":50,"operator":"Or","prefix_length":0}},{"match":'
|
||||||
|
'{"column":"title","terms":"tutorial","boost":1.0,"fuzziness":0,'
|
||||||
|
'"max_expansions":50,"operator":"Or","prefix_length":0}}],"must_not":[]}}'
|
||||||
|
',{"boolean":{"should":[],"must":[{"match":{"column":"tags",'
|
||||||
|
'"terms":"rust","boost":1.0,"fuzziness":0,"max_expansions":50,'
|
||||||
|
'"operator":"Or","prefix_length":0}},{"match":{"column":"title",'
|
||||||
|
'"terms":"guide","boost":1.0,"fuzziness":0,"max_expansions":50,'
|
||||||
|
'"operator":"Or","prefix_length":0}}],"must_not":[]}}],"must":[],'
|
||||||
|
'"must_not":[]}}'
|
||||||
|
)
|
||||||
|
assert json_str == expected
|
||||||
|
|||||||
@@ -55,6 +55,7 @@ pub fn _lancedb(_py: Python, m: &Bound<'_, PyModule>) -> PyResult<()> {
|
|||||||
m.add_function(wrap_pyfunction!(connect, m)?)?;
|
m.add_function(wrap_pyfunction!(connect, m)?)?;
|
||||||
m.add_function(wrap_pyfunction!(permutation::async_permutation_builder, m)?)?;
|
m.add_function(wrap_pyfunction!(permutation::async_permutation_builder, m)?)?;
|
||||||
m.add_function(wrap_pyfunction!(util::validate_table_name, m)?)?;
|
m.add_function(wrap_pyfunction!(util::validate_table_name, m)?)?;
|
||||||
|
m.add_function(wrap_pyfunction!(query::fts_query_to_json, m)?)?;
|
||||||
m.add("__version__", env!("CARGO_PKG_VERSION"))?;
|
m.add("__version__", env!("CARGO_PKG_VERSION"))?;
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -23,6 +23,7 @@ use lancedb::query::{
|
|||||||
};
|
};
|
||||||
use lancedb::table::AnyQuery;
|
use lancedb::table::AnyQuery;
|
||||||
use pyo3::prelude::{PyAnyMethods, PyDictMethods};
|
use pyo3::prelude::{PyAnyMethods, PyDictMethods};
|
||||||
|
use pyo3::pyfunction;
|
||||||
use pyo3::pymethods;
|
use pyo3::pymethods;
|
||||||
use pyo3::types::PyList;
|
use pyo3::types::PyList;
|
||||||
use pyo3::types::{PyDict, PyString};
|
use pyo3::types::{PyDict, PyString};
|
||||||
@@ -982,3 +983,15 @@ impl HybridQuery {
|
|||||||
req
|
req
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Convert a Python FTS query to JSON string
|
||||||
|
#[pyfunction]
|
||||||
|
pub fn fts_query_to_json(query_obj: &Bound<'_, PyAny>) -> PyResult<String> {
|
||||||
|
let wrapped: PyLanceDB<FtsQuery> = query_obj.extract()?;
|
||||||
|
lancedb::table::datafusion::udtf::fts::to_json(&wrapped.0).map_err(|e| {
|
||||||
|
PyErr::new::<pyo3::exceptions::PyValueError, _>(format!(
|
||||||
|
"Failed to serialize FTS query to JSON: {}",
|
||||||
|
e
|
||||||
|
))
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "lancedb"
|
name = "lancedb"
|
||||||
version = "0.22.3-beta.3"
|
version = "0.22.3-beta.4"
|
||||||
edition.workspace = true
|
edition.workspace = true
|
||||||
description = "LanceDB: A serverless, low-latency vector database for AI applications"
|
description = "LanceDB: A serverless, low-latency vector database for AI applications"
|
||||||
license.workspace = true
|
license.workspace = true
|
||||||
|
|||||||
@@ -667,6 +667,12 @@ pub struct QueryRequest {
|
|||||||
|
|
||||||
/// Configure how query results are normalized when doing hybrid search
|
/// Configure how query results are normalized when doing hybrid search
|
||||||
pub norm: Option<NormalizeMethod>,
|
pub norm: Option<NormalizeMethod>,
|
||||||
|
|
||||||
|
/// If set to true, disables automatic projection of scoring columns (_score, _distance).
|
||||||
|
/// When disabled, these columns are only included if explicitly requested in the projection.
|
||||||
|
///
|
||||||
|
/// By default, this is false (scoring columns are auto-projected for backward compatibility).
|
||||||
|
pub disable_scoring_autoprojection: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Default for QueryRequest {
|
impl Default for QueryRequest {
|
||||||
@@ -682,6 +688,7 @@ impl Default for QueryRequest {
|
|||||||
prefilter: true,
|
prefilter: true,
|
||||||
reranker: None,
|
reranker: None,
|
||||||
norm: None,
|
norm: None,
|
||||||
|
disable_scoring_autoprojection: false,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -2391,6 +2391,10 @@ impl BaseTable for NativeTable {
|
|||||||
scanner.distance_metric(distance_type.into());
|
scanner.distance_metric(distance_type.into());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if query.base.disable_scoring_autoprojection {
|
||||||
|
scanner.disable_scoring_autoprojection();
|
||||||
|
}
|
||||||
|
|
||||||
Ok(scanner.create_plan().await?)
|
Ok(scanner.create_plan().await?)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -2,6 +2,9 @@
|
|||||||
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||||
|
|
||||||
//! This module contains adapters to allow LanceDB tables to be used as DataFusion table providers.
|
//! This module contains adapters to allow LanceDB tables to be used as DataFusion table providers.
|
||||||
|
|
||||||
|
pub mod udtf;
|
||||||
|
|
||||||
use std::{collections::HashMap, sync::Arc};
|
use std::{collections::HashMap, sync::Arc};
|
||||||
|
|
||||||
use arrow_array::RecordBatch;
|
use arrow_array::RecordBatch;
|
||||||
@@ -21,6 +24,8 @@ use crate::{
|
|||||||
query::{QueryExecutionOptions, QueryFilter, QueryRequest, Select},
|
query::{QueryExecutionOptions, QueryFilter, QueryRequest, Select},
|
||||||
Result,
|
Result,
|
||||||
};
|
};
|
||||||
|
use arrow_schema::{DataType, Field};
|
||||||
|
use lance_index::scalar::FullTextSearchQuery;
|
||||||
|
|
||||||
/// Datafusion attempts to maintain batch metadata
|
/// Datafusion attempts to maintain batch metadata
|
||||||
///
|
///
|
||||||
@@ -135,19 +140,38 @@ impl ExecutionPlan for MetadataEraserExec {
|
|||||||
pub struct BaseTableAdapter {
|
pub struct BaseTableAdapter {
|
||||||
table: Arc<dyn BaseTable>,
|
table: Arc<dyn BaseTable>,
|
||||||
schema: Arc<ArrowSchema>,
|
schema: Arc<ArrowSchema>,
|
||||||
|
fts_query: Option<FullTextSearchQuery>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl BaseTableAdapter {
|
impl BaseTableAdapter {
|
||||||
pub async fn try_new(table: Arc<dyn BaseTable>) -> Result<Self> {
|
pub async fn try_new(table: Arc<dyn BaseTable>) -> Result<Self> {
|
||||||
let schema = Arc::new(
|
let schema = table
|
||||||
table
|
.schema()
|
||||||
.schema()
|
.await?
|
||||||
.await?
|
.as_ref()
|
||||||
.as_ref()
|
.clone()
|
||||||
.clone()
|
.with_metadata(HashMap::default());
|
||||||
.with_metadata(HashMap::default()),
|
|
||||||
);
|
Ok(Self {
|
||||||
Ok(Self { table, schema })
|
table,
|
||||||
|
schema: Arc::new(schema),
|
||||||
|
fts_query: None,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Create a new adapter with an FTS query applied.
|
||||||
|
pub fn with_fts_query(&self, fts_query: FullTextSearchQuery) -> Self {
|
||||||
|
// Add _score column to the schema
|
||||||
|
let score_field = Field::new("_score", DataType::Float32, true);
|
||||||
|
let mut fields = self.schema.fields().to_vec();
|
||||||
|
fields.push(Arc::new(score_field));
|
||||||
|
let schema = Arc::new(ArrowSchema::new(fields));
|
||||||
|
|
||||||
|
Self {
|
||||||
|
table: self.table.clone(),
|
||||||
|
schema,
|
||||||
|
fts_query: Some(fts_query),
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -172,7 +196,15 @@ impl TableProvider for BaseTableAdapter {
|
|||||||
filters: &[Expr],
|
filters: &[Expr],
|
||||||
limit: Option<usize>,
|
limit: Option<usize>,
|
||||||
) -> DataFusionResult<Arc<dyn ExecutionPlan>> {
|
) -> DataFusionResult<Arc<dyn ExecutionPlan>> {
|
||||||
let mut query = QueryRequest::default();
|
// For FTS queries, disable auto-projection of _score to match DataFusion expectations
|
||||||
|
let disable_scoring = self.fts_query.is_some() && projection.is_some();
|
||||||
|
|
||||||
|
let mut query = QueryRequest {
|
||||||
|
full_text_search: self.fts_query.clone(),
|
||||||
|
disable_scoring_autoprojection: disable_scoring,
|
||||||
|
..Default::default()
|
||||||
|
};
|
||||||
|
|
||||||
if let Some(projection) = projection {
|
if let Some(projection) = projection {
|
||||||
let field_names = projection
|
let field_names = projection
|
||||||
.iter()
|
.iter()
|
||||||
|
|||||||
6
rust/lancedb/src/table/datafusion/udtf.rs
Normal file
6
rust/lancedb/src/table/datafusion/udtf.rs
Normal file
@@ -0,0 +1,6 @@
|
|||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||||
|
|
||||||
|
//! User-Defined Table Functions (UDTFs) for DataFusion integration
|
||||||
|
|
||||||
|
pub mod fts;
|
||||||
2028
rust/lancedb/src/table/datafusion/udtf/fts.rs
Normal file
2028
rust/lancedb/src/table/datafusion/udtf/fts.rs
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user