Compare commits

...

3 Commits

Author SHA1 Message Date
Lance Release
56aa133ee6 Bump version: 0.19.0-beta.5 → 0.19.0-beta.6 2025-04-08 06:16:30 +00:00
Lance Release
27d9e5c596 Bump version: 0.22.0-beta.5 → 0.22.0-beta.6 2025-04-08 06:16:14 +00:00
BubbleCal
ec8271931f feat: support to create FTS index on list of strings (#2317)
<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->

## Summary by CodeRabbit

- **Chores**
- Updated internal library dependencies to the latest beta version for
improved system stability.
- **Tests**
- Added automated tests to validate full-text search functionality on
list-based text fields.
- **Refactor**
- Enhanced the search processing logic to provide robust support for
list-type text data, ensuring more reliable results.

<!-- end of auto-generated comment: release notes by coderabbit.ai -->

---------

Signed-off-by: BubbleCal <bubble-cal@outlook.com>
2025-04-08 14:12:35 +08:00
23 changed files with 110 additions and 49 deletions

View File

@@ -1,5 +1,5 @@
[tool.bumpversion]
current_version = "0.19.0-beta.5"
current_version = "0.19.0-beta.6"
parse = """(?x)
(?P<major>0|[1-9]\\d*)\\.
(?P<minor>0|[1-9]\\d*)\\.

34
Cargo.lock generated
View File

@@ -2720,7 +2720,7 @@ checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c"
[[package]]
name = "fsst"
version = "0.25.3"
source = "git+https://github.com/lancedb/lance?tag=v0.25.3-beta.2#e0d3179bcc6e6ce5f9c5fcfeb4398789a7005467"
source = "git+https://github.com/lancedb/lance?tag=v0.25.3-beta.4#236c8f986ab9e2d478d0754fab6e8d2643c31247"
dependencies = [
"rand 0.8.5",
]
@@ -3708,7 +3708,7 @@ dependencies = [
[[package]]
name = "lance"
version = "0.25.3"
source = "git+https://github.com/lancedb/lance?tag=v0.25.3-beta.2#e0d3179bcc6e6ce5f9c5fcfeb4398789a7005467"
source = "git+https://github.com/lancedb/lance?tag=v0.25.3-beta.4#236c8f986ab9e2d478d0754fab6e8d2643c31247"
dependencies = [
"arrow",
"arrow-arith",
@@ -3768,7 +3768,7 @@ dependencies = [
[[package]]
name = "lance-arrow"
version = "0.25.3"
source = "git+https://github.com/lancedb/lance?tag=v0.25.3-beta.2#e0d3179bcc6e6ce5f9c5fcfeb4398789a7005467"
source = "git+https://github.com/lancedb/lance?tag=v0.25.3-beta.4#236c8f986ab9e2d478d0754fab6e8d2643c31247"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -3786,7 +3786,7 @@ dependencies = [
[[package]]
name = "lance-core"
version = "0.25.3"
source = "git+https://github.com/lancedb/lance?tag=v0.25.3-beta.2#e0d3179bcc6e6ce5f9c5fcfeb4398789a7005467"
source = "git+https://github.com/lancedb/lance?tag=v0.25.3-beta.4#236c8f986ab9e2d478d0754fab6e8d2643c31247"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -3823,7 +3823,7 @@ dependencies = [
[[package]]
name = "lance-datafusion"
version = "0.25.3"
source = "git+https://github.com/lancedb/lance?tag=v0.25.3-beta.2#e0d3179bcc6e6ce5f9c5fcfeb4398789a7005467"
source = "git+https://github.com/lancedb/lance?tag=v0.25.3-beta.4#236c8f986ab9e2d478d0754fab6e8d2643c31247"
dependencies = [
"arrow",
"arrow-array",
@@ -3851,7 +3851,7 @@ dependencies = [
[[package]]
name = "lance-datagen"
version = "0.25.3"
source = "git+https://github.com/lancedb/lance?tag=v0.25.3-beta.2#e0d3179bcc6e6ce5f9c5fcfeb4398789a7005467"
source = "git+https://github.com/lancedb/lance?tag=v0.25.3-beta.4#236c8f986ab9e2d478d0754fab6e8d2643c31247"
dependencies = [
"arrow",
"arrow-array",
@@ -3867,7 +3867,7 @@ dependencies = [
[[package]]
name = "lance-encoding"
version = "0.25.3"
source = "git+https://github.com/lancedb/lance?tag=v0.25.3-beta.2#e0d3179bcc6e6ce5f9c5fcfeb4398789a7005467"
source = "git+https://github.com/lancedb/lance?tag=v0.25.3-beta.4#236c8f986ab9e2d478d0754fab6e8d2643c31247"
dependencies = [
"arrayref",
"arrow",
@@ -3907,7 +3907,7 @@ dependencies = [
[[package]]
name = "lance-file"
version = "0.25.3"
source = "git+https://github.com/lancedb/lance?tag=v0.25.3-beta.2#e0d3179bcc6e6ce5f9c5fcfeb4398789a7005467"
source = "git+https://github.com/lancedb/lance?tag=v0.25.3-beta.4#236c8f986ab9e2d478d0754fab6e8d2643c31247"
dependencies = [
"arrow-arith",
"arrow-array",
@@ -3942,7 +3942,7 @@ dependencies = [
[[package]]
name = "lance-index"
version = "0.25.3"
source = "git+https://github.com/lancedb/lance?tag=v0.25.3-beta.2#e0d3179bcc6e6ce5f9c5fcfeb4398789a7005467"
source = "git+https://github.com/lancedb/lance?tag=v0.25.3-beta.4#236c8f986ab9e2d478d0754fab6e8d2643c31247"
dependencies = [
"arrow",
"arrow-array",
@@ -3996,7 +3996,7 @@ dependencies = [
[[package]]
name = "lance-io"
version = "0.25.3"
source = "git+https://github.com/lancedb/lance?tag=v0.25.3-beta.2#e0d3179bcc6e6ce5f9c5fcfeb4398789a7005467"
source = "git+https://github.com/lancedb/lance?tag=v0.25.3-beta.4#236c8f986ab9e2d478d0754fab6e8d2643c31247"
dependencies = [
"arrow",
"arrow-arith",
@@ -4035,7 +4035,7 @@ dependencies = [
[[package]]
name = "lance-linalg"
version = "0.25.3"
source = "git+https://github.com/lancedb/lance?tag=v0.25.3-beta.2#e0d3179bcc6e6ce5f9c5fcfeb4398789a7005467"
source = "git+https://github.com/lancedb/lance?tag=v0.25.3-beta.4#236c8f986ab9e2d478d0754fab6e8d2643c31247"
dependencies = [
"arrow-array",
"arrow-ord",
@@ -4059,7 +4059,7 @@ dependencies = [
[[package]]
name = "lance-table"
version = "0.25.3"
source = "git+https://github.com/lancedb/lance?tag=v0.25.3-beta.2#e0d3179bcc6e6ce5f9c5fcfeb4398789a7005467"
source = "git+https://github.com/lancedb/lance?tag=v0.25.3-beta.4#236c8f986ab9e2d478d0754fab6e8d2643c31247"
dependencies = [
"arrow",
"arrow-array",
@@ -4099,7 +4099,7 @@ dependencies = [
[[package]]
name = "lance-testing"
version = "0.25.3"
source = "git+https://github.com/lancedb/lance?tag=v0.25.3-beta.2#e0d3179bcc6e6ce5f9c5fcfeb4398789a7005467"
source = "git+https://github.com/lancedb/lance?tag=v0.25.3-beta.4#236c8f986ab9e2d478d0754fab6e8d2643c31247"
dependencies = [
"arrow-array",
"arrow-schema",
@@ -4110,7 +4110,7 @@ dependencies = [
[[package]]
name = "lancedb"
version = "0.19.0-beta.4"
version = "0.19.0-beta.5"
dependencies = [
"arrow",
"arrow-array",
@@ -4197,7 +4197,7 @@ dependencies = [
[[package]]
name = "lancedb-node"
version = "0.19.0-beta.4"
version = "0.19.0-beta.5"
dependencies = [
"arrow-array",
"arrow-ipc",
@@ -4222,7 +4222,7 @@ dependencies = [
[[package]]
name = "lancedb-nodejs"
version = "0.19.0-beta.4"
version = "0.19.0-beta.5"
dependencies = [
"arrow-array",
"arrow-ipc",
@@ -4240,7 +4240,7 @@ dependencies = [
[[package]]
name = "lancedb-python"
version = "0.22.0-beta.4"
version = "0.22.0-beta.5"
dependencies = [
"arrow",
"env_logger",

View File

@@ -23,14 +23,14 @@ rust-version = "1.78.0"
[workspace.dependencies]
lance = { "version" = "=0.25.3", "features" = [
"dynamodb",
], tag = "v0.25.3-beta.2", git = "https://github.com/lancedb/lance" }
lance-io = { version = "=0.25.3", tag = "v0.25.3-beta.2", git = "https://github.com/lancedb/lance" }
lance-index = { version = "=0.25.3", tag = "v0.25.3-beta.2", git = "https://github.com/lancedb/lance" }
lance-linalg = { version = "=0.25.3", tag = "v0.25.3-beta.2", git = "https://github.com/lancedb/lance" }
lance-table = { version = "=0.25.3", tag = "v0.25.3-beta.2", git = "https://github.com/lancedb/lance" }
lance-testing = { version = "=0.25.3", tag = "v0.25.3-beta.2", git = "https://github.com/lancedb/lance" }
lance-datafusion = { version = "=0.25.3", tag = "v0.25.3-beta.2", git = "https://github.com/lancedb/lance" }
lance-encoding = { version = "=0.25.3", tag = "v0.25.3-beta.2", git = "https://github.com/lancedb/lance" }
], tag = "v0.25.3-beta.4", git = "https://github.com/lancedb/lance" }
lance-io = { version = "=0.25.3", tag = "v0.25.3-beta.4", git = "https://github.com/lancedb/lance" }
lance-index = { version = "=0.25.3", tag = "v0.25.3-beta.4", git = "https://github.com/lancedb/lance" }
lance-linalg = { version = "=0.25.3", tag = "v0.25.3-beta.4", git = "https://github.com/lancedb/lance" }
lance-table = { version = "=0.25.3", tag = "v0.25.3-beta.4", git = "https://github.com/lancedb/lance" }
lance-testing = { version = "=0.25.3", tag = "v0.25.3-beta.4", git = "https://github.com/lancedb/lance" }
lance-datafusion = { version = "=0.25.3", tag = "v0.25.3-beta.4", git = "https://github.com/lancedb/lance" }
lance-encoding = { version = "=0.25.3", tag = "v0.25.3-beta.4", git = "https://github.com/lancedb/lance" }
# Note that this one does not include pyarrow
arrow = { version = "54.1", optional = false }
arrow-array = "54.1"

View File

@@ -8,7 +8,7 @@
<parent>
<groupId>com.lancedb</groupId>
<artifactId>lancedb-parent</artifactId>
<version>0.19.0-beta.5</version>
<version>0.19.0-beta.6</version>
<relativePath>../pom.xml</relativePath>
</parent>

View File

@@ -6,7 +6,7 @@
<groupId>com.lancedb</groupId>
<artifactId>lancedb-parent</artifactId>
<version>0.19.0-beta.5</version>
<version>0.19.0-beta.6</version>
<packaging>pom</packaging>
<name>LanceDB Parent</name>

View File

@@ -1,6 +1,6 @@
{
"name": "vectordb",
"version": "0.19.0-beta.5",
"version": "0.19.0-beta.6",
"description": " Serverless, low-latency vector database for AI applications",
"private": false,
"main": "dist/index.js",
@@ -89,10 +89,10 @@
}
},
"optionalDependencies": {
"@lancedb/vectordb-darwin-x64": "0.19.0-beta.5",
"@lancedb/vectordb-darwin-arm64": "0.19.0-beta.5",
"@lancedb/vectordb-linux-x64-gnu": "0.19.0-beta.5",
"@lancedb/vectordb-linux-arm64-gnu": "0.19.0-beta.5",
"@lancedb/vectordb-win32-x64-msvc": "0.19.0-beta.5"
"@lancedb/vectordb-darwin-x64": "0.19.0-beta.6",
"@lancedb/vectordb-darwin-arm64": "0.19.0-beta.6",
"@lancedb/vectordb-linux-x64-gnu": "0.19.0-beta.6",
"@lancedb/vectordb-linux-arm64-gnu": "0.19.0-beta.6",
"@lancedb/vectordb-win32-x64-msvc": "0.19.0-beta.6"
}
}

View File

@@ -1,7 +1,7 @@
[package]
name = "lancedb-nodejs"
edition.workspace = true
version = "0.19.0-beta.5"
version = "0.19.0-beta.6"
license.workspace = true
description.workspace = true
repository.workspace = true

View File

@@ -1304,6 +1304,27 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
expect(results[0].text).toBe(data[0].text);
});
test("full text index on list", async () => {
const db = await connect(tmpDir.name);
const data = [
{ text: ["lance database", "the", "search"], vector: [0.1, 0.2, 0.3] },
{ text: ["lance database"], vector: [0.4, 0.5, 0.6] },
{ text: ["lance", "search"], vector: [0.7, 0.8, 0.9] },
{ text: ["database", "search"], vector: [1.0, 1.1, 1.2] },
{ text: ["unrelated", "doc"], vector: [1.3, 1.4, 1.5] },
];
const table = await db.createTable("test", data);
await table.createIndex("text", {
config: Index.fts(),
});
const results = await table.search("lance").toArray();
expect(results.length).toBe(3);
const results2 = await table.search('"lance database"').toArray();
expect(results2.length).toBe(2);
});
test("full text search without positions", async () => {
const db = await connect(tmpDir.name);
const data = [

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-darwin-arm64",
"version": "0.19.0-beta.5",
"version": "0.19.0-beta.6",
"os": ["darwin"],
"cpu": ["arm64"],
"main": "lancedb.darwin-arm64.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-darwin-x64",
"version": "0.19.0-beta.5",
"version": "0.19.0-beta.6",
"os": ["darwin"],
"cpu": ["x64"],
"main": "lancedb.darwin-x64.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-linux-arm64-gnu",
"version": "0.19.0-beta.5",
"version": "0.19.0-beta.6",
"os": ["linux"],
"cpu": ["arm64"],
"main": "lancedb.linux-arm64-gnu.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-linux-arm64-musl",
"version": "0.19.0-beta.5",
"version": "0.19.0-beta.6",
"os": ["linux"],
"cpu": ["arm64"],
"main": "lancedb.linux-arm64-musl.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-linux-x64-gnu",
"version": "0.19.0-beta.5",
"version": "0.19.0-beta.6",
"os": ["linux"],
"cpu": ["x64"],
"main": "lancedb.linux-x64-gnu.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-linux-x64-musl",
"version": "0.19.0-beta.5",
"version": "0.19.0-beta.6",
"os": ["linux"],
"cpu": ["x64"],
"main": "lancedb.linux-x64-musl.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-win32-arm64-msvc",
"version": "0.19.0-beta.5",
"version": "0.19.0-beta.6",
"os": [
"win32"
],

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-win32-x64-msvc",
"version": "0.19.0-beta.5",
"version": "0.19.0-beta.6",
"os": ["win32"],
"cpu": ["x64"],
"main": "lancedb.win32-x64-msvc.node",

View File

@@ -11,7 +11,7 @@
"ann"
],
"private": false,
"version": "0.19.0-beta.5",
"version": "0.19.0-beta.6",
"main": "dist/index.js",
"exports": {
".": "./dist/index.js",

View File

@@ -1,5 +1,5 @@
[tool.bumpversion]
current_version = "0.22.0-beta.5"
current_version = "0.22.0-beta.6"
parse = """(?x)
(?P<major>0|[1-9]\\d*)\\.
(?P<minor>0|[1-9]\\d*)\\.

View File

@@ -1,6 +1,6 @@
[package]
name = "lancedb-python"
version = "0.22.0-beta.5"
version = "0.22.0-beta.6"
edition.workspace = true
description = "Python bindings for LanceDB"
license.workspace = true

View File

@@ -22,6 +22,7 @@ from lancedb.db import DBConnection
from lancedb.index import FTS
from lancedb.query import BoostQuery, MatchQuery, MultiMatchQuery, PhraseQuery
import numpy as np
import pyarrow as pa
import pandas as pd
import pytest
from utils import exception_output
@@ -626,3 +627,32 @@ def test_language(mem_db: DBConnection):
# Stop words -> no results
results = table.search("la", query_type="fts").limit(5).to_list()
assert len(results) == 0
def test_fts_on_list(mem_db: DBConnection):
data = pa.table(
{
"text": [
["lance database", "the", "search"],
["lance database"],
["lance", "search"],
["database", "search"],
["unrelated", "doc"],
],
"vector": [
[1.0, 2.0, 3.0],
[4.0, 5.0, 6.0],
[7.0, 8.0, 9.0],
[10.0, 11.0, 12.0],
[13.0, 14.0, 15.0],
],
}
)
table = mem_db.create_table("test", data=data)
table.create_fts_index("text", use_tantivy=False)
res = table.search("lance").limit(5).to_list()
assert len(res) == 3
res = table.search(PhraseQuery("lance database", "text")).limit(5).to_list()
assert len(res) == 2

View File

@@ -1,6 +1,6 @@
[package]
name = "lancedb-node"
version = "0.19.0-beta.5"
version = "0.19.0-beta.6"
description = "Serverless, low-latency vector database for AI applications"
license.workspace = true
edition.workspace = true

View File

@@ -1,6 +1,6 @@
[package]
name = "lancedb"
version = "0.19.0-beta.5"
version = "0.19.0-beta.6"
edition.workspace = true
description = "LanceDB: A serverless, low-latency vector database for AI applications"
license.workspace = true

View File

@@ -158,7 +158,17 @@ pub fn supported_label_list_data_type(dtype: &DataType) -> bool {
}
pub fn supported_fts_data_type(dtype: &DataType) -> bool {
matches!(dtype, DataType::Utf8 | DataType::LargeUtf8)
supported_fts_data_type_impl(dtype, false)
}
fn supported_fts_data_type_impl(dtype: &DataType, in_list: bool) -> bool {
match (dtype, in_list) {
(DataType::Utf8 | DataType::LargeUtf8, _) => true,
(DataType::List(field) | DataType::LargeList(field), false) => {
supported_fts_data_type_impl(field.data_type(), true)
}
_ => false,
}
}
pub fn supported_vector_data_type(dtype: &DataType) -> bool {