feat!: upgrade lance to v0.28.0 (#2404)

this introduces some breaking changes in terms of rust API of creating
FTS index, and the default index params changed

Signed-off-by: BubbleCal <bubble-cal@outlook.com>

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->
## Summary by CodeRabbit

- **New Features**
- Updated default settings for full-text search (FTS) index creation:
stemming, stop word removal, and ASCII folding are now enabled by
default, while token position storage is disabled by default.

- **Refactor**
- Simplified and streamlined the configuration and handling of FTS index
parameters for improved maintainability and consistency across
interfaces.
- Enhanced serialization and request construction for FTS index
parameters to reduce manual handling and improve code clarity.
- Improved test coverage by explicitly enabling positional indexing in
FTS tests to support phrase queries.

- **Chores**
- Upgraded all internal dependencies related to FTS indexing to the
latest version for enhanced compatibility and performance.
- Updated package versions for Node.js, Python, and Rust components to
the latest beta releases.
- Improved CI workflows by adding Rust toolchain setup with formatting
and linting tools.
<!-- end of auto-generated comment: release notes by coderabbit.ai -->

---------

Signed-off-by: BubbleCal <bubble-cal@outlook.com>
Co-authored-by: Will Jones <willjones127@gmail.com>
This commit is contained in:
BubbleCal
2025-05-30 06:19:24 +08:00
committed by GitHub
parent d0bc671cac
commit 5c7f63388d
21 changed files with 484 additions and 479 deletions

View File

@@ -156,6 +156,9 @@ async def test_vector_search_async():
# --8<-- [end:search_result_async_as_list]
@pytest.mark.skipif(
os.name == "nt", reason="Need to fix https://github.com/lancedb/lance/issues/3905"
)
def test_fts_fuzzy_query():
uri = "data/fuzzy-example"
db = lancedb.connect(uri)
@@ -189,6 +192,9 @@ def test_fts_fuzzy_query():
}
@pytest.mark.skipif(
os.name == "nt", reason="Need to fix https://github.com/lancedb/lance/issues/3905"
)
def test_fts_boost_query():
uri = "data/boost-example"
db = lancedb.connect(uri)
@@ -234,6 +240,9 @@ def test_fts_boost_query():
)
@pytest.mark.skipif(
os.name == "nt", reason="Need to fix https://github.com/lancedb/lance/issues/3905"
)
def test_fts_native():
# --8<-- [start:basic_fts]
uri = "data/sample-lancedb"
@@ -282,6 +291,9 @@ def test_fts_native():
# --8<-- [end:fts_incremental_index]
@pytest.mark.skipif(
os.name == "nt", reason="Need to fix https://github.com/lancedb/lance/issues/3905"
)
@pytest.mark.asyncio
async def test_fts_native_async():
# --8<-- [start:basic_fts_async]

View File

@@ -287,7 +287,7 @@ def test_search_fts_phrase_query(table):
assert False
except Exception:
pass
table.create_fts_index("text", use_tantivy=False, replace=True)
table.create_fts_index("text", use_tantivy=False, with_position=True, replace=True)
results = table.search("puppy").limit(100).to_list()
phrase_results = table.search('"puppy runs"').limit(100).to_list()
assert len(results) > len(phrase_results)
@@ -312,7 +312,7 @@ async def test_search_fts_phrase_query_async(async_table):
assert False
except Exception:
pass
await async_table.create_index("text", config=FTS())
await async_table.create_index("text", config=FTS(with_position=True))
results = await async_table.query().nearest_to_text("puppy").limit(100).to_list()
phrase_results = (
await async_table.query().nearest_to_text('"puppy runs"').limit(100).to_list()
@@ -649,7 +649,7 @@ def test_fts_on_list(mem_db: DBConnection):
}
)
table = mem_db.create_table("test", data=data)
table.create_fts_index("text", use_tantivy=False)
table.create_fts_index("text", use_tantivy=False, with_position=True)
res = table.search("lance").limit(5).to_list()
assert len(res) == 3