mirror of
https://github.com/lancedb/lancedb.git
synced 2026-03-28 03:20:39 +00:00
Compare commits
1 Commits
python-v0.
...
codex/upda
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
9e7ba99a31 |
2037
Cargo.lock
generated
2037
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
28
Cargo.toml
28
Cargo.toml
@@ -15,20 +15,20 @@ categories = ["database-implementations"]
|
||||
rust-version = "1.91.0"
|
||||
|
||||
[workspace.dependencies]
|
||||
lance = { "version" = "=3.0.0-rc.2", default-features = false, "tag" = "v3.0.0-rc.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-core = { "version" = "=3.0.0-rc.2", "tag" = "v3.0.0-rc.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-datagen = { "version" = "=3.0.0-rc.2", "tag" = "v3.0.0-rc.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-file = { "version" = "=3.0.0-rc.2", "tag" = "v3.0.0-rc.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-io = { "version" = "=3.0.0-rc.2", default-features = false, "tag" = "v3.0.0-rc.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-index = { "version" = "=3.0.0-rc.2", "tag" = "v3.0.0-rc.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-linalg = { "version" = "=3.0.0-rc.2", "tag" = "v3.0.0-rc.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-namespace = { "version" = "=3.0.0-rc.2", "tag" = "v3.0.0-rc.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-namespace-impls = { "version" = "=3.0.0-rc.2", default-features = false, "tag" = "v3.0.0-rc.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-table = { "version" = "=3.0.0-rc.2", "tag" = "v3.0.0-rc.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-testing = { "version" = "=3.0.0-rc.2", "tag" = "v3.0.0-rc.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-datafusion = { "version" = "=3.0.0-rc.2", "tag" = "v3.0.0-rc.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-encoding = { "version" = "=3.0.0-rc.2", "tag" = "v3.0.0-rc.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-arrow = { "version" = "=3.0.0-rc.2", "tag" = "v3.0.0-rc.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance = { "version" = "=4.0.0-beta.2", default-features = false, "tag" = "v4.0.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-core = { "version" = "=4.0.0-beta.2", "tag" = "v4.0.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-datagen = { "version" = "=4.0.0-beta.2", "tag" = "v4.0.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-file = { "version" = "=4.0.0-beta.2", "tag" = "v4.0.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-io = { "version" = "=4.0.0-beta.2", default-features = false, "tag" = "v4.0.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-index = { "version" = "=4.0.0-beta.2", "tag" = "v4.0.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-linalg = { "version" = "=4.0.0-beta.2", "tag" = "v4.0.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-namespace = { "version" = "=4.0.0-beta.2", "tag" = "v4.0.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-namespace-impls = { "version" = "=4.0.0-beta.2", default-features = false, "tag" = "v4.0.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-table = { "version" = "=4.0.0-beta.2", "tag" = "v4.0.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-testing = { "version" = "=4.0.0-beta.2", "tag" = "v4.0.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-datafusion = { "version" = "=4.0.0-beta.2", "tag" = "v4.0.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-encoding = { "version" = "=4.0.0-beta.2", "tag" = "v4.0.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-arrow = { "version" = "=4.0.0-beta.2", "tag" = "v4.0.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
ahash = "0.8"
|
||||
# Note that this one does not include pyarrow
|
||||
arrow = { version = "57.2", optional = false }
|
||||
|
||||
@@ -28,7 +28,7 @@
|
||||
<properties>
|
||||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
||||
<arrow.version>15.0.0</arrow.version>
|
||||
<lance-core.version>3.1.0-beta.2</lance-core.version>
|
||||
<lance-core.version>4.0.0-beta.2</lance-core.version>
|
||||
<spotless.skip>false</spotless.skip>
|
||||
<spotless.version>2.30.0</spotless.version>
|
||||
<spotless.java.googlejavaformat.version>1.7</spotless.java.googlejavaformat.version>
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
[tool.bumpversion]
|
||||
current_version = "0.30.0-beta.3"
|
||||
current_version = "0.30.0-beta.2"
|
||||
parse = """(?x)
|
||||
(?P<major>0|[1-9]\\d*)\\.
|
||||
(?P<minor>0|[1-9]\\d*)\\.
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "lancedb-python"
|
||||
version = "0.30.0-beta.3"
|
||||
version = "0.30.0-beta.2"
|
||||
edition.workspace = true
|
||||
description = "Python bindings for LanceDB"
|
||||
license.workspace = true
|
||||
|
||||
@@ -59,7 +59,7 @@ tests = [
|
||||
"polars>=0.19, <=1.3.0",
|
||||
"tantivy",
|
||||
"pyarrow-stubs",
|
||||
"pylance>=1.0.0b14,<3.0.0",
|
||||
"pylance>=1.0.0b14",
|
||||
"requests",
|
||||
"datafusion<52",
|
||||
]
|
||||
|
||||
@@ -1462,7 +1462,6 @@ class LanceFtsQueryBuilder(LanceQueryBuilder):
|
||||
self._phrase_query = False
|
||||
self.ordering_field_name = ordering_field_name
|
||||
self._reranker = None
|
||||
self._fast_search = None
|
||||
if isinstance(fts_columns, str):
|
||||
fts_columns = [fts_columns]
|
||||
self._fts_columns = fts_columns
|
||||
@@ -1484,19 +1483,6 @@ class LanceFtsQueryBuilder(LanceQueryBuilder):
|
||||
self._phrase_query = phrase_query
|
||||
return self
|
||||
|
||||
def fast_search(self) -> LanceFtsQueryBuilder:
|
||||
"""
|
||||
Skip a flat search of unindexed data. This will improve
|
||||
search performance but search results will not include unindexed data.
|
||||
|
||||
Returns
|
||||
-------
|
||||
LanceFtsQueryBuilder
|
||||
The LanceFtsQueryBuilder object.
|
||||
"""
|
||||
self._fast_search = True
|
||||
return self
|
||||
|
||||
def to_query_object(self) -> Query:
|
||||
return Query(
|
||||
columns=self._columns,
|
||||
@@ -1508,7 +1494,6 @@ class LanceFtsQueryBuilder(LanceQueryBuilder):
|
||||
query=self._query, columns=self._fts_columns
|
||||
),
|
||||
offset=self._offset,
|
||||
fast_search=self._fast_search,
|
||||
)
|
||||
|
||||
def output_schema(self) -> pa.Schema:
|
||||
|
||||
@@ -882,105 +882,3 @@ def test_fts_query_to_json():
|
||||
'"must_not":[]}}'
|
||||
)
|
||||
assert json_str == expected
|
||||
|
||||
|
||||
def test_fts_fast_search(table):
|
||||
table.create_fts_index("text", use_tantivy=False)
|
||||
|
||||
# Insert some unindexed data
|
||||
table.add(
|
||||
[
|
||||
{
|
||||
"text": "xyz",
|
||||
"vector": [0 for _ in range(128)],
|
||||
"id": 101,
|
||||
"text2": "xyz",
|
||||
"nested": {"text": "xyz"},
|
||||
"count": 10,
|
||||
}
|
||||
]
|
||||
)
|
||||
|
||||
# Without fast_search, the query object should not have fast_search set
|
||||
builder = table.search("xyz", query_type="fts").limit(10)
|
||||
query = builder.to_query_object()
|
||||
assert query.fast_search is None
|
||||
|
||||
# With fast_search, the query object should have fast_search=True
|
||||
builder = table.search("xyz", query_type="fts").fast_search().limit(10)
|
||||
query = builder.to_query_object()
|
||||
assert query.fast_search is True
|
||||
|
||||
# fast_search should be chainable with other methods
|
||||
builder = (
|
||||
table.search("xyz", query_type="fts").fast_search().select(["text"]).limit(5)
|
||||
)
|
||||
query = builder.to_query_object()
|
||||
assert query.fast_search is True
|
||||
assert query.limit == 5
|
||||
assert query.columns == ["text"]
|
||||
|
||||
# Verify it executes without error and skips unindexed data
|
||||
results = table.search("xyz", query_type="fts").fast_search().limit(5).to_list()
|
||||
assert len(results) == 0
|
||||
|
||||
# Update index and verify it returns results
|
||||
table.optimize()
|
||||
results = table.search("xyz", query_type="fts").fast_search().limit(5).to_list()
|
||||
assert len(results) > 0
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_fts_fast_search_async(async_table):
|
||||
await async_table.create_index("text", config=FTS())
|
||||
|
||||
# Insert some unindexed data
|
||||
await async_table.add(
|
||||
[
|
||||
{
|
||||
"text": "xyz",
|
||||
"vector": [0 for _ in range(128)],
|
||||
"id": 101,
|
||||
"text2": "xyz",
|
||||
"nested": {"text": "xyz"},
|
||||
"count": 10,
|
||||
}
|
||||
]
|
||||
)
|
||||
|
||||
# Without fast_search, should return results
|
||||
results = await async_table.query().nearest_to_text("xyz").limit(5).to_list()
|
||||
assert len(results) > 0
|
||||
|
||||
# With fast_search, should return no results data unindexed
|
||||
fast_results = (
|
||||
await async_table.query()
|
||||
.nearest_to_text("xyz")
|
||||
.fast_search()
|
||||
.limit(5)
|
||||
.to_list()
|
||||
)
|
||||
assert len(fast_results) == 0
|
||||
|
||||
# Update index and verify it returns results
|
||||
await async_table.optimize()
|
||||
|
||||
fast_results = (
|
||||
await async_table.query()
|
||||
.nearest_to_text("xyz")
|
||||
.fast_search()
|
||||
.limit(5)
|
||||
.to_list()
|
||||
)
|
||||
assert len(fast_results) > 0
|
||||
|
||||
# fast_search should be chainable with other methods
|
||||
results = (
|
||||
await async_table.query()
|
||||
.nearest_to_text("xyz")
|
||||
.fast_search()
|
||||
.select(["text"])
|
||||
.limit(5)
|
||||
.to_list()
|
||||
)
|
||||
assert len(results) > 0
|
||||
|
||||
Reference in New Issue
Block a user