mirror of
https://github.com/lancedb/lancedb.git
synced 2026-06-23 22:20:40 +00:00
Compare commits
1 Commits
python-v0.
...
jack/clipp
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
f94349ef59 |
4
.github/workflows/rust.yml
vendored
4
.github/workflows/rust.yml
vendored
@@ -100,9 +100,7 @@ jobs:
|
|||||||
lfs: true
|
lfs: true
|
||||||
- uses: Swatinem/rust-cache@v2
|
- uses: Swatinem/rust-cache@v2
|
||||||
- name: Install dependencies
|
- name: Install dependencies
|
||||||
run: |
|
run: sudo apt install -y protobuf-compiler libssl-dev
|
||||||
sudo apt update
|
|
||||||
sudo apt install -y protobuf-compiler libssl-dev
|
|
||||||
- uses: rui314/setup-mold@v1
|
- uses: rui314/setup-mold@v1
|
||||||
- name: Make Swap
|
- name: Make Swap
|
||||||
run: |
|
run: |
|
||||||
|
|||||||
467
Cargo.lock
generated
467
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
44
Cargo.toml
44
Cargo.toml
@@ -15,20 +15,20 @@ categories = ["database-implementations"]
|
|||||||
rust-version = "1.91.0"
|
rust-version = "1.91.0"
|
||||||
|
|
||||||
[workspace.dependencies]
|
[workspace.dependencies]
|
||||||
lance = { "version" = "=3.0.0-rc.2", default-features = false, "tag" = "v3.0.0-rc.2", "git" = "https://github.com/lance-format/lance.git" }
|
lance = { "version" = "=3.1.0-beta.2", default-features = false, "tag" = "v3.1.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||||
lance-core = { "version" = "=3.0.0-rc.2", "tag" = "v3.0.0-rc.2", "git" = "https://github.com/lance-format/lance.git" }
|
lance-core = { "version" = "=3.1.0-beta.2", "tag" = "v3.1.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||||
lance-datagen = { "version" = "=3.0.0-rc.2", "tag" = "v3.0.0-rc.2", "git" = "https://github.com/lance-format/lance.git" }
|
lance-datagen = { "version" = "=3.1.0-beta.2", "tag" = "v3.1.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||||
lance-file = { "version" = "=3.0.0-rc.2", "tag" = "v3.0.0-rc.2", "git" = "https://github.com/lance-format/lance.git" }
|
lance-file = { "version" = "=3.1.0-beta.2", "tag" = "v3.1.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||||
lance-io = { "version" = "=3.0.0-rc.2", default-features = false, "tag" = "v3.0.0-rc.2", "git" = "https://github.com/lance-format/lance.git" }
|
lance-io = { "version" = "=3.1.0-beta.2", default-features = false, "tag" = "v3.1.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||||
lance-index = { "version" = "=3.0.0-rc.2", "tag" = "v3.0.0-rc.2", "git" = "https://github.com/lance-format/lance.git" }
|
lance-index = { "version" = "=3.1.0-beta.2", "tag" = "v3.1.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||||
lance-linalg = { "version" = "=3.0.0-rc.2", "tag" = "v3.0.0-rc.2", "git" = "https://github.com/lance-format/lance.git" }
|
lance-linalg = { "version" = "=3.1.0-beta.2", "tag" = "v3.1.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||||
lance-namespace = { "version" = "=3.0.0-rc.2", "tag" = "v3.0.0-rc.2", "git" = "https://github.com/lance-format/lance.git" }
|
lance-namespace = { "version" = "=3.1.0-beta.2", "tag" = "v3.1.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||||
lance-namespace-impls = { "version" = "=3.0.0-rc.2", default-features = false, "tag" = "v3.0.0-rc.2", "git" = "https://github.com/lance-format/lance.git" }
|
lance-namespace-impls = { "version" = "=3.1.0-beta.2", default-features = false, "tag" = "v3.1.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||||
lance-table = { "version" = "=3.0.0-rc.2", "tag" = "v3.0.0-rc.2", "git" = "https://github.com/lance-format/lance.git" }
|
lance-table = { "version" = "=3.1.0-beta.2", "tag" = "v3.1.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||||
lance-testing = { "version" = "=3.0.0-rc.2", "tag" = "v3.0.0-rc.2", "git" = "https://github.com/lance-format/lance.git" }
|
lance-testing = { "version" = "=3.1.0-beta.2", "tag" = "v3.1.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||||
lance-datafusion = { "version" = "=3.0.0-rc.2", "tag" = "v3.0.0-rc.2", "git" = "https://github.com/lance-format/lance.git" }
|
lance-datafusion = { "version" = "=3.1.0-beta.2", "tag" = "v3.1.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||||
lance-encoding = { "version" = "=3.0.0-rc.2", "tag" = "v3.0.0-rc.2", "git" = "https://github.com/lance-format/lance.git" }
|
lance-encoding = { "version" = "=3.1.0-beta.2", "tag" = "v3.1.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||||
lance-arrow = { "version" = "=3.0.0-rc.2", "tag" = "v3.0.0-rc.2", "git" = "https://github.com/lance-format/lance.git" }
|
lance-arrow = { "version" = "=3.1.0-beta.2", "tag" = "v3.1.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||||
ahash = "0.8"
|
ahash = "0.8"
|
||||||
# Note that this one does not include pyarrow
|
# Note that this one does not include pyarrow
|
||||||
arrow = { version = "57.2", optional = false }
|
arrow = { version = "57.2", optional = false }
|
||||||
@@ -40,15 +40,13 @@ arrow-schema = "57.2"
|
|||||||
arrow-select = "57.2"
|
arrow-select = "57.2"
|
||||||
arrow-cast = "57.2"
|
arrow-cast = "57.2"
|
||||||
async-trait = "0"
|
async-trait = "0"
|
||||||
datafusion = { version = "52.1", default-features = false }
|
datafusion = { version = "51.0", default-features = false }
|
||||||
datafusion-catalog = "52.1"
|
datafusion-catalog = "51.0"
|
||||||
datafusion-common = { version = "52.1", default-features = false }
|
datafusion-common = { version = "51.0", default-features = false }
|
||||||
datafusion-execution = "52.1"
|
datafusion-execution = "51.0"
|
||||||
datafusion-expr = "52.1"
|
datafusion-expr = "51.0"
|
||||||
datafusion-functions = "52.1"
|
datafusion-physical-plan = "51.0"
|
||||||
datafusion-physical-plan = "52.1"
|
datafusion-physical-expr = "51.0"
|
||||||
datafusion-physical-expr = "52.1"
|
|
||||||
datafusion-sql = "52.1"
|
|
||||||
env_logger = "0.11"
|
env_logger = "0.11"
|
||||||
half = { "version" = "2.7.1", default-features = false, features = [
|
half = { "version" = "2.7.1", default-features = false, features = [
|
||||||
"num-traits",
|
"num-traits",
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
[tool.bumpversion]
|
[tool.bumpversion]
|
||||||
current_version = "0.30.0-beta.3"
|
current_version = "0.30.0-beta.2"
|
||||||
parse = """(?x)
|
parse = """(?x)
|
||||||
(?P<major>0|[1-9]\\d*)\\.
|
(?P<major>0|[1-9]\\d*)\\.
|
||||||
(?P<minor>0|[1-9]\\d*)\\.
|
(?P<minor>0|[1-9]\\d*)\\.
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "lancedb-python"
|
name = "lancedb-python"
|
||||||
version = "0.30.0-beta.3"
|
version = "0.30.0-beta.2"
|
||||||
edition.workspace = true
|
edition.workspace = true
|
||||||
description = "Python bindings for LanceDB"
|
description = "Python bindings for LanceDB"
|
||||||
license.workspace = true
|
license.workspace = true
|
||||||
|
|||||||
@@ -59,7 +59,7 @@ tests = [
|
|||||||
"polars>=0.19, <=1.3.0",
|
"polars>=0.19, <=1.3.0",
|
||||||
"tantivy",
|
"tantivy",
|
||||||
"pyarrow-stubs",
|
"pyarrow-stubs",
|
||||||
"pylance>=1.0.0b14,<3.0.0",
|
"pylance>=1.0.0b14",
|
||||||
"requests",
|
"requests",
|
||||||
"datafusion<52",
|
"datafusion<52",
|
||||||
]
|
]
|
||||||
|
|||||||
@@ -1462,7 +1462,6 @@ class LanceFtsQueryBuilder(LanceQueryBuilder):
|
|||||||
self._phrase_query = False
|
self._phrase_query = False
|
||||||
self.ordering_field_name = ordering_field_name
|
self.ordering_field_name = ordering_field_name
|
||||||
self._reranker = None
|
self._reranker = None
|
||||||
self._fast_search = None
|
|
||||||
if isinstance(fts_columns, str):
|
if isinstance(fts_columns, str):
|
||||||
fts_columns = [fts_columns]
|
fts_columns = [fts_columns]
|
||||||
self._fts_columns = fts_columns
|
self._fts_columns = fts_columns
|
||||||
@@ -1484,19 +1483,6 @@ class LanceFtsQueryBuilder(LanceQueryBuilder):
|
|||||||
self._phrase_query = phrase_query
|
self._phrase_query = phrase_query
|
||||||
return self
|
return self
|
||||||
|
|
||||||
def fast_search(self) -> LanceFtsQueryBuilder:
|
|
||||||
"""
|
|
||||||
Skip a flat search of unindexed data. This will improve
|
|
||||||
search performance but search results will not include unindexed data.
|
|
||||||
|
|
||||||
Returns
|
|
||||||
-------
|
|
||||||
LanceFtsQueryBuilder
|
|
||||||
The LanceFtsQueryBuilder object.
|
|
||||||
"""
|
|
||||||
self._fast_search = True
|
|
||||||
return self
|
|
||||||
|
|
||||||
def to_query_object(self) -> Query:
|
def to_query_object(self) -> Query:
|
||||||
return Query(
|
return Query(
|
||||||
columns=self._columns,
|
columns=self._columns,
|
||||||
@@ -1508,7 +1494,6 @@ class LanceFtsQueryBuilder(LanceQueryBuilder):
|
|||||||
query=self._query, columns=self._fts_columns
|
query=self._query, columns=self._fts_columns
|
||||||
),
|
),
|
||||||
offset=self._offset,
|
offset=self._offset,
|
||||||
fast_search=self._fast_search,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
def output_schema(self) -> pa.Schema:
|
def output_schema(self) -> pa.Schema:
|
||||||
|
|||||||
@@ -882,105 +882,3 @@ def test_fts_query_to_json():
|
|||||||
'"must_not":[]}}'
|
'"must_not":[]}}'
|
||||||
)
|
)
|
||||||
assert json_str == expected
|
assert json_str == expected
|
||||||
|
|
||||||
|
|
||||||
def test_fts_fast_search(table):
|
|
||||||
table.create_fts_index("text", use_tantivy=False)
|
|
||||||
|
|
||||||
# Insert some unindexed data
|
|
||||||
table.add(
|
|
||||||
[
|
|
||||||
{
|
|
||||||
"text": "xyz",
|
|
||||||
"vector": [0 for _ in range(128)],
|
|
||||||
"id": 101,
|
|
||||||
"text2": "xyz",
|
|
||||||
"nested": {"text": "xyz"},
|
|
||||||
"count": 10,
|
|
||||||
}
|
|
||||||
]
|
|
||||||
)
|
|
||||||
|
|
||||||
# Without fast_search, the query object should not have fast_search set
|
|
||||||
builder = table.search("xyz", query_type="fts").limit(10)
|
|
||||||
query = builder.to_query_object()
|
|
||||||
assert query.fast_search is None
|
|
||||||
|
|
||||||
# With fast_search, the query object should have fast_search=True
|
|
||||||
builder = table.search("xyz", query_type="fts").fast_search().limit(10)
|
|
||||||
query = builder.to_query_object()
|
|
||||||
assert query.fast_search is True
|
|
||||||
|
|
||||||
# fast_search should be chainable with other methods
|
|
||||||
builder = (
|
|
||||||
table.search("xyz", query_type="fts").fast_search().select(["text"]).limit(5)
|
|
||||||
)
|
|
||||||
query = builder.to_query_object()
|
|
||||||
assert query.fast_search is True
|
|
||||||
assert query.limit == 5
|
|
||||||
assert query.columns == ["text"]
|
|
||||||
|
|
||||||
# Verify it executes without error and skips unindexed data
|
|
||||||
results = table.search("xyz", query_type="fts").fast_search().limit(5).to_list()
|
|
||||||
assert len(results) == 0
|
|
||||||
|
|
||||||
# Update index and verify it returns results
|
|
||||||
table.optimize()
|
|
||||||
results = table.search("xyz", query_type="fts").fast_search().limit(5).to_list()
|
|
||||||
assert len(results) > 0
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_fts_fast_search_async(async_table):
|
|
||||||
await async_table.create_index("text", config=FTS())
|
|
||||||
|
|
||||||
# Insert some unindexed data
|
|
||||||
await async_table.add(
|
|
||||||
[
|
|
||||||
{
|
|
||||||
"text": "xyz",
|
|
||||||
"vector": [0 for _ in range(128)],
|
|
||||||
"id": 101,
|
|
||||||
"text2": "xyz",
|
|
||||||
"nested": {"text": "xyz"},
|
|
||||||
"count": 10,
|
|
||||||
}
|
|
||||||
]
|
|
||||||
)
|
|
||||||
|
|
||||||
# Without fast_search, should return results
|
|
||||||
results = await async_table.query().nearest_to_text("xyz").limit(5).to_list()
|
|
||||||
assert len(results) > 0
|
|
||||||
|
|
||||||
# With fast_search, should return no results data unindexed
|
|
||||||
fast_results = (
|
|
||||||
await async_table.query()
|
|
||||||
.nearest_to_text("xyz")
|
|
||||||
.fast_search()
|
|
||||||
.limit(5)
|
|
||||||
.to_list()
|
|
||||||
)
|
|
||||||
assert len(fast_results) == 0
|
|
||||||
|
|
||||||
# Update index and verify it returns results
|
|
||||||
await async_table.optimize()
|
|
||||||
|
|
||||||
fast_results = (
|
|
||||||
await async_table.query()
|
|
||||||
.nearest_to_text("xyz")
|
|
||||||
.fast_search()
|
|
||||||
.limit(5)
|
|
||||||
.to_list()
|
|
||||||
)
|
|
||||||
assert len(fast_results) > 0
|
|
||||||
|
|
||||||
# fast_search should be chainable with other methods
|
|
||||||
results = (
|
|
||||||
await async_table.query()
|
|
||||||
.nearest_to_text("xyz")
|
|
||||||
.fast_search()
|
|
||||||
.select(["text"])
|
|
||||||
.limit(5)
|
|
||||||
.to_list()
|
|
||||||
)
|
|
||||||
assert len(results) > 0
|
|
||||||
|
|||||||
@@ -25,9 +25,9 @@ datafusion-catalog.workspace = true
|
|||||||
datafusion-common.workspace = true
|
datafusion-common.workspace = true
|
||||||
datafusion-execution.workspace = true
|
datafusion-execution.workspace = true
|
||||||
datafusion-expr.workspace = true
|
datafusion-expr.workspace = true
|
||||||
datafusion-functions.workspace = true
|
datafusion-functions = "51.0"
|
||||||
datafusion-physical-expr.workspace = true
|
datafusion-physical-expr.workspace = true
|
||||||
datafusion-sql.workspace = true
|
datafusion-sql = "51.0"
|
||||||
datafusion-physical-plan.workspace = true
|
datafusion-physical-plan.workspace = true
|
||||||
datafusion.workspace = true
|
datafusion.workspace = true
|
||||||
object_store = { workspace = true }
|
object_store = { workspace = true }
|
||||||
|
|||||||
Reference in New Issue
Block a user