diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml index 77614d19f..0dc213857 100644 --- a/.github/workflows/python.yml +++ b/.github/workflows/python.yml @@ -111,7 +111,6 @@ jobs: - name: Install run: | pip install --extra-index-url https://pypi.fury.io/lance-format/ --extra-index-url https://pypi.fury.io/lancedb/ -e .[tests,dev,embeddings] - pip install tantivy pip install mlx - name: Doctest run: pytest --doctest-modules python/lancedb @@ -230,6 +229,5 @@ jobs: pip install "pydantic<2" pip install pyarrow==16 pip install --extra-index-url https://pypi.fury.io/lance-format/ --extra-index-url https://pypi.fury.io/lancedb/ -e .[tests] - pip install tantivy - name: Run tests run: pytest -m "not slow and not s3_test" -x -v --durations=30 python/tests diff --git a/dockerfiles/Dockerfile b/dockerfiles/Dockerfile index a7ace9da9..1c223b9b8 100644 --- a/dockerfiles/Dockerfile +++ b/dockerfiles/Dockerfile @@ -24,4 +24,4 @@ RUN python --version && \ rustc --version && \ protoc --version -RUN pip install --no-cache-dir tantivy lancedb +RUN pip install --no-cache-dir lancedb diff --git a/python/PYTHON_THIRD_PARTY_LICENSES.md b/python/PYTHON_THIRD_PARTY_LICENSES.md index 57e337d32..d4d1bc766 100644 --- a/python/PYTHON_THIRD_PARTY_LICENSES.md +++ b/python/PYTHON_THIRD_PARTY_LICENSES.md @@ -183,7 +183,6 @@ | stack-data | 0.6.3 | MIT License | http://github.com/alexmojaki/stack_data | | sympy | 1.14.0 | BSD License | https://sympy.org | | tabulate | 0.9.0 | MIT License | https://github.com/astanin/python-tabulate | -| tantivy | 0.25.1 | UNKNOWN | UNKNOWN | | threadpoolctl | 3.6.0 | BSD License | https://github.com/joblib/threadpoolctl | | timm | 1.0.24 | Apache Software License | https://github.com/huggingface/pytorch-image-models | | tinycss2 | 1.4.0 | BSD License | https://www.courtbouillon.org/tinycss2 | diff --git a/python/pyproject.toml b/python/pyproject.toml index 280f6cfec..5f0ab3eec 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -57,7 +57,6 @@ tests = [ "duckdb>=0.9.0", "pytz>=2023.3", "polars>=0.19, <=1.3.0", - "tantivy>=0.20.0", "pyarrow-stubs>=16.0", "pylance>=5.0.0b5", "requests>=2.31.0", diff --git a/python/python/lancedb/fts.py b/python/python/lancedb/fts.py deleted file mode 100644 index ab954116e..000000000 --- a/python/python/lancedb/fts.py +++ /dev/null @@ -1,201 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright The LanceDB Authors - -"""Full text search index using tantivy-py""" - -import os -from typing import List, Tuple, Optional - -import pyarrow as pa - -try: - import tantivy -except ImportError: - raise ImportError( - "Please install tantivy-py `pip install tantivy` to use the full text search feature." # noqa: E501 - ) - -from .table import LanceTable - - -def create_index( - index_path: str, - text_fields: List[str], - ordering_fields: Optional[List[str]] = None, - tokenizer_name: str = "default", -) -> tantivy.Index: - """ - Create a new Index (not populated) - - Parameters - ---------- - index_path : str - Path to the index directory - text_fields : List[str] - List of text fields to index - ordering_fields: List[str] - List of unsigned type fields to order by at search time - tokenizer_name : str, default "default" - The tokenizer to use - - Returns - ------- - index : tantivy.Index - The index object (not yet populated) - """ - if ordering_fields is None: - ordering_fields = [] - # Declaring our schema. - schema_builder = tantivy.SchemaBuilder() - # special field that we'll populate with row_id - schema_builder.add_integer_field("doc_id", stored=True) - # data fields - for name in text_fields: - schema_builder.add_text_field(name, stored=True, tokenizer_name=tokenizer_name) - if ordering_fields: - for name in ordering_fields: - schema_builder.add_unsigned_field(name, fast=True) - schema = schema_builder.build() - os.makedirs(index_path, exist_ok=True) - index = tantivy.Index(schema, path=index_path) - return index - - -def populate_index( - index: tantivy.Index, - table: LanceTable, - fields: List[str], - writer_heap_size: Optional[int] = None, - ordering_fields: Optional[List[str]] = None, -) -> int: - """ - Populate an index with data from a LanceTable - - Parameters - ---------- - index : tantivy.Index - The index object - table : LanceTable - The table to index - fields : List[str] - List of fields to index - writer_heap_size : int - The writer heap size in bytes, defaults to 1GB - - Returns - ------- - int - The number of rows indexed - """ - if ordering_fields is None: - ordering_fields = [] - writer_heap_size = writer_heap_size or 1024 * 1024 * 1024 - # first check the fields exist and are string or large string type - nested = [] - - for name in fields: - try: - f = table.schema.field(name) # raises KeyError if not found - except KeyError: - f = resolve_path(table.schema, name) - nested.append(name) - - if not pa.types.is_string(f.type) and not pa.types.is_large_string(f.type): - raise TypeError(f"Field {name} is not a string type") - - # create a tantivy writer - writer = index.writer(heap_size=writer_heap_size) - # write data into index - dataset = table.to_lance() - row_id = 0 - - max_nested_level = 0 - if len(nested) > 0: - max_nested_level = max([len(name.split(".")) for name in nested]) - - for b in dataset.to_batches(columns=fields + ordering_fields): - if max_nested_level > 0: - b = pa.Table.from_batches([b]) - for _ in range(max_nested_level - 1): - b = b.flatten() - for i in range(b.num_rows): - doc = tantivy.Document() - for name in fields: - value = b[name][i].as_py() - if value is not None: - doc.add_text(name, value) - for name in ordering_fields: - value = b[name][i].as_py() - if value is not None: - doc.add_unsigned(name, value) - if not doc.is_empty: - doc.add_integer("doc_id", row_id) - writer.add_document(doc) - row_id += 1 - # commit changes - writer.commit() - return row_id - - -def resolve_path(schema, field_name: str) -> pa.Field: - """ - Resolve a nested field path to a list of field names - - Parameters - ---------- - field_name : str - The field name to resolve - - Returns - ------- - List[str] - The resolved path - """ - path = field_name.split(".") - field = schema.field(path.pop(0)) - for segment in path: - if pa.types.is_struct(field.type): - field = field.type.field(segment) - else: - raise KeyError(f"field {field_name} not found in schema {schema}") - return field - - -def search_index( - index: tantivy.Index, query: str, limit: int = 10, ordering_field=None -) -> Tuple[Tuple[int], Tuple[float]]: - """ - Search an index for a query - - Parameters - ---------- - index : tantivy.Index - The index object - query : str - The query string - limit : int - The maximum number of results to return - - Returns - ------- - ids_and_score: list[tuple[int], tuple[float]] - A tuple of two tuples, the first containing the document ids - and the second containing the scores - """ - searcher = index.searcher() - query = index.parse_query(query) - # get top results - if ordering_field: - results = searcher.search(query, limit, order_by_field=ordering_field) - else: - results = searcher.search(query, limit) - if results.count == 0: - return tuple(), tuple() - return tuple( - zip( - *[ - (searcher.doc(doc_address)["doc_id"][0], score) - for score, doc_address in results.hits - ] - ) - ) diff --git a/python/python/lancedb/query.py b/python/python/lancedb/query.py index b5298505c..b796fc40c 100644 --- a/python/python/lancedb/query.py +++ b/python/python/lancedb/query.py @@ -25,7 +25,6 @@ import deprecation import numpy as np import pyarrow as pa import pyarrow.compute as pc -import pyarrow.fs as pa_fs import pydantic from lancedb.pydantic import PYDANTIC_VERSION @@ -1526,9 +1525,7 @@ class LanceFtsQueryBuilder(LanceQueryBuilder): return self._table._output_schema(self.to_query_object()) def to_arrow(self, *, timeout: Optional[timedelta] = None) -> pa.Table: - path, fs, exist = self._table._get_fts_index_path() - if exist: - return self.tantivy_to_arrow() + self._table._ensure_no_legacy_fts_index() query = self._query if self._phrase_query: @@ -1552,90 +1549,6 @@ class LanceFtsQueryBuilder(LanceQueryBuilder): ): raise NotImplementedError("to_batches on an FTS query") - def tantivy_to_arrow(self) -> pa.Table: - try: - import tantivy - except ImportError: - raise ImportError( - "Please install tantivy-py `pip install tantivy` to use the full text search feature." # noqa: E501 - ) - - from .fts import search_index - - # get the index path - path, fs, exist = self._table._get_fts_index_path() - - # check if the index exist - if not exist: - raise FileNotFoundError( - "Fts index does not exist. " - "Please first call table.create_fts_index(['']) to " - "create the fts index." - ) - - # Check that we are on local filesystem - if not isinstance(fs, pa_fs.LocalFileSystem): - raise NotImplementedError( - "Tantivy-based full text search " - "is only supported on the local filesystem" - ) - # open the index - index = tantivy.Index.open(path) - # get the scores and doc ids - query = self._query - if self._phrase_query: - query = query.replace('"', "'") - query = f'"{query}"' - limit = self._limit if self._limit is not None else 10 - row_ids, scores = search_index( - index, query, limit, ordering_field=self.ordering_field_name - ) - if len(row_ids) == 0: - empty_schema = pa.schema([pa.field("_score", pa.float32())]) - return pa.Table.from_batches([], schema=empty_schema) - scores = pa.array(scores) - output_tbl = self._table.to_lance().take(row_ids, columns=self._columns) - output_tbl = output_tbl.append_column("_score", scores) - # this needs to match vector search results which are uint64 - row_ids = pa.array(row_ids, type=pa.uint64()) - - if self._where is not None: - tmp_name = "__lancedb__duckdb__indexer__" - output_tbl = output_tbl.append_column( - tmp_name, pa.array(range(len(output_tbl))) - ) - try: - # TODO would be great to have Substrait generate pyarrow compute - # expressions or conversely have pyarrow support SQL expressions - # using Substrait - import duckdb - - indexer = duckdb.sql( - f"SELECT {tmp_name} FROM output_tbl WHERE {self._where}" - ).to_arrow_table()[tmp_name] - output_tbl = output_tbl.take(indexer).drop([tmp_name]) - row_ids = row_ids.take(indexer) - - except ImportError: - import tempfile - - import lance - - # TODO Use "memory://" instead once that's supported - with tempfile.TemporaryDirectory() as tmp: - ds = lance.write_dataset(output_tbl, tmp) - output_tbl = ds.to_table(filter=self._where) - indexer = output_tbl[tmp_name] - row_ids = row_ids.take(indexer) - output_tbl = output_tbl.drop([tmp_name]) - - if self._with_row_id: - output_tbl = output_tbl.append_column("_rowid", row_ids) - - if self._reranker is not None: - output_tbl = self._reranker.rerank_fts(self._query, output_tbl) - return output_tbl - def rerank(self, reranker: Reranker) -> LanceFtsQueryBuilder: """Rerank the results using the specified reranker. diff --git a/python/python/lancedb/table.py b/python/python/lancedb/table.py index 204e9c5b7..d1321b69a 100644 --- a/python/python/lancedb/table.py +++ b/python/python/lancedb/table.py @@ -943,29 +943,26 @@ class Table(ABC): Parameters ---------- field_names: str or list of str - The name(s) of the field to index. - If ``use_tantivy`` is False (default), only a single field name - (str) is supported. To index multiple fields, create a separate - FTS index for each field. + The name of the field to index. Native FTS indexes can only be + created on a single field at a time. To search over multiple text + fields, create a separate FTS index for each field. replace: bool, default False If True, replace the existing index if it exists. Note that this is not yet an atomic operation; the index will be temporarily unavailable while the new index is being created. writer_heap_size: int, default 1GB - Only available with use_tantivy=True + Deprecated legacy Tantivy parameter. Any value other than the + default raises an error. ordering_field_names: - A list of unsigned type fields to index to optionally order - results on at search time. - only available with use_tantivy=True + Deprecated legacy Tantivy parameter. Setting this raises an error. tokenizer_name: str, default "default" - The tokenizer to use for the index. Can be "raw", "default" or the 2 letter - language code followed by "_stem". So for english it would be "en_stem". - For available languages see: https://docs.rs/tantivy/latest/tantivy/tokenizer/enum.Language.html + A compatibility alias for native tokenizer configs. Can be "raw", + "default" or the 2 letter language code followed by "_stem". So + for english it would be "en_stem". use_tantivy: bool, default False - If True, use the legacy full-text search implementation based on tantivy. - If False, use the new full-text search implementation based on lance-index. + Deprecated legacy Tantivy parameter. Setting this to True raises an + error. with_position: bool, default False - Only available with use_tantivy=False If False, do not store the positions of the terms in the text. This can reduce the size of the index and improve indexing speed. But it will raise an exception for phrase queries. @@ -1746,6 +1743,16 @@ class Table(ABC): index_exists = fs.get_file_info(path).type != pa_fs.FileType.NotFound return (path, fs, index_exists) + def _ensure_no_legacy_fts_index(self): + path, _, exists = self._get_fts_index_path() + if exists: + raise ValueError( + "Legacy Tantivy FTS index detected at " + f"{path}. Tantivy-based FTS has been removed. " + "Delete the legacy index and recreate it with " + "table.create_fts_index(...)." + ) + @abstractmethod def uses_v2_manifest_paths(self) -> bool: """ @@ -2405,84 +2412,63 @@ class LanceTable(Table): prefix_only: bool = False, name: Optional[str] = None, ): - if not use_tantivy: - if not isinstance(field_names, str): - raise ValueError( - "Native FTS indexes can only be created on a single field " - "at a time. To search over multiple text fields, create a " - "separate FTS index for each field." - ) + self._ensure_no_legacy_fts_index() - if tokenizer_name is None: - tokenizer_configs = { - "base_tokenizer": base_tokenizer, - "language": language, - "with_position": with_position, - "max_token_length": max_token_length, - "lower_case": lower_case, - "stem": stem, - "remove_stop_words": remove_stop_words, - "ascii_folding": ascii_folding, - "ngram_min_length": ngram_min_length, - "ngram_max_length": ngram_max_length, - "prefix_only": prefix_only, - } - else: - tokenizer_configs = self.infer_tokenizer_configs(tokenizer_name) - - config = FTS( - **tokenizer_configs, + if use_tantivy: + raise ValueError( + "Tantivy-based FTS has been removed. " + "Remove use_tantivy and recreate the index with native FTS." ) - - # delete the existing legacy index if it exists - if replace: - path, fs, exist = self._get_fts_index_path() - if exist: - fs.delete_dir(path) - - LOOP.run( - self._table.create_index( - field_names, - replace=replace, - config=config, - name=name, - ) + if ordering_field_names is not None: + raise ValueError( + "ordering_field_names was only supported by the removed " + "Tantivy-based FTS implementation." ) - return - - from .fts import create_index, populate_index - - if isinstance(field_names, str): - field_names = [field_names] - - if isinstance(ordering_field_names, str): - ordering_field_names = [ordering_field_names] - - path, fs, exist = self._get_fts_index_path() - if exist: - if not replace: - raise ValueError("Index already exists. Use replace=True to overwrite.") - fs.delete_dir(path) - - if not isinstance(fs, pa_fs.LocalFileSystem): - raise NotImplementedError( - "Full-text search is only supported on the local filesystem" + if writer_heap_size != 1024 * 1024 * 1024: + raise ValueError( + "writer_heap_size was only supported by the removed " + "Tantivy-based FTS implementation." + ) + if not isinstance(field_names, str): + raise ValueError( + "Native FTS indexes can only be created on a single field " + "at a time. To search over multiple text fields, create a " + "separate FTS index for each field." + ) + if "." in field_names: + raise ValueError( + "Native FTS indexes can only be created on top-level fields. " + f"Received nested field path: {field_names!r}." ) if tokenizer_name is None: - tokenizer_name = "default" - index = create_index( - path, - field_names, - ordering_fields=ordering_field_names, - tokenizer_name=tokenizer_name, + tokenizer_configs = { + "base_tokenizer": base_tokenizer, + "language": language, + "with_position": with_position, + "max_token_length": max_token_length, + "lower_case": lower_case, + "stem": stem, + "remove_stop_words": remove_stop_words, + "ascii_folding": ascii_folding, + "ngram_min_length": ngram_min_length, + "ngram_max_length": ngram_max_length, + "prefix_only": prefix_only, + } + else: + tokenizer_configs = self.infer_tokenizer_configs(tokenizer_name) + + config = FTS( + **tokenizer_configs, ) - populate_index( - index, - self, - field_names, - ordering_fields=ordering_field_names, - writer_heap_size=writer_heap_size, + + LOOP.run( + self._table.create_index( + field_names, + replace=replace, + config=config, + name=name, + ) ) @staticmethod diff --git a/python/python/tests/docs/test_search.py b/python/python/tests/docs/test_search.py index d90651639..f34dfd9b7 100644 --- a/python/python/tests/docs/test_search.py +++ b/python/python/tests/docs/test_search.py @@ -180,7 +180,7 @@ def test_fts_fuzzy_query(): ), mode="overwrite", ) - table.create_fts_index("text", use_tantivy=False, replace=True) + table.create_fts_index("text", replace=True) results = table.search(MatchQuery("foo", "text", fuzziness=1)).to_pandas() assert len(results) == 4 @@ -230,7 +230,7 @@ def test_fts_boost_query(): ), mode="overwrite", ) - table.create_fts_index("desc", use_tantivy=False, replace=True) + table.create_fts_index("desc", replace=True) results = table.search( BoostQuery( @@ -265,7 +265,7 @@ def test_fts_boolean_query(tmp_path): ], mode="overwrite", ) - table.create_fts_index("text", use_tantivy=False, replace=True) + table.create_fts_index("text", replace=True) # SHOULD results = table.search( @@ -319,9 +319,7 @@ def test_fts_native(): ], ) - # passing `use_tantivy=False` to use lance FTS index - # `use_tantivy=True` by default - table.create_fts_index("text", use_tantivy=False) + table.create_fts_index("text") table.search("puppy").limit(10).select(["text"]).to_list() # [{'text': 'Frodo was a happy puppy', '_score': 0.6931471824645996}] # ... @@ -332,7 +330,6 @@ def test_fts_native(): # --8<-- [start:fts_config_folding] table.create_fts_index( "text", - use_tantivy=False, language="French", stem=True, ascii_folding=True, @@ -346,7 +343,7 @@ def test_fts_native(): table.search("puppy").limit(10).where("text='foo'", prefilter=False).to_list() # --8<-- [end:fts_postfiltering] # --8<-- [start:fts_with_position] - table.create_fts_index("text", use_tantivy=False, with_position=True, replace=True) + table.create_fts_index("text", with_position=True, replace=True) # --8<-- [end:fts_with_position] # --8<-- [start:fts_incremental_index] table.add([{"vector": [3.1, 4.1], "text": "Frodo was a happy puppy"}]) diff --git a/python/python/tests/test_db.py b/python/python/tests/test_db.py index 5f3fc60ec..af03e77cd 100644 --- a/python/python/tests/test_db.py +++ b/python/python/tests/test_db.py @@ -15,8 +15,7 @@ import pytest from lancedb.pydantic import LanceModel, Vector -@pytest.mark.parametrize("use_tantivy", [True, False]) -def test_basic(tmp_path, use_tantivy): +def test_basic(tmp_path): db = lancedb.connect(tmp_path) assert db.uri == str(tmp_path) @@ -49,7 +48,7 @@ def test_basic(tmp_path, use_tantivy): assert len(rs) == 1 assert rs["item"].iloc[0] == "foo" - table.create_fts_index("item", use_tantivy=use_tantivy) + table.create_fts_index("item") rs = table.search("bar", query_type="fts").to_pandas() assert len(rs) == 1 assert rs["item"].iloc[0] == "bar" diff --git a/python/python/tests/test_fts.py b/python/python/tests/test_fts.py index 031f28cfa..57f2db85d 100644 --- a/python/python/tests/test_fts.py +++ b/python/python/tests/test_fts.py @@ -36,9 +36,6 @@ import pytest import pytest_asyncio from utils import exception_output -pytest.importorskip("lancedb.fts") -tantivy = pytest.importorskip("tantivy") - @pytest.fixture def table(tmp_path) -> ldb.table.LanceTable: @@ -144,58 +141,53 @@ async def async_table(tmp_path) -> ldb.table.AsyncTable: return table -def test_create_index(tmp_path): - index = ldb.fts.create_index(str(tmp_path / "index"), ["text"]) - assert isinstance(index, tantivy.Index) - assert os.path.exists(str(tmp_path / "index")) +@pytest.mark.parametrize( + ("kwargs", "match"), + [ + ( + {"use_tantivy": True}, + "Tantivy-based FTS has been removed", + ), + ( + {"ordering_field_names": ["count"]}, + "ordering_field_names was only supported", + ), + ( + {"writer_heap_size": 128}, + "writer_heap_size was only supported", + ), + ], +) +def test_reject_removed_tantivy_parameters(table, kwargs, match): + with pytest.raises(ValueError, match=match): + table.create_fts_index("text", **kwargs) -def test_create_index_with_stemming(tmp_path, table): - index = ldb.fts.create_index( - str(tmp_path / "index"), ["text"], tokenizer_name="en_stem" - ) - assert isinstance(index, tantivy.Index) - assert os.path.exists(str(tmp_path / "index")) +def test_reject_legacy_tantivy_index(table): + path, _, _ = table._get_fts_index_path() + os.makedirs(path, exist_ok=True) - # Check stemming by running tokenizer on non empty table - table.create_fts_index("text", tokenizer_name="en_stem", use_tantivy=True) + with pytest.raises(ValueError, match="Legacy Tantivy FTS index detected"): + table.search("puppy").limit(5).to_list() + + with pytest.raises(ValueError, match="Legacy Tantivy FTS index detected"): + table.create_fts_index("text") -@pytest.mark.parametrize("use_tantivy", [True, False]) @pytest.mark.parametrize("with_position", [True, False]) -def test_create_inverted_index(table, use_tantivy, with_position): - if use_tantivy and not with_position: - pytest.skip("we don't support building a tantivy index without position") +def test_create_inverted_index(table, with_position): table.create_fts_index( "text", - use_tantivy=use_tantivy, with_position=with_position, name="custom_fts_index", ) - if not use_tantivy: - indices = table.list_indices() - fts_indices = [i for i in indices if i.index_type == "FTS"] - assert any(i.name == "custom_fts_index" for i in fts_indices) + indices = table.list_indices() + fts_indices = [i for i in indices if i.index_type == "FTS"] + assert any(i.name == "custom_fts_index" for i in fts_indices) -def test_populate_index(tmp_path, table): - index = ldb.fts.create_index(str(tmp_path / "index"), ["text"]) - assert ldb.fts.populate_index(index, table, ["text"]) == len(table) - - -def test_search_index(tmp_path, table): - index = ldb.fts.create_index(str(tmp_path / "index"), ["text"]) - ldb.fts.populate_index(index, table, ["text"]) - index.reload() - results = ldb.fts.search_index(index, query="puppy", limit=5) - assert len(results) == 2 - assert len(results[0]) == 5 # row_ids - assert len(results[1]) == 5 # _score - - -@pytest.mark.parametrize("use_tantivy", [True, False]) -def test_search_fts(table, use_tantivy): - table.create_fts_index("text", use_tantivy=use_tantivy) +def test_search_fts(table): + table.create_fts_index("text") results = table.search("puppy").select(["id", "text"]).limit(5).to_list() assert len(results) == 5 assert len(results[0]) == 3 # id, text, _score @@ -204,53 +196,52 @@ def test_search_fts(table, use_tantivy): results = table.search("puppy").select(["id", "text"]).to_list() assert len(results) == 10 - if not use_tantivy: - # Test with a query - results = ( - table.search(MatchQuery("puppy", "text")) - .select(["id", "text"]) - .limit(5) - .to_list() - ) - assert len(results) == 5 + # Test with a query + results = ( + table.search(MatchQuery("puppy", "text")) + .select(["id", "text"]) + .limit(5) + .to_list() + ) + assert len(results) == 5 - # Test boost query - results = ( - table.search( - BoostQuery( - MatchQuery("puppy", "text"), - MatchQuery("runs", "text"), - ) + # Test boost query + results = ( + table.search( + BoostQuery( + MatchQuery("puppy", "text"), + MatchQuery("runs", "text"), ) - .select(["id", "text"]) - .limit(5) - .to_list() ) - assert len(results) == 5 + .select(["id", "text"]) + .limit(5) + .to_list() + ) + assert len(results) == 5 - # Test multi match query - table.create_fts_index("text2", use_tantivy=use_tantivy) - results = ( - table.search(MultiMatchQuery("puppy", ["text", "text2"])) - .select(["id", "text"]) - .limit(5) - .to_list() - ) - assert len(results) == 5 - assert len(results[0]) == 3 # id, text, _score + # Test multi match query + table.create_fts_index("text2") + results = ( + table.search(MultiMatchQuery("puppy", ["text", "text2"])) + .select(["id", "text"]) + .limit(5) + .to_list() + ) + assert len(results) == 5 + assert len(results[0]) == 3 # id, text, _score - # Test boolean query - results = ( - table.search(MatchQuery("puppy", "text") & MatchQuery("runs", "text")) - .select(["id", "text"]) - .limit(5) - .to_list() - ) - assert len(results) == 5 - assert len(results[0]) == 3 # id, text, _score - for r in results: - assert "puppy" in r["text"] - assert "runs" in r["text"] + # Test boolean query + results = ( + table.search(MatchQuery("puppy", "text") & MatchQuery("runs", "text")) + .select(["id", "text"]) + .limit(5) + .to_list() + ) + assert len(results) == 5 + assert len(results[0]) == 3 # id, text, _score + for r in results: + assert "puppy" in r["text"] + assert "runs" in r["text"] @pytest.mark.asyncio @@ -318,13 +309,13 @@ async def test_fts_select_async(async_table): def test_search_fts_phrase_query(table): - table.create_fts_index("text", use_tantivy=False, with_position=False) + table.create_fts_index("text", with_position=False) try: phrase_results = table.search('"puppy runs"').limit(100).to_list() assert False except Exception: pass - table.create_fts_index("text", use_tantivy=False, with_position=True, replace=True) + table.create_fts_index("text", with_position=True, replace=True) results = table.search("puppy").limit(100).to_list() # Test with quotation marks @@ -375,8 +366,8 @@ async def test_search_fts_phrase_query_async(async_table): def test_search_fts_specify_column(table): - table.create_fts_index("text", use_tantivy=False) - table.create_fts_index("text2", use_tantivy=False) + table.create_fts_index("text") + table.create_fts_index("text2") results = table.search("puppy", fts_columns="text").limit(5).to_list() assert len(results) == 5 @@ -470,42 +461,8 @@ async def test_search_fts_specify_column_async(async_table): pass -def test_search_ordering_field_index_table(tmp_path, table): - table.create_fts_index("text", ordering_field_names=["count"], use_tantivy=True) - rows = ( - table.search("puppy", ordering_field_name="count") - .limit(20) - .select(["text", "count"]) - .to_list() - ) - for r in rows: - assert "puppy" in r["text"] - assert sorted(rows, key=lambda x: x["count"], reverse=True) == rows - - -def test_search_ordering_field_index(tmp_path, table): - index = ldb.fts.create_index( - str(tmp_path / "index"), ["text"], ordering_fields=["count"] - ) - - ldb.fts.populate_index(index, table, ["text"], ordering_fields=["count"]) - index.reload() - results = ldb.fts.search_index( - index, query="puppy", limit=5, ordering_field="count" - ) - assert len(results) == 2 - assert len(results[0]) == 5 # row_ids - assert len(results[1]) == 5 # _distance - rows = table.to_lance().take(results[0]).to_pylist() - - for r in rows: - assert "puppy" in r["text"] - assert sorted(rows, key=lambda x: x["count"], reverse=True) == rows - - -@pytest.mark.parametrize("use_tantivy", [True, False]) -def test_create_index_from_table(tmp_path, table, use_tantivy): - table.create_fts_index("text", use_tantivy=use_tantivy) +def test_create_index_from_table(tmp_path, table): + table.create_fts_index("text") df = table.search("puppy").limit(5).select(["text"]).to_pandas() assert len(df) <= 5 assert "text" in df.columns @@ -525,36 +482,24 @@ def test_create_index_from_table(tmp_path, table, use_tantivy): ) with pytest.raises(Exception, match="already exists"): - table.create_fts_index("text", use_tantivy=use_tantivy) + table.create_fts_index("text") - table.create_fts_index("text", replace=True, use_tantivy=use_tantivy) + table.create_fts_index("text", replace=True) assert len(table.search("gorilla").limit(1).to_pandas()) == 1 def test_create_index_multiple_columns(tmp_path, table): - table.create_fts_index(["text", "text2"], use_tantivy=True) - df = table.search("puppy").limit(5).to_pandas() - assert len(df) == 5 - assert "text" in df.columns - assert "text2" in df.columns - - -def test_empty_rs(tmp_path, table, mocker): - table.create_fts_index(["text", "text2"], use_tantivy=True) - mocker.patch("lancedb.fts.search_index", return_value=([], [])) - df = table.search("puppy").limit(5).to_pandas() - assert len(df) == 0 + with pytest.raises(ValueError, match="Native FTS indexes can only be created"): + table.create_fts_index(["text", "text2"]) def test_nested_schema(tmp_path, table): - table.create_fts_index("nested.text", use_tantivy=True) - rs = table.search("puppy").limit(5).to_list() - assert len(rs) == 5 + with pytest.raises(ValueError, match="top-level fields"): + table.create_fts_index("nested.text") -@pytest.mark.parametrize("use_tantivy", [True, False]) -def test_search_index_with_filter(table, use_tantivy): - table.create_fts_index("text", use_tantivy=use_tantivy) +def test_search_index_with_filter(table): + table.create_fts_index("text") orig_import = __import__ def import_mock(name, *args): @@ -584,8 +529,7 @@ def test_search_index_with_filter(table, use_tantivy): assert r["_rowid"] is not None -@pytest.mark.parametrize("use_tantivy", [True, False]) -def test_null_input(table, use_tantivy): +def test_null_input(table): table.add( [ { @@ -598,14 +542,13 @@ def test_null_input(table, use_tantivy): } ] ) - table.create_fts_index("text", use_tantivy=use_tantivy) + table.create_fts_index("text") def test_syntax(table): # https://github.com/lancedb/lancedb/issues/769 - table.create_fts_index("text", use_tantivy=True) - with pytest.raises(ValueError, match="Syntax Error"): - table.search("they could have been dogs OR").limit(10).to_list() + table.create_fts_index("text") + table.search("they could have been dogs OR").limit(10).to_list() # these should work @@ -616,6 +559,7 @@ def test_syntax(table): ).to_list() # phrase queries + table.create_fts_index("text", with_position=True, replace=True) table.search("they could have been dogs OR cats").phrase_query().limit(10).to_list() table.search('"they could have been dogs OR cats"').limit(10).to_list() table.search('''"the cats OR dogs were not really 'pets' at all"''').limit( @@ -639,7 +583,7 @@ def test_language(mem_db: DBConnection): table = mem_db.create_table("test", data=data) with pytest.raises(ValueError) as e: - table.create_fts_index("text", use_tantivy=False, language="klingon") + table.create_fts_index("text", language="klingon") assert exception_output(e) == ( "ValueError: LanceDB does not support the requested language: 'klingon'\n" @@ -650,7 +594,6 @@ def test_language(mem_db: DBConnection): table.create_fts_index( "text", - use_tantivy=False, language="French", stem=True, ascii_folding=True, @@ -690,7 +633,7 @@ def test_fts_on_list(mem_db: DBConnection): } ) table = mem_db.create_table("test", data=data) - table.create_fts_index("text", use_tantivy=False, with_position=True) + table.create_fts_index("text", with_position=True) res = table.search("lance").limit(5).to_list() assert len(res) == 3 @@ -702,7 +645,7 @@ def test_fts_on_list(mem_db: DBConnection): def test_fts_ngram(mem_db: DBConnection): data = pa.table({"text": ["hello world", "lance database", "lance is cool"]}) table = mem_db.create_table("test", data=data) - table.create_fts_index("text", use_tantivy=False, base_tokenizer="ngram") + table.create_fts_index("text", base_tokenizer="ngram") results = table.search("lan", query_type="fts").limit(10).to_list() assert len(results) == 2 @@ -721,7 +664,6 @@ def test_fts_ngram(mem_db: DBConnection): # test setting min_ngram_length and prefix_only table.create_fts_index( "text", - use_tantivy=False, base_tokenizer="ngram", replace=True, ngram_min_length=2, @@ -886,7 +828,7 @@ def test_fts_query_to_json(): def test_fts_fast_search(table): - table.create_fts_index("text", use_tantivy=False) + table.create_fts_index("text") # Insert some unindexed data table.add( diff --git a/python/python/tests/test_hybrid_query.py b/python/python/tests/test_hybrid_query.py index a9d89c0f0..d0712f16c 100644 --- a/python/python/tests/test_hybrid_query.py +++ b/python/python/tests/test_hybrid_query.py @@ -28,7 +28,7 @@ def sync_table(tmpdir_factory) -> Table: } ) table = db.create_table("test", data) - table.create_fts_index("text", with_position=False, use_tantivy=False) + table.create_fts_index("text", with_position=False) return table @@ -192,7 +192,7 @@ def table_with_id(tmpdir_factory) -> Table: } ) table = db.create_table("test_with_id", data) - table.create_fts_index("text", with_position=False, use_tantivy=False) + table.create_fts_index("text", with_position=False) return table diff --git a/python/python/tests/test_query.py b/python/python/tests/test_query.py index a7153b010..9ac585df5 100644 --- a/python/python/tests/test_query.py +++ b/python/python/tests/test_query.py @@ -1385,7 +1385,7 @@ def test_query_timeout(tmp_path): } ) table = db.create_table("test", data) - table.create_fts_index("text", use_tantivy=False) + table.create_fts_index("text") with pytest.raises(Exception, match="Query timeout"): table.search().where("text = 'a'").to_list(timeout=timedelta(0)) diff --git a/python/python/tests/test_rerankers.py b/python/python/tests/test_rerankers.py index cfbaca204..3d028cb3a 100644 --- a/python/python/tests/test_rerankers.py +++ b/python/python/tests/test_rerankers.py @@ -26,11 +26,8 @@ from lancedb.rerankers import ( ) from lancedb.table import LanceTable -# Tests rely on FTS index -pytest.importorskip("lancedb.fts") - -def get_test_table(tmp_path, use_tantivy): +def get_test_table(tmp_path): db = lancedb.connect(tmp_path) # Create a LanceDB table schema with a vector and a text column emb = EmbeddingFunctionRegistry.get_instance().get("test").create() @@ -98,7 +95,7 @@ def get_test_table(tmp_path, use_tantivy): ) # Create a fts index - table.create_fts_index("text", use_tantivy=use_tantivy, replace=True) + table.create_fts_index("text", replace=True) return table, MyTable @@ -208,8 +205,8 @@ def _run_test_reranker(reranker, table, query, query_vector, schema): assert len(result) == 20 and result == result_arrow -def _run_test_hybrid_reranker(reranker, tmp_path, use_tantivy): - table, schema = get_test_table(tmp_path, use_tantivy) +def _run_test_hybrid_reranker(reranker, tmp_path): + table, schema = get_test_table(tmp_path) # The default reranker result1 = ( table.search( @@ -285,8 +282,7 @@ def _run_test_hybrid_reranker(reranker, tmp_path, use_tantivy): ) -@pytest.mark.parametrize("use_tantivy", [True, False]) -def test_linear_combination(tmp_path, use_tantivy): +def test_linear_combination(tmp_path): reranker = LinearCombinationReranker() vector_results = pa.Table.from_pydict( @@ -313,22 +309,20 @@ def test_linear_combination(tmp_path, use_tantivy): assert "_score" not in combined_results.column_names assert "_relevance_score" in combined_results.column_names - _run_test_hybrid_reranker(reranker, tmp_path, use_tantivy) + _run_test_hybrid_reranker(reranker, tmp_path) -@pytest.mark.parametrize("use_tantivy", [True, False]) -def test_rrf_reranker(tmp_path, use_tantivy): +def test_rrf_reranker(tmp_path): reranker = RRFReranker() - _run_test_hybrid_reranker(reranker, tmp_path, use_tantivy) + _run_test_hybrid_reranker(reranker, tmp_path) -@pytest.mark.parametrize("use_tantivy", [True, False]) -def test_mrr_reranker(tmp_path, use_tantivy): +def test_mrr_reranker(tmp_path): reranker = MRRReranker() - _run_test_hybrid_reranker(reranker, tmp_path, use_tantivy) + _run_test_hybrid_reranker(reranker, tmp_path) # Test multi-vector part - table, schema = get_test_table(tmp_path, use_tantivy) + table, schema = get_test_table(tmp_path) query = "single player experience" rs1 = table.search(query, vector_column_name="vector").limit(10).with_row_id(True) rs2 = ( @@ -363,7 +357,7 @@ def test_rrf_reranker_distance(): table = db.create_table("test", data) table.create_index(num_partitions=1, num_sub_vectors=2) - table.create_fts_index("text", use_tantivy=False) + table.create_fts_index("text") reranker = RRFReranker(return_score="all") @@ -422,35 +416,31 @@ def test_rrf_reranker_distance(): @pytest.mark.skipif( os.environ.get("COHERE_API_KEY") is None, reason="COHERE_API_KEY not set" ) -@pytest.mark.parametrize("use_tantivy", [True, False]) -def test_cohere_reranker(tmp_path, use_tantivy): +def test_cohere_reranker(tmp_path): pytest.importorskip("cohere") reranker = CohereReranker() - table, schema = get_test_table(tmp_path, use_tantivy) + table, schema = get_test_table(tmp_path) _run_test_reranker(reranker, table, "single player experience", None, schema) -@pytest.mark.parametrize("use_tantivy", [True, False]) -def test_cross_encoder_reranker(tmp_path, use_tantivy): +def test_cross_encoder_reranker(tmp_path): pytest.importorskip("sentence_transformers") reranker = CrossEncoderReranker() - table, schema = get_test_table(tmp_path, use_tantivy) + table, schema = get_test_table(tmp_path) _run_test_reranker(reranker, table, "single player experience", None, schema) -@pytest.mark.parametrize("use_tantivy", [True, False]) -def test_colbert_reranker(tmp_path, use_tantivy): +def test_colbert_reranker(tmp_path): pytest.importorskip("rerankers") reranker = ColbertReranker() - table, schema = get_test_table(tmp_path, use_tantivy) + table, schema = get_test_table(tmp_path) _run_test_reranker(reranker, table, "single player experience", None, schema) -@pytest.mark.parametrize("use_tantivy", [True, False]) -def test_answerdotai_reranker(tmp_path, use_tantivy): +def test_answerdotai_reranker(tmp_path): pytest.importorskip("rerankers") reranker = AnswerdotaiRerankers() - table, schema = get_test_table(tmp_path, use_tantivy) + table, schema = get_test_table(tmp_path) _run_test_reranker(reranker, table, "single player experience", None, schema) @@ -459,10 +449,9 @@ def test_answerdotai_reranker(tmp_path, use_tantivy): or os.environ.get("OPENAI_BASE_URL") is not None, reason="OPENAI_API_KEY not set", ) -@pytest.mark.parametrize("use_tantivy", [True, False]) -def test_openai_reranker(tmp_path, use_tantivy): +def test_openai_reranker(tmp_path): pytest.importorskip("openai") - table, schema = get_test_table(tmp_path, use_tantivy) + table, schema = get_test_table(tmp_path) reranker = OpenaiReranker() _run_test_reranker(reranker, table, "single player experience", None, schema) @@ -470,10 +459,9 @@ def test_openai_reranker(tmp_path, use_tantivy): @pytest.mark.skipif( os.environ.get("JINA_API_KEY") is None, reason="JINA_API_KEY not set" ) -@pytest.mark.parametrize("use_tantivy", [True, False]) -def test_jina_reranker(tmp_path, use_tantivy): +def test_jina_reranker(tmp_path): pytest.importorskip("jina") - table, schema = get_test_table(tmp_path, use_tantivy) + table, schema = get_test_table(tmp_path) reranker = JinaReranker() _run_test_reranker(reranker, table, "single player experience", None, schema) @@ -481,11 +469,10 @@ def test_jina_reranker(tmp_path, use_tantivy): @pytest.mark.skipif( os.environ.get("VOYAGE_API_KEY") is None, reason="VOYAGE_API_KEY not set" ) -@pytest.mark.parametrize("use_tantivy", [True, False]) -def test_voyageai_reranker(tmp_path, use_tantivy): +def test_voyageai_reranker(tmp_path): pytest.importorskip("voyageai") reranker = VoyageAIReranker(model_name="rerank-2.5") - table, schema = get_test_table(tmp_path, use_tantivy) + table, schema = get_test_table(tmp_path) _run_test_reranker(reranker, table, "single player experience", None, schema) @@ -504,7 +491,7 @@ def test_empty_result_reranker(): # Create empty table with schema empty_table = db.create_table("empty_table", schema=schema, mode="overwrite") - empty_table.create_fts_index("text", use_tantivy=False, replace=True) + empty_table.create_fts_index("text", replace=True) for reranker in [ CrossEncoderReranker(), # ColbertReranker(), @@ -603,11 +590,10 @@ def test_empty_hybrid_result_reranker(): assert "_rowid" in result.column_names -@pytest.mark.parametrize("use_tantivy", [True, False]) -def test_cross_encoder_reranker_return_all(tmp_path, use_tantivy): +def test_cross_encoder_reranker_return_all(tmp_path): pytest.importorskip("sentence_transformers") reranker = CrossEncoderReranker(return_score="all") - table, schema = get_test_table(tmp_path, use_tantivy) + table, schema = get_test_table(tmp_path) query = "single player experience" result = ( table.search(query, query_type="hybrid", vector_column_name="vector") diff --git a/python/python/tests/test_s3.py b/python/python/tests/test_s3.py index 3b62e1160..256ccb1d4 100644 --- a/python/python/tests/test_s3.py +++ b/python/python/tests/test_s3.py @@ -242,8 +242,8 @@ def test_s3_dynamodb_sync(s3_bucket: str, commit_table: str, monkeypatch): # FTS indices should error since they are not supported yet. with pytest.raises( - NotImplementedError, - match="Full-text search is only supported on the local filesystem", + ValueError, + match="Tantivy-based FTS has been removed", ): table.create_fts_index("x", use_tantivy=True) diff --git a/python/python/tests/test_table.py b/python/python/tests/test_table.py index 7337c7e9a..4e20d2cfc 100644 --- a/python/python/tests/test_table.py +++ b/python/python/tests/test_table.py @@ -1948,7 +1948,6 @@ def setup_hybrid_search_table(db: DBConnection, embedding_func): def test_hybrid_search(tmp_db: DBConnection): # This test uses an FTS index - pytest.importorskip("lancedb.fts") pytest.importorskip("lance") table, MyTable, emb = setup_hybrid_search_table(tmp_db, "test") @@ -2019,7 +2018,6 @@ def test_hybrid_search(tmp_db: DBConnection): def test_hybrid_search_metric_type(tmp_db: DBConnection): # This test uses an FTS index - pytest.importorskip("lancedb.fts") pytest.importorskip("lance") # Need to use nonnorm as the embedding function so l2 and dot results diff --git a/python/uv.lock b/python/uv.lock index 3cebe6931..3a101e673 100644 --- a/python/uv.lock +++ b/python/uv.lock @@ -1996,7 +1996,6 @@ tests = [ { name = "pytest-mock" }, { name = "pytz" }, { name = "requests" }, - { name = "tantivy" }, ] [package.metadata] @@ -2050,7 +2049,6 @@ requires-dist = [ { name = "sentence-transformers", marker = "extra == 'embeddings'", specifier = ">=2.2.0" }, { name = "sentencepiece", marker = "extra == 'embeddings'", specifier = ">=0.1.99" }, { name = "sentencepiece", marker = "extra == 'siglip'" }, - { name = "tantivy", marker = "extra == 'tests'", specifier = ">=0.20.0" }, { name = "torch", marker = "extra == 'clip'" }, { name = "torch", marker = "extra == 'embeddings'", specifier = ">=2.0.0" }, { name = "torch", marker = "extra == 'siglip'" }, @@ -4779,44 +4777,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/40/44/4a5f08c96eb108af5cb50b41f76142f0afa346dfa99d5296fe7202a11854/tabulate-0.9.0-py3-none-any.whl", hash = "sha256:024ca478df22e9340661486f85298cff5f6dcdba14f3813e8830015b9ed1948f", size = 35252, upload-time = "2022-10-06T17:21:44.262Z" }, ] -[[package]] -name = "tantivy" -version = "0.25.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/1b/f9/0cd3955d155d3e3ef74b864769514dd191e5dacba9f0beb7af2d914942ce/tantivy-0.25.1.tar.gz", hash = "sha256:68a3314699a7d18fcf338b52bae8ce46a97dde1128a3e47e33fa4db7f71f265e", size = 75120, upload-time = "2025-12-02T11:57:12.997Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/80/f7/2276bed3bed983ce2970dc70e3571f372587fe4f5f2bac1d6d617df08fa3/tantivy-0.25.1-cp310-cp310-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:7aa587a3dc9470584cacf5e3640fee93d12ec5f10109669c1f47c4e90820b958", size = 7638510, upload-time = "2025-12-02T11:56:08.754Z" }, - { url = "https://files.pythonhosted.org/packages/20/8c/078dc50570e243414356b05633f52fe544b85179281ffa9f1fe05d76bbd8/tantivy-0.25.1-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:56d77fe667595693d9fa5f0b4545776d84da9526bab0273b3fc6c7536dc0d8a2", size = 3932659, upload-time = "2025-12-02T11:56:10.621Z" }, - { url = "https://files.pythonhosted.org/packages/bd/dc/281c48436a1e3178b58fe463af314434fe0f3a4ec0c7588a362900e0c69e/tantivy-0.25.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5ba8c347cd48595fcaeabb28a909ebce92cf9c5e5c84ab5ba1136a280a307b5c", size = 4197430, upload-time = "2025-12-02T11:56:12.65Z" }, - { url = "https://files.pythonhosted.org/packages/7b/6c/61e6e0b0a350007d10a9b66a35703361d3345e14e7a7cc83494776b2a054/tantivy-0.25.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aa7c4932e8fde1f09f2d46226060e827e197c2749abdc6129d73a752773adc38", size = 4184055, upload-time = "2025-12-02T11:56:14.647Z" }, - { url = "https://files.pythonhosted.org/packages/5f/fd/0eb059b12f0b6f91623a54a46448a83b7f716d08f3bca68c095d697b85da/tantivy-0.25.1-cp310-cp310-win_amd64.whl", hash = "sha256:afcfc5dbb0bcd5d24531f4471737ae0896f33528426ab0b1dad3e427c19120f6", size = 3424134, upload-time = "2025-12-02T11:56:16.242Z" }, - { url = "https://files.pythonhosted.org/packages/4e/7a/8a277f377e8a151fc0e71d4ffc1114aefb6e5e1c7dd609fed0955cf34ed8/tantivy-0.25.1-cp311-cp311-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:d363d7b4207d3a5aa7f0d212420df35bed18bdb6bae26a2a8bd57428388b7c29", size = 7637033, upload-time = "2025-12-02T11:56:18.104Z" }, - { url = "https://files.pythonhosted.org/packages/71/31/8b4acdedfc9f9a2d04b1340d07eef5213d6f151d1e18da0cb423e5f090d2/tantivy-0.25.1-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:8f4389cf1d889a1df7c5a3195806b4b56c37cee10d8a26faaa0dea35a867b5ff", size = 3932180, upload-time = "2025-12-02T11:56:19.833Z" }, - { url = "https://files.pythonhosted.org/packages/2f/dc/3e8499c21b4b9795e8f2fc54c68ce5b92905aaeadadaa56ecfa9180b11b1/tantivy-0.25.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:99864c09fc54652c3c2486cdf13f86cdc8200f4b481569cb291e095ca5d496e5", size = 4197620, upload-time = "2025-12-02T11:56:21.496Z" }, - { url = "https://files.pythonhosted.org/packages/f8/8e/f2ce62fffc811eb62bead92c7b23c2e218f817cbd54c4f3b802e03ba1438/tantivy-0.25.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:05abf37ddbc5063c575548be0d62931629c086bff7a5a1b67cf5a8f5ebf4cd8c", size = 4183794, upload-time = "2025-12-02T11:56:23.215Z" }, - { url = "https://files.pythonhosted.org/packages/de/64/24e2891b0ba3fd9853e10c296095a33b89bf3efd65e29da1ee5dae736040/tantivy-0.25.1-cp311-cp311-win_amd64.whl", hash = "sha256:f307ee8ad21597b0be23af83008fd66cfd5f958cdfa24ec0aaa08a38e86bbef4", size = 3424235, upload-time = "2025-12-02T11:56:25.172Z" }, - { url = "https://files.pythonhosted.org/packages/41/e7/6849c713ed0996c7628324c60512c4882006f0a62145e56c624a93407f90/tantivy-0.25.1-cp312-cp312-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:90fd919e5f611809f746560ecf36eb9be824dec62e21ae17a27243759edb9aa1", size = 7621494, upload-time = "2025-12-02T11:56:27.069Z" }, - { url = "https://files.pythonhosted.org/packages/c5/22/c3d8294600dc6e7fa350daef9ff337d3c06e132b81df727de9f7a50c692a/tantivy-0.25.1-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:4613c7cf6c23f3a97989819690a0f956d799354957de7a204abcc60083cebe02", size = 3925219, upload-time = "2025-12-02T11:56:29.403Z" }, - { url = "https://files.pythonhosted.org/packages/41/fc/cbb1df71dd44c9110eff4eaaeda9d44f2d06182fe0452193be20ddfba93f/tantivy-0.25.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c477bd20b4df804d57dfc5033431bef27cde605695ae141b03abbf6ebc069129", size = 4198699, upload-time = "2025-12-02T11:56:31.359Z" }, - { url = "https://files.pythonhosted.org/packages/47/4d/71abb78b774073c3ce12a4faa4351a9d910a71ffa3659526affba163873d/tantivy-0.25.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f9b1a1ba1113c523c7ff7b10f282d6c4074006f7ef8d71e1d973d51bf7291ddb", size = 4183585, upload-time = "2025-12-02T11:56:33.317Z" }, - { url = "https://files.pythonhosted.org/packages/be/16/3f00cd7ec458b92a0e977960af9ddfbeb762127d9acc68da9094a1fda556/tantivy-0.25.1-cp312-cp312-win_amd64.whl", hash = "sha256:9de0bafd3bd7ac9f8f82d53e17562e9db11a5af308fe5185c4bd86feaddbe4a6", size = 3424622, upload-time = "2025-12-02T11:56:34.788Z" }, - { url = "https://files.pythonhosted.org/packages/3d/25/73cfbcf1a8ea49be6c42817431cac46b70a119fe64da903fcc2d92b5b511/tantivy-0.25.1-cp313-cp313-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:f51ff7196c6f31719202080ed8372d5e3d51e92c749c032fb8234f012e99744c", size = 7622530, upload-time = "2025-12-02T11:56:36.839Z" }, - { url = "https://files.pythonhosted.org/packages/12/c8/c0d7591cdf4f7e7a9fc4da786d1ca8cd1aacffaa2be16ea6d401a8e4a566/tantivy-0.25.1-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:550e63321bfcacc003859f2fa29c1e8e56450807b3c9a501c1add27cfb9236d9", size = 3925637, upload-time = "2025-12-02T11:56:38.425Z" }, - { url = "https://files.pythonhosted.org/packages/3a/09/bedfc223bffec7641b417dd7ab071134b2ef8f8550e9b1fb6014657ef52e/tantivy-0.25.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fde31cc8d6e122faf7902aeea32bc008a429a6e8904e34d3468126a3ec01b016", size = 4197322, upload-time = "2025-12-02T11:56:40.411Z" }, - { url = "https://files.pythonhosted.org/packages/f5/f1/1fa5183500c8042200c9f2b840d34f5bbcfb434a1ee750e7132262d2a5c9/tantivy-0.25.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b11bd5a518b0be645320b47af8493f6a40c4f3234313e37adcf4534a564d27dd", size = 4183143, upload-time = "2025-12-02T11:56:42.048Z" }, - { url = "https://files.pythonhosted.org/packages/d5/74/a4c4f4eb95888ccb784da3b017aa0625ab1ac411bf5d022a9a797d9a2334/tantivy-0.25.1-cp313-cp313-win_amd64.whl", hash = "sha256:cc7fe88853e06b3251ee4fa42b7a2038727f850c8765bcc8167cfc73585dd24e", size = 3423491, upload-time = "2025-12-02T11:56:43.858Z" }, - { url = "https://files.pythonhosted.org/packages/8b/2f/581519492226f97d23bd0adc95dad991ebeaa73ea6abc8bff389a3096d9a/tantivy-0.25.1-cp313-cp313t-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:dae99e75b7eaa9bf5bd16ab106b416370f08c135aed0e117d62a3201cd1ffe36", size = 7610316, upload-time = "2025-12-02T11:56:45.927Z" }, - { url = "https://files.pythonhosted.org/packages/91/40/5d7bc315ab9e6a22c5572656e8ada1c836cfa96dccf533377504fbc3c9d9/tantivy-0.25.1-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:506e9533c5ef4d3df43bad64ffecc0aa97c76e361ea610815dc3a20a9d6b30b3", size = 3919882, upload-time = "2025-12-02T11:56:48.469Z" }, - { url = "https://files.pythonhosted.org/packages/02/b9/e0ef2f57a6a72444cb66c2ffbc310ab33ffaace275f1c4b0319d84ea3f18/tantivy-0.25.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5dbd4f8f264dacbcc9dee542832da2173fd53deaaea03f082d95214f8b5ed6bc", size = 4196031, upload-time = "2025-12-02T11:56:50.151Z" }, - { url = "https://files.pythonhosted.org/packages/1e/02/bf3f8cacfd08642e14a73f7956a3fb95d58119132c98c121b9065a1f8615/tantivy-0.25.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:824c643ccb640dd9e35e00c5d5054ddf3323f56fe4219d57d428a9eeea13d22c", size = 4183437, upload-time = "2025-12-02T11:56:51.818Z" }, - { url = "https://files.pythonhosted.org/packages/9c/83/afa90e570198e2d1139dd567bec3c9cf44d8c54f63a649f16d711ede02f5/tantivy-0.25.1-cp313-cp313t-win_amd64.whl", hash = "sha256:09c987b840afcebac817836ac08407eff17272d8aa60ce6e291f89c81830221d", size = 3419409, upload-time = "2025-12-02T11:56:53.451Z" }, - { url = "https://files.pythonhosted.org/packages/ff/44/9f1d67aa5030f7eebc966c863d1316a510a971dd8bb45651df4acdfae9ed/tantivy-0.25.1-cp314-cp314-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:7f5d29ae85dd0f23df8d15b3e7b341d4f9eb5a446bbb9640df48ac1f6d9e0c6c", size = 7623723, upload-time = "2025-12-02T11:56:55.066Z" }, - { url = "https://files.pythonhosted.org/packages/db/30/6e085bd3ed9d12da3c91c185854abd70f9dfd35fb36a75ea98428d42c30b/tantivy-0.25.1-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:f2d2938fb69a74fc1bb36edfaf7f0d1596fa1264db0f377bda2195c58bcb6245", size = 3926243, upload-time = "2025-12-02T11:56:57.058Z" }, - { url = "https://files.pythonhosted.org/packages/32/f5/a00d65433430f51718e5cc6938df571765d7c4e03aedec5aef4ab567aa9b/tantivy-0.25.1-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4f5ff124c4802558e627091e780b362ca944169736caba5a372eef39a79d0ae0", size = 4207186, upload-time = "2025-12-02T11:56:58.803Z" }, - { url = "https://files.pythonhosted.org/packages/19/63/61bdb12fc95f2a7f77bd419a5149bfa9f28caa76cb569bf2b6b06e1d033e/tantivy-0.25.1-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:43b80ef62a340416139c93d19264e5f808da48e04f9305f1092b8ed22be0a5be", size = 4187312, upload-time = "2025-12-02T11:57:00.595Z" }, - { url = "https://files.pythonhosted.org/packages/b7/de/e39c0b01d59019bf5c38face8b81defbc4a68cebf5e0c53bcb2cd715a449/tantivy-0.25.1-cp314-cp314-win_amd64.whl", hash = "sha256:286b654f40c70c1e6b64b9bc7031ed0bf5c440f5bffeaeeee21a0ee6cc39f0e2", size = 3436535, upload-time = "2025-12-02T11:57:02.267Z" }, -] - [[package]] name = "threadpoolctl" version = "3.6.0"