fix(python): typing (#2167)

@wjones127 is there a standard way you guys setup your virtualenv? I can either relist all the dependencies in the pyright precommit section, or specify a venv, or the user has to be in the virtual environment when they run git commit. If the venv location was standardized or a python manager like `uv` was used it would be easier to avoid duplicating the pyright dependency list. Per your suggestion, in `pyproject.toml` I added in all the passing files to the `includes` section. For ruff I upgraded the version and removed "TCH" which doesn't exist as an option. I added a `pyright_report.csv` which contains a list of all files sorted by pyright errors ascending as a todo list to work on. I fixed about 30 issues in `table.py` stemming from str's being passed into methods that required a string within a set of string Literals by extracting them into `types.py` Can you verify in the rust bridge that the schema should be a property and not a method here? If it's a method, then there's another place in the code where `inner.schema` should be `inner.schema()` ``` python class RecordBatchStream: @property def schema(self) -> pa.Schema: ... ``` Also unless the `_lancedb.pyi` file is wrong, then there is no `__anext__` here for `__inner` when it's not an `AsyncGenerator` and only `next` is defined: ``` python async def __anext__(self) -> pa.RecordBatch: return await self._inner.__anext__() if isinstance(self._inner, AsyncGenerator): batch = await self._inner.__anext__() else: batch = await self._inner.next() if batch is None: raise StopAsyncIteration return batch ``` in the else statement, `_inner` is a `RecordBatchStream` ```python class RecordBatchStream: @property def schema(self) -> pa.Schema: ... async def next(self) -> Optional[pa.RecordBatch]: ... ``` --------- Co-authored-by: Will Jones <willjones127@gmail.com>
2026-05-16 19:40:40 +00:00 · 2025-03-10 09:01:23 -07:00
parent bc49c4db82
commit cc81f3e1a5
16 changed files with 294 additions and 86 deletions
--- a/python/python/tests/test_rerankers.py
+++ b/python/python/tests/test_rerankers.py
@@ -131,9 +131,9 @@ def _run_test_reranker(reranker, table, query, query_vector, schema):
        "represents the relevance of the result to the query & should "
        "be descending."
    )
-    assert np.all(
-        np.diff(result.column("_relevance_score").to_numpy()) <= 0
-    ), ascending_relevance_err
+    assert np.all(np.diff(result.column("_relevance_score").to_numpy()) <= 0), (
+        ascending_relevance_err
+    )

    # Vector search setting
    result = (
@@ -143,9 +143,9 @@ def _run_test_reranker(reranker, table, query, query_vector, schema):
        .to_arrow()
    )
    assert len(result) == 30
-    assert np.all(
-        np.diff(result.column("_relevance_score").to_numpy()) <= 0
-    ), ascending_relevance_err
+    assert np.all(np.diff(result.column("_relevance_score").to_numpy()) <= 0), (
+        ascending_relevance_err
+    )
    result_explicit = (
        table.search(query_vector, vector_column_name="vector")
        .rerank(reranker=reranker, query_string=query)
@@ -168,9 +168,9 @@ def _run_test_reranker(reranker, table, query, query_vector, schema):
        .to_arrow()
    )
    assert len(result) > 0
-    assert np.all(
-        np.diff(result.column("_relevance_score").to_numpy()) <= 0
-    ), ascending_relevance_err
+    assert np.all(np.diff(result.column("_relevance_score").to_numpy()) <= 0), (
+        ascending_relevance_err
+    )

    # empty FTS results
    query = "abcxyz" * 100
@@ -185,9 +185,9 @@ def _run_test_reranker(reranker, table, query, query_vector, schema):

    # should return _relevance_score column
    assert "_relevance_score" in result.column_names
-    assert np.all(
-        np.diff(result.column("_relevance_score").to_numpy()) <= 0
-    ), ascending_relevance_err
+    assert np.all(np.diff(result.column("_relevance_score").to_numpy()) <= 0), (
+        ascending_relevance_err
+    )

    # Multi-vector search setting
    rs1 = table.search(query, vector_column_name="vector").limit(10).with_row_id(True)
@@ -262,9 +262,9 @@ def _run_test_hybrid_reranker(reranker, tmp_path, use_tantivy):
        "represents the relevance of the result to the query & should "
        "be descending."
    )
-    assert np.all(
-        np.diff(result.column("_relevance_score").to_numpy()) <= 0
-    ), ascending_relevance_err
+    assert np.all(np.diff(result.column("_relevance_score").to_numpy()) <= 0), (
+        ascending_relevance_err
+    )

    # Test with empty FTS results
    query = "abcxyz" * 100
@@ -278,9 +278,9 @@ def _run_test_hybrid_reranker(reranker, tmp_path, use_tantivy):
    )
    # should return _relevance_score column
    assert "_relevance_score" in result.column_names
-    assert np.all(
-        np.diff(result.column("_relevance_score").to_numpy()) <= 0
-    ), ascending_relevance_err
+    assert np.all(np.diff(result.column("_relevance_score").to_numpy()) <= 0), (
+        ascending_relevance_err
+    )


@pytest.mark.parametrize("use_tantivy", [True, False])