fix: use local random state in FTS test fixtures to prevent flaky failures (#2532)

## Summary Fixes intermittent CI failures in `test_search_fts[False]` where boolean FTS queries were returning fewer results than expected due to non-deterministic test data generation. ## Problem The test was using global `random` and `np.random` without seeding, causing the boolean query `MatchQuery("puppy", "text") & MatchQuery("runs", "text")` to sometimes return only 3 results instead of the expected 5, leading to `AssertionError: assert 3 == 5`. ## Solution - Replace global random calls with local `random.Random(42)` and `np.random.RandomState(42)` objects in test fixtures - Ensures deterministic test data while maintaining test isolation - No impact on other tests since random state is scoped to fixtures only ## Test Results - ✅ `test_search_fts[False]` now passes consistently - ✅ All other FTS tests continue to pass - ✅ No regression in other test suites (verified with `test_basic`) - ✅ Maintains existing test behavior and coverage
2025-12-27 07:09:57 +00:00 · 2025-07-24 11:30:02 -07:00
parent c2aa03615a
commit 81afd8a42f
1 changed files with 26 additions and 20 deletions
--- a/python/python/tests/test_fts.py
+++ b/python/python/tests/test_fts.py
@@ -33,8 +33,11 @@ tantivy = pytest.importorskip("tantivy")
@pytest.fixture
 def table(tmp_path) -> ldb.table.LanceTable:
    # Use local random state to avoid affecting other tests
    rng = np.random.RandomState(42)
    local_random = random.Random(42)
    db = ldb.connect(tmp_path)
-    vectors = [np.random.randn(128) for _ in range(100)]
+    vectors = [rng.randn(128) for _ in range(100)]
    text_nouns = ("puppy", "car")
    text2_nouns = ("rabbit", "girl", "monkey")
@@ -44,10 +47,10 @@ def table(tmp_path) -> ldb.table.LanceTable:
    text = [
        " ".join(
            [
-                text_nouns[random.randrange(0, len(text_nouns))],
+                text_nouns[local_random.randrange(0, len(text_nouns))],
-                verbs[random.randrange(0, 5)],
+                verbs[local_random.randrange(0, 5)],
-                adv[random.randrange(0, 5)],
+                adv[local_random.randrange(0, 5)],
-                adj[random.randrange(0, 5)],
+                adj[local_random.randrange(0, 5)],
            ]
        )
        for _ in range(100)
@@ -55,15 +58,15 @@ def table(tmp_path) -> ldb.table.LanceTable:
    text2 = [
        " ".join(
            [
-                text2_nouns[random.randrange(0, len(text2_nouns))],
+                text2_nouns[local_random.randrange(0, len(text2_nouns))],
-                verbs[random.randrange(0, 5)],
+                verbs[local_random.randrange(0, 5)],
-                adv[random.randrange(0, 5)],
+                adv[local_random.randrange(0, 5)],
-                adj[random.randrange(0, 5)],
+                adj[local_random.randrange(0, 5)],
            ]
        )
        for _ in range(100)
    ]
-    count = [random.randint(1, 10000) for _ in range(100)]
+    count = [local_random.randint(1, 10000) for _ in range(100)]
    table = db.create_table(
        "test",
        data=pd.DataFrame(
@@ -82,8 +85,11 @@ def table(tmp_path) -> ldb.table.LanceTable:
@pytest.fixture
 async def async_table(tmp_path) -> ldb.table.AsyncTable:
    # Use local random state to avoid affecting other tests
    rng = np.random.RandomState(42)
    local_random = random.Random(42)
    db = await ldb.connect_async(tmp_path)
-    vectors = [np.random.randn(128) for _ in range(100)]
+    vectors = [rng.randn(128) for _ in range(100)]
    text_nouns = ("puppy", "car")
    text2_nouns = ("rabbit", "girl", "monkey")
@@ -93,10 +99,10 @@ async def async_table(tmp_path) -> ldb.table.AsyncTable:
    text = [
        " ".join(
            [
-                text_nouns[random.randrange(0, len(text_nouns))],
+                text_nouns[local_random.randrange(0, len(text_nouns))],
-                verbs[random.randrange(0, 5)],
+                verbs[local_random.randrange(0, 5)],
-                adv[random.randrange(0, 5)],
+                adv[local_random.randrange(0, 5)],
-                adj[random.randrange(0, 5)],
+                adj[local_random.randrange(0, 5)],
            ]
        )
        for _ in range(100)
@@ -104,15 +110,15 @@ async def async_table(tmp_path) -> ldb.table.AsyncTable:
    text2 = [
        " ".join(
            [
-                text2_nouns[random.randrange(0, len(text2_nouns))],
+                text2_nouns[local_random.randrange(0, len(text2_nouns))],
-                verbs[random.randrange(0, 5)],
+                verbs[local_random.randrange(0, 5)],
-                adv[random.randrange(0, 5)],
+                adv[local_random.randrange(0, 5)],
-                adj[random.randrange(0, 5)],
+                adj[local_random.randrange(0, 5)],
            ]
        )
        for _ in range(100)
    ]
-    count = [random.randint(1, 10000) for _ in range(100)]
+    count = [local_random.randint(1, 10000) for _ in range(100)]
    table = await db.create_table(
        "test",
        data=pd.DataFrame(