mirror of
https://github.com/lancedb/lancedb.git
synced 2025-12-27 07:09:57 +00:00
fix: use local random state in FTS test fixtures to prevent flaky failures (#2532)
## Summary
Fixes intermittent CI failures in `test_search_fts[False]` where boolean
FTS queries were returning fewer results than expected due to
non-deterministic test data generation.
## Problem
The test was using global `random` and `np.random` without seeding,
causing the boolean query `MatchQuery("puppy", "text") &
MatchQuery("runs", "text")` to sometimes return only 3 results instead
of the expected 5, leading to `AssertionError: assert 3 == 5`.
## Solution
- Replace global random calls with local `random.Random(42)` and
`np.random.RandomState(42)` objects in test fixtures
- Ensures deterministic test data while maintaining test isolation
- No impact on other tests since random state is scoped to fixtures only
## Test Results
- ✅ `test_search_fts[False]` now passes consistently
- ✅ All other FTS tests continue to pass
- ✅ No regression in other test suites (verified with `test_basic`)
- ✅ Maintains existing test behavior and coverage
This commit is contained in:
@@ -33,8 +33,11 @@ tantivy = pytest.importorskip("tantivy")
|
|||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def table(tmp_path) -> ldb.table.LanceTable:
|
def table(tmp_path) -> ldb.table.LanceTable:
|
||||||
|
# Use local random state to avoid affecting other tests
|
||||||
|
rng = np.random.RandomState(42)
|
||||||
|
local_random = random.Random(42)
|
||||||
db = ldb.connect(tmp_path)
|
db = ldb.connect(tmp_path)
|
||||||
vectors = [np.random.randn(128) for _ in range(100)]
|
vectors = [rng.randn(128) for _ in range(100)]
|
||||||
|
|
||||||
text_nouns = ("puppy", "car")
|
text_nouns = ("puppy", "car")
|
||||||
text2_nouns = ("rabbit", "girl", "monkey")
|
text2_nouns = ("rabbit", "girl", "monkey")
|
||||||
@@ -44,10 +47,10 @@ def table(tmp_path) -> ldb.table.LanceTable:
|
|||||||
text = [
|
text = [
|
||||||
" ".join(
|
" ".join(
|
||||||
[
|
[
|
||||||
text_nouns[random.randrange(0, len(text_nouns))],
|
text_nouns[local_random.randrange(0, len(text_nouns))],
|
||||||
verbs[random.randrange(0, 5)],
|
verbs[local_random.randrange(0, 5)],
|
||||||
adv[random.randrange(0, 5)],
|
adv[local_random.randrange(0, 5)],
|
||||||
adj[random.randrange(0, 5)],
|
adj[local_random.randrange(0, 5)],
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
for _ in range(100)
|
for _ in range(100)
|
||||||
@@ -55,15 +58,15 @@ def table(tmp_path) -> ldb.table.LanceTable:
|
|||||||
text2 = [
|
text2 = [
|
||||||
" ".join(
|
" ".join(
|
||||||
[
|
[
|
||||||
text2_nouns[random.randrange(0, len(text2_nouns))],
|
text2_nouns[local_random.randrange(0, len(text2_nouns))],
|
||||||
verbs[random.randrange(0, 5)],
|
verbs[local_random.randrange(0, 5)],
|
||||||
adv[random.randrange(0, 5)],
|
adv[local_random.randrange(0, 5)],
|
||||||
adj[random.randrange(0, 5)],
|
adj[local_random.randrange(0, 5)],
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
for _ in range(100)
|
for _ in range(100)
|
||||||
]
|
]
|
||||||
count = [random.randint(1, 10000) for _ in range(100)]
|
count = [local_random.randint(1, 10000) for _ in range(100)]
|
||||||
table = db.create_table(
|
table = db.create_table(
|
||||||
"test",
|
"test",
|
||||||
data=pd.DataFrame(
|
data=pd.DataFrame(
|
||||||
@@ -82,8 +85,11 @@ def table(tmp_path) -> ldb.table.LanceTable:
|
|||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
async def async_table(tmp_path) -> ldb.table.AsyncTable:
|
async def async_table(tmp_path) -> ldb.table.AsyncTable:
|
||||||
|
# Use local random state to avoid affecting other tests
|
||||||
|
rng = np.random.RandomState(42)
|
||||||
|
local_random = random.Random(42)
|
||||||
db = await ldb.connect_async(tmp_path)
|
db = await ldb.connect_async(tmp_path)
|
||||||
vectors = [np.random.randn(128) for _ in range(100)]
|
vectors = [rng.randn(128) for _ in range(100)]
|
||||||
|
|
||||||
text_nouns = ("puppy", "car")
|
text_nouns = ("puppy", "car")
|
||||||
text2_nouns = ("rabbit", "girl", "monkey")
|
text2_nouns = ("rabbit", "girl", "monkey")
|
||||||
@@ -93,10 +99,10 @@ async def async_table(tmp_path) -> ldb.table.AsyncTable:
|
|||||||
text = [
|
text = [
|
||||||
" ".join(
|
" ".join(
|
||||||
[
|
[
|
||||||
text_nouns[random.randrange(0, len(text_nouns))],
|
text_nouns[local_random.randrange(0, len(text_nouns))],
|
||||||
verbs[random.randrange(0, 5)],
|
verbs[local_random.randrange(0, 5)],
|
||||||
adv[random.randrange(0, 5)],
|
adv[local_random.randrange(0, 5)],
|
||||||
adj[random.randrange(0, 5)],
|
adj[local_random.randrange(0, 5)],
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
for _ in range(100)
|
for _ in range(100)
|
||||||
@@ -104,15 +110,15 @@ async def async_table(tmp_path) -> ldb.table.AsyncTable:
|
|||||||
text2 = [
|
text2 = [
|
||||||
" ".join(
|
" ".join(
|
||||||
[
|
[
|
||||||
text2_nouns[random.randrange(0, len(text2_nouns))],
|
text2_nouns[local_random.randrange(0, len(text2_nouns))],
|
||||||
verbs[random.randrange(0, 5)],
|
verbs[local_random.randrange(0, 5)],
|
||||||
adv[random.randrange(0, 5)],
|
adv[local_random.randrange(0, 5)],
|
||||||
adj[random.randrange(0, 5)],
|
adj[local_random.randrange(0, 5)],
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
for _ in range(100)
|
for _ in range(100)
|
||||||
]
|
]
|
||||||
count = [random.randint(1, 10000) for _ in range(100)]
|
count = [local_random.randint(1, 10000) for _ in range(100)]
|
||||||
table = await db.create_table(
|
table = await db.create_table(
|
||||||
"test",
|
"test",
|
||||||
data=pd.DataFrame(
|
data=pd.DataFrame(
|
||||||
|
|||||||
Reference in New Issue
Block a user