fix: revert change default read_consistency_interval=5s (#2327)

This reverts commit a547c523c2 or #2281

The current implementation can cause panics and performance degradation.
I will bring this back with more testing in
https://github.com/lancedb/lancedb/pull/2311

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->
## Summary by CodeRabbit

- **Documentation**
- Enhanced clarity on read consistency settings with updated
descriptions and default behavior.
- Removed outdated warnings about eventual consistency from the
troubleshooting guide.

- **Refactor**
- Streamlined the handling of the read consistency interval across
integrations, now defaulting to "None" for improved performance.
  - Simplified internal logic to offer a more consistent experience.

- **Tests**
- Updated test expectations to reflect the new default representation
for the read consistency interval.
- Removed redundant tests related to "no consistency" settings for
streamlined testing.
<!-- end of auto-generated comment: release notes by coderabbit.ai -->

---------

Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com>
This commit is contained in:
Will Jones
2025-04-14 10:48:15 -05:00
committed by GitHub
parent 4708b60bb1
commit b3a4efd587
20 changed files with 83 additions and 246 deletions

View File

@@ -26,7 +26,7 @@ def connect(
api_key: Optional[str] = None,
region: str = "us-east-1",
host_override: Optional[str] = None,
read_consistency_interval: Optional[timedelta] = timedelta(seconds=5),
read_consistency_interval: Optional[timedelta] = None,
request_thread_pool: Optional[Union[int, ThreadPoolExecutor]] = None,
client_config: Union[ClientConfig, Dict[str, Any], None] = None,
storage_options: Optional[Dict[str, str]] = None,
@@ -49,8 +49,9 @@ def connect(
read_consistency_interval: timedelta, default None
(For LanceDB OSS only)
The interval at which to check for updates to the table from other
processes. If None, then consistency is not checked. For strong consistency,
set this to zero seconds. Then every read will check for updates from other
processes. If None, then consistency is not checked. For performance
reasons, this is the default. For strong consistency, set this to
zero seconds. Then every read will check for updates from other
processes. As a compromise, you can set this to a non-zero timedelta
for eventual consistency. If more than that interval has passed since
the last check, then the table will be checked for updates. Note: this
@@ -121,7 +122,7 @@ async def connect_async(
api_key: Optional[str] = None,
region: str = "us-east-1",
host_override: Optional[str] = None,
read_consistency_interval: Optional[timedelta] = timedelta(seconds=5),
read_consistency_interval: Optional[timedelta] = None,
client_config: Optional[Union[ClientConfig, Dict[str, Any]]] = None,
storage_options: Optional[Dict[str, str]] = None,
) -> AsyncConnection:
@@ -142,8 +143,9 @@ async def connect_async(
read_consistency_interval: timedelta, default None
(For LanceDB OSS only)
The interval at which to check for updates to the table from other
processes. If None, then consistency is not checked. For strong consistency,
set this to zero seconds. Then every read will check for updates from other
processes. If None, then consistency is not checked. For performance
reasons, this is the default. For strong consistency, set this to
zero seconds. Then every read will check for updates from other
processes. As a compromise, you can set this to a non-zero timedelta
for eventual consistency. If more than that interval has passed since
the last check, then the table will be checked for updates. Note: this

View File

@@ -6,7 +6,6 @@ from __future__ import annotations
from abc import abstractmethod
from pathlib import Path
from datetime import timedelta
from typing import TYPE_CHECKING, Dict, Iterable, List, Literal, Optional, Union
from lancedb.embeddings.registry import EmbeddingFunctionRegistry
@@ -33,6 +32,7 @@ import deprecation
if TYPE_CHECKING:
import pyarrow as pa
from .pydantic import LanceModel
from datetime import timedelta
from ._lancedb import Connection as LanceDbConnection
from .common import DATA, URI
@@ -318,8 +318,9 @@ class LanceDBConnection(DBConnection):
The root uri of the database.
read_consistency_interval: timedelta, default None
The interval at which to check for updates to the table from other
processes. If None, then consistency is not checked. For strong consistency,
set this to zero seconds. Then every read will check for updates from other
processes. If None, then consistency is not checked. For performance
reasons, this is the default. For strong consistency, set this to
zero seconds. Then every read will check for updates from other
processes. As a compromise, you can set this to a non-zero timedelta
for eventual consistency. If more than that interval has passed since
the last check, then the table will be checked for updates. Note: this
@@ -351,7 +352,7 @@ class LanceDBConnection(DBConnection):
self,
uri: URI,
*,
read_consistency_interval: Optional[timedelta] = timedelta(seconds=5),
read_consistency_interval: Optional[timedelta] = None,
storage_options: Optional[Dict[str, str]] = None,
):
if not isinstance(uri, Path):

View File

@@ -315,11 +315,6 @@ def test_table():
db = lancedb.connect(uri, read_consistency_interval=timedelta(seconds=5))
tbl = db.open_table("test_table")
# --8<-- [end:table_eventual_consistency]
# --8<-- [start:table_no_consistency]
uri = "data/sample-lancedb"
db = lancedb.connect(uri, read_consistency_interval=None)
tbl = db.open_table("test_table")
# --8<-- [end:table_no_consistency]
# --8<-- [start:table_checkout_latest]
tbl = db.open_table("test_table")
@@ -574,12 +569,6 @@ async def test_table_async():
)
async_tbl = await async_db.open_table("test_table_async")
# --8<-- [end:table_async_eventual_consistency]
# --8<-- [start:table_async_no_consistency]
uri = "data/sample-lancedb"
async_db = await lancedb.connect_async(uri, read_consistency_interval=None)
async_tbl = await async_db.open_table("test_table_async")
# --8<-- [end:table_async_no_consistency]
# --8<-- [start:table_async_checkout_latest]
async_tbl = await async_db.open_table("test_table_async")

View File

@@ -3,6 +3,7 @@
import re
from datetime import timedelta
import os
import lancedb
@@ -298,11 +299,13 @@ def test_create_exist_ok(tmp_db: lancedb.DBConnection):
@pytest.mark.asyncio
async def test_connect(tmp_path):
db = await lancedb.connect_async(tmp_path)
assert str(db) == f"ListingDatabase(uri={tmp_path}, read_consistency_interval=5s)"
db = await lancedb.connect_async(tmp_path, read_consistency_interval=None)
assert str(db) == f"ListingDatabase(uri={tmp_path}, read_consistency_interval=None)"
db = await lancedb.connect_async(
tmp_path, read_consistency_interval=timedelta(seconds=5)
)
assert str(db) == f"ListingDatabase(uri={tmp_path}, read_consistency_interval=5s)"
@pytest.mark.asyncio
async def test_close(mem_db_async: lancedb.AsyncConnection):
@@ -450,7 +453,7 @@ async def test_open_table(tmp_path):
assert tbl.name == "test"
assert (
re.search(
r"NativeTable\(test, uri=.*test\.lance, read_consistency_interval=5s\)",
r"NativeTable\(test, uri=.*test\.lance, read_consistency_interval=None\)",
str(tbl),
)
is not None

View File

@@ -32,11 +32,7 @@ def test_basic(mem_db: DBConnection):
table = mem_db.create_table("test", data=data)
assert table.name == "test"
assert (
"LanceTable(name='test', version=1, "
"read_consistency_interval=datetime.timedelta(seconds=5), "
"_conn=LanceDBConnection("
) in repr(table)
assert "LanceTable(name='test', version=1, _conn=LanceDBConnection(" in repr(table)
expected_schema = pa.schema(
{
"vector": pa.list_(pa.float32(), 2),