mirror of
https://github.com/lancedb/lancedb.git
synced 2025-12-26 06:39:57 +00:00
feat!: change default read_consistency_interval=5s (#2281)
Previously, when we loaded the next version of the table, we would block all reads with a write lock. Now, we only do that if `read_consistency_interval=0`. Otherwise, we load the next version asynchronously in the background. This should mean that `read_consistency_interval > 0` won't have a meaningful impact on latency. Along with this change, I felt it was safe to change the default consistency interval to 5 seconds. The current default is `None`, which means we will **never** check for a new version by default. I think that default is contrary to most users expectations.
This commit is contained in:
@@ -26,7 +26,7 @@ def connect(
|
||||
api_key: Optional[str] = None,
|
||||
region: str = "us-east-1",
|
||||
host_override: Optional[str] = None,
|
||||
read_consistency_interval: Optional[timedelta] = None,
|
||||
read_consistency_interval: Optional[timedelta] = timedelta(seconds=5),
|
||||
request_thread_pool: Optional[Union[int, ThreadPoolExecutor]] = None,
|
||||
client_config: Union[ClientConfig, Dict[str, Any], None] = None,
|
||||
storage_options: Optional[Dict[str, str]] = None,
|
||||
@@ -49,9 +49,8 @@ def connect(
|
||||
read_consistency_interval: timedelta, default None
|
||||
(For LanceDB OSS only)
|
||||
The interval at which to check for updates to the table from other
|
||||
processes. If None, then consistency is not checked. For performance
|
||||
reasons, this is the default. For strong consistency, set this to
|
||||
zero seconds. Then every read will check for updates from other
|
||||
processes. If None, then consistency is not checked. For strong consistency,
|
||||
set this to zero seconds. Then every read will check for updates from other
|
||||
processes. As a compromise, you can set this to a non-zero timedelta
|
||||
for eventual consistency. If more than that interval has passed since
|
||||
the last check, then the table will be checked for updates. Note: this
|
||||
@@ -122,7 +121,7 @@ async def connect_async(
|
||||
api_key: Optional[str] = None,
|
||||
region: str = "us-east-1",
|
||||
host_override: Optional[str] = None,
|
||||
read_consistency_interval: Optional[timedelta] = None,
|
||||
read_consistency_interval: Optional[timedelta] = timedelta(seconds=5),
|
||||
client_config: Optional[Union[ClientConfig, Dict[str, Any]]] = None,
|
||||
storage_options: Optional[Dict[str, str]] = None,
|
||||
) -> AsyncConnection:
|
||||
@@ -143,9 +142,8 @@ async def connect_async(
|
||||
read_consistency_interval: timedelta, default None
|
||||
(For LanceDB OSS only)
|
||||
The interval at which to check for updates to the table from other
|
||||
processes. If None, then consistency is not checked. For performance
|
||||
reasons, this is the default. For strong consistency, set this to
|
||||
zero seconds. Then every read will check for updates from other
|
||||
processes. If None, then consistency is not checked. For strong consistency,
|
||||
set this to zero seconds. Then every read will check for updates from other
|
||||
processes. As a compromise, you can set this to a non-zero timedelta
|
||||
for eventual consistency. If more than that interval has passed since
|
||||
the last check, then the table will be checked for updates. Note: this
|
||||
|
||||
@@ -6,6 +6,7 @@ from __future__ import annotations
|
||||
|
||||
from abc import abstractmethod
|
||||
from pathlib import Path
|
||||
from datetime import timedelta
|
||||
from typing import TYPE_CHECKING, Dict, Iterable, List, Literal, Optional, Union
|
||||
|
||||
from lancedb.embeddings.registry import EmbeddingFunctionRegistry
|
||||
@@ -32,7 +33,6 @@ import deprecation
|
||||
if TYPE_CHECKING:
|
||||
import pyarrow as pa
|
||||
from .pydantic import LanceModel
|
||||
from datetime import timedelta
|
||||
|
||||
from ._lancedb import Connection as LanceDbConnection
|
||||
from .common import DATA, URI
|
||||
@@ -318,9 +318,8 @@ class LanceDBConnection(DBConnection):
|
||||
The root uri of the database.
|
||||
read_consistency_interval: timedelta, default None
|
||||
The interval at which to check for updates to the table from other
|
||||
processes. If None, then consistency is not checked. For performance
|
||||
reasons, this is the default. For strong consistency, set this to
|
||||
zero seconds. Then every read will check for updates from other
|
||||
processes. If None, then consistency is not checked. For strong consistency,
|
||||
set this to zero seconds. Then every read will check for updates from other
|
||||
processes. As a compromise, you can set this to a non-zero timedelta
|
||||
for eventual consistency. If more than that interval has passed since
|
||||
the last check, then the table will be checked for updates. Note: this
|
||||
@@ -352,7 +351,7 @@ class LanceDBConnection(DBConnection):
|
||||
self,
|
||||
uri: URI,
|
||||
*,
|
||||
read_consistency_interval: Optional[timedelta] = None,
|
||||
read_consistency_interval: Optional[timedelta] = timedelta(seconds=5),
|
||||
storage_options: Optional[Dict[str, str]] = None,
|
||||
):
|
||||
if not isinstance(uri, Path):
|
||||
|
||||
@@ -315,6 +315,11 @@ def test_table():
|
||||
db = lancedb.connect(uri, read_consistency_interval=timedelta(seconds=5))
|
||||
tbl = db.open_table("test_table")
|
||||
# --8<-- [end:table_eventual_consistency]
|
||||
# --8<-- [start:table_no_consistency]
|
||||
uri = "data/sample-lancedb"
|
||||
db = lancedb.connect(uri, read_consistency_interval=None)
|
||||
tbl = db.open_table("test_table")
|
||||
# --8<-- [end:table_no_consistency]
|
||||
# --8<-- [start:table_checkout_latest]
|
||||
tbl = db.open_table("test_table")
|
||||
|
||||
@@ -562,13 +567,19 @@ async def test_table_async():
|
||||
async_db = await lancedb.connect_async(uri, read_consistency_interval=timedelta(0))
|
||||
async_tbl = await async_db.open_table("test_table_async")
|
||||
# --8<-- [end:table_async_strong_consistency]
|
||||
# --8<-- [start:table_async_ventual_consistency]
|
||||
# --8<-- [start:table_async_eventual_consistency]
|
||||
uri = "data/sample-lancedb"
|
||||
async_db = await lancedb.connect_async(
|
||||
uri, read_consistency_interval=timedelta(seconds=5)
|
||||
)
|
||||
async_tbl = await async_db.open_table("test_table_async")
|
||||
# --8<-- [end:table_async_eventual_consistency]
|
||||
# --8<-- [start:table_async_no_consistency]
|
||||
uri = "data/sample-lancedb"
|
||||
async_db = await lancedb.connect_async(uri, read_consistency_interval=None)
|
||||
async_tbl = await async_db.open_table("test_table_async")
|
||||
# --8<-- [end:table_async_no_consistency]
|
||||
|
||||
# --8<-- [start:table_async_checkout_latest]
|
||||
async_tbl = await async_db.open_table("test_table_async")
|
||||
|
||||
|
||||
@@ -3,7 +3,6 @@
|
||||
|
||||
|
||||
import re
|
||||
from datetime import timedelta
|
||||
import os
|
||||
|
||||
import lancedb
|
||||
@@ -299,13 +298,11 @@ def test_create_exist_ok(tmp_db: lancedb.DBConnection):
|
||||
@pytest.mark.asyncio
|
||||
async def test_connect(tmp_path):
|
||||
db = await lancedb.connect_async(tmp_path)
|
||||
assert str(db) == f"ListingDatabase(uri={tmp_path}, read_consistency_interval=None)"
|
||||
|
||||
db = await lancedb.connect_async(
|
||||
tmp_path, read_consistency_interval=timedelta(seconds=5)
|
||||
)
|
||||
assert str(db) == f"ListingDatabase(uri={tmp_path}, read_consistency_interval=5s)"
|
||||
|
||||
db = await lancedb.connect_async(tmp_path, read_consistency_interval=None)
|
||||
assert str(db) == f"ListingDatabase(uri={tmp_path}, read_consistency_interval=None)"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_close(mem_db_async: lancedb.AsyncConnection):
|
||||
@@ -453,7 +450,7 @@ async def test_open_table(tmp_path):
|
||||
assert tbl.name == "test"
|
||||
assert (
|
||||
re.search(
|
||||
r"NativeTable\(test, uri=.*test\.lance, read_consistency_interval=None\)",
|
||||
r"NativeTable\(test, uri=.*test\.lance, read_consistency_interval=5s\)",
|
||||
str(tbl),
|
||||
)
|
||||
is not None
|
||||
|
||||
@@ -32,7 +32,11 @@ def test_basic(mem_db: DBConnection):
|
||||
table = mem_db.create_table("test", data=data)
|
||||
|
||||
assert table.name == "test"
|
||||
assert "LanceTable(name='test', version=1, _conn=LanceDBConnection(" in repr(table)
|
||||
assert (
|
||||
"LanceTable(name='test', version=1, "
|
||||
"read_consistency_interval=datetime.timedelta(seconds=5), "
|
||||
"_conn=LanceDBConnection("
|
||||
) in repr(table)
|
||||
expected_schema = pa.schema(
|
||||
{
|
||||
"vector": pa.list_(pa.float32(), 2),
|
||||
|
||||
@@ -204,7 +204,9 @@ pub fn connect(
|
||||
}
|
||||
if let Some(read_consistency_interval) = read_consistency_interval {
|
||||
let read_consistency_interval = Duration::from_secs_f64(read_consistency_interval);
|
||||
builder = builder.read_consistency_interval(read_consistency_interval);
|
||||
builder = builder.read_consistency_interval(Some(read_consistency_interval));
|
||||
} else {
|
||||
builder = builder.read_consistency_interval(None);
|
||||
}
|
||||
if let Some(storage_options) = storage_options {
|
||||
builder = builder.storage_options(storage_options);
|
||||
|
||||
Reference in New Issue
Block a user