mirror of
https://github.com/lancedb/lancedb.git
synced 2026-05-24 07:20:40 +00:00
feat!: refactor ConnectionInternal into a Database trait (#2067)
This opens up the door for more custom database implementations than the
two we have today. The biggest change should be inivisble:
`ConnectionInternal` has been renamed to `Database`, made public, and
refactored
However, there are a few breaking changes. `data_storage_version` and
`enable_v2_manifest_paths` have been moved from options on
`create_table` to options for the database which are now set via
`storage_options`.
Before:
```
db = connect(uri)
tbl = db.create_table("my_table", data, data_storage_version="legacy", enable_v2_manifest_paths=True)
```
After:
```
db = connect(uri, storage_options={
"new_table_enable_v2_manifest_paths": "true",
"new_table_data_storage_version": "legacy"
})
tbl = db.create_table("my_table", data)
```
BREAKING CHANGE: the data_storage_version, enable_v2_manifest_paths
options have moved from options to create_table to storage_options.
BREAKING CHANGE: the use_legacy_format option has been removed,
data_storage_version has replaced it for some time now
This commit is contained in:
@@ -299,12 +299,12 @@ def test_create_exist_ok(tmp_db: lancedb.DBConnection):
|
||||
@pytest.mark.asyncio
|
||||
async def test_connect(tmp_path):
|
||||
db = await lancedb.connect_async(tmp_path)
|
||||
assert str(db) == f"NativeDatabase(uri={tmp_path}, read_consistency_interval=None)"
|
||||
assert str(db) == f"ListingDatabase(uri={tmp_path}, read_consistency_interval=None)"
|
||||
|
||||
db = await lancedb.connect_async(
|
||||
tmp_path, read_consistency_interval=timedelta(seconds=5)
|
||||
)
|
||||
assert str(db) == f"NativeDatabase(uri={tmp_path}, read_consistency_interval=5s)"
|
||||
assert str(db) == f"ListingDatabase(uri={tmp_path}, read_consistency_interval=5s)"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@@ -396,13 +396,16 @@ async def test_create_exist_ok_async(tmp_db_async: lancedb.AsyncConnection):
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_create_table_v2_manifest_paths_async(tmp_path):
|
||||
db = await lancedb.connect_async(tmp_path)
|
||||
db_with_v2_paths = await lancedb.connect_async(
|
||||
tmp_path, storage_options={"new_table_enable_v2_manifest_paths": "true"}
|
||||
)
|
||||
db_no_v2_paths = await lancedb.connect_async(
|
||||
tmp_path, storage_options={"new_table_enable_v2_manifest_paths": "false"}
|
||||
)
|
||||
# Create table in v2 mode with v2 manifest paths enabled
|
||||
tbl = await db.create_table(
|
||||
tbl = await db_with_v2_paths.create_table(
|
||||
"test_v2_manifest_paths",
|
||||
data=[{"id": 0}],
|
||||
use_legacy_format=False,
|
||||
enable_v2_manifest_paths=True,
|
||||
)
|
||||
assert await tbl.uses_v2_manifest_paths()
|
||||
manifests_dir = tmp_path / "test_v2_manifest_paths.lance" / "_versions"
|
||||
@@ -410,11 +413,9 @@ async def test_create_table_v2_manifest_paths_async(tmp_path):
|
||||
assert re.match(r"\d{20}\.manifest", manifest)
|
||||
|
||||
# Start a table in V1 mode then migrate
|
||||
tbl = await db.create_table(
|
||||
tbl = await db_no_v2_paths.create_table(
|
||||
"test_v2_migration",
|
||||
data=[{"id": 0}],
|
||||
use_legacy_format=False,
|
||||
enable_v2_manifest_paths=False,
|
||||
)
|
||||
assert not await tbl.uses_v2_manifest_paths()
|
||||
manifests_dir = tmp_path / "test_v2_migration.lance" / "_versions"
|
||||
@@ -583,7 +584,7 @@ def test_empty_or_nonexistent_table(mem_db: lancedb.DBConnection):
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_create_in_v2_mode(mem_db_async: lancedb.AsyncConnection):
|
||||
async def test_create_in_v2_mode():
|
||||
def make_data():
|
||||
for i in range(10):
|
||||
yield pa.record_batch([pa.array([x for x in range(1024)])], names=["x"])
|
||||
@@ -594,10 +595,13 @@ async def test_create_in_v2_mode(mem_db_async: lancedb.AsyncConnection):
|
||||
schema = pa.schema([pa.field("x", pa.int64())])
|
||||
|
||||
# Create table in v1 mode
|
||||
tbl = await mem_db_async.create_table(
|
||||
"test", data=make_data(), schema=schema, data_storage_version="legacy"
|
||||
|
||||
v1_db = await lancedb.connect_async(
|
||||
"memory://", storage_options={"new_table_data_storage_version": "legacy"}
|
||||
)
|
||||
|
||||
tbl = await v1_db.create_table("test", data=make_data(), schema=schema)
|
||||
|
||||
async def is_in_v2_mode(tbl):
|
||||
batches = (
|
||||
await tbl.query().limit(10 * 1024).to_batches(max_batch_length=1024 * 10)
|
||||
@@ -610,10 +614,12 @@ async def test_create_in_v2_mode(mem_db_async: lancedb.AsyncConnection):
|
||||
assert not await is_in_v2_mode(tbl)
|
||||
|
||||
# Create table in v2 mode
|
||||
tbl = await mem_db_async.create_table(
|
||||
"test_v2", data=make_data(), schema=schema, use_legacy_format=False
|
||||
v2_db = await lancedb.connect_async(
|
||||
"memory://", storage_options={"new_table_data_storage_version": "stable"}
|
||||
)
|
||||
|
||||
tbl = await v2_db.create_table("test_v2", data=make_data(), schema=schema)
|
||||
|
||||
assert await is_in_v2_mode(tbl)
|
||||
|
||||
# Add data (should remain in v2 mode)
|
||||
@@ -622,20 +628,18 @@ async def test_create_in_v2_mode(mem_db_async: lancedb.AsyncConnection):
|
||||
assert await is_in_v2_mode(tbl)
|
||||
|
||||
# Create empty table in v2 mode and add data
|
||||
tbl = await mem_db_async.create_table(
|
||||
"test_empty_v2", data=None, schema=schema, use_legacy_format=False
|
||||
)
|
||||
tbl = await v2_db.create_table("test_empty_v2", data=None, schema=schema)
|
||||
await tbl.add(make_table())
|
||||
|
||||
assert await is_in_v2_mode(tbl)
|
||||
|
||||
# Create empty table uses v1 mode by default
|
||||
tbl = await mem_db_async.create_table(
|
||||
"test_empty_v2_default", data=None, schema=schema, data_storage_version="legacy"
|
||||
)
|
||||
# Db uses v2 mode by default
|
||||
db = await lancedb.connect_async("memory://")
|
||||
|
||||
tbl = await db.create_table("test_empty_v2_default", data=None, schema=schema)
|
||||
await tbl.add(make_table())
|
||||
|
||||
assert not await is_in_v2_mode(tbl)
|
||||
assert await is_in_v2_mode(tbl)
|
||||
|
||||
|
||||
def test_replace_index(mem_db: lancedb.DBConnection):
|
||||
|
||||
Reference in New Issue
Block a user