mirror of
https://github.com/lancedb/lancedb.git
synced 2026-01-13 23:32:57 +00:00
@@ -2451,7 +2451,10 @@ class AsyncTable:
|
||||
await self._inner.restore()
|
||||
|
||||
async def optimize(
|
||||
self, *, cleanup_older_than: Optional[timedelta] = None
|
||||
self,
|
||||
*,
|
||||
cleanup_older_than: Optional[timedelta] = None,
|
||||
delete_unverified: bool = False,
|
||||
) -> OptimizeStats:
|
||||
"""
|
||||
Optimize the on-disk data and indices for better performance.
|
||||
@@ -2470,6 +2473,11 @@ class AsyncTable:
|
||||
All files belonging to versions older than this will be removed. Set
|
||||
to 0 days to remove all versions except the latest. The latest version
|
||||
is never removed.
|
||||
delete_unverified: bool, default False
|
||||
Files leftover from a failed transaction may appear to be part of an
|
||||
in-progress operation (e.g. appending new data) and these files will not
|
||||
be deleted unless they are at least 7 days old. If delete_unverified is True
|
||||
then these files will be deleted regardless of their age.
|
||||
|
||||
Experimental API
|
||||
----------------
|
||||
@@ -2491,7 +2499,7 @@ class AsyncTable:
|
||||
"""
|
||||
if cleanup_older_than is not None:
|
||||
cleanup_older_than = round(cleanup_older_than.total_seconds() * 1000)
|
||||
return await self._inner.optimize(cleanup_older_than)
|
||||
return await self._inner.optimize(cleanup_older_than, delete_unverified)
|
||||
|
||||
async def list_indices(self) -> IndexConfig:
|
||||
"""
|
||||
|
||||
@@ -8,6 +8,7 @@ from pathlib import Path
|
||||
from time import sleep
|
||||
from typing import List
|
||||
from unittest.mock import PropertyMock, patch
|
||||
import os
|
||||
|
||||
import lance
|
||||
import lancedb
|
||||
@@ -1052,3 +1053,25 @@ async def test_optimize(db_async: AsyncConnection):
|
||||
assert stats.prune.old_versions_removed == 3
|
||||
|
||||
assert await table.query().to_arrow() == pa.table({"x": [[1], [2]]})
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_optimize_delete_unverified(db_async: AsyncConnection, tmp_path):
|
||||
table = await db_async.create_table(
|
||||
"test",
|
||||
data=[{"x": [1]}],
|
||||
)
|
||||
await table.add(
|
||||
data=[
|
||||
{"x": [2]},
|
||||
],
|
||||
)
|
||||
version = await table.version()
|
||||
path = tmp_path / "test.lance" / "_versions" / f"{version - 1}.manifest"
|
||||
os.remove(path)
|
||||
stats = await table.optimize(delete_unverified=False)
|
||||
assert stats.prune.old_versions_removed == 0
|
||||
stats = await table.optimize(
|
||||
cleanup_older_than=timedelta(seconds=0), delete_unverified=True
|
||||
)
|
||||
assert stats.prune.old_versions_removed == 2
|
||||
|
||||
@@ -248,6 +248,7 @@ impl Table {
|
||||
pub fn optimize(
|
||||
self_: PyRef<'_, Self>,
|
||||
cleanup_since_ms: Option<u64>,
|
||||
delete_unverified: Option<bool>,
|
||||
) -> PyResult<Bound<'_, PyAny>> {
|
||||
let inner = self_.inner_ref()?.clone();
|
||||
let older_than = if let Some(ms) = cleanup_since_ms {
|
||||
@@ -275,7 +276,7 @@ impl Table {
|
||||
let prune_stats = inner
|
||||
.optimize(OptimizeAction::Prune {
|
||||
older_than,
|
||||
delete_unverified: None,
|
||||
delete_unverified,
|
||||
error_if_tagged_old_versions: None,
|
||||
})
|
||||
.await
|
||||
|
||||
Reference in New Issue
Block a user