feat: cleanup and compaction (#518)

#488
This commit is contained in:
Will Jones
2023-10-11 12:49:12 -07:00
committed by GitHub
parent e41894b071
commit db7bdefe77
12 changed files with 394 additions and 7 deletions

View File

@@ -12,6 +12,7 @@
# limitations under the License.
import functools
from datetime import timedelta
from pathlib import Path
from typing import List
from unittest.mock import PropertyMock, patch
@@ -442,3 +443,31 @@ def test_empty_query(db):
df = table.search().select(["id"]).where("text='bar'").limit(1).to_pandas()
val = df.id.iloc[0]
assert val == 1
def test_compact_cleanup(db):
table = LanceTable.create(
db,
"my_table",
data=[{"text": "foo", "id": 0}, {"text": "bar", "id": 1}],
)
table.add([{"text": "baz", "id": 2}])
assert len(table) == 3
assert table.version == 3
stats = table.compact_files()
assert len(table) == 3
assert table.version == 4
assert stats.fragments_removed > 0
assert stats.fragments_added == 1
stats = table.cleanup_old_versions()
assert stats.bytes_removed == 0
stats = table.cleanup_old_versions(older_than=timedelta(0), delete_unverified=True)
assert stats.bytes_removed > 0
assert table.version == 4
with pytest.raises(Exception, match="Version 3 no longer exists"):
table.checkout(3)