From cc9473a94aeefb04ec48a0ab01aac73b10b421cc Mon Sep 17 00:00:00 2001 From: Weston Pace Date: Thu, 1 Feb 2024 15:06:00 -0800 Subject: [PATCH] docs: add cleanup_old_versions and compact_files to `Table` for documentation purposes (#900) Closes #819 --- python/lancedb/remote/table.py | 12 ++++++++ python/lancedb/table.py | 51 ++++++++++++++++++++++++++++++++-- 2 files changed, 61 insertions(+), 2 deletions(-) diff --git a/python/lancedb/remote/table.py b/python/lancedb/remote/table.py index a313d2c9..e751bcbb 100644 --- a/python/lancedb/remote/table.py +++ b/python/lancedb/remote/table.py @@ -359,6 +359,18 @@ class RemoteTable(Table): payload = {"predicate": where, "updates": updates} self._conn._client.post(f"/v1/table/{self._name}/update/", data=payload) + def cleanup_old_versions(self, *_): + """cleanup_old_versions() is not supported on the LanceDB cloud""" + raise NotImplementedError( + "cleanup_old_versions() is not supported on the LanceDB cloud" + ) + + def compact_files(self, *_): + """compact_files() is not supported on the LanceDB cloud""" + raise NotImplementedError( + "compact_files() is not supported on the LanceDB cloud" + ) + def add_index(tbl: pa.Table, i: int) -> pa.Table: return tbl.add_column( diff --git a/python/lancedb/table.py b/python/lancedb/table.py index 6e433425..56638d81 100644 --- a/python/lancedb/table.py +++ b/python/lancedb/table.py @@ -590,6 +590,52 @@ class Table(ABC): """ raise NotImplementedError + @abstractmethod + def cleanup_old_versions( + self, + older_than: Optional[timedelta] = None, + *, + delete_unverified: bool = False, + ) -> CleanupStats: + """ + Clean up old versions of the table, freeing disk space. + + Note: This function is not available in LanceDb Cloud (since LanceDb + Cloud manages cleanup for you automatically) + + Parameters + ---------- + older_than: timedelta, default None + The minimum age of the version to delete. If None, then this defaults + to two weeks. + delete_unverified: bool, default False + Because they may be part of an in-progress transaction, files newer + than 7 days old are not deleted by default. If you are sure that + there are no in-progress transactions, then you can set this to True + to delete all files older than `older_than`. + + Returns + ------- + CleanupStats + The stats of the cleanup operation, including how many bytes were + freed. + """ + + @abstractmethod + def compact_files(self, *args, **kwargs): + """ + Run the compaction process on the table. + + Note: This function is not available in LanceDb Cloud (since LanceDb + Cloud manages compaction for you automatically) + + This can be run after making several small appends to optimize the table + for faster reads. + + Arguments are passed onto :meth:`lance.dataset.DatasetOptimizer.compact_files`. + For most cases, the default should be fine. + """ + class LanceTable(Table): """ @@ -1314,8 +1360,9 @@ class LanceTable(Table): This can be run after making several small appends to optimize the table for faster reads. - Arguments are passed onto :meth:`lance.dataset.DatasetOptimizer.compact_files`. - For most cases, the default should be fine. + Arguments are passed onto `lance.dataset.DatasetOptimizer.compact_files`. + (see Lance documentation for more details) For most cases, the default + should be fine. """ return self.to_lance().optimize.compact_files(*args, **kwargs)