docs: improve optimization docs (#1957)

* Add `See Also` section to `cleanup_old_files` and `compact_files` so
they know it's linked to `optimize`.
* Fixes link to `compact_files` arguments
* Improves formatting of note.
This commit is contained in:
Will Jones
2024-12-19 10:55:11 -08:00
committed by GitHub
parent 5ddd84cec0
commit 61a714a459
4 changed files with 55 additions and 19 deletions

View File

@@ -15,6 +15,7 @@ from datetime import timedelta
import logging
from functools import cached_property
from typing import Dict, Iterable, List, Optional, Union, Literal
import warnings
from lancedb._lancedb import IndexConfig
from lancedb.embeddings.base import EmbeddingFunctionConfig
@@ -481,16 +482,28 @@ class RemoteTable(Table):
)
def cleanup_old_versions(self, *_):
"""cleanup_old_versions() is not supported on the LanceDB cloud"""
raise NotImplementedError(
"cleanup_old_versions() is not supported on the LanceDB cloud"
"""
cleanup_old_versions() is a no-op on LanceDB Cloud.
Tables are automatically cleaned up and optimized.
"""
warnings.warn(
"cleanup_old_versions() is a no-op on LanceDB Cloud. "
"Tables are automatically cleaned up and optimized."
)
pass
def compact_files(self, *_):
"""compact_files() is not supported on the LanceDB cloud"""
raise NotImplementedError(
"compact_files() is not supported on the LanceDB cloud"
"""
compact_files() is a no-op on LanceDB Cloud.
Tables are automatically compacted and optimized.
"""
warnings.warn(
"compact_files() is a no-op on LanceDB Cloud. "
"Tables are automatically compacted and optimized."
)
pass
def optimize(
self,
@@ -498,12 +511,16 @@ class RemoteTable(Table):
cleanup_older_than: Optional[timedelta] = None,
delete_unverified: bool = False,
):
"""optimize() is not supported on the LanceDB cloud.
Indices are optimized automatically."""
raise NotImplementedError(
"optimize() is not supported on the LanceDB cloud. "
"""
optimize() is a no-op on LanceDB Cloud.
Indices are optimized automatically.
"""
warnings.warn(
"optimize() is a no-op on LanceDB Cloud. "
"Indices are optimized automatically."
)
pass
def count_rows(self, filter: Optional[str] = None) -> int:
return LOOP.run(self._table.count_rows(filter))

View File

@@ -917,9 +917,6 @@ class Table(ABC):
"""
Clean up old versions of the table, freeing disk space.
Note: This function is not available in LanceDb Cloud (since LanceDb
Cloud manages cleanup for you automatically)
Parameters
----------
older_than: timedelta, default None
@@ -936,21 +933,38 @@ class Table(ABC):
CleanupStats
The stats of the cleanup operation, including how many bytes were
freed.
See Also
--------
[Table.optimize][lancedb.table.Table.optimize]: A more comprehensive
optimization operation that includes cleanup as well as other operations.
Notes
-----
This function is not available in LanceDb Cloud (since LanceDB
Cloud manages cleanup for you automatically)
"""
@abstractmethod
def compact_files(self, *args, **kwargs):
"""
Run the compaction process on the table.
Note: This function is not available in LanceDb Cloud (since LanceDb
Cloud manages compaction for you automatically)
This can be run after making several small appends to optimize the table
for faster reads.
Arguments are passed onto :meth:`lance.dataset.DatasetOptimizer.compact_files`.
Arguments are passed onto Lance's
[compact_files][lance.dataset.DatasetOptimizer.compact_files].
For most cases, the default should be fine.
See Also
--------
[Table.optimize][lancedb.table.Table.optimize]: A more comprehensive
optimization operation that includes cleanup as well as other operations.
Notes
-----
This function is not available in LanceDB Cloud (since LanceDB
Cloud manages compaction for you automatically)
"""
@abstractmethod