mirror of
https://github.com/lancedb/lancedb.git
synced 2025-12-27 23:12:58 +00:00
feat: add table stats API (#2363)
* Add a new "table stats" API to expose basic table and fragment statistics with local and remote table implementations ### Questions * This is using `calculate_data_stats` to determine total bytes in the table. This seems like a potentially expensive operation - are there any concerns about performance for large datasets? ### Notes * bytes_on_disk seems to be stored at the column level but there does not seem to be a way to easily calculate total bytes per fragment. This may need to be added in lance before we can support fragment size (bytes) statistics. <!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit - **New Features** - Added a method to retrieve comprehensive table statistics, including total rows, index counts, storage size, and detailed fragment size metrics such as minimum, maximum, mean, and percentiles. - Enabled fetching of table statistics from remote sources through asynchronous requests. - Extended table interfaces across Python, Rust, and Node.js to support synchronous and asynchronous retrieval of table statistics. - **Tests** - Introduced tests to verify the accuracy of the new table statistics feature for both populated and empty tables. <!-- end of auto-generated comment: release notes by coderabbit.ai -->
This commit is contained in:
@@ -389,6 +389,50 @@ def test_table_wait_for_index_timeout():
|
||||
table.wait_for_index(["id_idx"], timedelta(seconds=1))
|
||||
|
||||
|
||||
def test_stats():
|
||||
stats = {
|
||||
"total_bytes": 38,
|
||||
"num_rows": 2,
|
||||
"num_indices": 0,
|
||||
"fragment_stats": {
|
||||
"num_fragments": 1,
|
||||
"num_small_fragments": 1,
|
||||
"lengths": {
|
||||
"min": 2,
|
||||
"max": 2,
|
||||
"mean": 2,
|
||||
"p25": 2,
|
||||
"p50": 2,
|
||||
"p75": 2,
|
||||
"p99": 2,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
def handler(request):
|
||||
if request.path == "/v1/table/test/create/?mode=create":
|
||||
request.send_response(200)
|
||||
request.send_header("Content-Type", "application/json")
|
||||
request.end_headers()
|
||||
request.wfile.write(b"{}")
|
||||
elif request.path == "/v1/table/test/stats/":
|
||||
request.send_response(200)
|
||||
request.send_header("Content-Type", "application/json")
|
||||
request.end_headers()
|
||||
payload = json.dumps(stats)
|
||||
request.wfile.write(payload.encode())
|
||||
else:
|
||||
print(request.path)
|
||||
request.send_response(404)
|
||||
request.end_headers()
|
||||
|
||||
with mock_lancedb_connection(handler) as db:
|
||||
table = db.create_table("test", [{"id": 1}])
|
||||
res = table.stats()
|
||||
print(f"{res=}")
|
||||
assert res == stats
|
||||
|
||||
|
||||
@contextlib.contextmanager
|
||||
def query_test_table(query_handler, *, server_version=Version("0.1.0")):
|
||||
def handler(request):
|
||||
|
||||
Reference in New Issue
Block a user