feat: add list/create/delete/update/checkout tag API (#2353)

add the tag related API to list existing tags, attach tag to a version,
update the tag version, delete tag, get the version of the tag, and
checkout the version that the tag bounded to.

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->
## Summary by CodeRabbit

- **New Features**
- Introduced table version tagging, allowing users to create, update,
delete, and list human-readable tags for specific table versions.
  - Enabled checking out a table by either version number or tag name.
- Added new interfaces for tag management in both Python and Node.js
APIs, supporting synchronous and asynchronous workflows.

- **Bug Fixes**
  - None.

- **Documentation**
- Updated documentation to describe the new tagging features, including
usage examples.

- **Tests**
- Added comprehensive tests for tag creation, updating, deletion,
listing, and version checkout by tag in both Python and Node.js
environments.
<!-- end of auto-generated comment: release notes by coderabbit.ai -->
This commit is contained in:
LuQQiu
2025-04-28 10:04:46 -07:00
committed by GitHub
parent 178bcf9c90
commit a9311c4dc0
17 changed files with 1192 additions and 99 deletions

View File

@@ -2,6 +2,11 @@
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
use std::{collections::HashMap, sync::Arc};
use crate::{
error::PythonErrorExt,
index::{extract_index_params, IndexConfig},
query::Query,
};
use arrow::{
datatypes::{DataType, Schema},
ffi_stream::ArrowArrayStreamReader,
@@ -12,19 +17,13 @@ use lancedb::table::{
Table as LanceDbTable,
};
use pyo3::{
exceptions::{PyKeyError, PyRuntimeError, PyValueError},
exceptions::{PyIOError, PyKeyError, PyRuntimeError, PyValueError},
pyclass, pymethods,
types::{IntoPyDict, PyAnyMethods, PyDict, PyDictMethods},
Bound, FromPyObject, PyAny, PyRef, PyResult, Python,
types::{IntoPyDict, PyAnyMethods, PyDict, PyDictMethods, PyInt, PyString},
Bound, FromPyObject, PyAny, PyObject, PyRef, PyResult, Python,
};
use pyo3_async_runtimes::tokio::future_into_py;
use crate::{
error::PythonErrorExt,
index::{extract_index_params, IndexConfig},
query::Query,
};
/// Statistics about a compaction operation.
#[pyclass(get_all)]
#[derive(Clone, Debug)]
@@ -322,10 +321,26 @@ impl Table {
})
}
pub fn checkout(self_: PyRef<'_, Self>, version: u64) -> PyResult<Bound<'_, PyAny>> {
pub fn checkout(self_: PyRef<'_, Self>, version: PyObject) -> PyResult<Bound<'_, PyAny>> {
let inner = self_.inner_ref()?.clone();
future_into_py(self_.py(), async move {
inner.checkout(version).await.infer_error()
let py = self_.py();
let (is_int, int_value, string_value) = if let Ok(i) = version.downcast_bound::<PyInt>(py) {
let num: u64 = i.extract()?;
(true, num, String::new())
} else if let Ok(s) = version.downcast_bound::<PyString>(py) {
let str_value = s.to_string();
(false, 0, str_value)
} else {
return Err(PyIOError::new_err(
"version must be an integer or a string.",
));
};
future_into_py(py, async move {
if is_int {
inner.checkout(int_value).await.infer_error()
} else {
inner.checkout_tag(&string_value).await.infer_error()
}
})
}
@@ -352,6 +367,11 @@ impl Table {
Query::new(self.inner_ref().unwrap().query())
}
#[getter]
pub fn tags(&self) -> PyResult<Tags> {
Ok(Tags::new(self.inner_ref()?.clone()))
}
/// Optimize the on-disk data by compacting and pruning old data, for better performance.
#[pyo3(signature = (cleanup_since_ms=None, delete_unverified=None, retrain=None))]
pub fn optimize(
@@ -586,3 +606,72 @@ pub struct MergeInsertParams {
when_not_matched_by_source_delete: bool,
when_not_matched_by_source_condition: Option<String>,
}
#[pyclass]
pub struct Tags {
inner: LanceDbTable,
}
impl Tags {
pub fn new(table: LanceDbTable) -> Self {
Self { inner: table }
}
}
#[pymethods]
impl Tags {
pub fn list(self_: PyRef<'_, Self>) -> PyResult<Bound<'_, PyAny>> {
let inner = self_.inner.clone();
future_into_py(self_.py(), async move {
let tags = inner.tags().await.infer_error()?;
let res = tags.list().await.infer_error()?;
Python::with_gil(|py| {
let py_dict = PyDict::new(py);
for (key, contents) in res {
let value_dict = PyDict::new(py);
value_dict.set_item("version", contents.version)?;
value_dict.set_item("manifest_size", contents.manifest_size)?;
py_dict.set_item(key, value_dict)?;
}
Ok(py_dict.unbind())
})
})
}
pub fn get_version(self_: PyRef<'_, Self>, tag: String) -> PyResult<Bound<'_, PyAny>> {
let inner = self_.inner.clone();
future_into_py(self_.py(), async move {
let tags = inner.tags().await.infer_error()?;
let res = tags.get_version(tag.as_str()).await.infer_error()?;
Ok(res)
})
}
pub fn create(self_: PyRef<Self>, tag: String, version: u64) -> PyResult<Bound<PyAny>> {
let inner = self_.inner.clone();
future_into_py(self_.py(), async move {
let mut tags = inner.tags().await.infer_error()?;
tags.create(tag.as_str(), version).await.infer_error()?;
Ok(())
})
}
pub fn delete(self_: PyRef<Self>, tag: String) -> PyResult<Bound<PyAny>> {
let inner = self_.inner.clone();
future_into_py(self_.py(), async move {
let mut tags = inner.tags().await.infer_error()?;
tags.delete(tag.as_str()).await.infer_error()?;
Ok(())
})
}
pub fn update(self_: PyRef<Self>, tag: String, version: u64) -> PyResult<Bound<PyAny>> {
let inner = self_.inner.clone();
future_into_py(self_.py(), async move {
let mut tags = inner.tags().await.infer_error()?;
tags.update(tag.as_str(), version).await.infer_error()?;
Ok(())
})
}
}