feat: allow Python and Typescript users to create Sessions (#2530)

## Summary
- Exposes `Session` in Python and Typescript so users can set the
`index_cache_size_bytes` and `metadata_cache_size_bytes`
* The `Session` is attached to the `Connection`, and thus shared across
all tables in that connection.
- Adds deprecation warnings for table-level cache configuration


🤖 Generated with [Claude Code](https://claude.ai/code)

---------

Co-authored-by: Claude <noreply@anthropic.com>
This commit is contained in:
Will Jones
2025-07-24 12:06:29 -07:00
committed by GitHub
parent 81afd8a42f
commit 3d1f102087
21 changed files with 514 additions and 13 deletions

View File

@@ -179,7 +179,7 @@ impl Connection {
}
#[pyfunction]
#[pyo3(signature = (uri, api_key=None, region=None, host_override=None, read_consistency_interval=None, client_config=None, storage_options=None))]
#[pyo3(signature = (uri, api_key=None, region=None, host_override=None, read_consistency_interval=None, client_config=None, storage_options=None, session=None))]
#[allow(clippy::too_many_arguments)]
pub fn connect(
py: Python,
@@ -190,6 +190,7 @@ pub fn connect(
read_consistency_interval: Option<f64>,
client_config: Option<PyClientConfig>,
storage_options: Option<HashMap<String, String>>,
session: Option<crate::session::Session>,
) -> PyResult<Bound<'_, PyAny>> {
future_into_py(py, async move {
let mut builder = lancedb::connect(&uri);
@@ -213,6 +214,9 @@ pub fn connect(
if let Some(client_config) = client_config {
builder = builder.client_config(client_config.into());
}
if let Some(session) = session {
builder = builder.session(session.inner.clone());
}
Ok(Connection::new(builder.execute().await.infer_error()?))
})
}

View File

@@ -11,6 +11,7 @@ use pyo3::{
wrap_pyfunction, Bound, PyResult, Python,
};
use query::{FTSQuery, HybridQuery, Query, VectorQuery};
use session::Session;
use table::{
AddColumnsResult, AddResult, AlterColumnsResult, DeleteResult, DropColumnsResult, MergeResult,
Table, UpdateResult,
@@ -21,6 +22,7 @@ pub mod connection;
pub mod error;
pub mod index;
pub mod query;
pub mod session;
pub mod table;
pub mod util;
@@ -31,6 +33,7 @@ pub fn _lancedb(_py: Python, m: &Bound<'_, PyModule>) -> PyResult<()> {
.write_style("LANCEDB_LOG_STYLE");
env_logger::init_from_env(env);
m.add_class::<Connection>()?;
m.add_class::<Session>()?;
m.add_class::<Table>()?;
m.add_class::<IndexConfig>()?;
m.add_class::<Query>()?;

107
python/src/session.rs Normal file
View File

@@ -0,0 +1,107 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
use std::sync::Arc;
use lancedb::{ObjectStoreRegistry, Session as LanceSession};
use pyo3::{pyclass, pymethods, PyResult};
/// A session for managing caches and object stores across LanceDB operations.
///
/// Sessions allow you to configure cache sizes for index and metadata caches,
/// which can significantly impact memory use and performance. They can
/// also be re-used across multiple connections to share the same cache state.
#[pyclass]
#[derive(Clone)]
pub struct Session {
pub(crate) inner: Arc<LanceSession>,
}
impl Default for Session {
fn default() -> Self {
Self {
inner: Arc::new(LanceSession::default()),
}
}
}
#[pymethods]
impl Session {
/// Create a new session with custom cache sizes.
///
/// Parameters
/// ----------
/// index_cache_size_bytes : int, optional
/// The size of the index cache in bytes.
/// Index data is stored in memory in this cache to speed up queries.
/// Default: 6GB (6 * 1024 * 1024 * 1024 bytes)
/// metadata_cache_size_bytes : int, optional
/// The size of the metadata cache in bytes.
/// The metadata cache stores file metadata and schema information in memory.
/// This cache improves scan and write performance.
/// Default: 1GB (1024 * 1024 * 1024 bytes)
#[new]
#[pyo3(signature = (index_cache_size_bytes=None, metadata_cache_size_bytes=None))]
pub fn new(
index_cache_size_bytes: Option<usize>,
metadata_cache_size_bytes: Option<usize>,
) -> PyResult<Self> {
let index_cache_size = index_cache_size_bytes.unwrap_or(6 * 1024 * 1024 * 1024); // 6GB default
let metadata_cache_size = metadata_cache_size_bytes.unwrap_or(1024 * 1024 * 1024); // 1GB default
let session = LanceSession::new(
index_cache_size,
metadata_cache_size,
Arc::new(ObjectStoreRegistry::default()),
);
Ok(Self {
inner: Arc::new(session),
})
}
/// Create a session with default cache sizes.
///
/// This is equivalent to creating a session with 6GB index cache
/// and 1GB metadata cache.
///
/// Returns
/// -------
/// Session
/// A new Session with default cache sizes
#[staticmethod]
#[allow(clippy::should_implement_trait)]
pub fn default() -> Self {
Default::default()
}
/// Get the current size of the session caches in bytes.
///
/// Returns
/// -------
/// int
/// The total size of all caches in the session
#[getter]
pub fn size_bytes(&self) -> u64 {
self.inner.size_bytes()
}
/// Get the approximate number of items cached in the session.
///
/// Returns
/// -------
/// int
/// The number of cached items across all caches
#[getter]
pub fn approx_num_items(&self) -> usize {
self.inner.approx_num_items()
}
fn __repr__(&self) -> String {
format!(
"Session(size_bytes={}, approx_num_items={})",
self.size_bytes(),
self.approx_num_items()
)
}
}