mirror of
https://github.com/lancedb/lancedb.git
synced 2025-12-23 05:19:58 +00:00
feat: allow Python and Typescript users to create Sessions (#2530)
## Summary - Exposes `Session` in Python and Typescript so users can set the `index_cache_size_bytes` and `metadata_cache_size_bytes` * The `Session` is attached to the `Connection`, and thus shared across all tables in that connection. - Adds deprecation warnings for table-level cache configuration 🤖 Generated with [Claude Code](https://claude.ai/code) --------- Co-authored-by: Claude <noreply@anthropic.com>
This commit is contained in:
4
Cargo.lock
generated
4
Cargo.lock
generated
@@ -3819,9 +3819,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "io-uring"
|
name = "io-uring"
|
||||||
version = "0.7.8"
|
version = "0.7.9"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "b86e202f00093dcba4275d4636b93ef9dd75d025ae560d2521b45ea28ab49013"
|
checksum = "d93587f37623a1a17d94ef2bc9ada592f5465fe7732084ab7beefabe5c77c0c4"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"bitflags 2.9.1",
|
"bitflags 2.9.1",
|
||||||
"cfg-if",
|
"cfg-if",
|
||||||
|
|||||||
84
docs/src/js/classes/Session.md
Normal file
84
docs/src/js/classes/Session.md
Normal file
@@ -0,0 +1,84 @@
|
|||||||
|
[**@lancedb/lancedb**](../README.md) • **Docs**
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
|
[@lancedb/lancedb](../globals.md) / Session
|
||||||
|
|
||||||
|
# Class: Session
|
||||||
|
|
||||||
|
A session for managing caches and object stores across LanceDB operations.
|
||||||
|
|
||||||
|
Sessions allow you to configure cache sizes for index and metadata caches,
|
||||||
|
which can significantly impact performance for large datasets.
|
||||||
|
|
||||||
|
## Constructors
|
||||||
|
|
||||||
|
### new Session()
|
||||||
|
|
||||||
|
```ts
|
||||||
|
new Session(indexCacheSizeBytes?, metadataCacheSizeBytes?): Session
|
||||||
|
```
|
||||||
|
|
||||||
|
Create a new session with custom cache sizes.
|
||||||
|
|
||||||
|
# Parameters
|
||||||
|
|
||||||
|
- `index_cache_size_bytes`: The size of the index cache in bytes.
|
||||||
|
Defaults to 6GB if not specified.
|
||||||
|
- `metadata_cache_size_bytes`: The size of the metadata cache in bytes.
|
||||||
|
Defaults to 1GB if not specified.
|
||||||
|
|
||||||
|
#### Parameters
|
||||||
|
|
||||||
|
* **indexCacheSizeBytes?**: `null` \| `bigint`
|
||||||
|
|
||||||
|
* **metadataCacheSizeBytes?**: `null` \| `bigint`
|
||||||
|
|
||||||
|
#### Returns
|
||||||
|
|
||||||
|
[`Session`](Session.md)
|
||||||
|
|
||||||
|
## Methods
|
||||||
|
|
||||||
|
### approxNumItems()
|
||||||
|
|
||||||
|
```ts
|
||||||
|
approxNumItems(): number
|
||||||
|
```
|
||||||
|
|
||||||
|
Get the approximate number of items cached in the session.
|
||||||
|
|
||||||
|
#### Returns
|
||||||
|
|
||||||
|
`number`
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
|
### sizeBytes()
|
||||||
|
|
||||||
|
```ts
|
||||||
|
sizeBytes(): bigint
|
||||||
|
```
|
||||||
|
|
||||||
|
Get the current size of the session caches in bytes.
|
||||||
|
|
||||||
|
#### Returns
|
||||||
|
|
||||||
|
`bigint`
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
|
### default()
|
||||||
|
|
||||||
|
```ts
|
||||||
|
static default(): Session
|
||||||
|
```
|
||||||
|
|
||||||
|
Create a session with default cache sizes.
|
||||||
|
|
||||||
|
This is equivalent to creating a session with 6GB index cache
|
||||||
|
and 1GB metadata cache.
|
||||||
|
|
||||||
|
#### Returns
|
||||||
|
|
||||||
|
[`Session`](Session.md)
|
||||||
@@ -6,10 +6,13 @@
|
|||||||
|
|
||||||
# Function: connect()
|
# Function: connect()
|
||||||
|
|
||||||
## connect(uri, options)
|
## connect(uri, options, session)
|
||||||
|
|
||||||
```ts
|
```ts
|
||||||
function connect(uri, options?): Promise<Connection>
|
function connect(
|
||||||
|
uri,
|
||||||
|
options?,
|
||||||
|
session?): Promise<Connection>
|
||||||
```
|
```
|
||||||
|
|
||||||
Connect to a LanceDB instance at the given URI.
|
Connect to a LanceDB instance at the given URI.
|
||||||
@@ -29,6 +32,8 @@ Accepted formats:
|
|||||||
* **options?**: `Partial`<[`ConnectionOptions`](../interfaces/ConnectionOptions.md)>
|
* **options?**: `Partial`<[`ConnectionOptions`](../interfaces/ConnectionOptions.md)>
|
||||||
The options to use when connecting to the database
|
The options to use when connecting to the database
|
||||||
|
|
||||||
|
* **session?**: [`Session`](../classes/Session.md)
|
||||||
|
|
||||||
### Returns
|
### Returns
|
||||||
|
|
||||||
`Promise`<[`Connection`](../classes/Connection.md)>
|
`Promise`<[`Connection`](../classes/Connection.md)>
|
||||||
@@ -77,7 +82,7 @@ Accepted formats:
|
|||||||
|
|
||||||
[ConnectionOptions](../interfaces/ConnectionOptions.md) for more details on the URI format.
|
[ConnectionOptions](../interfaces/ConnectionOptions.md) for more details on the URI format.
|
||||||
|
|
||||||
### Example
|
### Examples
|
||||||
|
|
||||||
```ts
|
```ts
|
||||||
const conn = await connect({
|
const conn = await connect({
|
||||||
@@ -85,3 +90,11 @@ const conn = await connect({
|
|||||||
storageOptions: {timeout: "60s"}
|
storageOptions: {timeout: "60s"}
|
||||||
});
|
});
|
||||||
```
|
```
|
||||||
|
|
||||||
|
```ts
|
||||||
|
const session = Session.default();
|
||||||
|
const conn = await connect({
|
||||||
|
uri: "/path/to/database",
|
||||||
|
session: session
|
||||||
|
});
|
||||||
|
```
|
||||||
|
|||||||
@@ -29,6 +29,7 @@
|
|||||||
- [Query](classes/Query.md)
|
- [Query](classes/Query.md)
|
||||||
- [QueryBase](classes/QueryBase.md)
|
- [QueryBase](classes/QueryBase.md)
|
||||||
- [RecordBatchIterator](classes/RecordBatchIterator.md)
|
- [RecordBatchIterator](classes/RecordBatchIterator.md)
|
||||||
|
- [Session](classes/Session.md)
|
||||||
- [Table](classes/Table.md)
|
- [Table](classes/Table.md)
|
||||||
- [TagContents](classes/TagContents.md)
|
- [TagContents](classes/TagContents.md)
|
||||||
- [Tags](classes/Tags.md)
|
- [Tags](classes/Tags.md)
|
||||||
|
|||||||
@@ -70,6 +70,17 @@ Defaults to 'us-east-1'.
|
|||||||
|
|
||||||
***
|
***
|
||||||
|
|
||||||
|
### session?
|
||||||
|
|
||||||
|
```ts
|
||||||
|
optional session: Session;
|
||||||
|
```
|
||||||
|
|
||||||
|
(For LanceDB OSS only): the session to use for this connection. Holds
|
||||||
|
shared caches and other session-specific state.
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
### storageOptions?
|
### storageOptions?
|
||||||
|
|
||||||
```ts
|
```ts
|
||||||
|
|||||||
@@ -8,7 +8,7 @@
|
|||||||
|
|
||||||
## Properties
|
## Properties
|
||||||
|
|
||||||
### indexCacheSize?
|
### ~~indexCacheSize?~~
|
||||||
|
|
||||||
```ts
|
```ts
|
||||||
optional indexCacheSize: number;
|
optional indexCacheSize: number;
|
||||||
@@ -16,6 +16,11 @@ optional indexCacheSize: number;
|
|||||||
|
|
||||||
Set the size of the index cache, specified as a number of entries
|
Set the size of the index cache, specified as a number of entries
|
||||||
|
|
||||||
|
#### Deprecated
|
||||||
|
|
||||||
|
Use session-level cache configuration instead.
|
||||||
|
Create a Session with custom cache sizes and pass it to the connect() function.
|
||||||
|
|
||||||
The exact meaning of an "entry" will depend on the type of index:
|
The exact meaning of an "entry" will depend on the type of index:
|
||||||
- IVF: there is one entry for each IVF partition
|
- IVF: there is one entry for each IVF partition
|
||||||
- BTREE: there is one entry for the entire index
|
- BTREE: there is one entry for the entire index
|
||||||
|
|||||||
46
nodejs/__test__/session.test.ts
Normal file
46
nodejs/__test__/session.test.ts
Normal file
@@ -0,0 +1,46 @@
|
|||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||||
|
|
||||||
|
import * as tmp from "tmp";
|
||||||
|
import { Session, connect } from "../lancedb";
|
||||||
|
|
||||||
|
describe("Session", () => {
|
||||||
|
let tmpDir: tmp.DirResult;
|
||||||
|
beforeEach(() => {
|
||||||
|
tmpDir = tmp.dirSync({ unsafeCleanup: true });
|
||||||
|
});
|
||||||
|
afterEach(() => tmpDir.removeCallback());
|
||||||
|
|
||||||
|
it("should configure cache sizes and work with database operations", async () => {
|
||||||
|
// Create session with small cache limits for testing
|
||||||
|
const indexCacheSize = BigInt(1024 * 1024); // 1MB
|
||||||
|
const metadataCacheSize = BigInt(512 * 1024); // 512KB
|
||||||
|
|
||||||
|
const session = new Session(indexCacheSize, metadataCacheSize);
|
||||||
|
|
||||||
|
// Record initial cache state
|
||||||
|
const initialCacheSize = session.sizeBytes();
|
||||||
|
const initialCacheItems = session.approxNumItems();
|
||||||
|
|
||||||
|
// Test session works with database connection
|
||||||
|
const db = await connect({ uri: tmpDir.name, session: session });
|
||||||
|
|
||||||
|
// Create and use a table to exercise the session
|
||||||
|
const data = Array.from({ length: 100 }, (_, i) => ({
|
||||||
|
id: i,
|
||||||
|
text: `item ${i}`,
|
||||||
|
}));
|
||||||
|
const table = await db.createTable("test", data);
|
||||||
|
const results = await table.query().limit(5).toArray();
|
||||||
|
|
||||||
|
expect(results).toHaveLength(5);
|
||||||
|
|
||||||
|
// Verify cache usage increased after operations
|
||||||
|
const finalCacheSize = session.sizeBytes();
|
||||||
|
const finalCacheItems = session.approxNumItems();
|
||||||
|
|
||||||
|
expect(finalCacheSize).toBeGreaterThan(initialCacheSize); // Cache should have grown
|
||||||
|
expect(finalCacheItems).toBeGreaterThanOrEqual(initialCacheItems); // Items should not decrease
|
||||||
|
expect(initialCacheSize).toBeLessThan(indexCacheSize + metadataCacheSize); // Within limits
|
||||||
|
});
|
||||||
|
});
|
||||||
@@ -85,6 +85,9 @@ export interface OpenTableOptions {
|
|||||||
/**
|
/**
|
||||||
* Set the size of the index cache, specified as a number of entries
|
* Set the size of the index cache, specified as a number of entries
|
||||||
*
|
*
|
||||||
|
* @deprecated Use session-level cache configuration instead.
|
||||||
|
* Create a Session with custom cache sizes and pass it to the connect() function.
|
||||||
|
*
|
||||||
* The exact meaning of an "entry" will depend on the type of index:
|
* The exact meaning of an "entry" will depend on the type of index:
|
||||||
* - IVF: there is one entry for each IVF partition
|
* - IVF: there is one entry for each IVF partition
|
||||||
* - BTREE: there is one entry for the entire index
|
* - BTREE: there is one entry for the entire index
|
||||||
|
|||||||
@@ -10,6 +10,7 @@ import {
|
|||||||
import {
|
import {
|
||||||
ConnectionOptions,
|
ConnectionOptions,
|
||||||
Connection as LanceDbConnection,
|
Connection as LanceDbConnection,
|
||||||
|
Session,
|
||||||
} from "./native.js";
|
} from "./native.js";
|
||||||
|
|
||||||
export {
|
export {
|
||||||
@@ -51,6 +52,8 @@ export {
|
|||||||
OpenTableOptions,
|
OpenTableOptions,
|
||||||
} from "./connection";
|
} from "./connection";
|
||||||
|
|
||||||
|
export { Session } from "./native.js";
|
||||||
|
|
||||||
export {
|
export {
|
||||||
ExecutableQuery,
|
ExecutableQuery,
|
||||||
Query,
|
Query,
|
||||||
@@ -131,6 +134,7 @@ export { IntoSql, packBits } from "./util";
|
|||||||
export async function connect(
|
export async function connect(
|
||||||
uri: string,
|
uri: string,
|
||||||
options?: Partial<ConnectionOptions>,
|
options?: Partial<ConnectionOptions>,
|
||||||
|
session?: Session,
|
||||||
): Promise<Connection>;
|
): Promise<Connection>;
|
||||||
/**
|
/**
|
||||||
* Connect to a LanceDB instance at the given URI.
|
* Connect to a LanceDB instance at the given URI.
|
||||||
@@ -149,31 +153,43 @@ export async function connect(
|
|||||||
* storageOptions: {timeout: "60s"}
|
* storageOptions: {timeout: "60s"}
|
||||||
* });
|
* });
|
||||||
* ```
|
* ```
|
||||||
|
*
|
||||||
|
* @example
|
||||||
|
* ```ts
|
||||||
|
* const session = Session.default();
|
||||||
|
* const conn = await connect({
|
||||||
|
* uri: "/path/to/database",
|
||||||
|
* session: session
|
||||||
|
* });
|
||||||
|
* ```
|
||||||
*/
|
*/
|
||||||
export async function connect(
|
export async function connect(
|
||||||
options: Partial<ConnectionOptions> & { uri: string },
|
options: Partial<ConnectionOptions> & { uri: string },
|
||||||
): Promise<Connection>;
|
): Promise<Connection>;
|
||||||
export async function connect(
|
export async function connect(
|
||||||
uriOrOptions: string | (Partial<ConnectionOptions> & { uri: string }),
|
uriOrOptions: string | (Partial<ConnectionOptions> & { uri: string }),
|
||||||
options: Partial<ConnectionOptions> = {},
|
options?: Partial<ConnectionOptions>,
|
||||||
): Promise<Connection> {
|
): Promise<Connection> {
|
||||||
let uri: string | undefined;
|
let uri: string | undefined;
|
||||||
|
let finalOptions: Partial<ConnectionOptions> = {};
|
||||||
|
|
||||||
if (typeof uriOrOptions !== "string") {
|
if (typeof uriOrOptions !== "string") {
|
||||||
const { uri: uri_, ...opts } = uriOrOptions;
|
const { uri: uri_, ...opts } = uriOrOptions;
|
||||||
uri = uri_;
|
uri = uri_;
|
||||||
options = opts;
|
finalOptions = opts;
|
||||||
} else {
|
} else {
|
||||||
uri = uriOrOptions;
|
uri = uriOrOptions;
|
||||||
|
finalOptions = options || {};
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!uri) {
|
if (!uri) {
|
||||||
throw new Error("uri is required");
|
throw new Error("uri is required");
|
||||||
}
|
}
|
||||||
|
|
||||||
options = (options as ConnectionOptions) ?? {};
|
finalOptions = (finalOptions as ConnectionOptions) ?? {};
|
||||||
(<ConnectionOptions>options).storageOptions = cleanseStorageOptions(
|
(<ConnectionOptions>finalOptions).storageOptions = cleanseStorageOptions(
|
||||||
(<ConnectionOptions>options).storageOptions,
|
(<ConnectionOptions>finalOptions).storageOptions,
|
||||||
);
|
);
|
||||||
const nativeConn = await LanceDbConnection.new(uri, options);
|
const nativeConn = await LanceDbConnection.new(uri, finalOptions);
|
||||||
return new LocalConnection(nativeConn);
|
return new LocalConnection(nativeConn);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -74,6 +74,10 @@ impl Connection {
|
|||||||
builder = builder.host_override(&host_override);
|
builder = builder.host_override(&host_override);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if let Some(session) = options.session {
|
||||||
|
builder = builder.session(session.inner.clone());
|
||||||
|
}
|
||||||
|
|
||||||
Ok(Self::inner_new(builder.execute().await.default_error()?))
|
Ok(Self::inner_new(builder.execute().await.default_error()?))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -14,6 +14,7 @@ pub mod merge;
|
|||||||
mod query;
|
mod query;
|
||||||
pub mod remote;
|
pub mod remote;
|
||||||
mod rerankers;
|
mod rerankers;
|
||||||
|
mod session;
|
||||||
mod table;
|
mod table;
|
||||||
mod util;
|
mod util;
|
||||||
|
|
||||||
@@ -34,6 +35,9 @@ pub struct ConnectionOptions {
|
|||||||
///
|
///
|
||||||
/// The available options are described at https://lancedb.github.io/lancedb/guides/storage/
|
/// The available options are described at https://lancedb.github.io/lancedb/guides/storage/
|
||||||
pub storage_options: Option<HashMap<String, String>>,
|
pub storage_options: Option<HashMap<String, String>>,
|
||||||
|
/// (For LanceDB OSS only): the session to use for this connection. Holds
|
||||||
|
/// shared caches and other session-specific state.
|
||||||
|
pub session: Option<session::Session>,
|
||||||
|
|
||||||
/// (For LanceDB cloud only): configuration for the remote HTTP client.
|
/// (For LanceDB cloud only): configuration for the remote HTTP client.
|
||||||
pub client_config: Option<remote::ClientConfig>,
|
pub client_config: Option<remote::ClientConfig>,
|
||||||
|
|||||||
102
nodejs/src/session.rs
Normal file
102
nodejs/src/session.rs
Normal file
@@ -0,0 +1,102 @@
|
|||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||||
|
|
||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
use lancedb::{ObjectStoreRegistry, Session as LanceSession};
|
||||||
|
use napi::bindgen_prelude::*;
|
||||||
|
use napi_derive::*;
|
||||||
|
|
||||||
|
/// A session for managing caches and object stores across LanceDB operations.
|
||||||
|
///
|
||||||
|
/// Sessions allow you to configure cache sizes for index and metadata caches,
|
||||||
|
/// which can significantly impact memory use and performance. They can
|
||||||
|
/// also be re-used across multiple connections to share the same cache state.
|
||||||
|
#[napi]
|
||||||
|
#[derive(Clone)]
|
||||||
|
pub struct Session {
|
||||||
|
pub(crate) inner: Arc<LanceSession>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl std::fmt::Debug for Session {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
f.debug_struct("Session")
|
||||||
|
.field("size_bytes", &self.inner.size_bytes())
|
||||||
|
.field("approx_num_items", &self.inner.approx_num_items())
|
||||||
|
.finish()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[napi]
|
||||||
|
impl Session {
|
||||||
|
/// Create a new session with custom cache sizes.
|
||||||
|
///
|
||||||
|
/// # Parameters
|
||||||
|
///
|
||||||
|
/// - `index_cache_size_bytes`: The size of the index cache in bytes.
|
||||||
|
/// Index data is stored in memory in this cache to speed up queries.
|
||||||
|
/// Defaults to 6GB if not specified.
|
||||||
|
/// - `metadata_cache_size_bytes`: The size of the metadata cache in bytes.
|
||||||
|
/// The metadata cache stores file metadata and schema information in memory.
|
||||||
|
/// This cache improves scan and write performance.
|
||||||
|
/// Defaults to 1GB if not specified.
|
||||||
|
#[napi(constructor)]
|
||||||
|
pub fn new(
|
||||||
|
index_cache_size_bytes: Option<BigInt>,
|
||||||
|
metadata_cache_size_bytes: Option<BigInt>,
|
||||||
|
) -> napi::Result<Self> {
|
||||||
|
let index_cache_size = index_cache_size_bytes
|
||||||
|
.map(|size| size.get_u64().1 as usize)
|
||||||
|
.unwrap_or(6 * 1024 * 1024 * 1024); // 6GB default
|
||||||
|
|
||||||
|
let metadata_cache_size = metadata_cache_size_bytes
|
||||||
|
.map(|size| size.get_u64().1 as usize)
|
||||||
|
.unwrap_or(1024 * 1024 * 1024); // 1GB default
|
||||||
|
|
||||||
|
let session = LanceSession::new(
|
||||||
|
index_cache_size,
|
||||||
|
metadata_cache_size,
|
||||||
|
Arc::new(ObjectStoreRegistry::default()),
|
||||||
|
);
|
||||||
|
|
||||||
|
Ok(Self {
|
||||||
|
inner: Arc::new(session),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Create a session with default cache sizes.
|
||||||
|
///
|
||||||
|
/// This is equivalent to creating a session with 6GB index cache
|
||||||
|
/// and 1GB metadata cache.
|
||||||
|
#[napi(factory)]
|
||||||
|
pub fn default() -> Self {
|
||||||
|
Self {
|
||||||
|
inner: Arc::new(LanceSession::default()),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get the current size of the session caches in bytes.
|
||||||
|
#[napi]
|
||||||
|
pub fn size_bytes(&self) -> BigInt {
|
||||||
|
BigInt::from(self.inner.size_bytes())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get the approximate number of items cached in the session.
|
||||||
|
#[napi]
|
||||||
|
pub fn approx_num_items(&self) -> u32 {
|
||||||
|
self.inner.approx_num_items() as u32
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Implement FromNapiValue for Session to work with napi(object)
|
||||||
|
impl napi::bindgen_prelude::FromNapiValue for Session {
|
||||||
|
unsafe fn from_napi_value(
|
||||||
|
env: napi::sys::napi_env,
|
||||||
|
napi_val: napi::sys::napi_value,
|
||||||
|
) -> napi::Result<Self> {
|
||||||
|
let object: napi::bindgen_prelude::ClassInstance<Session> =
|
||||||
|
napi::bindgen_prelude::ClassInstance::from_napi_value(env, napi_val)?;
|
||||||
|
let copy = object.clone();
|
||||||
|
Ok(copy)
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -14,3 +14,6 @@ Common commands:
|
|||||||
Before committing changes, run lints and then formatting.
|
Before committing changes, run lints and then formatting.
|
||||||
|
|
||||||
When you change the Rust code, you will need to recompile the Python bindings: `make develop`.
|
When you change the Rust code, you will need to recompile the Python bindings: `make develop`.
|
||||||
|
|
||||||
|
When you export new types from Rust to Python, you must manually update `python/lancedb/_lancedb.pyi`
|
||||||
|
with the corresponding type hints. You can run `pyright` to check for type errors in the Python code.
|
||||||
|
|||||||
@@ -18,6 +18,7 @@ from .remote import ClientConfig
|
|||||||
from .remote.db import RemoteDBConnection
|
from .remote.db import RemoteDBConnection
|
||||||
from .schema import vector
|
from .schema import vector
|
||||||
from .table import AsyncTable
|
from .table import AsyncTable
|
||||||
|
from ._lancedb import Session
|
||||||
|
|
||||||
|
|
||||||
def connect(
|
def connect(
|
||||||
@@ -30,6 +31,7 @@ def connect(
|
|||||||
request_thread_pool: Optional[Union[int, ThreadPoolExecutor]] = None,
|
request_thread_pool: Optional[Union[int, ThreadPoolExecutor]] = None,
|
||||||
client_config: Union[ClientConfig, Dict[str, Any], None] = None,
|
client_config: Union[ClientConfig, Dict[str, Any], None] = None,
|
||||||
storage_options: Optional[Dict[str, str]] = None,
|
storage_options: Optional[Dict[str, str]] = None,
|
||||||
|
session: Optional[Session] = None,
|
||||||
**kwargs: Any,
|
**kwargs: Any,
|
||||||
) -> DBConnection:
|
) -> DBConnection:
|
||||||
"""Connect to a LanceDB database.
|
"""Connect to a LanceDB database.
|
||||||
@@ -64,6 +66,12 @@ def connect(
|
|||||||
storage_options: dict, optional
|
storage_options: dict, optional
|
||||||
Additional options for the storage backend. See available options at
|
Additional options for the storage backend. See available options at
|
||||||
<https://lancedb.github.io/lancedb/guides/storage/>
|
<https://lancedb.github.io/lancedb/guides/storage/>
|
||||||
|
session: Session, optional
|
||||||
|
(For LanceDB OSS only)
|
||||||
|
A session to use for this connection. Sessions allow you to configure
|
||||||
|
cache sizes for index and metadata caches, which can significantly
|
||||||
|
impact memory use and performance. They can also be re-used across
|
||||||
|
multiple connections to share the same cache state.
|
||||||
|
|
||||||
Examples
|
Examples
|
||||||
--------
|
--------
|
||||||
@@ -113,6 +121,7 @@ def connect(
|
|||||||
uri,
|
uri,
|
||||||
read_consistency_interval=read_consistency_interval,
|
read_consistency_interval=read_consistency_interval,
|
||||||
storage_options=storage_options,
|
storage_options=storage_options,
|
||||||
|
session=session,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -125,6 +134,7 @@ async def connect_async(
|
|||||||
read_consistency_interval: Optional[timedelta] = None,
|
read_consistency_interval: Optional[timedelta] = None,
|
||||||
client_config: Optional[Union[ClientConfig, Dict[str, Any]]] = None,
|
client_config: Optional[Union[ClientConfig, Dict[str, Any]]] = None,
|
||||||
storage_options: Optional[Dict[str, str]] = None,
|
storage_options: Optional[Dict[str, str]] = None,
|
||||||
|
session: Optional[Session] = None,
|
||||||
) -> AsyncConnection:
|
) -> AsyncConnection:
|
||||||
"""Connect to a LanceDB database.
|
"""Connect to a LanceDB database.
|
||||||
|
|
||||||
@@ -158,6 +168,12 @@ async def connect_async(
|
|||||||
storage_options: dict, optional
|
storage_options: dict, optional
|
||||||
Additional options for the storage backend. See available options at
|
Additional options for the storage backend. See available options at
|
||||||
<https://lancedb.github.io/lancedb/guides/storage/>
|
<https://lancedb.github.io/lancedb/guides/storage/>
|
||||||
|
session: Session, optional
|
||||||
|
(For LanceDB OSS only)
|
||||||
|
A session to use for this connection. Sessions allow you to configure
|
||||||
|
cache sizes for index and metadata caches, which can significantly
|
||||||
|
impact memory use and performance. They can also be re-used across
|
||||||
|
multiple connections to share the same cache state.
|
||||||
|
|
||||||
Examples
|
Examples
|
||||||
--------
|
--------
|
||||||
@@ -197,6 +213,7 @@ async def connect_async(
|
|||||||
read_consistency_interval_secs,
|
read_consistency_interval_secs,
|
||||||
client_config,
|
client_config,
|
||||||
storage_options,
|
storage_options,
|
||||||
|
session,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -212,6 +229,7 @@ __all__ = [
|
|||||||
"DBConnection",
|
"DBConnection",
|
||||||
"LanceDBConnection",
|
"LanceDBConnection",
|
||||||
"RemoteDBConnection",
|
"RemoteDBConnection",
|
||||||
|
"Session",
|
||||||
"__version__",
|
"__version__",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|||||||
@@ -6,6 +6,19 @@ import pyarrow as pa
|
|||||||
from .index import BTree, IvfFlat, IvfPq, Bitmap, LabelList, HnswPq, HnswSq, FTS
|
from .index import BTree, IvfFlat, IvfPq, Bitmap, LabelList, HnswPq, HnswSq, FTS
|
||||||
from .remote import ClientConfig
|
from .remote import ClientConfig
|
||||||
|
|
||||||
|
class Session:
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
index_cache_size_bytes: Optional[int] = None,
|
||||||
|
metadata_cache_size_bytes: Optional[int] = None,
|
||||||
|
): ...
|
||||||
|
@staticmethod
|
||||||
|
def default() -> "Session": ...
|
||||||
|
@property
|
||||||
|
def size_bytes(self) -> int: ...
|
||||||
|
@property
|
||||||
|
def approx_num_items(self) -> int: ...
|
||||||
|
|
||||||
class Connection(object):
|
class Connection(object):
|
||||||
uri: str
|
uri: str
|
||||||
async def table_names(
|
async def table_names(
|
||||||
@@ -89,6 +102,7 @@ async def connect(
|
|||||||
read_consistency_interval: Optional[float],
|
read_consistency_interval: Optional[float],
|
||||||
client_config: Optional[Union[ClientConfig, Dict[str, Any]]],
|
client_config: Optional[Union[ClientConfig, Dict[str, Any]]],
|
||||||
storage_options: Optional[Dict[str, str]],
|
storage_options: Optional[Dict[str, str]],
|
||||||
|
session: Optional[Session],
|
||||||
) -> Connection: ...
|
) -> Connection: ...
|
||||||
|
|
||||||
class RecordBatchStream:
|
class RecordBatchStream:
|
||||||
|
|||||||
@@ -37,6 +37,7 @@ if TYPE_CHECKING:
|
|||||||
from ._lancedb import Connection as LanceDbConnection
|
from ._lancedb import Connection as LanceDbConnection
|
||||||
from .common import DATA, URI
|
from .common import DATA, URI
|
||||||
from .embeddings import EmbeddingFunctionConfig
|
from .embeddings import EmbeddingFunctionConfig
|
||||||
|
from ._lancedb import Session
|
||||||
|
|
||||||
|
|
||||||
class DBConnection(EnforceOverrides):
|
class DBConnection(EnforceOverrides):
|
||||||
@@ -247,6 +248,9 @@ class DBConnection(EnforceOverrides):
|
|||||||
name: str
|
name: str
|
||||||
The name of the table.
|
The name of the table.
|
||||||
index_cache_size: int, default 256
|
index_cache_size: int, default 256
|
||||||
|
**Deprecated**: Use session-level cache configuration instead.
|
||||||
|
Create a Session with custom cache sizes and pass it to lancedb.connect().
|
||||||
|
|
||||||
Set the size of the index cache, specified as a number of entries
|
Set the size of the index cache, specified as a number of entries
|
||||||
|
|
||||||
The exact meaning of an "entry" will depend on the type of index:
|
The exact meaning of an "entry" will depend on the type of index:
|
||||||
@@ -354,6 +358,7 @@ class LanceDBConnection(DBConnection):
|
|||||||
*,
|
*,
|
||||||
read_consistency_interval: Optional[timedelta] = None,
|
read_consistency_interval: Optional[timedelta] = None,
|
||||||
storage_options: Optional[Dict[str, str]] = None,
|
storage_options: Optional[Dict[str, str]] = None,
|
||||||
|
session: Optional[Session] = None,
|
||||||
):
|
):
|
||||||
if not isinstance(uri, Path):
|
if not isinstance(uri, Path):
|
||||||
scheme = get_uri_scheme(uri)
|
scheme = get_uri_scheme(uri)
|
||||||
@@ -367,6 +372,7 @@ class LanceDBConnection(DBConnection):
|
|||||||
self._entered = False
|
self._entered = False
|
||||||
self.read_consistency_interval = read_consistency_interval
|
self.read_consistency_interval = read_consistency_interval
|
||||||
self.storage_options = storage_options
|
self.storage_options = storage_options
|
||||||
|
self.session = session
|
||||||
|
|
||||||
if read_consistency_interval is not None:
|
if read_consistency_interval is not None:
|
||||||
read_consistency_interval_secs = read_consistency_interval.total_seconds()
|
read_consistency_interval_secs = read_consistency_interval.total_seconds()
|
||||||
@@ -382,6 +388,7 @@ class LanceDBConnection(DBConnection):
|
|||||||
read_consistency_interval_secs,
|
read_consistency_interval_secs,
|
||||||
None,
|
None,
|
||||||
storage_options,
|
storage_options,
|
||||||
|
session,
|
||||||
)
|
)
|
||||||
|
|
||||||
self._conn = AsyncConnection(LOOP.run(do_connect()))
|
self._conn = AsyncConnection(LOOP.run(do_connect()))
|
||||||
@@ -475,6 +482,17 @@ class LanceDBConnection(DBConnection):
|
|||||||
-------
|
-------
|
||||||
A LanceTable object representing the table.
|
A LanceTable object representing the table.
|
||||||
"""
|
"""
|
||||||
|
if index_cache_size is not None:
|
||||||
|
import warnings
|
||||||
|
|
||||||
|
warnings.warn(
|
||||||
|
"index_cache_size is deprecated. Use session-level cache "
|
||||||
|
"configuration instead. Create a Session with custom cache sizes "
|
||||||
|
"and pass it to lancedb.connect().",
|
||||||
|
DeprecationWarning,
|
||||||
|
stacklevel=2,
|
||||||
|
)
|
||||||
|
|
||||||
return LanceTable.open(
|
return LanceTable.open(
|
||||||
self,
|
self,
|
||||||
name,
|
name,
|
||||||
@@ -820,6 +838,9 @@ class AsyncConnection(object):
|
|||||||
See available options at
|
See available options at
|
||||||
<https://lancedb.github.io/lancedb/guides/storage/>
|
<https://lancedb.github.io/lancedb/guides/storage/>
|
||||||
index_cache_size: int, default 256
|
index_cache_size: int, default 256
|
||||||
|
**Deprecated**: Use session-level cache configuration instead.
|
||||||
|
Create a Session with custom cache sizes and pass it to lancedb.connect().
|
||||||
|
|
||||||
Set the size of the index cache, specified as a number of entries
|
Set the size of the index cache, specified as a number of entries
|
||||||
|
|
||||||
The exact meaning of an "entry" will depend on the type of index:
|
The exact meaning of an "entry" will depend on the type of index:
|
||||||
|
|||||||
38
python/python/tests/test_session.py
Normal file
38
python/python/tests/test_session.py
Normal file
@@ -0,0 +1,38 @@
|
|||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
# SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||||
|
|
||||||
|
import lancedb
|
||||||
|
|
||||||
|
|
||||||
|
def test_session_cache_configuration(tmp_path):
|
||||||
|
"""Test Session cache configuration and basic functionality."""
|
||||||
|
# Create session with small cache limits for testing
|
||||||
|
index_cache_size = 1024 * 1024 # 1MB
|
||||||
|
metadata_cache_size = 512 * 1024 # 512KB
|
||||||
|
|
||||||
|
session = lancedb.Session(
|
||||||
|
index_cache_size_bytes=index_cache_size,
|
||||||
|
metadata_cache_size_bytes=metadata_cache_size,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Record initial cache state
|
||||||
|
initial_cache_size = session.size_bytes
|
||||||
|
initial_cache_items = session.approx_num_items
|
||||||
|
|
||||||
|
# Test session works with database connection
|
||||||
|
db = lancedb.connect(tmp_path, session=session)
|
||||||
|
|
||||||
|
# Create and use a table to exercise the session
|
||||||
|
data = [{"id": i, "text": f"item {i}"} for i in range(100)]
|
||||||
|
table = db.create_table("test", data)
|
||||||
|
results = list(table.to_arrow().to_pylist())
|
||||||
|
|
||||||
|
assert len(results) == 100
|
||||||
|
|
||||||
|
# Verify cache usage increased after operations
|
||||||
|
final_cache_size = session.size_bytes
|
||||||
|
final_cache_items = session.approx_num_items
|
||||||
|
|
||||||
|
assert final_cache_size > initial_cache_size # Cache should have grown
|
||||||
|
assert final_cache_items >= initial_cache_items # Items should not decrease
|
||||||
|
assert initial_cache_size < index_cache_size + metadata_cache_size
|
||||||
@@ -179,7 +179,7 @@ impl Connection {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[pyfunction]
|
#[pyfunction]
|
||||||
#[pyo3(signature = (uri, api_key=None, region=None, host_override=None, read_consistency_interval=None, client_config=None, storage_options=None))]
|
#[pyo3(signature = (uri, api_key=None, region=None, host_override=None, read_consistency_interval=None, client_config=None, storage_options=None, session=None))]
|
||||||
#[allow(clippy::too_many_arguments)]
|
#[allow(clippy::too_many_arguments)]
|
||||||
pub fn connect(
|
pub fn connect(
|
||||||
py: Python,
|
py: Python,
|
||||||
@@ -190,6 +190,7 @@ pub fn connect(
|
|||||||
read_consistency_interval: Option<f64>,
|
read_consistency_interval: Option<f64>,
|
||||||
client_config: Option<PyClientConfig>,
|
client_config: Option<PyClientConfig>,
|
||||||
storage_options: Option<HashMap<String, String>>,
|
storage_options: Option<HashMap<String, String>>,
|
||||||
|
session: Option<crate::session::Session>,
|
||||||
) -> PyResult<Bound<'_, PyAny>> {
|
) -> PyResult<Bound<'_, PyAny>> {
|
||||||
future_into_py(py, async move {
|
future_into_py(py, async move {
|
||||||
let mut builder = lancedb::connect(&uri);
|
let mut builder = lancedb::connect(&uri);
|
||||||
@@ -213,6 +214,9 @@ pub fn connect(
|
|||||||
if let Some(client_config) = client_config {
|
if let Some(client_config) = client_config {
|
||||||
builder = builder.client_config(client_config.into());
|
builder = builder.client_config(client_config.into());
|
||||||
}
|
}
|
||||||
|
if let Some(session) = session {
|
||||||
|
builder = builder.session(session.inner.clone());
|
||||||
|
}
|
||||||
Ok(Connection::new(builder.execute().await.infer_error()?))
|
Ok(Connection::new(builder.execute().await.infer_error()?))
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -11,6 +11,7 @@ use pyo3::{
|
|||||||
wrap_pyfunction, Bound, PyResult, Python,
|
wrap_pyfunction, Bound, PyResult, Python,
|
||||||
};
|
};
|
||||||
use query::{FTSQuery, HybridQuery, Query, VectorQuery};
|
use query::{FTSQuery, HybridQuery, Query, VectorQuery};
|
||||||
|
use session::Session;
|
||||||
use table::{
|
use table::{
|
||||||
AddColumnsResult, AddResult, AlterColumnsResult, DeleteResult, DropColumnsResult, MergeResult,
|
AddColumnsResult, AddResult, AlterColumnsResult, DeleteResult, DropColumnsResult, MergeResult,
|
||||||
Table, UpdateResult,
|
Table, UpdateResult,
|
||||||
@@ -21,6 +22,7 @@ pub mod connection;
|
|||||||
pub mod error;
|
pub mod error;
|
||||||
pub mod index;
|
pub mod index;
|
||||||
pub mod query;
|
pub mod query;
|
||||||
|
pub mod session;
|
||||||
pub mod table;
|
pub mod table;
|
||||||
pub mod util;
|
pub mod util;
|
||||||
|
|
||||||
@@ -31,6 +33,7 @@ pub fn _lancedb(_py: Python, m: &Bound<'_, PyModule>) -> PyResult<()> {
|
|||||||
.write_style("LANCEDB_LOG_STYLE");
|
.write_style("LANCEDB_LOG_STYLE");
|
||||||
env_logger::init_from_env(env);
|
env_logger::init_from_env(env);
|
||||||
m.add_class::<Connection>()?;
|
m.add_class::<Connection>()?;
|
||||||
|
m.add_class::<Session>()?;
|
||||||
m.add_class::<Table>()?;
|
m.add_class::<Table>()?;
|
||||||
m.add_class::<IndexConfig>()?;
|
m.add_class::<IndexConfig>()?;
|
||||||
m.add_class::<Query>()?;
|
m.add_class::<Query>()?;
|
||||||
|
|||||||
107
python/src/session.rs
Normal file
107
python/src/session.rs
Normal file
@@ -0,0 +1,107 @@
|
|||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||||
|
|
||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
use lancedb::{ObjectStoreRegistry, Session as LanceSession};
|
||||||
|
use pyo3::{pyclass, pymethods, PyResult};
|
||||||
|
|
||||||
|
/// A session for managing caches and object stores across LanceDB operations.
|
||||||
|
///
|
||||||
|
/// Sessions allow you to configure cache sizes for index and metadata caches,
|
||||||
|
/// which can significantly impact memory use and performance. They can
|
||||||
|
/// also be re-used across multiple connections to share the same cache state.
|
||||||
|
#[pyclass]
|
||||||
|
#[derive(Clone)]
|
||||||
|
pub struct Session {
|
||||||
|
pub(crate) inner: Arc<LanceSession>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for Session {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self {
|
||||||
|
inner: Arc::new(LanceSession::default()),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[pymethods]
|
||||||
|
impl Session {
|
||||||
|
/// Create a new session with custom cache sizes.
|
||||||
|
///
|
||||||
|
/// Parameters
|
||||||
|
/// ----------
|
||||||
|
/// index_cache_size_bytes : int, optional
|
||||||
|
/// The size of the index cache in bytes.
|
||||||
|
/// Index data is stored in memory in this cache to speed up queries.
|
||||||
|
/// Default: 6GB (6 * 1024 * 1024 * 1024 bytes)
|
||||||
|
/// metadata_cache_size_bytes : int, optional
|
||||||
|
/// The size of the metadata cache in bytes.
|
||||||
|
/// The metadata cache stores file metadata and schema information in memory.
|
||||||
|
/// This cache improves scan and write performance.
|
||||||
|
/// Default: 1GB (1024 * 1024 * 1024 bytes)
|
||||||
|
#[new]
|
||||||
|
#[pyo3(signature = (index_cache_size_bytes=None, metadata_cache_size_bytes=None))]
|
||||||
|
pub fn new(
|
||||||
|
index_cache_size_bytes: Option<usize>,
|
||||||
|
metadata_cache_size_bytes: Option<usize>,
|
||||||
|
) -> PyResult<Self> {
|
||||||
|
let index_cache_size = index_cache_size_bytes.unwrap_or(6 * 1024 * 1024 * 1024); // 6GB default
|
||||||
|
let metadata_cache_size = metadata_cache_size_bytes.unwrap_or(1024 * 1024 * 1024); // 1GB default
|
||||||
|
|
||||||
|
let session = LanceSession::new(
|
||||||
|
index_cache_size,
|
||||||
|
metadata_cache_size,
|
||||||
|
Arc::new(ObjectStoreRegistry::default()),
|
||||||
|
);
|
||||||
|
|
||||||
|
Ok(Self {
|
||||||
|
inner: Arc::new(session),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Create a session with default cache sizes.
|
||||||
|
///
|
||||||
|
/// This is equivalent to creating a session with 6GB index cache
|
||||||
|
/// and 1GB metadata cache.
|
||||||
|
///
|
||||||
|
/// Returns
|
||||||
|
/// -------
|
||||||
|
/// Session
|
||||||
|
/// A new Session with default cache sizes
|
||||||
|
#[staticmethod]
|
||||||
|
#[allow(clippy::should_implement_trait)]
|
||||||
|
pub fn default() -> Self {
|
||||||
|
Default::default()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get the current size of the session caches in bytes.
|
||||||
|
///
|
||||||
|
/// Returns
|
||||||
|
/// -------
|
||||||
|
/// int
|
||||||
|
/// The total size of all caches in the session
|
||||||
|
#[getter]
|
||||||
|
pub fn size_bytes(&self) -> u64 {
|
||||||
|
self.inner.size_bytes()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get the approximate number of items cached in the session.
|
||||||
|
///
|
||||||
|
/// Returns
|
||||||
|
/// -------
|
||||||
|
/// int
|
||||||
|
/// The number of cached items across all caches
|
||||||
|
#[getter]
|
||||||
|
pub fn approx_num_items(&self) -> usize {
|
||||||
|
self.inner.approx_num_items()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn __repr__(&self) -> String {
|
||||||
|
format!(
|
||||||
|
"Session(size_bytes={}, approx_num_items={})",
|
||||||
|
self.size_bytes(),
|
||||||
|
self.approx_num_items()
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -290,3 +290,7 @@ impl Display for DistanceType {
|
|||||||
|
|
||||||
/// Connect to a database
|
/// Connect to a database
|
||||||
pub use connection::connect;
|
pub use connection::connect;
|
||||||
|
|
||||||
|
/// Re-export Lance Session and ObjectStoreRegistry for custom session creation
|
||||||
|
pub use lance::session::Session;
|
||||||
|
pub use lance_io::object_store::ObjectStoreRegistry;
|
||||||
|
|||||||
Reference in New Issue
Block a user