mirror of
https://github.com/lancedb/lancedb.git
synced 2025-12-27 23:12:58 +00:00
Based on https://github.com/lancedb/lance/pull/4984 1. Bump to 1.0.0-beta.2 2. Use DirectoryNamespace in lance to perform all testing in python and rust for much better coverage 3. Refactor `ListingDatabase` to be able to accept location and namespace. This is because we have to leverage listing database (local lancedb connection) for using namespace, namespace only resolves the location and storage options but we don't want to bind all the way to rust since user will plug-in namespace from python side. And thus `ListingDatabase` needs to be able to accept location and namespace that are created from namespace connection. 4. For credentials vending, we also pass storage options provider all the way to rust layer, and the rust layer calls back to the python function to fetch next storage option. This is exactly the same thing we did in pylance.
72 lines
2.6 KiB
Python
72 lines
2.6 KiB
Python
# SPDX-License-Identifier: Apache-2.0
|
|
# SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
|
|
|
"""I/O utilities and interfaces for LanceDB."""
|
|
|
|
from abc import ABC, abstractmethod
|
|
from typing import Dict
|
|
|
|
|
|
class StorageOptionsProvider(ABC):
|
|
"""Abstract base class for providing storage options to LanceDB tables.
|
|
|
|
Storage options providers enable automatic credential refresh for cloud
|
|
storage backends (e.g., AWS S3, Azure Blob Storage, GCS). When credentials
|
|
have an expiration time, the provider's fetch_storage_options() method will
|
|
be called periodically to get fresh credentials before they expire.
|
|
|
|
Example
|
|
-------
|
|
>>> class MyProvider(StorageOptionsProvider):
|
|
... def fetch_storage_options(self) -> Dict[str, str]:
|
|
... # Fetch fresh credentials from your credential manager
|
|
... return {
|
|
... "aws_access_key_id": "...",
|
|
... "aws_secret_access_key": "...",
|
|
... "expires_at_millis": "1234567890000" # Optional
|
|
... }
|
|
"""
|
|
|
|
@abstractmethod
|
|
def fetch_storage_options(self) -> Dict[str, str]:
|
|
"""Fetch fresh storage credentials.
|
|
|
|
This method is called by LanceDB when credentials need to be refreshed.
|
|
If the returned dictionary contains an "expires_at_millis" key with a
|
|
Unix timestamp in milliseconds, LanceDB will automatically refresh the
|
|
credentials before that time. If the key is not present, credentials
|
|
are assumed to not expire.
|
|
|
|
Returns
|
|
-------
|
|
Dict[str, str]
|
|
Dictionary containing cloud storage credentials and optionally an
|
|
expiration time:
|
|
- "expires_at_millis" (optional): Unix timestamp in milliseconds when
|
|
credentials expire
|
|
- Provider-specific credential keys (e.g., aws_access_key_id,
|
|
aws_secret_access_key, etc.)
|
|
|
|
Raises
|
|
------
|
|
RuntimeError
|
|
If credentials cannot be fetched or are invalid
|
|
"""
|
|
pass
|
|
|
|
def provider_id(self) -> str:
|
|
"""Return a human-readable unique identifier for this provider instance.
|
|
|
|
This identifier is used for caching and equality comparison. Two providers
|
|
with the same ID will share the same cached object store connection.
|
|
|
|
The default implementation uses the class name and string representation.
|
|
Override this method if you need custom identification logic.
|
|
|
|
Returns
|
|
-------
|
|
str
|
|
A unique identifier for this provider instance
|
|
"""
|
|
return f"{self.__class__.__name__} {{ repr: {str(self)!r} }}"
|