Compare commits

..

2 Commits

Author SHA1 Message Date
Jack Ye
a898dc81c2 feat: add user_id field to ClientConfig for user identification (#3240)
## Summary

- Add a `user_id` field to `ClientConfig` that allows users to identify
themselves to LanceDB Cloud/Enterprise
- The user_id is sent as the `x-lancedb-user-id` HTTP header in all
requests
- Supports three configuration methods:
  - Direct assignment via `ClientConfig.user_id`
  - Environment variable `LANCEDB_USER_ID`
  - Indirect env var lookup via `LANCEDB_USER_ID_ENV_KEY`

Closes #3230

🤖 Generated with [Claude Code](https://claude.com/claude-code)

---------

Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
2026-04-06 11:20:10 -07:00
Lance Release
de3f8097e7 Bump version: 0.28.0-beta.0 → 0.28.0-beta.1 2026-04-05 02:51:18 +00:00
7 changed files with 193 additions and 5 deletions

6
Cargo.lock generated
View File

@@ -4630,7 +4630,7 @@ dependencies = [
[[package]]
name = "lancedb"
version = "0.28.0-beta.0"
version = "0.28.0-beta.1"
dependencies = [
"ahash",
"anyhow",
@@ -4712,7 +4712,7 @@ dependencies = [
[[package]]
name = "lancedb-nodejs"
version = "0.28.0-beta.0"
version = "0.28.0-beta.1"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -4734,7 +4734,7 @@ dependencies = [
[[package]]
name = "lancedb-python"
version = "0.31.0-beta.0"
version = "0.31.0-beta.1"
dependencies = [
"arrow",
"async-trait",

View File

@@ -53,3 +53,18 @@ optional tlsConfig: TlsConfig;
```ts
optional userAgent: string;
```
***
### userId?
```ts
optional userId: string;
```
User identifier for tracking purposes.
This is sent as the `x-lancedb-user-id` header in requests to LanceDB Cloud/Enterprise.
It can be set directly, or via the `LANCEDB_USER_ID` environment variable.
Alternatively, set `LANCEDB_USER_ID_ENV_KEY` to specify another environment
variable that contains the user ID value.

View File

@@ -1,12 +1,12 @@
{
"name": "@lancedb/lancedb",
"version": "0.28.0-beta.0",
"version": "0.28.0-beta.1",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "@lancedb/lancedb",
"version": "0.28.0-beta.0",
"version": "0.28.0-beta.1",
"cpu": [
"x64",
"arm64"

View File

@@ -92,6 +92,13 @@ pub struct ClientConfig {
pub extra_headers: Option<HashMap<String, String>>,
pub id_delimiter: Option<String>,
pub tls_config: Option<TlsConfig>,
/// User identifier for tracking purposes.
///
/// This is sent as the `x-lancedb-user-id` header in requests to LanceDB Cloud/Enterprise.
/// It can be set directly, or via the `LANCEDB_USER_ID` environment variable.
/// Alternatively, set `LANCEDB_USER_ID_ENV_KEY` to specify another environment
/// variable that contains the user ID value.
pub user_id: Option<String>,
}
impl From<TimeoutConfig> for lancedb::remote::TimeoutConfig {
@@ -145,6 +152,7 @@ impl From<ClientConfig> for lancedb::remote::ClientConfig {
id_delimiter: config.id_delimiter,
tls_config: config.tls_config.map(Into::into),
header_provider: None, // the header provider is set separately later
user_id: config.user_id,
}
}
}

View File

@@ -145,6 +145,33 @@ class TlsConfig:
@dataclass
class ClientConfig:
"""Configuration for the LanceDB Cloud HTTP client.
Attributes
----------
user_agent: str
User agent string sent with requests.
retry_config: RetryConfig
Configuration for retrying failed requests.
timeout_config: Optional[TimeoutConfig]
Configuration for request timeouts.
extra_headers: Optional[dict]
Additional headers to include in requests.
id_delimiter: Optional[str]
The delimiter to use when constructing object identifiers.
tls_config: Optional[TlsConfig]
TLS/mTLS configuration for secure connections.
header_provider: Optional[HeaderProvider]
Provider for dynamic headers to be added to each request.
user_id: Optional[str]
User identifier for tracking purposes. This is sent as the
`x-lancedb-user-id` header in requests to LanceDB Cloud/Enterprise.
This can also be set via the `LANCEDB_USER_ID` environment variable.
Alternatively, set `LANCEDB_USER_ID_ENV_KEY` to specify another
environment variable that contains the user ID value.
"""
user_agent: str = f"LanceDB-Python-Client/{__version__}"
retry_config: RetryConfig = field(default_factory=RetryConfig)
timeout_config: Optional[TimeoutConfig] = field(default_factory=TimeoutConfig)
@@ -152,6 +179,7 @@ class ClientConfig:
id_delimiter: Optional[str] = None
tls_config: Optional[TlsConfig] = None
header_provider: Optional["HeaderProvider"] = None
user_id: Optional[str] = None
def __post_init__(self):
if isinstance(self.retry_config, dict):

View File

@@ -547,6 +547,7 @@ pub struct PyClientConfig {
id_delimiter: Option<String>,
tls_config: Option<PyClientTlsConfig>,
header_provider: Option<Py<PyAny>>,
user_id: Option<String>,
}
#[derive(FromPyObject)]
@@ -631,6 +632,7 @@ impl From<PyClientConfig> for lancedb::remote::ClientConfig {
id_delimiter: value.id_delimiter,
tls_config: value.tls_config.map(Into::into),
header_provider,
user_id: value.user_id,
}
}
}

View File

@@ -52,6 +52,13 @@ pub struct ClientConfig {
pub tls_config: Option<TlsConfig>,
/// Provider for custom headers to be added to each request
pub header_provider: Option<Arc<dyn HeaderProvider>>,
/// User identifier for tracking purposes.
///
/// This is sent as the `x-lancedb-user-id` header in requests to LanceDB Cloud/Enterprise.
/// It can be set directly, or via the `LANCEDB_USER_ID` environment variable.
/// Alternatively, set `LANCEDB_USER_ID_ENV_KEY` to specify another environment
/// variable that contains the user ID value.
pub user_id: Option<String>,
}
impl std::fmt::Debug for ClientConfig {
@@ -67,6 +74,7 @@ impl std::fmt::Debug for ClientConfig {
"header_provider",
&self.header_provider.as_ref().map(|_| "Some(...)"),
)
.field("user_id", &self.user_id)
.finish()
}
}
@@ -81,10 +89,41 @@ impl Default for ClientConfig {
id_delimiter: None,
tls_config: None,
header_provider: None,
user_id: None,
}
}
}
impl ClientConfig {
/// Resolve the user ID from the config or environment variables.
///
/// Resolution order:
/// 1. If `user_id` is set in the config, use that value
/// 2. If `LANCEDB_USER_ID` environment variable is set, use that value
/// 3. If `LANCEDB_USER_ID_ENV_KEY` is set, read the env var it points to
/// 4. Otherwise, return None
pub fn resolve_user_id(&self) -> Option<String> {
if self.user_id.is_some() {
return self.user_id.clone();
}
if let Ok(user_id) = std::env::var("LANCEDB_USER_ID")
&& !user_id.is_empty()
{
return Some(user_id);
}
if let Ok(env_key) = std::env::var("LANCEDB_USER_ID_ENV_KEY")
&& let Ok(user_id) = std::env::var(&env_key)
&& !user_id.is_empty()
{
return Some(user_id);
}
None
}
}
/// How to handle timeouts for HTTP requests.
#[derive(Clone, Default, Debug)]
pub struct TimeoutConfig {
@@ -464,6 +503,15 @@ impl<S: HttpSend> RestfulLanceDbClient<S> {
);
}
if let Some(user_id) = config.resolve_user_id() {
headers.insert(
HeaderName::from_static("x-lancedb-user-id"),
HeaderValue::from_str(&user_id).map_err(|_| Error::InvalidInput {
message: format!("non-ascii user_id '{}' provided", user_id),
})?,
);
}
Ok(headers)
}
@@ -1072,4 +1120,91 @@ mod tests {
_ => panic!("Expected Runtime error"),
}
}
#[test]
fn test_resolve_user_id_direct_value() {
let config = ClientConfig {
user_id: Some("direct-user-id".to_string()),
..Default::default()
};
assert_eq!(config.resolve_user_id(), Some("direct-user-id".to_string()));
}
#[test]
fn test_resolve_user_id_none() {
let config = ClientConfig::default();
// Clear env vars that might be set from other tests
// SAFETY: This is only called in tests
unsafe {
std::env::remove_var("LANCEDB_USER_ID");
std::env::remove_var("LANCEDB_USER_ID_ENV_KEY");
}
assert_eq!(config.resolve_user_id(), None);
}
#[test]
fn test_resolve_user_id_from_env() {
// SAFETY: This is only called in tests
unsafe {
std::env::set_var("LANCEDB_USER_ID", "env-user-id");
}
let config = ClientConfig::default();
assert_eq!(config.resolve_user_id(), Some("env-user-id".to_string()));
// SAFETY: This is only called in tests
unsafe {
std::env::remove_var("LANCEDB_USER_ID");
}
}
#[test]
fn test_resolve_user_id_from_env_key() {
// SAFETY: This is only called in tests
unsafe {
std::env::remove_var("LANCEDB_USER_ID");
std::env::set_var("LANCEDB_USER_ID_ENV_KEY", "MY_CUSTOM_USER_ID");
std::env::set_var("MY_CUSTOM_USER_ID", "custom-env-user-id");
}
let config = ClientConfig::default();
assert_eq!(
config.resolve_user_id(),
Some("custom-env-user-id".to_string())
);
// SAFETY: This is only called in tests
unsafe {
std::env::remove_var("LANCEDB_USER_ID_ENV_KEY");
std::env::remove_var("MY_CUSTOM_USER_ID");
}
}
#[test]
fn test_resolve_user_id_direct_takes_precedence() {
// SAFETY: This is only called in tests
unsafe {
std::env::set_var("LANCEDB_USER_ID", "env-user-id");
}
let config = ClientConfig {
user_id: Some("direct-user-id".to_string()),
..Default::default()
};
assert_eq!(config.resolve_user_id(), Some("direct-user-id".to_string()));
// SAFETY: This is only called in tests
unsafe {
std::env::remove_var("LANCEDB_USER_ID");
}
}
#[test]
fn test_resolve_user_id_empty_env_ignored() {
// SAFETY: This is only called in tests
unsafe {
std::env::set_var("LANCEDB_USER_ID", "");
std::env::remove_var("LANCEDB_USER_ID_ENV_KEY");
}
let config = ClientConfig::default();
assert_eq!(config.resolve_user_id(), None);
// SAFETY: This is only called in tests
unsafe {
std::env::remove_var("LANCEDB_USER_ID");
}
}
}