mirror of
https://github.com/lancedb/lancedb.git
synced 2026-04-01 21:40:41 +00:00
Compare commits
3 Commits
v0.27.2
...
feature/wa
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
931f19b737 | ||
|
|
cde0814bbc | ||
|
|
3ba46135a5 |
@@ -1,5 +1,5 @@
|
||||
[tool.bumpversion]
|
||||
current_version = "0.27.2-beta.2"
|
||||
current_version = "0.27.2"
|
||||
parse = """(?x)
|
||||
(?P<major>0|[1-9]\\d*)\\.
|
||||
(?P<minor>0|[1-9]\\d*)\\.
|
||||
|
||||
6
Cargo.lock
generated
6
Cargo.lock
generated
@@ -4631,7 +4631,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lancedb"
|
||||
version = "0.27.2-beta.1"
|
||||
version = "0.27.2"
|
||||
dependencies = [
|
||||
"ahash",
|
||||
"anyhow",
|
||||
@@ -4713,7 +4713,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lancedb-nodejs"
|
||||
version = "0.27.2-beta.1"
|
||||
version = "0.27.2"
|
||||
dependencies = [
|
||||
"arrow-array",
|
||||
"arrow-buffer",
|
||||
@@ -4735,7 +4735,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lancedb-python"
|
||||
version = "0.30.2-beta.1"
|
||||
version = "0.30.2"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"async-trait",
|
||||
|
||||
@@ -14,7 +14,7 @@ Add the following dependency to your `pom.xml`:
|
||||
<dependency>
|
||||
<groupId>com.lancedb</groupId>
|
||||
<artifactId>lancedb-core</artifactId>
|
||||
<version>0.27.2-beta.2</version>
|
||||
<version>0.27.2</version>
|
||||
</dependency>
|
||||
```
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
<parent>
|
||||
<groupId>com.lancedb</groupId>
|
||||
<artifactId>lancedb-parent</artifactId>
|
||||
<version>0.27.2-beta.2</version>
|
||||
<version>0.27.2-final.0</version>
|
||||
<relativePath>../pom.xml</relativePath>
|
||||
</parent>
|
||||
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
|
||||
<groupId>com.lancedb</groupId>
|
||||
<artifactId>lancedb-parent</artifactId>
|
||||
<version>0.27.2-beta.2</version>
|
||||
<version>0.27.2-final.0</version>
|
||||
<packaging>pom</packaging>
|
||||
<name>${project.artifactId}</name>
|
||||
<description>LanceDB Java SDK Parent POM</description>
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
[package]
|
||||
name = "lancedb-nodejs"
|
||||
edition.workspace = true
|
||||
version = "0.27.2-beta.2"
|
||||
version = "0.27.2"
|
||||
license.workspace = true
|
||||
description.workspace = true
|
||||
repository.workspace = true
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-darwin-arm64",
|
||||
"version": "0.27.2-beta.2",
|
||||
"version": "0.27.2",
|
||||
"os": ["darwin"],
|
||||
"cpu": ["arm64"],
|
||||
"main": "lancedb.darwin-arm64.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-linux-arm64-gnu",
|
||||
"version": "0.27.2-beta.2",
|
||||
"version": "0.27.2",
|
||||
"os": ["linux"],
|
||||
"cpu": ["arm64"],
|
||||
"main": "lancedb.linux-arm64-gnu.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-linux-arm64-musl",
|
||||
"version": "0.27.2-beta.2",
|
||||
"version": "0.27.2",
|
||||
"os": ["linux"],
|
||||
"cpu": ["arm64"],
|
||||
"main": "lancedb.linux-arm64-musl.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-linux-x64-gnu",
|
||||
"version": "0.27.2-beta.2",
|
||||
"version": "0.27.2",
|
||||
"os": ["linux"],
|
||||
"cpu": ["x64"],
|
||||
"main": "lancedb.linux-x64-gnu.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-linux-x64-musl",
|
||||
"version": "0.27.2-beta.2",
|
||||
"version": "0.27.2",
|
||||
"os": ["linux"],
|
||||
"cpu": ["x64"],
|
||||
"main": "lancedb.linux-x64-musl.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-win32-arm64-msvc",
|
||||
"version": "0.27.2-beta.2",
|
||||
"version": "0.27.2",
|
||||
"os": [
|
||||
"win32"
|
||||
],
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-win32-x64-msvc",
|
||||
"version": "0.27.2-beta.2",
|
||||
"version": "0.27.2",
|
||||
"os": ["win32"],
|
||||
"cpu": ["x64"],
|
||||
"main": "lancedb.win32-x64-msvc.node",
|
||||
|
||||
4
nodejs/package-lock.json
generated
4
nodejs/package-lock.json
generated
@@ -1,12 +1,12 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb",
|
||||
"version": "0.27.2-beta.1",
|
||||
"version": "0.27.2",
|
||||
"lockfileVersion": 3,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"name": "@lancedb/lancedb",
|
||||
"version": "0.27.2-beta.1",
|
||||
"version": "0.27.2",
|
||||
"cpu": [
|
||||
"x64",
|
||||
"arm64"
|
||||
|
||||
@@ -11,7 +11,7 @@
|
||||
"ann"
|
||||
],
|
||||
"private": false,
|
||||
"version": "0.27.2-beta.2",
|
||||
"version": "0.27.2",
|
||||
"main": "dist/index.js",
|
||||
"exports": {
|
||||
".": "./dist/index.js",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "lancedb"
|
||||
version = "0.27.2-beta.2"
|
||||
version = "0.27.2"
|
||||
edition.workspace = true
|
||||
description = "LanceDB: A serverless, low-latency vector database for AI applications"
|
||||
license.workspace = true
|
||||
|
||||
@@ -30,7 +30,10 @@ use crate::error::{Error, Result};
|
||||
#[cfg(feature = "remote")]
|
||||
use crate::remote::{
|
||||
client::ClientConfig,
|
||||
db::{OPT_REMOTE_API_KEY, OPT_REMOTE_HOST_OVERRIDE, OPT_REMOTE_REGION},
|
||||
db::{
|
||||
OPT_REMOTE_API_KEY, OPT_REMOTE_HOST_OVERRIDE, OPT_REMOTE_REGION,
|
||||
OPT_REMOTE_WAL_HOST_OVERRIDE,
|
||||
},
|
||||
};
|
||||
use lance::io::ObjectStoreParams;
|
||||
pub use lance_encoding::version::LanceFileVersion;
|
||||
@@ -664,6 +667,24 @@ impl ConnectBuilder {
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the WAL host override for routing merge_insert requests
|
||||
/// to a separate WAL/ingest service.
|
||||
///
|
||||
/// This option is only used when connecting to LanceDB Cloud (db:// URIs)
|
||||
/// and will be ignored for other URIs.
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `wal_host_override` - The WAL host override to use for the connection
|
||||
#[cfg(feature = "remote")]
|
||||
pub fn wal_host_override(mut self, wal_host_override: &str) -> Self {
|
||||
self.request.options.insert(
|
||||
OPT_REMOTE_WAL_HOST_OVERRIDE.to_string(),
|
||||
wal_host_override.to_string(),
|
||||
);
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the database specific options
|
||||
///
|
||||
/// See [crate::database::listing::ListingDatabaseOptions] for the options available for
|
||||
@@ -817,6 +838,7 @@ impl ConnectBuilder {
|
||||
&api_key,
|
||||
®ion,
|
||||
options.host_override,
|
||||
options.wal_host_override,
|
||||
self.request.client_config,
|
||||
storage_options.into(),
|
||||
)?);
|
||||
|
||||
@@ -190,6 +190,7 @@ pub struct RetryConfig {
|
||||
pub struct RestfulLanceDbClient<S: HttpSend = Sender> {
|
||||
client: reqwest::Client,
|
||||
host: String,
|
||||
wal_host: String,
|
||||
pub(crate) retry_config: ResolvedRetryConfig,
|
||||
pub(crate) sender: S,
|
||||
pub(crate) id_delimiter: String,
|
||||
@@ -200,6 +201,7 @@ impl<S: HttpSend> std::fmt::Debug for RestfulLanceDbClient<S> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
f.debug_struct("RestfulLanceDbClient")
|
||||
.field("host", &self.host)
|
||||
.field("wal_host", &self.wal_host)
|
||||
.field("retry_config", &self.retry_config)
|
||||
.field("sender", &self.sender)
|
||||
.field("id_delimiter", &self.id_delimiter)
|
||||
@@ -285,6 +287,7 @@ impl RestfulLanceDbClient<Sender> {
|
||||
parsed_url: &ParsedDbUrl,
|
||||
region: &str,
|
||||
host_override: Option<String>,
|
||||
wal_host_override: Option<String>,
|
||||
default_headers: HeaderMap,
|
||||
client_config: ClientConfig,
|
||||
) -> Result<Self> {
|
||||
@@ -372,11 +375,16 @@ impl RestfulLanceDbClient<Sender> {
|
||||
Some(host_override) => host_override,
|
||||
None => format!("https://{}.{}.api.lancedb.com", parsed_url.db_name, region),
|
||||
};
|
||||
debug!("Created client for host: {}", host);
|
||||
let wal_host = match wal_host_override {
|
||||
Some(wal_host_override) => wal_host_override,
|
||||
None => format!("https://{}.{}.wal.lancedb.com", parsed_url.db_name, region),
|
||||
};
|
||||
debug!("Created client for host: {}, wal_host: {}", host, wal_host);
|
||||
let retry_config = client_config.retry_config.clone().try_into()?;
|
||||
Ok(Self {
|
||||
client,
|
||||
host,
|
||||
wal_host,
|
||||
retry_config,
|
||||
sender: Sender,
|
||||
id_delimiter: client_config
|
||||
@@ -479,6 +487,12 @@ impl<S: HttpSend> RestfulLanceDbClient<S> {
|
||||
self.add_id_delimiter_query_param(builder)
|
||||
}
|
||||
|
||||
pub fn post_wal(&self, uri: &str) -> RequestBuilder {
|
||||
let full_uri = format!("{}{}", self.wal_host, uri);
|
||||
let builder = self.client.post(full_uri);
|
||||
self.add_id_delimiter_query_param(builder)
|
||||
}
|
||||
|
||||
fn add_id_delimiter_query_param(&self, req: RequestBuilder) -> RequestBuilder {
|
||||
if self.id_delimiter != "$" {
|
||||
req.query(&[("delimiter", self.id_delimiter.clone())])
|
||||
@@ -791,6 +805,7 @@ pub mod test_utils {
|
||||
RestfulLanceDbClient {
|
||||
client: reqwest::Client::new(),
|
||||
host: "http://localhost".to_string(),
|
||||
wal_host: "http://localhost-wal".to_string(),
|
||||
retry_config: RetryConfig::default().try_into().unwrap(),
|
||||
sender: MockSender {
|
||||
f: Arc::new(wrapper),
|
||||
@@ -815,6 +830,7 @@ pub mod test_utils {
|
||||
RestfulLanceDbClient {
|
||||
client: reqwest::Client::new(),
|
||||
host: "http://localhost".to_string(),
|
||||
wal_host: "http://localhost-wal".to_string(),
|
||||
retry_config: config.retry_config.try_into().unwrap(),
|
||||
sender: MockSender {
|
||||
f: Arc::new(wrapper),
|
||||
@@ -982,6 +998,7 @@ mod tests {
|
||||
let client = RestfulLanceDbClient {
|
||||
client: reqwest::Client::new(),
|
||||
host: "https://example.com".to_string(),
|
||||
wal_host: "https://example.com".to_string(),
|
||||
retry_config: RetryConfig::default().try_into().unwrap(),
|
||||
sender: Sender,
|
||||
id_delimiter: "+".to_string(),
|
||||
@@ -1017,6 +1034,7 @@ mod tests {
|
||||
let client = RestfulLanceDbClient {
|
||||
client: reqwest::Client::new(),
|
||||
host: "https://example.com".to_string(),
|
||||
wal_host: "https://example.com".to_string(),
|
||||
retry_config: RetryConfig::default().try_into().unwrap(),
|
||||
sender: Sender,
|
||||
id_delimiter: "+".to_string(),
|
||||
@@ -1054,6 +1072,7 @@ mod tests {
|
||||
let client = RestfulLanceDbClient {
|
||||
client: reqwest::Client::new(),
|
||||
host: "https://example.com".to_string(),
|
||||
wal_host: "https://example.com".to_string(),
|
||||
retry_config: RetryConfig::default().try_into().unwrap(),
|
||||
sender: Sender,
|
||||
id_delimiter: "+".to_string(),
|
||||
|
||||
@@ -82,6 +82,7 @@ pub const OPT_REMOTE_PREFIX: &str = "remote_database_";
|
||||
pub const OPT_REMOTE_API_KEY: &str = "remote_database_api_key";
|
||||
pub const OPT_REMOTE_REGION: &str = "remote_database_region";
|
||||
pub const OPT_REMOTE_HOST_OVERRIDE: &str = "remote_database_host_override";
|
||||
pub const OPT_REMOTE_WAL_HOST_OVERRIDE: &str = "remote_database_wal_host_override";
|
||||
// TODO: add support for configuring client config via key/value options
|
||||
|
||||
#[derive(Clone, Debug, Default)]
|
||||
@@ -95,6 +96,11 @@ pub struct RemoteDatabaseOptions {
|
||||
/// This is required when connecting to LanceDB Enterprise and should be
|
||||
/// provided if using an on-premises LanceDB Enterprise instance.
|
||||
pub host_override: Option<String>,
|
||||
/// The WAL host override
|
||||
///
|
||||
/// When set, merge_insert operations using WAL routing will be sent to
|
||||
/// this host instead of the auto-derived WAL host.
|
||||
pub wal_host_override: Option<String>,
|
||||
/// Storage options configure the storage layer (e.g. S3, GCS, Azure, etc.)
|
||||
///
|
||||
/// See available options at <https://lancedb.com/docs/storage/>
|
||||
@@ -113,6 +119,7 @@ impl RemoteDatabaseOptions {
|
||||
let api_key = map.get(OPT_REMOTE_API_KEY).cloned();
|
||||
let region = map.get(OPT_REMOTE_REGION).cloned();
|
||||
let host_override = map.get(OPT_REMOTE_HOST_OVERRIDE).cloned();
|
||||
let wal_host_override = map.get(OPT_REMOTE_WAL_HOST_OVERRIDE).cloned();
|
||||
let storage_options = map
|
||||
.iter()
|
||||
.filter(|(key, _)| !key.starts_with(OPT_REMOTE_PREFIX))
|
||||
@@ -122,6 +129,7 @@ impl RemoteDatabaseOptions {
|
||||
api_key,
|
||||
region,
|
||||
host_override,
|
||||
wal_host_override,
|
||||
storage_options,
|
||||
})
|
||||
}
|
||||
@@ -141,6 +149,12 @@ impl DatabaseOptions for RemoteDatabaseOptions {
|
||||
if let Some(host_override) = &self.host_override {
|
||||
map.insert(OPT_REMOTE_HOST_OVERRIDE.to_string(), host_override.clone());
|
||||
}
|
||||
if let Some(wal_host_override) = &self.wal_host_override {
|
||||
map.insert(
|
||||
OPT_REMOTE_WAL_HOST_OVERRIDE.to_string(),
|
||||
wal_host_override.clone(),
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -185,6 +199,19 @@ impl RemoteDatabaseOptionsBuilder {
|
||||
self.options.host_override = Some(host_override);
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the WAL host override
|
||||
///
|
||||
/// When set, merge_insert operations using WAL routing will be sent to
|
||||
/// this host instead of the auto-derived WAL host.
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `wal_host_override` - The WAL host override
|
||||
pub fn wal_host_override(mut self, wal_host_override: String) -> Self {
|
||||
self.options.wal_host_override = Some(wal_host_override);
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
@@ -204,6 +231,7 @@ impl RemoteDatabase {
|
||||
api_key: &str,
|
||||
region: &str,
|
||||
host_override: Option<String>,
|
||||
wal_host_override: Option<String>,
|
||||
client_config: ClientConfig,
|
||||
options: RemoteOptions,
|
||||
) -> Result<Self> {
|
||||
@@ -231,6 +259,7 @@ impl RemoteDatabase {
|
||||
&parsed,
|
||||
region,
|
||||
host_override,
|
||||
wal_host_override,
|
||||
header_map,
|
||||
client_config.clone(),
|
||||
)?;
|
||||
|
||||
@@ -1610,13 +1610,17 @@ impl<S: HttpSend> BaseTable for RemoteTable<S> {
|
||||
self.check_mutable().await?;
|
||||
|
||||
let timeout = params.timeout;
|
||||
let use_wal = params.use_wal;
|
||||
|
||||
let query = MergeInsertRequest::try_from(params)?;
|
||||
let mut request = self
|
||||
.client
|
||||
.post(&format!("/v1/table/{}/merge_insert/", self.identifier))
|
||||
.query(&query)
|
||||
.header(CONTENT_TYPE, ARROW_STREAM_CONTENT_TYPE);
|
||||
let path = format!("/v1/table/{}/merge_insert/", self.identifier);
|
||||
let mut request = if use_wal {
|
||||
self.client.post_wal(&path)
|
||||
} else {
|
||||
self.client.post(&path)
|
||||
}
|
||||
.query(&query)
|
||||
.header(CONTENT_TYPE, ARROW_STREAM_CONTENT_TYPE);
|
||||
|
||||
if let Some(timeout) = timeout {
|
||||
// (If it doesn't fit into u64, it's not worth sending anyways.)
|
||||
@@ -2705,6 +2709,43 @@ mod tests {
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_merge_insert_use_wal() {
|
||||
let batch = RecordBatch::try_new(
|
||||
Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, false)])),
|
||||
vec![Arc::new(Int32Array::from(vec![1, 2, 3]))],
|
||||
)
|
||||
.unwrap();
|
||||
let data: Box<dyn RecordBatchReader + Send> = Box::new(RecordBatchIterator::new(
|
||||
[Ok(batch.clone())],
|
||||
batch.schema(),
|
||||
));
|
||||
|
||||
let table = Table::new_with_handler("my_table", move |request| {
|
||||
if request.url().path() == "/v1/table/my_table/merge_insert/" {
|
||||
// Verify the request was sent to the WAL host
|
||||
assert_eq!(
|
||||
request.url().host_str().unwrap(),
|
||||
"localhost-wal",
|
||||
"merge_insert with use_wal should route to WAL host"
|
||||
);
|
||||
|
||||
http::Response::builder()
|
||||
.status(200)
|
||||
.body(r#"{"version": 1, "num_deleted_rows": 0, "num_inserted_rows": 3, "num_updated_rows": 0}"#)
|
||||
.unwrap()
|
||||
} else {
|
||||
panic!("Unexpected request path: {}", request.url().path());
|
||||
}
|
||||
});
|
||||
|
||||
let mut builder = table.merge_insert(&["some_col"]);
|
||||
builder.use_wal(true);
|
||||
let result = builder.execute(data).await.unwrap();
|
||||
|
||||
assert_eq!(result.num_inserted_rows, 3);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_merge_insert_retries_on_409() {
|
||||
let batch = RecordBatch::try_new(
|
||||
|
||||
@@ -55,6 +55,7 @@ pub struct MergeInsertBuilder {
|
||||
pub(crate) when_not_matched_by_source_delete_filt: Option<String>,
|
||||
pub(crate) timeout: Option<Duration>,
|
||||
pub(crate) use_index: bool,
|
||||
pub(crate) use_wal: bool,
|
||||
}
|
||||
|
||||
impl MergeInsertBuilder {
|
||||
@@ -69,6 +70,7 @@ impl MergeInsertBuilder {
|
||||
when_not_matched_by_source_delete_filt: None,
|
||||
timeout: None,
|
||||
use_index: true,
|
||||
use_wal: false,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -148,6 +150,18 @@ impl MergeInsertBuilder {
|
||||
self
|
||||
}
|
||||
|
||||
/// Controls whether to route the merge insert operation through the WAL host.
|
||||
///
|
||||
/// When set to `true`, the operation will be sent to the WAL host instead of
|
||||
/// the main API host. The WAL host is auto-derived from the database connection
|
||||
/// or can be explicitly set via [`crate::connection::ConnectBuilder::wal_host_override`].
|
||||
///
|
||||
/// Defaults to `false`.
|
||||
pub fn use_wal(&mut self, use_wal: bool) -> &mut Self {
|
||||
self.use_wal = use_wal;
|
||||
self
|
||||
}
|
||||
|
||||
/// Executes the merge insert operation
|
||||
///
|
||||
/// Returns version and statistics about the merge operation including the number of rows
|
||||
|
||||
Reference in New Issue
Block a user