mirror of
https://github.com/lancedb/lancedb.git
synced 2026-05-14 10:30:40 +00:00
feat: add mem_wal flag to merge insert for MemWAL write path
Add support for enabling MemWAL (Memory Write-Ahead Log) mode on merge insert operations. This allows streaming writes to route through memory nodes for high-performance buffered writes. Changes: - Add `mem_wal` field to MergeInsertBuilder with validation - Add `x-lancedb-mem-wal-enabled` header for remote requests - Add Python `mem_wal()` method to LanceMergeInsertBuilder - Add validation to ensure only upsert pattern is supported: - when_matched_update_all() without filter - when_not_matched_insert_all() - Throw NotSupported error for native tables - Add mem_wal_enabled to ClientConfig for Python/Node bindings Generated with [Claude Code](https://claude.ai/code) via [Happy](https://happy.engineering) Co-Authored-By: Claude <noreply@anthropic.com> Co-Authored-By: Happy <yesreply@happy.engineering>
This commit is contained in:
@@ -34,6 +34,7 @@ class LanceMergeInsertBuilder(object):
|
||||
self._when_not_matched_by_source_condition = None
|
||||
self._timeout = None
|
||||
self._use_index = True
|
||||
self._mem_wal = False
|
||||
|
||||
def when_matched_update_all(
|
||||
self, *, where: Optional[str] = None
|
||||
@@ -96,6 +97,47 @@ class LanceMergeInsertBuilder(object):
|
||||
self._use_index = use_index
|
||||
return self
|
||||
|
||||
def mem_wal(self, enabled: bool = True) -> LanceMergeInsertBuilder:
|
||||
"""
|
||||
Enable MemWAL (Memory Write-Ahead Log) mode for this merge insert operation.
|
||||
|
||||
When enabled, the merge insert will route data through a memory node service
|
||||
that buffers writes before flushing to storage. This is only supported for
|
||||
remote (LanceDB Cloud) tables.
|
||||
|
||||
**Important:** MemWAL only supports the upsert pattern. You must use:
|
||||
- `when_matched_update_all()` (without a filter condition)
|
||||
- `when_not_matched_insert_all()`
|
||||
|
||||
MemWAL does NOT support:
|
||||
- `when_matched_update_all(where=...)` with a filter condition
|
||||
- `when_not_matched_by_source_delete()`
|
||||
|
||||
Parameters
|
||||
----------
|
||||
enabled: bool
|
||||
Whether to enable MemWAL mode. Defaults to `True`.
|
||||
|
||||
Raises
|
||||
------
|
||||
NotImplementedError
|
||||
If used on a native (local) table, as MemWAL is only supported for
|
||||
remote tables.
|
||||
ValueError
|
||||
If the merge insert pattern is not supported by MemWAL.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> # Correct usage with MemWAL
|
||||
>>> table.merge_insert(["id"]) \\
|
||||
... .when_matched_update_all() \\
|
||||
... .when_not_matched_insert_all() \\
|
||||
... .mem_wal() \\
|
||||
... .execute(new_data)
|
||||
"""
|
||||
self._mem_wal = enabled
|
||||
return self
|
||||
|
||||
def execute(
|
||||
self,
|
||||
new_data: DATA,
|
||||
|
||||
@@ -4181,6 +4181,7 @@ class AsyncTable:
|
||||
when_not_matched_by_source_condition=merge._when_not_matched_by_source_condition,
|
||||
timeout=merge._timeout,
|
||||
use_index=merge._use_index,
|
||||
mem_wal=merge._mem_wal,
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
@@ -506,6 +506,7 @@ pub struct PyClientConfig {
|
||||
id_delimiter: Option<String>,
|
||||
tls_config: Option<PyClientTlsConfig>,
|
||||
header_provider: Option<Py<PyAny>>,
|
||||
mem_wal_enabled: Option<bool>,
|
||||
}
|
||||
|
||||
#[derive(FromPyObject)]
|
||||
@@ -590,6 +591,7 @@ impl From<PyClientConfig> for lancedb::remote::ClientConfig {
|
||||
id_delimiter: value.id_delimiter,
|
||||
tls_config: value.tls_config.map(Into::into),
|
||||
header_provider,
|
||||
mem_wal_enabled: value.mem_wal_enabled,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -710,6 +710,9 @@ impl Table {
|
||||
if let Some(use_index) = parameters.use_index {
|
||||
builder.use_index(use_index);
|
||||
}
|
||||
if let Some(mem_wal) = parameters.mem_wal {
|
||||
builder.mem_wal(mem_wal);
|
||||
}
|
||||
|
||||
future_into_py(self_.py(), async move {
|
||||
let res = builder.execute(Box::new(batches)).await.infer_error()?;
|
||||
@@ -870,6 +873,7 @@ pub struct MergeInsertParams {
|
||||
when_not_matched_by_source_condition: Option<String>,
|
||||
timeout: Option<std::time::Duration>,
|
||||
use_index: Option<bool>,
|
||||
mem_wal: Option<bool>,
|
||||
}
|
||||
|
||||
#[pyclass]
|
||||
|
||||
Reference in New Issue
Block a user