Compare commits

..

2 Commits

Author SHA1 Message Date
lancedb automation
dd8fd66e27 chore: update lance dependency to v5.0.0-rc.1 2026-04-09 19:18:39 +00:00
lennylxx
4c2939d66e fix(python): guard against None before .decode() on split_names metadata key (#3229)
`.get(b"split_names", None).decode()` was called unconditionally in both
Permutations.__init__ and Permutation.from_tables(), crashing with
AttributeError when schema metadata existed but lacked the split_names
key. Guard the decode behind a None check and add regression tests.
2026-04-08 16:04:13 -07:00
10 changed files with 101 additions and 128 deletions

65
Cargo.lock generated
View File

@@ -3072,8 +3072,8 @@ checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c"
[[package]]
name = "fsst"
version = "5.0.0-beta.5"
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.5#d630106da5a238b3adfb8c5dea3b3921f3519945"
version = "5.0.0-rc.1"
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-rc.1#d130b036a62a5d8a904dfbe711d3f7b91b132194"
dependencies = [
"arrow-array",
"rand 0.9.2",
@@ -4134,8 +4134,8 @@ dependencies = [
[[package]]
name = "lance"
version = "5.0.0-beta.5"
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.5#d630106da5a238b3adfb8c5dea3b3921f3519945"
version = "5.0.0-rc.1"
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-rc.1#d130b036a62a5d8a904dfbe711d3f7b91b132194"
dependencies = [
"arrow",
"arrow-arith",
@@ -4201,13 +4201,14 @@ dependencies = [
[[package]]
name = "lance-arrow"
version = "5.0.0-beta.5"
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.5#d630106da5a238b3adfb8c5dea3b3921f3519945"
version = "5.0.0-rc.1"
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-rc.1#d130b036a62a5d8a904dfbe711d3f7b91b132194"
dependencies = [
"arrow-array",
"arrow-buffer",
"arrow-cast",
"arrow-data",
"arrow-ipc",
"arrow-ord",
"arrow-schema",
"arrow-select",
@@ -4222,8 +4223,8 @@ dependencies = [
[[package]]
name = "lance-bitpacking"
version = "5.0.0-beta.5"
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.5#d630106da5a238b3adfb8c5dea3b3921f3519945"
version = "5.0.0-rc.1"
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-rc.1#d130b036a62a5d8a904dfbe711d3f7b91b132194"
dependencies = [
"arrayref",
"paste",
@@ -4232,8 +4233,8 @@ dependencies = [
[[package]]
name = "lance-core"
version = "5.0.0-beta.5"
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.5#d630106da5a238b3adfb8c5dea3b3921f3519945"
version = "5.0.0-rc.1"
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-rc.1#d130b036a62a5d8a904dfbe711d3f7b91b132194"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -4270,8 +4271,8 @@ dependencies = [
[[package]]
name = "lance-datafusion"
version = "5.0.0-beta.5"
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.5#d630106da5a238b3adfb8c5dea3b3921f3519945"
version = "5.0.0-rc.1"
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-rc.1#d130b036a62a5d8a904dfbe711d3f7b91b132194"
dependencies = [
"arrow",
"arrow-array",
@@ -4301,8 +4302,8 @@ dependencies = [
[[package]]
name = "lance-datagen"
version = "5.0.0-beta.5"
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.5#d630106da5a238b3adfb8c5dea3b3921f3519945"
version = "5.0.0-rc.1"
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-rc.1#d130b036a62a5d8a904dfbe711d3f7b91b132194"
dependencies = [
"arrow",
"arrow-array",
@@ -4320,8 +4321,8 @@ dependencies = [
[[package]]
name = "lance-encoding"
version = "5.0.0-beta.5"
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.5#d630106da5a238b3adfb8c5dea3b3921f3519945"
version = "5.0.0-rc.1"
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-rc.1#d130b036a62a5d8a904dfbe711d3f7b91b132194"
dependencies = [
"arrow-arith",
"arrow-array",
@@ -4358,8 +4359,8 @@ dependencies = [
[[package]]
name = "lance-file"
version = "5.0.0-beta.5"
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.5#d630106da5a238b3adfb8c5dea3b3921f3519945"
version = "5.0.0-rc.1"
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-rc.1#d130b036a62a5d8a904dfbe711d3f7b91b132194"
dependencies = [
"arrow-arith",
"arrow-array",
@@ -4391,8 +4392,8 @@ dependencies = [
[[package]]
name = "lance-index"
version = "5.0.0-beta.5"
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.5#d630106da5a238b3adfb8c5dea3b3921f3519945"
version = "5.0.0-rc.1"
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-rc.1#d130b036a62a5d8a904dfbe711d3f7b91b132194"
dependencies = [
"arrow",
"arrow-arith",
@@ -4456,8 +4457,8 @@ dependencies = [
[[package]]
name = "lance-io"
version = "5.0.0-beta.5"
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.5#d630106da5a238b3adfb8c5dea3b3921f3519945"
version = "5.0.0-rc.1"
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-rc.1#d130b036a62a5d8a904dfbe711d3f7b91b132194"
dependencies = [
"arrow",
"arrow-arith",
@@ -4501,8 +4502,8 @@ dependencies = [
[[package]]
name = "lance-linalg"
version = "5.0.0-beta.5"
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.5#d630106da5a238b3adfb8c5dea3b3921f3519945"
version = "5.0.0-rc.1"
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-rc.1#d130b036a62a5d8a904dfbe711d3f7b91b132194"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -4518,8 +4519,8 @@ dependencies = [
[[package]]
name = "lance-namespace"
version = "5.0.0-beta.5"
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.5#d630106da5a238b3adfb8c5dea3b3921f3519945"
version = "5.0.0-rc.1"
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-rc.1#d130b036a62a5d8a904dfbe711d3f7b91b132194"
dependencies = [
"arrow",
"async-trait",
@@ -4532,8 +4533,8 @@ dependencies = [
[[package]]
name = "lance-namespace-impls"
version = "5.0.0-beta.5"
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.5#d630106da5a238b3adfb8c5dea3b3921f3519945"
version = "5.0.0-rc.1"
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-rc.1#d130b036a62a5d8a904dfbe711d3f7b91b132194"
dependencies = [
"arrow",
"arrow-ipc",
@@ -4578,8 +4579,8 @@ dependencies = [
[[package]]
name = "lance-table"
version = "5.0.0-beta.5"
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.5#d630106da5a238b3adfb8c5dea3b3921f3519945"
version = "5.0.0-rc.1"
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-rc.1#d130b036a62a5d8a904dfbe711d3f7b91b132194"
dependencies = [
"arrow",
"arrow-array",
@@ -4618,8 +4619,8 @@ dependencies = [
[[package]]
name = "lance-testing"
version = "5.0.0-beta.5"
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.5#d630106da5a238b3adfb8c5dea3b3921f3519945"
version = "5.0.0-rc.1"
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-rc.1#d130b036a62a5d8a904dfbe711d3f7b91b132194"
dependencies = [
"arrow-array",
"arrow-schema",

View File

@@ -15,20 +15,20 @@ categories = ["database-implementations"]
rust-version = "1.91.0"
[workspace.dependencies]
lance = { "version" = "=5.0.0-beta.5", default-features = false, "tag" = "v5.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
lance-core = { "version" = "=5.0.0-beta.5", "tag" = "v5.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
lance-datagen = { "version" = "=5.0.0-beta.5", "tag" = "v5.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
lance-file = { "version" = "=5.0.0-beta.5", "tag" = "v5.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
lance-io = { "version" = "=5.0.0-beta.5", default-features = false, "tag" = "v5.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
lance-index = { "version" = "=5.0.0-beta.5", "tag" = "v5.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
lance-linalg = { "version" = "=5.0.0-beta.5", "tag" = "v5.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
lance-namespace = { "version" = "=5.0.0-beta.5", "tag" = "v5.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
lance-namespace-impls = { "version" = "=5.0.0-beta.5", default-features = false, "tag" = "v5.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
lance-table = { "version" = "=5.0.0-beta.5", "tag" = "v5.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
lance-testing = { "version" = "=5.0.0-beta.5", "tag" = "v5.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
lance-datafusion = { "version" = "=5.0.0-beta.5", "tag" = "v5.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
lance-encoding = { "version" = "=5.0.0-beta.5", "tag" = "v5.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
lance-arrow = { "version" = "=5.0.0-beta.5", "tag" = "v5.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
lance = { "version" = "=5.0.0-rc.1", default-features = false, "tag" = "v5.0.0-rc.1", "git" = "https://github.com/lance-format/lance.git" }
lance-core = { "version" = "=5.0.0-rc.1", "tag" = "v5.0.0-rc.1", "git" = "https://github.com/lance-format/lance.git" }
lance-datagen = { "version" = "=5.0.0-rc.1", "tag" = "v5.0.0-rc.1", "git" = "https://github.com/lance-format/lance.git" }
lance-file = { "version" = "=5.0.0-rc.1", "tag" = "v5.0.0-rc.1", "git" = "https://github.com/lance-format/lance.git" }
lance-io = { "version" = "=5.0.0-rc.1", default-features = false, "tag" = "v5.0.0-rc.1", "git" = "https://github.com/lance-format/lance.git" }
lance-index = { "version" = "=5.0.0-rc.1", "tag" = "v5.0.0-rc.1", "git" = "https://github.com/lance-format/lance.git" }
lance-linalg = { "version" = "=5.0.0-rc.1", "tag" = "v5.0.0-rc.1", "git" = "https://github.com/lance-format/lance.git" }
lance-namespace = { "version" = "=5.0.0-rc.1", "tag" = "v5.0.0-rc.1", "git" = "https://github.com/lance-format/lance.git" }
lance-namespace-impls = { "version" = "=5.0.0-rc.1", default-features = false, "tag" = "v5.0.0-rc.1", "git" = "https://github.com/lance-format/lance.git" }
lance-table = { "version" = "=5.0.0-rc.1", "tag" = "v5.0.0-rc.1", "git" = "https://github.com/lance-format/lance.git" }
lance-testing = { "version" = "=5.0.0-rc.1", "tag" = "v5.0.0-rc.1", "git" = "https://github.com/lance-format/lance.git" }
lance-datafusion = { "version" = "=5.0.0-rc.1", "tag" = "v5.0.0-rc.1", "git" = "https://github.com/lance-format/lance.git" }
lance-encoding = { "version" = "=5.0.0-rc.1", "tag" = "v5.0.0-rc.1", "git" = "https://github.com/lance-format/lance.git" }
lance-arrow = { "version" = "=5.0.0-rc.1", "tag" = "v5.0.0-rc.1", "git" = "https://github.com/lance-format/lance.git" }
ahash = "0.8"
# Note that this one does not include pyarrow
arrow = { version = "57.2", optional = false }

View File

@@ -28,7 +28,7 @@
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<arrow.version>15.0.0</arrow.version>
<lance-core.version>5.0.0-beta.5</lance-core.version>
<lance-core.version>5.0.0-rc.1</lance-core.version>
<spotless.skip>false</spotless.skip>
<spotless.version>2.30.0</spotless.version>
<spotless.java.googlejavaformat.version>1.7</spotless.java.googlejavaformat.version>

View File

@@ -284,9 +284,8 @@ class Permutations:
self.permutation_table = permutation_table
if permutation_table.schema.metadata is not None:
split_names = permutation_table.schema.metadata.get(
b"split_names", None
).decode("utf-8")
raw = permutation_table.schema.metadata.get(b"split_names")
split_names = raw.decode("utf-8") if raw is not None else None
if split_names is not None:
self.split_names = json.loads(split_names)
self.split_dict = {
@@ -460,9 +459,8 @@ class Permutation:
f"Cannot create a permutation on split `{split}`"
" because no split names are defined in the permutation table"
)
split_names = permutation_table.schema.metadata.get(
b"split_names", None
).decode("utf-8")
raw = permutation_table.schema.metadata.get(b"split_names")
split_names = raw.decode("utf-8") if raw is not None else None
if split_names is None:
raise ValueError(
f"Cannot create a permutation on split `{split}`"

View File

@@ -522,6 +522,50 @@ def test_no_split_names(some_table: Table):
assert permutations[1].num_rows == 500
def test_permutations_metadata_without_split_names_key(mem_db: DBConnection):
"""Regression: schema metadata present but missing split_names key must not crash.
Previously, `.get(b"split_names", None).decode()` was called unconditionally,
so any permutation table whose metadata dict had other keys but no split_names
raised AttributeError: 'NoneType' has no attribute 'decode'.
"""
base = mem_db.create_table("base_nosplit", pa.table({"x": range(10)}))
# Build a permutation-like table that carries some metadata but NOT split_names.
raw = pa.table(
{
"row_id": pa.array(range(10), type=pa.uint64()),
"split_id": pa.array([0] * 10, type=pa.uint32()),
}
).replace_schema_metadata({b"other_key": b"other_value"})
perm_tbl = mem_db.create_table("perm_nosplit", raw)
permutations = Permutations(base, perm_tbl)
assert permutations.split_names == []
assert permutations.split_dict == {}
def test_from_tables_string_split_missing_names_key(mem_db: DBConnection):
"""Regression: from_tables() with a string split must raise ValueError, not
AttributeError.
Previously, `.get(b"split_names", None).decode()` crashed with AttributeError
when the metadata dict existed but had no split_names key.
"""
base = mem_db.create_table("base_strsplit", pa.table({"x": range(10)}))
raw = pa.table(
{
"row_id": pa.array(range(10), type=pa.uint64()),
"split_id": pa.array([0] * 10, type=pa.uint32()),
}
).replace_schema_metadata({b"other_key": b"other_value"})
perm_tbl = mem_db.create_table("perm_strsplit", raw)
with pytest.raises(ValueError, match="no split names are defined"):
Permutation.from_tables(base, perm_tbl, split="train")
@pytest.fixture
def some_perm_table(some_table: Table) -> Table:
return (

View File

@@ -676,11 +676,6 @@ impl ConnectBuilder {
self
}
/// Set the WAL host override for routing merge_insert requests
/// to a separate WAL/ingest service.
///
/// This option is only used when connecting to LanceDB Cloud (db:// URIs)
/// and will be ignored for other URIs.
/// Set the database specific options
///
/// See [crate::database::listing::ListingDatabaseOptions] for the options available for

View File

@@ -527,12 +527,6 @@ impl<S: HttpSend> RestfulLanceDbClient<S> {
self.add_id_delimiter_query_param(builder)
}
pub fn post_wal(&self, uri: &str) -> RequestBuilder {
let full_uri = format!("{}{}", self.host, uri);
let builder = self.client.post(full_uri).header("x-use-wal", "true");
self.add_id_delimiter_query_param(builder)
}
fn add_id_delimiter_query_param(&self, req: RequestBuilder) -> RequestBuilder {
if self.id_delimiter != "$" {
req.query(&[("delimiter", self.id_delimiter.clone())])
@@ -1036,7 +1030,6 @@ mod tests {
let client = RestfulLanceDbClient {
client: reqwest::Client::new(),
host: "https://example.com".to_string(),
retry_config: RetryConfig::default().try_into().unwrap(),
sender: Sender,
id_delimiter: "+".to_string(),
@@ -1072,7 +1065,6 @@ mod tests {
let client = RestfulLanceDbClient {
client: reqwest::Client::new(),
host: "https://example.com".to_string(),
retry_config: RetryConfig::default().try_into().unwrap(),
sender: Sender,
id_delimiter: "+".to_string(),
@@ -1110,7 +1102,6 @@ mod tests {
let client = RestfulLanceDbClient {
client: reqwest::Client::new(),
host: "https://example.com".to_string(),
retry_config: RetryConfig::default().try_into().unwrap(),
sender: Sender,
id_delimiter: "+".to_string(),

View File

@@ -185,7 +185,6 @@ impl RemoteDatabaseOptionsBuilder {
self.options.host_override = Some(host_override);
self
}
}
#[derive(Debug)]

View File

@@ -1610,17 +1610,13 @@ impl<S: HttpSend> BaseTable for RemoteTable<S> {
self.check_mutable().await?;
let timeout = params.timeout;
let use_wal = params.use_wal;
let query = MergeInsertRequest::try_from(params)?;
let path = format!("/v1/table/{}/merge_insert/", self.identifier);
let mut request = if use_wal {
self.client.post_wal(&path)
} else {
self.client.post(&path)
}
.query(&query)
.header(CONTENT_TYPE, ARROW_STREAM_CONTENT_TYPE);
let mut request = self
.client
.post(&format!("/v1/table/{}/merge_insert/", self.identifier))
.query(&query)
.header(CONTENT_TYPE, ARROW_STREAM_CONTENT_TYPE);
if let Some(timeout) = timeout {
// (If it doesn't fit into u64, it's not worth sending anyways.)
@@ -2709,43 +2705,6 @@ mod tests {
}
}
#[tokio::test]
async fn test_merge_insert_use_wal() {
let batch = RecordBatch::try_new(
Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, false)])),
vec![Arc::new(Int32Array::from(vec![1, 2, 3]))],
)
.unwrap();
let data: Box<dyn RecordBatchReader + Send> = Box::new(RecordBatchIterator::new(
[Ok(batch.clone())],
batch.schema(),
));
let table = Table::new_with_handler("my_table", move |request| {
if request.url().path() == "/v1/table/my_table/merge_insert/" {
// Verify the x-use-wal header is set for router-based WAL routing
assert_eq!(
request.headers().get("x-use-wal").unwrap(),
"true",
"merge_insert with use_wal should set x-use-wal header"
);
http::Response::builder()
.status(200)
.body(r#"{"version": 1, "num_deleted_rows": 0, "num_inserted_rows": 3, "num_updated_rows": 0}"#)
.unwrap()
} else {
panic!("Unexpected request path: {}", request.url().path());
}
});
let mut builder = table.merge_insert(&["some_col"]);
builder.use_wal(true);
let result = builder.execute(data).await.unwrap();
assert_eq!(result.num_inserted_rows, 3);
}
#[tokio::test]
async fn test_merge_insert_retries_on_409() {
let batch = RecordBatch::try_new(

View File

@@ -55,7 +55,6 @@ pub struct MergeInsertBuilder {
pub(crate) when_not_matched_by_source_delete_filt: Option<String>,
pub(crate) timeout: Option<Duration>,
pub(crate) use_index: bool,
pub(crate) use_wal: bool,
}
impl MergeInsertBuilder {
@@ -70,7 +69,6 @@ impl MergeInsertBuilder {
when_not_matched_by_source_delete_filt: None,
timeout: None,
use_index: true,
use_wal: false,
}
}
@@ -150,18 +148,6 @@ impl MergeInsertBuilder {
self
}
/// Controls whether to route the merge insert operation through the WAL.
///
/// When set to `true`, the request includes an `x-use-wal: true` header,
/// which the router uses to forward the operation to wal-writer instances
/// instead of Phalanx.
///
/// Defaults to `false`.
pub fn use_wal(&mut self, use_wal: bool) -> &mut Self {
self.use_wal = use_wal;
self
}
/// Executes the merge insert operation
///
/// Returns version and statistics about the merge operation including the number of rows