diff --git a/rust/lancedb/src/database/listing.rs b/rust/lancedb/src/database/listing.rs
index d1831f52d..7b7657bf3 100644
--- a/rust/lancedb/src/database/listing.rs
+++ b/rust/lancedb/src/database/listing.rs
@@ -505,8 +505,15 @@ impl ListingDatabase {
// Filter out the commit store query param -- it's a lancedb param
url.query_pairs_mut().clear();
url.query_pairs_mut().extend_pairs(filtered_querys);
- // Take a copy of the query string so we can propagate it to lance
- let query_string = url.query().map(|s| s.to_string());
+ // Take a copy of the query string so we can propagate it to lance.
+ // `query_pairs_mut()` leaves the URL with `Some("")` even when no
+ // pairs survive (or none existed in the first place), so an empty
+ // string here must be treated the same as "no query" — otherwise
+ // every table URI ends up with a trailing `?`, which makes downstream
+ // sub-paths (e.g. MemWAL gen paths) re-parse as path= +
+ // query=, causing Lance to find the base table dataset
+ // when looking up the sub-path.
+ let query_string = url.query().filter(|q| !q.is_empty()).map(|s| s.to_string());
// clear the query string so we can use the url as the base uri
// use .set_query(None) instead of .set_query("") because the latter
// will add a trailing '?' to the url
@@ -2213,6 +2220,133 @@ mod tests {
assert_eq!(uri, expected);
}
+ /// Regression: connecting via a URL-style URI (which goes through
+ /// `url::Url::parse` and the `query_pairs_mut()` path) must not
+ /// append a trailing `?` to per-table URIs when the input URI has
+ /// no query string.
+ ///
+ /// Earlier, `query_pairs_mut().clear()` left the URL with
+ /// `query=Some("")`, which then propagated as a trailing `?` on
+ /// every table URI. Sub-path lookups against that URI (e.g. MemWAL
+ /// `/_mem_wal//_gen_`) re-parsed as
+ /// `path=` + `query=/_mem_wal/...`, causing
+ /// `Dataset::write` to find the base table dataset and falsely
+ /// report `Dataset already exists`.
+ /// Mirrors the URL-mutation step from
+ /// [`ListingDatabase::connect_with_options`] so we can assert the
+ /// fix without going through filesystem setup (which is awkward
+ /// across platforms — see the `file://` test below).
+ fn capture_query_like_connect(input_uri: &str) -> Option {
+ let mut url = url::Url::parse(input_uri).unwrap();
+ let mut filtered_querys = Vec::new();
+ for (key, value) in url.query_pairs() {
+ if key == ENGINE || key == MIRRORED_STORE {
+ continue;
+ }
+ filtered_querys.push((key.to_string(), value.to_string()));
+ }
+ url.query_pairs_mut().clear();
+ url.query_pairs_mut().extend_pairs(filtered_querys);
+ url.query().filter(|q| !q.is_empty()).map(|s| s.to_string())
+ }
+
+ #[test]
+ fn test_capture_query_treats_empty_as_none() {
+ // No query at all. With the bug, `query_pairs_mut()` left the
+ // URL with `query=Some("")` and we used to propagate that.
+ assert_eq!(
+ capture_query_like_connect("s3://bucket/prefix/"),
+ None,
+ "empty query after mutation must be treated as no query"
+ );
+
+ // Real query is propagated.
+ assert_eq!(
+ capture_query_like_connect("s3://bucket/prefix/?foo=bar"),
+ Some("foo=bar".to_string())
+ );
+
+ // lancedb-internal `engine=` is stripped; nothing remains, so
+ // query_string is None — not Some("").
+ assert_eq!(
+ capture_query_like_connect(&format!("s3://bucket/prefix/?{}=mem", ENGINE)),
+ None
+ );
+
+ // Mixed: drop `engine=`, keep the rest.
+ let captured =
+ capture_query_like_connect(&format!("s3://bucket/prefix/?{}=mem&foo=bar", ENGINE));
+ assert_eq!(captured.as_deref(), Some("foo=bar"));
+ }
+
+ /// Regression: connecting via a URL-style URI (which goes through
+ /// `url::Url::parse` and the `query_pairs_mut()` path) must not
+ /// append a trailing `?` to per-table URIs when the input URI has
+ /// no query string. Sub-path lookups against such a URI (e.g.
+ /// MemWAL `/_mem_wal//_gen_`) re-parse
+ /// as `path=` + `query=/_mem_wal/...`, causing
+ /// `Dataset::write` to find the base table dataset and falsely
+ /// report `Dataset already exists`.
+ ///
+ /// Skipped on Windows: `try_create_dir` does not understand
+ /// `file:///C:/…` paths so `connect_with_options` fails before
+ /// even reaching the URL-mutation logic. The pure URL-mutation
+ /// invariant is covered by
+ /// `test_capture_query_treats_empty_as_none` above, which runs
+ /// on all platforms.
+ #[cfg(not(windows))]
+ #[tokio::test]
+ async fn test_table_uri_url_path_has_no_trailing_question_mark() {
+ let tempdir = tempdir().unwrap();
+ let uri = format!("file://{}", tempdir.path().to_str().unwrap());
+
+ let request = ConnectRequest {
+ uri: uri.clone(),
+ #[cfg(feature = "remote")]
+ client_config: Default::default(),
+ options: Default::default(),
+ namespace_client_properties: Default::default(),
+ manifest_enabled: false,
+ read_consistency_interval: None,
+ session: None,
+ };
+ let db = ListingDatabase::connect_with_options(&request)
+ .await
+ .unwrap();
+
+ assert_eq!(
+ db.query_string, None,
+ "no input query → no captured query_string"
+ );
+
+ let table_uri = db.table_uri("test").unwrap();
+ assert!(
+ !table_uri.ends_with('?'),
+ "table_uri must not have a trailing `?`: {}",
+ table_uri
+ );
+ assert_eq!(table_uri, format!("{}/test.lance", uri));
+
+ // A real query string should still be propagated.
+ let with_query = format!("{}?foo=bar", uri);
+ let request_with_query = ConnectRequest {
+ uri: with_query,
+ #[cfg(feature = "remote")]
+ client_config: Default::default(),
+ options: Default::default(),
+ namespace_client_properties: Default::default(),
+ manifest_enabled: false,
+ read_consistency_interval: None,
+ session: None,
+ };
+ let db_with_query = ListingDatabase::connect_with_options(&request_with_query)
+ .await
+ .unwrap();
+ assert_eq!(db_with_query.query_string.as_deref(), Some("foo=bar"));
+ let table_uri = db_with_query.table_uri("test").unwrap();
+ assert_eq!(table_uri, format!("{}/test.lance?foo=bar", uri));
+ }
+
#[tokio::test]
async fn test_namespace_client() {
let (_tempdir, db) = setup_database().await;