Compare commits

..

2 Commits

Author SHA1 Message Date
Lance Release
446a69b51b Bump version: 0.26.1 → 0.27.0-beta.0 2026-01-21 12:21:09 +00:00
Ryan Green
cd5f91bb7d feat: expose table uri (#2922)
* Expose `table.uri` property for all tables, including remote tables
* Fix bug in path calculation on windows file systems
2026-01-20 19:56:46 -03:30
13 changed files with 312 additions and 292 deletions

68
Cargo.lock generated
View File

@@ -3072,8 +3072,8 @@ checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c"
[[package]]
name = "fsst"
version = "2.0.0-beta.10"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.10#8948169e6f9299721c0dd74aa01d565d225a4afa"
version = "2.0.0-beta.8"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.8#5bffcfc3f9716fd6eb069579eb5027ed9c69feb7"
dependencies = [
"arrow-array",
"rand 0.9.2",
@@ -4404,8 +4404,8 @@ dependencies = [
[[package]]
name = "lance"
version = "2.0.0-beta.10"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.10#8948169e6f9299721c0dd74aa01d565d225a4afa"
version = "2.0.0-beta.8"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.8#5bffcfc3f9716fd6eb069579eb5027ed9c69feb7"
dependencies = [
"arrow",
"arrow-arith",
@@ -4470,8 +4470,8 @@ dependencies = [
[[package]]
name = "lance-arrow"
version = "2.0.0-beta.10"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.10#8948169e6f9299721c0dd74aa01d565d225a4afa"
version = "2.0.0-beta.8"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.8#5bffcfc3f9716fd6eb069579eb5027ed9c69feb7"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -4490,8 +4490,8 @@ dependencies = [
[[package]]
name = "lance-bitpacking"
version = "2.0.0-beta.10"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.10#8948169e6f9299721c0dd74aa01d565d225a4afa"
version = "2.0.0-beta.8"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.8#5bffcfc3f9716fd6eb069579eb5027ed9c69feb7"
dependencies = [
"arrayref",
"paste",
@@ -4500,8 +4500,8 @@ dependencies = [
[[package]]
name = "lance-core"
version = "2.0.0-beta.10"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.10#8948169e6f9299721c0dd74aa01d565d225a4afa"
version = "2.0.0-beta.8"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.8#5bffcfc3f9716fd6eb069579eb5027ed9c69feb7"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -4538,8 +4538,8 @@ dependencies = [
[[package]]
name = "lance-datafusion"
version = "2.0.0-beta.10"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.10#8948169e6f9299721c0dd74aa01d565d225a4afa"
version = "2.0.0-beta.8"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.8#5bffcfc3f9716fd6eb069579eb5027ed9c69feb7"
dependencies = [
"arrow",
"arrow-array",
@@ -4569,8 +4569,8 @@ dependencies = [
[[package]]
name = "lance-datagen"
version = "2.0.0-beta.10"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.10#8948169e6f9299721c0dd74aa01d565d225a4afa"
version = "2.0.0-beta.8"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.8#5bffcfc3f9716fd6eb069579eb5027ed9c69feb7"
dependencies = [
"arrow",
"arrow-array",
@@ -4588,8 +4588,8 @@ dependencies = [
[[package]]
name = "lance-encoding"
version = "2.0.0-beta.10"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.10#8948169e6f9299721c0dd74aa01d565d225a4afa"
version = "2.0.0-beta.8"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.8#5bffcfc3f9716fd6eb069579eb5027ed9c69feb7"
dependencies = [
"arrow-arith",
"arrow-array",
@@ -4626,8 +4626,8 @@ dependencies = [
[[package]]
name = "lance-file"
version = "2.0.0-beta.10"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.10#8948169e6f9299721c0dd74aa01d565d225a4afa"
version = "2.0.0-beta.8"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.8#5bffcfc3f9716fd6eb069579eb5027ed9c69feb7"
dependencies = [
"arrow-arith",
"arrow-array",
@@ -4659,8 +4659,8 @@ dependencies = [
[[package]]
name = "lance-geo"
version = "2.0.0-beta.10"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.10#8948169e6f9299721c0dd74aa01d565d225a4afa"
version = "2.0.0-beta.8"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.8#5bffcfc3f9716fd6eb069579eb5027ed9c69feb7"
dependencies = [
"datafusion",
"geo-traits",
@@ -4674,8 +4674,8 @@ dependencies = [
[[package]]
name = "lance-index"
version = "2.0.0-beta.10"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.10#8948169e6f9299721c0dd74aa01d565d225a4afa"
version = "2.0.0-beta.8"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.8#5bffcfc3f9716fd6eb069579eb5027ed9c69feb7"
dependencies = [
"arrow",
"arrow-arith",
@@ -4742,8 +4742,8 @@ dependencies = [
[[package]]
name = "lance-io"
version = "2.0.0-beta.10"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.10#8948169e6f9299721c0dd74aa01d565d225a4afa"
version = "2.0.0-beta.8"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.8#5bffcfc3f9716fd6eb069579eb5027ed9c69feb7"
dependencies = [
"arrow",
"arrow-arith",
@@ -4783,8 +4783,8 @@ dependencies = [
[[package]]
name = "lance-linalg"
version = "2.0.0-beta.10"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.10#8948169e6f9299721c0dd74aa01d565d225a4afa"
version = "2.0.0-beta.8"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.8#5bffcfc3f9716fd6eb069579eb5027ed9c69feb7"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -4800,8 +4800,8 @@ dependencies = [
[[package]]
name = "lance-namespace"
version = "2.0.0-beta.10"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.10#8948169e6f9299721c0dd74aa01d565d225a4afa"
version = "2.0.0-beta.8"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.8#5bffcfc3f9716fd6eb069579eb5027ed9c69feb7"
dependencies = [
"arrow",
"async-trait",
@@ -4813,8 +4813,8 @@ dependencies = [
[[package]]
name = "lance-namespace-impls"
version = "2.0.0-beta.10"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.10#8948169e6f9299721c0dd74aa01d565d225a4afa"
version = "2.0.0-beta.8"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.8#5bffcfc3f9716fd6eb069579eb5027ed9c69feb7"
dependencies = [
"arrow",
"arrow-ipc",
@@ -4857,8 +4857,8 @@ dependencies = [
[[package]]
name = "lance-table"
version = "2.0.0-beta.10"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.10#8948169e6f9299721c0dd74aa01d565d225a4afa"
version = "2.0.0-beta.8"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.8#5bffcfc3f9716fd6eb069579eb5027ed9c69feb7"
dependencies = [
"arrow",
"arrow-array",
@@ -4897,8 +4897,8 @@ dependencies = [
[[package]]
name = "lance-testing"
version = "2.0.0-beta.10"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.10#8948169e6f9299721c0dd74aa01d565d225a4afa"
version = "2.0.0-beta.8"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.8#5bffcfc3f9716fd6eb069579eb5027ed9c69feb7"
dependencies = [
"arrow-array",
"arrow-schema",

View File

@@ -15,20 +15,20 @@ categories = ["database-implementations"]
rust-version = "1.78.0"
[workspace.dependencies]
lance = { "version" = "=2.0.0-beta.10", default-features = false, "tag" = "v2.0.0-beta.10", "git" = "https://github.com/lance-format/lance.git" }
lance-core = { "version" = "=2.0.0-beta.10", "tag" = "v2.0.0-beta.10", "git" = "https://github.com/lance-format/lance.git" }
lance-datagen = { "version" = "=2.0.0-beta.10", "tag" = "v2.0.0-beta.10", "git" = "https://github.com/lance-format/lance.git" }
lance-file = { "version" = "=2.0.0-beta.10", "tag" = "v2.0.0-beta.10", "git" = "https://github.com/lance-format/lance.git" }
lance-io = { "version" = "=2.0.0-beta.10", default-features = false, "tag" = "v2.0.0-beta.10", "git" = "https://github.com/lance-format/lance.git" }
lance-index = { "version" = "=2.0.0-beta.10", "tag" = "v2.0.0-beta.10", "git" = "https://github.com/lance-format/lance.git" }
lance-linalg = { "version" = "=2.0.0-beta.10", "tag" = "v2.0.0-beta.10", "git" = "https://github.com/lance-format/lance.git" }
lance-namespace = { "version" = "=2.0.0-beta.10", "tag" = "v2.0.0-beta.10", "git" = "https://github.com/lance-format/lance.git" }
lance-namespace-impls = { "version" = "=2.0.0-beta.10", default-features = false, "tag" = "v2.0.0-beta.10", "git" = "https://github.com/lance-format/lance.git" }
lance-table = { "version" = "=2.0.0-beta.10", "tag" = "v2.0.0-beta.10", "git" = "https://github.com/lance-format/lance.git" }
lance-testing = { "version" = "=2.0.0-beta.10", "tag" = "v2.0.0-beta.10", "git" = "https://github.com/lance-format/lance.git" }
lance-datafusion = { "version" = "=2.0.0-beta.10", "tag" = "v2.0.0-beta.10", "git" = "https://github.com/lance-format/lance.git" }
lance-encoding = { "version" = "=2.0.0-beta.10", "tag" = "v2.0.0-beta.10", "git" = "https://github.com/lance-format/lance.git" }
lance-arrow = { "version" = "=2.0.0-beta.10", "tag" = "v2.0.0-beta.10", "git" = "https://github.com/lance-format/lance.git" }
lance = { "version" = "=2.0.0-beta.8", default-features = false, "tag" = "v2.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" }
lance-core = { "version" = "=2.0.0-beta.8", "tag" = "v2.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" }
lance-datagen = { "version" = "=2.0.0-beta.8", "tag" = "v2.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" }
lance-file = { "version" = "=2.0.0-beta.8", "tag" = "v2.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" }
lance-io = { "version" = "=2.0.0-beta.8", default-features = false, "tag" = "v2.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" }
lance-index = { "version" = "=2.0.0-beta.8", "tag" = "v2.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" }
lance-linalg = { "version" = "=2.0.0-beta.8", "tag" = "v2.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" }
lance-namespace = { "version" = "=2.0.0-beta.8", "tag" = "v2.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" }
lance-namespace-impls = { "version" = "=2.0.0-beta.8", default-features = false, "tag" = "v2.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" }
lance-table = { "version" = "=2.0.0-beta.8", "tag" = "v2.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" }
lance-testing = { "version" = "=2.0.0-beta.8", "tag" = "v2.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" }
lance-datafusion = { "version" = "=2.0.0-beta.8", "tag" = "v2.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" }
lance-encoding = { "version" = "=2.0.0-beta.8", "tag" = "v2.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" }
lance-arrow = { "version" = "=2.0.0-beta.8", "tag" = "v2.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" }
ahash = "0.8"
# Note that this one does not include pyarrow
arrow = { version = "57.2", optional = false }

View File

@@ -1,5 +1,5 @@
[tool.bumpversion]
current_version = "0.26.1"
current_version = "0.27.0-beta.0"
parse = """(?x)
(?P<major>0|[1-9]\\d*)\\.
(?P<minor>0|[1-9]\\d*)\\.

View File

@@ -1,6 +1,6 @@
[package]
name = "lancedb-python"
version = "0.26.1"
version = "0.27.0-beta.0"
edition.workspace = true
description = "Python bindings for LanceDB"
license.workspace = true

View File

@@ -179,6 +179,7 @@ class Table:
cleanup_since_ms: Optional[int] = None,
delete_unverified: Optional[bool] = None,
) -> OptimizeStats: ...
async def uri(self) -> str: ...
@property
def tags(self) -> Tags: ...
def query(self) -> Query: ...

View File

@@ -655,6 +655,14 @@ class RemoteTable(Table):
def stats(self):
return LOOP.run(self._table.stats())
@property
def uri(self) -> str:
"""The table URI (storage location).
For remote tables, this fetches the location from the server via describe.
"""
return LOOP.run(self._table.uri())
def take_offsets(self, offsets: list[int]) -> LanceTakeQueryBuilder:
return LanceTakeQueryBuilder(self._table.take_offsets(offsets))

View File

@@ -2218,6 +2218,10 @@ class LanceTable(Table):
def stats(self) -> TableStatistics:
return LOOP.run(self._table.stats())
@property
def uri(self) -> str:
return LOOP.run(self._table.uri())
def create_scalar_index(
self,
column: str,
@@ -3606,6 +3610,20 @@ class AsyncTable:
"""
return await self._inner.stats()
async def uri(self) -> str:
"""
Get the table URI (storage location).
For remote tables, this fetches the location from the server via describe.
For local tables, this returns the dataset URI.
Returns
-------
str
The full storage location of the table (e.g., S3/GCS path).
"""
return await self._inner.uri()
async def add(
self,
data: DATA,

View File

@@ -1967,3 +1967,9 @@ def test_add_table_with_empty_embeddings(tmp_path):
on_bad_vectors="drop",
)
assert table.count_rows() == 1
def test_table_uri(tmp_path):
db = lancedb.connect(tmp_path)
table = db.create_table("my_table", data=[{"x": 0}])
assert table.uri == str(tmp_path / "my_table.lance")

View File

@@ -504,6 +504,11 @@ impl Table {
})
}
pub fn uri(self_: PyRef<'_, Self>) -> PyResult<Bound<'_, PyAny>> {
let inner = self_.inner_ref()?.clone();
future_into_py(self_.py(), async move { inner.uri().await.infer_error() })
}
pub fn __repr__(&self) -> String {
match &self.inner {
None => format!("ClosedTable({})", self.name),

View File

@@ -9,7 +9,6 @@ use std::sync::Arc;
use arrow_array::RecordBatchReader;
use arrow_schema::{Field, SchemaRef};
use lance::dataset::ReadParams;
use lance::io::ObjectStoreParams;
use lance_namespace::models::{
CreateNamespaceRequest, CreateNamespaceResponse, DescribeNamespaceRequest,
DescribeNamespaceResponse, DropNamespaceRequest, DropNamespaceResponse, ListNamespacesRequest,
@@ -40,64 +39,7 @@ use crate::Table;
pub use lance_encoding::version::LanceFileVersion;
#[cfg(feature = "remote")]
use lance_io::object_store::StorageOptions;
use lance_io::object_store::{StorageOptionsAccessor, StorageOptionsProvider};
fn merge_storage_options(
store_params: &mut ObjectStoreParams,
pairs: impl IntoIterator<Item = (String, String)>,
) {
let mut storage_options = store_params.storage_options().cloned().unwrap_or_default();
for (key, value) in pairs {
storage_options.insert(key, value);
}
store_params.storage_options_accessor = Some(Arc::new(
StorageOptionsAccessor::with_static_options(storage_options),
));
}
fn apply_storage_options_provider(
store_params: &mut ObjectStoreParams,
provider: Option<Arc<dyn StorageOptionsProvider>>,
) {
let Some(provider) = provider else {
return;
};
let storage_options = store_params.storage_options().cloned().unwrap_or_default();
let accessor = if storage_options.is_empty() {
StorageOptionsAccessor::with_provider(provider)
} else {
StorageOptionsAccessor::with_initial_and_provider(storage_options, provider)
};
store_params.storage_options_accessor = Some(Arc::new(accessor));
}
fn apply_storage_options_provider_to_write_options(
write_options: &mut WriteOptions,
provider: Option<Arc<dyn StorageOptionsProvider>>,
) {
let Some(provider) = provider else {
return;
};
let store_params = write_options
.lance_write_params
.get_or_insert_with(Default::default)
.store_params
.get_or_insert_with(Default::default);
apply_storage_options_provider(store_params, Some(provider));
}
fn apply_storage_options_provider_to_read_params(
read_params: &mut ReadParams,
provider: Option<Arc<dyn StorageOptionsProvider>>,
) {
let Some(provider) = provider else {
return;
};
let store_params = read_params
.store_options
.get_or_insert_with(Default::default);
apply_storage_options_provider(store_params, Some(provider));
}
use lance_io::object_store::StorageOptionsProvider;
/// A builder for configuring a [`Connection::table_names`] operation
pub struct TableNamesBuilder {
@@ -164,7 +106,6 @@ pub struct CreateTableBuilder<const HAS_DATA: bool> {
embeddings: Vec<(EmbeddingDefinition, Arc<dyn EmbeddingFunction>)>,
embedding_registry: Arc<dyn EmbeddingRegistry>,
request: CreateTableRequest,
storage_options_provider: Option<Arc<dyn StorageOptionsProvider>>,
// This is a bit clumsy but we defer errors until `execute` is called
// to maintain backwards compatibility
data: CreateTableBuilderInitialData,
@@ -187,7 +128,6 @@ impl CreateTableBuilder<true> {
),
embeddings: Vec::new(),
embedding_registry,
storage_options_provider: None,
data: CreateTableBuilderInitialData::Iterator(data.into_arrow()),
}
}
@@ -207,7 +147,6 @@ impl CreateTableBuilder<true> {
),
embeddings: Vec::new(),
embedding_registry,
storage_options_provider: None,
data: CreateTableBuilderInitialData::Stream(data.into_arrow()),
}
}
@@ -229,30 +168,20 @@ impl CreateTableBuilder<true> {
match self.data {
CreateTableBuilderInitialData::Iterator(maybe_iter) => {
let data = maybe_iter?;
let mut request = CreateTableRequest {
Ok(CreateTableRequest {
data: CreateTableData::Data(data),
..self.request
};
apply_storage_options_provider_to_write_options(
&mut request.write_options,
self.storage_options_provider,
);
Ok(request)
})
}
CreateTableBuilderInitialData::None => {
unreachable!("No data provided for CreateTableBuilder<true>")
}
CreateTableBuilderInitialData::Stream(maybe_stream) => {
let data = maybe_stream?;
let mut request = CreateTableRequest {
Ok(CreateTableRequest {
data: CreateTableData::StreamingData(data),
..self.request
};
apply_storage_options_provider_to_write_options(
&mut request.write_options,
self.storage_options_provider,
);
Ok(request)
})
}
}
} else {
@@ -261,15 +190,10 @@ impl CreateTableBuilder<true> {
};
let data = maybe_iter?;
let data = Box::new(WithEmbeddings::new(data, self.embeddings));
let mut request = CreateTableRequest {
Ok(CreateTableRequest {
data: CreateTableData::Data(data),
..self.request
};
apply_storage_options_provider_to_write_options(
&mut request.write_options,
self.storage_options_provider,
);
Ok(request)
})
}
}
}
@@ -289,19 +213,13 @@ impl CreateTableBuilder<false> {
data: CreateTableBuilderInitialData::None,
embeddings: Vec::default(),
embedding_registry,
storage_options_provider: None,
}
}
/// Execute the create table operation
pub async fn execute(self) -> Result<Table> {
let parent = self.parent.clone();
let mut request = self.request;
apply_storage_options_provider_to_write_options(
&mut request.write_options,
self.storage_options_provider,
);
let table = parent.create_table(request).await?;
let table = parent.create_table(self.request).await?;
Ok(Table::new(table, parent))
}
}
@@ -328,14 +246,16 @@ impl<const HAS_DATA: bool> CreateTableBuilder<HAS_DATA> {
///
/// See available options at <https://lancedb.com/docs/storage/>
pub fn storage_option(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
let store_params = self
let store_options = self
.request
.write_options
.lance_write_params
.get_or_insert(Default::default())
.store_params
.get_or_insert(Default::default())
.storage_options
.get_or_insert(Default::default());
merge_storage_options(store_params, [(key.into(), value.into())]);
store_options.insert(key.into(), value.into());
self
}
@@ -349,20 +269,19 @@ impl<const HAS_DATA: bool> CreateTableBuilder<HAS_DATA> {
mut self,
pairs: impl IntoIterator<Item = (impl Into<String>, impl Into<String>)>,
) -> Self {
let store_params = self
let store_options = self
.request
.write_options
.lance_write_params
.get_or_insert(Default::default())
.store_params
.get_or_insert(Default::default())
.storage_options
.get_or_insert(Default::default());
merge_storage_options(
store_params,
pairs
.into_iter()
.map(|(key, value)| (key.into(), value.into())),
);
for (key, value) in pairs {
store_options.insert(key.into(), value.into());
}
self
}
@@ -399,21 +318,23 @@ impl<const HAS_DATA: bool> CreateTableBuilder<HAS_DATA> {
/// This has no effect in LanceDB Cloud.
#[deprecated(since = "0.15.1", note = "Use `database_options` instead")]
pub fn enable_v2_manifest_paths(mut self, use_v2_manifest_paths: bool) -> Self {
let store_params = self
let storage_options = self
.request
.write_options
.lance_write_params
.get_or_insert_with(Default::default)
.store_params
.get_or_insert_with(Default::default)
.storage_options
.get_or_insert_with(Default::default);
let value = if use_v2_manifest_paths {
"true".to_string()
} else {
"false".to_string()
};
merge_storage_options(
store_params,
[(OPT_NEW_TABLE_V2_MANIFEST_PATHS.to_string(), value)],
storage_options.insert(
OPT_NEW_TABLE_V2_MANIFEST_PATHS.to_string(),
if use_v2_manifest_paths {
"true".to_string()
} else {
"false".to_string()
},
);
self
}
@@ -423,19 +344,19 @@ impl<const HAS_DATA: bool> CreateTableBuilder<HAS_DATA> {
/// The default is `LanceFileVersion::Stable`.
#[deprecated(since = "0.15.1", note = "Use `database_options` instead")]
pub fn data_storage_version(mut self, data_storage_version: LanceFileVersion) -> Self {
let store_params = self
let storage_options = self
.request
.write_options
.lance_write_params
.get_or_insert_with(Default::default)
.store_params
.get_or_insert_with(Default::default)
.storage_options
.get_or_insert_with(Default::default);
merge_storage_options(
store_params,
[(
OPT_NEW_TABLE_STORAGE_VERSION.to_string(),
data_storage_version.to_string(),
)],
storage_options.insert(
OPT_NEW_TABLE_STORAGE_VERSION.to_string(),
data_storage_version.to_string(),
);
self
}
@@ -460,7 +381,13 @@ impl<const HAS_DATA: bool> CreateTableBuilder<HAS_DATA> {
/// This allows tables to automatically refresh cloud storage credentials
/// when they expire, enabling long-running operations on remote storage.
pub fn storage_options_provider(mut self, provider: Arc<dyn StorageOptionsProvider>) -> Self {
self.storage_options_provider = Some(provider);
self.request
.write_options
.lance_write_params
.get_or_insert(Default::default())
.store_params
.get_or_insert(Default::default())
.storage_options_provider = Some(provider);
self
}
}
@@ -470,7 +397,6 @@ pub struct OpenTableBuilder {
parent: Arc<dyn Database>,
request: OpenTableRequest,
embedding_registry: Arc<dyn EmbeddingRegistry>,
storage_options_provider: Option<Arc<dyn StorageOptionsProvider>>,
}
impl OpenTableBuilder {
@@ -490,7 +416,6 @@ impl OpenTableBuilder {
namespace_client: None,
},
embedding_registry,
storage_options_provider: None,
}
}
@@ -525,13 +450,15 @@ impl OpenTableBuilder {
///
/// See available options at <https://lancedb.com/docs/storage/>
pub fn storage_option(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
let store_params = self
let storage_options = self
.request
.lance_read_params
.get_or_insert(Default::default())
.store_options
.get_or_insert(Default::default())
.storage_options
.get_or_insert(Default::default());
merge_storage_options(store_params, [(key.into(), value.into())]);
storage_options.insert(key.into(), value.into());
self
}
@@ -545,19 +472,18 @@ impl OpenTableBuilder {
mut self,
pairs: impl IntoIterator<Item = (impl Into<String>, impl Into<String>)>,
) -> Self {
let store_params = self
let storage_options = self
.request
.lance_read_params
.get_or_insert(Default::default())
.store_options
.get_or_insert(Default::default())
.storage_options
.get_or_insert(Default::default());
merge_storage_options(
store_params,
pairs
.into_iter()
.map(|(key, value)| (key.into(), value.into())),
);
for (key, value) in pairs {
storage_options.insert(key.into(), value.into());
}
self
}
@@ -581,23 +507,18 @@ impl OpenTableBuilder {
/// This allows tables to automatically refresh cloud storage credentials
/// when they expire, enabling long-running operations on remote storage.
pub fn storage_options_provider(mut self, provider: Arc<dyn StorageOptionsProvider>) -> Self {
self.storage_options_provider = Some(provider);
self.request
.lance_read_params
.get_or_insert(Default::default())
.store_options
.get_or_insert(Default::default())
.storage_options_provider = Some(provider);
self
}
/// Open the table
pub async fn execute(self) -> Result<Table> {
let mut request = self.request;
if let Some(provider) = self.storage_options_provider {
if let Some(read_params) = request.lance_read_params.as_mut() {
apply_storage_options_provider_to_read_params(read_params, Some(provider));
} else {
let mut read_params = ReadParams::default();
apply_storage_options_provider_to_read_params(&mut read_params, Some(provider));
request.lance_read_params = Some(read_params);
}
}
let table = self.parent.open_table(request).await?;
let table = self.parent.open_table(self.request).await?;
Ok(Table::new_with_embedding_registry(
table,
self.parent,

View File

@@ -12,7 +12,7 @@ use lance::dataset::{builder::DatasetBuilder, ReadParams, WriteMode};
use lance::io::{ObjectStore, ObjectStoreParams, WrappingObjectStore};
use lance_datafusion::utils::StreamingWriteSource;
use lance_encoding::version::LanceFileVersion;
use lance_io::object_store::{StorageOptionsAccessor, StorageOptionsProvider};
use lance_io::object_store::StorageOptionsProvider;
use lance_table::io::commit::commit_handler_from_url;
use object_store::local::LocalFileSystem;
use snafu::ResultExt;
@@ -42,49 +42,6 @@ pub const OPT_NEW_TABLE_STORAGE_VERSION: &str = "new_table_data_storage_version"
pub const OPT_NEW_TABLE_V2_MANIFEST_PATHS: &str = "new_table_enable_v2_manifest_paths";
pub const OPT_NEW_TABLE_ENABLE_STABLE_ROW_IDS: &str = "new_table_enable_stable_row_ids";
fn build_storage_options_accessor(
options: Option<HashMap<String, String>>,
provider: Option<Arc<dyn StorageOptionsProvider>>,
) -> Option<Arc<StorageOptionsAccessor>> {
match (options, provider) {
(Some(opts), Some(provider)) => Some(Arc::new(
StorageOptionsAccessor::with_initial_and_provider(opts, provider),
)),
(None, Some(provider)) => Some(Arc::new(StorageOptionsAccessor::with_provider(provider))),
(Some(opts), None) => Some(Arc::new(StorageOptionsAccessor::with_static_options(opts))),
(None, None) => None,
}
}
fn merge_storage_options(
store_params: &mut ObjectStoreParams,
pairs: impl IntoIterator<Item = (String, String)>,
) {
let mut storage_options = store_params.storage_options().cloned().unwrap_or_default();
for (key, value) in pairs {
storage_options.insert(key, value);
}
store_params.storage_options_accessor = Some(Arc::new(
StorageOptionsAccessor::with_static_options(storage_options),
));
}
fn apply_storage_options_provider(
store_params: &mut ObjectStoreParams,
provider: Option<Arc<dyn StorageOptionsProvider>>,
) {
let Some(provider) = provider else {
return;
};
let storage_options = store_params.storage_options().cloned().unwrap_or_default();
let accessor = if storage_options.is_empty() {
StorageOptionsAccessor::with_provider(provider)
} else {
StorageOptionsAccessor::with_initial_and_provider(storage_options, provider)
};
store_params.storage_options_accessor = Some(Arc::new(accessor));
}
/// Controls how new tables should be created
#[derive(Clone, Debug, Default)]
pub struct NewTableConfig {
@@ -399,10 +356,7 @@ impl ListingDatabase {
.clone()
.unwrap_or_else(|| Arc::new(lance::session::Session::default()));
let os_params = ObjectStoreParams {
storage_options_accessor: build_storage_options_accessor(
Some(options.storage_options.clone()),
None,
),
storage_options: Some(options.storage_options.clone()),
..Default::default()
};
let (object_store, base_path) = ObjectStore::from_uri_and_params(
@@ -509,9 +463,20 @@ impl ListingDatabase {
validate_table_name(name)?;
let mut uri = self.uri.clone();
// If the URI does not end with a slash, add one
if !uri.ends_with('/') {
uri.push('/');
// If the URI does not end with a path separator, add one
// Use forward slash for URIs (http://, s3://, gs://, file://, etc.)
// Use platform-specific separator for local paths without scheme
let has_scheme = uri.contains("://");
let ends_with_separator = uri.ends_with('/') || uri.ends_with('\\');
if !ends_with_separator {
if has_scheme {
// URIs always use forward slash
uri.push('/');
} else {
// Local path without scheme - use platform separator
uri.push(std::path::MAIN_SEPARATOR);
}
}
// Append the table name with the lance file extension
uri.push_str(&format!("{}.{}", name, LANCE_FILE_EXTENSION));
@@ -527,10 +492,7 @@ impl ListingDatabase {
async fn drop_tables(&self, names: Vec<String>) -> Result<()> {
let object_store_params = ObjectStoreParams {
storage_options_accessor: build_storage_options_accessor(
Some(self.storage_options.clone()),
self.storage_options_provider.clone(),
),
storage_options: Some(self.storage_options.clone()),
..Default::default()
};
let mut uri = self.uri.clone();
@@ -579,7 +541,7 @@ impl ListingDatabase {
.lance_write_params
.as_ref()
.and_then(|p| p.store_params.as_ref())
.and_then(|sp| sp.storage_options());
.and_then(|sp| sp.storage_options.as_ref());
let storage_version_override = storage_options
.and_then(|opts| opts.get(OPT_NEW_TABLE_STORAGE_VERSION))
@@ -631,20 +593,20 @@ impl ListingDatabase {
// be dropped from the cache when python GCs the table object, which
// confounds reuse across tables.
if !self.storage_options.is_empty() {
let store_params = write_params
let storage_options = write_params
.store_params
.get_or_insert_with(Default::default)
.storage_options
.get_or_insert_with(Default::default);
let mut inherited = store_params.storage_options().cloned().unwrap_or_default();
self.inherit_storage_options(&mut inherited);
merge_storage_options(store_params, inherited);
self.inherit_storage_options(storage_options);
}
// Set storage options provider if available
if self.storage_options_provider.is_some() {
let store_params = write_params
write_params
.store_params
.get_or_insert_with(Default::default);
apply_storage_options_provider(store_params, self.storage_options_provider.clone());
.get_or_insert_with(Default::default)
.storage_options_provider = self.storage_options_provider.clone();
}
write_params.data_storage_version = self
@@ -930,10 +892,7 @@ impl Database for ListingDatabase {
validate_table_name(&request.target_table_name)?;
let storage_params = ObjectStoreParams {
storage_options_accessor: build_storage_options_accessor(
Some(self.storage_options.clone()),
self.storage_options_provider.clone(),
),
storage_options: Some(self.storage_options.clone()),
..Default::default()
};
let read_params = ReadParams {
@@ -998,24 +957,24 @@ impl Database for ListingDatabase {
// be dropped from the cache when python GCs the table object, which
// confounds reuse across tables.
if !self.storage_options.is_empty() {
let store_params = request
let storage_options = request
.lance_read_params
.get_or_insert_with(Default::default)
.store_options
.get_or_insert_with(Default::default)
.storage_options
.get_or_insert_with(Default::default);
let mut inherited = store_params.storage_options().cloned().unwrap_or_default();
self.inherit_storage_options(&mut inherited);
merge_storage_options(store_params, inherited);
self.inherit_storage_options(storage_options);
}
// Set storage options provider if available
if self.storage_options_provider.is_some() {
let store_params = request
request
.lance_read_params
.get_or_insert_with(Default::default)
.store_options
.get_or_insert_with(Default::default);
apply_storage_options_provider(store_params, self.storage_options_provider.clone());
.get_or_insert_with(Default::default)
.storage_options_provider = self.storage_options_provider.clone();
}
// Some ReadParams are exposed in the OpenTableBuilder, but we also
@@ -1123,6 +1082,7 @@ mod tests {
use crate::table::{Table, TableDefinition};
use arrow_array::{Int32Array, RecordBatch, StringArray};
use arrow_schema::{DataType, Field, Schema};
use std::path::PathBuf;
use tempfile::tempdir;
async fn setup_database() -> (tempfile::TempDir, ListingDatabase) {
@@ -1921,9 +1881,7 @@ mod tests {
let write_options = WriteOptions {
lance_write_params: Some(lance::dataset::WriteParams {
store_params: Some(lance::io::ObjectStoreParams {
storage_options_accessor: Some(Arc::new(
StorageOptionsAccessor::with_static_options(storage_options),
)),
storage_options: Some(storage_options),
..Default::default()
}),
..Default::default()
@@ -1997,9 +1955,7 @@ mod tests {
let write_options = WriteOptions {
lance_write_params: Some(lance::dataset::WriteParams {
store_params: Some(lance::io::ObjectStoreParams {
storage_options_accessor: Some(Arc::new(
StorageOptionsAccessor::with_static_options(storage_options),
)),
storage_options: Some(storage_options),
..Default::default()
}),
..Default::default()
@@ -2102,6 +2058,19 @@ mod tests {
assert_eq!(db_options.new_table_config.enable_stable_row_ids, None);
}
#[tokio::test]
async fn test_table_uri() {
let (_tempdir, db) = setup_database().await;
let mut pb = PathBuf::new();
pb.push(db.uri.clone());
pb.push("test.lance");
let expected = pb.to_str().unwrap();
let uri = db.table_uri("test").ok().unwrap();
assert_eq!(uri, expected);
}
#[tokio::test]
async fn test_namespace_client() {
let (_tempdir, db) = setup_database().await;

View File

@@ -204,6 +204,7 @@ pub struct RemoteTable<S: HttpSend = Sender> {
server_version: ServerVersion,
version: RwLock<Option<u64>>,
location: RwLock<Option<String>>,
}
impl<S: HttpSend> RemoteTable<S> {
@@ -221,6 +222,7 @@ impl<S: HttpSend> RemoteTable<S> {
identifier,
server_version,
version: RwLock::new(None),
location: RwLock::new(None),
}
}
@@ -639,6 +641,7 @@ impl<S: HttpSend> RemoteTable<S> {
struct TableDescription {
version: u64,
schema: JsonSchema,
location: Option<String>,
}
impl<S: HttpSend> std::fmt::Display for RemoteTable<S> {
@@ -667,6 +670,7 @@ mod test_utils {
identifier: name,
server_version: version.map(ServerVersion).unwrap_or_default(),
version: RwLock::new(None),
location: RwLock::new(None),
}
}
}
@@ -1461,8 +1465,28 @@ impl<S: HttpSend> BaseTable for RemoteTable<S> {
message: "table_definition is not supported on LanceDB cloud.".into(),
})
}
fn dataset_uri(&self) -> &str {
"NOT_SUPPORTED"
async fn uri(&self) -> Result<String> {
// Check if we already have the location cached
{
let location = self.location.read().await;
if let Some(ref loc) = *location {
return Ok(loc.clone());
}
}
// Fetch from server via describe
let description = self.describe().await?;
let location = description.location.ok_or_else(|| Error::NotSupported {
message: "Table URI not supported by the server".into(),
})?;
// Cache the location for future use
{
let mut cached_location = self.location.write().await;
*cached_location = Some(location.clone());
}
Ok(location)
}
async fn storage_options(&self) -> Option<HashMap<String, String>> {
@@ -3332,4 +3356,69 @@ mod tests {
let result = table.drop_columns(&["old_col1", "old_col2"]).await.unwrap();
assert_eq!(result.version, 5);
}
#[tokio::test]
async fn test_uri() {
let table = Table::new_with_handler("my_table", |request| {
assert_eq!(request.method(), "POST");
assert_eq!(request.url().path(), "/v1/table/my_table/describe/");
http::Response::builder()
.status(200)
.body(r#"{"version": 1, "schema": {"fields": []}, "location": "s3://bucket/path/to/table"}"#)
.unwrap()
});
let uri = table.uri().await.unwrap();
assert_eq!(uri, "s3://bucket/path/to/table");
}
#[tokio::test]
async fn test_uri_missing_location() {
let table = Table::new_with_handler("my_table", |request| {
assert_eq!(request.method(), "POST");
assert_eq!(request.url().path(), "/v1/table/my_table/describe/");
// Server returns response without location field
http::Response::builder()
.status(200)
.body(r#"{"version": 1, "schema": {"fields": []}}"#)
.unwrap()
});
let result = table.uri().await;
assert!(result.is_err());
assert!(matches!(&result, Err(Error::NotSupported { .. })));
}
#[tokio::test]
async fn test_uri_caching() {
use std::sync::atomic::{AtomicUsize, Ordering};
use std::sync::Arc;
let call_count = Arc::new(AtomicUsize::new(0));
let call_count_clone = call_count.clone();
let table = Table::new_with_handler("my_table", move |request| {
assert_eq!(request.url().path(), "/v1/table/my_table/describe/");
call_count_clone.fetch_add(1, Ordering::SeqCst);
http::Response::builder()
.status(200)
.body(
r#"{"version": 1, "schema": {"fields": []}, "location": "gs://bucket/table"}"#,
)
.unwrap()
});
// First call should fetch from server
let uri1 = table.uri().await.unwrap();
assert_eq!(uri1, "gs://bucket/table");
assert_eq!(call_count.load(Ordering::SeqCst), 1);
// Second call should use cached value
let uri2 = table.uri().await.unwrap();
assert_eq!(uri2, "gs://bucket/table");
assert_eq!(call_count.load(Ordering::SeqCst), 1); // Still 1, no new call
}
}

View File

@@ -40,7 +40,7 @@ use lance_index::vector::pq::PQBuildParams;
use lance_index::vector::sq::builder::SQBuildParams;
use lance_index::DatasetIndexExt;
use lance_index::IndexType;
use lance_io::object_store::{LanceNamespaceStorageOptionsProvider, StorageOptionsAccessor};
use lance_io::object_store::LanceNamespaceStorageOptionsProvider;
use lance_namespace::models::{
QueryTableRequest as NsQueryTableRequest, QueryTableRequestColumns,
QueryTableRequestFullTextQuery, QueryTableRequestVector, StringFtsQuery,
@@ -608,8 +608,8 @@ pub trait BaseTable: std::fmt::Display + std::fmt::Debug + Send + Sync {
async fn list_versions(&self) -> Result<Vec<Version>>;
/// Get the table definition.
async fn table_definition(&self) -> Result<TableDefinition>;
/// Get the table URI
fn dataset_uri(&self) -> &str;
/// Get the table URI (storage location)
async fn uri(&self) -> Result<String>;
/// Get the storage options used when opening this table, if any.
async fn storage_options(&self) -> Option<HashMap<String, String>>;
/// Poll until the columns are fully indexed. Will return Error::Timeout if the columns
@@ -1317,11 +1317,12 @@ impl Table {
self.inner.list_indices().await
}
/// Get the underlying dataset URI
/// Get the table URI (storage location)
///
/// Warning: This is an internal API and the return value is subject to change.
pub fn dataset_uri(&self) -> &str {
self.inner.dataset_uri()
/// Returns the full storage location of the table (e.g., S3/GCS path).
/// For remote tables, this fetches the location from the server via describe.
pub async fn uri(&self) -> Result<String> {
self.inner.uri().await
}
/// Get the storage options used when opening this table, if any.
@@ -1667,14 +1668,18 @@ impl NativeTable {
// Use DatasetBuilder::from_namespace which automatically fetches location
// and storage options from the namespace
let builder = DatasetBuilder::from_namespace(namespace_client.clone(), table_id)
.await
.map_err(|e| match e {
lance::Error::Namespace { source, .. } => Error::Runtime {
message: format!("Failed to get table info from namespace: {:?}", source),
},
source => Error::Lance { source },
})?;
let builder = DatasetBuilder::from_namespace(
namespace_client.clone(),
table_id,
false, // Don't ignore namespace storage options
)
.await
.map_err(|e| match e {
lance::Error::Namespace { source, .. } => Error::Runtime {
message: format!("Failed to get table info from namespace: {:?}", source),
},
source => Error::Lance { source },
})?;
let dataset = builder
.with_read_params(params)
@@ -1878,9 +1883,7 @@ impl NativeTable {
let store_params = params
.store_params
.get_or_insert_with(ObjectStoreParams::default);
store_params.storage_options_accessor = Some(Arc::new(
StorageOptionsAccessor::with_provider(storage_options_provider),
));
store_params.storage_options_provider = Some(storage_options_provider);
// Patch the params if we have a write store wrapper
let params = match write_store_wrapper.clone() {
@@ -3232,8 +3235,8 @@ impl BaseTable for NativeTable {
Ok(results.into_iter().flatten().collect())
}
fn dataset_uri(&self) -> &str {
self.uri.as_str()
async fn uri(&self) -> Result<String> {
Ok(self.uri.clone())
}
async fn storage_options(&self) -> Option<HashMap<String, String>> {
@@ -3241,7 +3244,7 @@ impl BaseTable for NativeTable {
.get()
.await
.ok()
.and_then(|dataset| dataset.initial_storage_options().cloned())
.and_then(|dataset| dataset.storage_options().cloned())
}
async fn index_stats(&self, index_name: &str) -> Result<Option<IndexStatistics>> {