mirror of
https://github.com/lancedb/lancedb.git
synced 2025-12-23 05:19:58 +00:00
Compare commits
3 Commits
python-v0.
...
add-data-t
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
8a72d8194f | ||
|
|
56ef4066c0 | ||
|
|
79693ef1d2 |
24
Cargo.toml
24
Cargo.toml
@@ -14,19 +14,19 @@ keywords = ["lancedb", "lance", "database", "vector", "search"]
|
||||
categories = ["database-implementations"]
|
||||
|
||||
[workspace.dependencies]
|
||||
lance = { "version" = "=0.10.16", "features" = ["dynamodb"] }
|
||||
lance-index = { "version" = "=0.10.16" }
|
||||
lance-linalg = { "version" = "=0.10.16" }
|
||||
lance-testing = { "version" = "=0.10.16" }
|
||||
lance = { "version" = "=0.10.15", "features" = ["dynamodb"] }
|
||||
lance-index = { "version" = "=0.10.15" }
|
||||
lance-linalg = { "version" = "=0.10.15" }
|
||||
lance-testing = { "version" = "=0.10.15" }
|
||||
# Note that this one does not include pyarrow
|
||||
arrow = { version = "51.0", optional = false }
|
||||
arrow-array = "51.0"
|
||||
arrow-data = "51.0"
|
||||
arrow-ipc = "51.0"
|
||||
arrow-ord = "51.0"
|
||||
arrow-schema = "51.0"
|
||||
arrow-arith = "51.0"
|
||||
arrow-cast = "51.0"
|
||||
arrow = { version = "50.0", optional = false }
|
||||
arrow-array = "50.0"
|
||||
arrow-data = "50.0"
|
||||
arrow-ipc = "50.0"
|
||||
arrow-ord = "50.0"
|
||||
arrow-schema = "50.0"
|
||||
arrow-arith = "50.0"
|
||||
arrow-cast = "50.0"
|
||||
async-trait = "0"
|
||||
chrono = "0.4.35"
|
||||
half = { "version" = "=2.3.1", default-features = false, features = [
|
||||
|
||||
@@ -140,9 +140,6 @@ export class RemoteConnection implements Connection {
|
||||
schema = nameOrOpts.schema
|
||||
embeddings = nameOrOpts.embeddingFunction
|
||||
tableName = nameOrOpts.name
|
||||
if (data === undefined) {
|
||||
data = nameOrOpts.data
|
||||
}
|
||||
}
|
||||
|
||||
let buffer: Buffer
|
||||
|
||||
@@ -77,18 +77,6 @@ export interface OpenTableOptions {
|
||||
* The available options are described at https://lancedb.github.io/lancedb/guides/storage/
|
||||
*/
|
||||
storageOptions?: Record<string, string>;
|
||||
/**
|
||||
* Set the size of the index cache, specified as a number of entries
|
||||
*
|
||||
* The exact meaning of an "entry" will depend on the type of index:
|
||||
* - IVF: there is one entry for each IVF partition
|
||||
* - BTREE: there is one entry for the entire index
|
||||
*
|
||||
* This cache applies to the entire opened table, across all indices.
|
||||
* Setting this value higher will increase performance on larger datasets
|
||||
* at the expense of more RAM
|
||||
*/
|
||||
indexCacheSize?: number;
|
||||
}
|
||||
|
||||
export interface TableNamesOptions {
|
||||
@@ -172,7 +160,6 @@ export class Connection {
|
||||
const innerTable = await this.inner.openTable(
|
||||
name,
|
||||
cleanseStorageOptions(options?.storageOptions),
|
||||
options?.indexCacheSize,
|
||||
);
|
||||
return new Table(innerTable);
|
||||
}
|
||||
|
||||
@@ -176,7 +176,6 @@ impl Connection {
|
||||
&self,
|
||||
name: String,
|
||||
storage_options: Option<HashMap<String, String>>,
|
||||
index_cache_size: Option<u32>,
|
||||
) -> napi::Result<Table> {
|
||||
let mut builder = self.get_inner()?.open_table(&name);
|
||||
if let Some(storage_options) = storage_options {
|
||||
@@ -184,9 +183,6 @@ impl Connection {
|
||||
builder = builder.storage_option(key, value);
|
||||
}
|
||||
}
|
||||
if let Some(index_cache_size) = index_cache_size {
|
||||
builder = builder.index_cache_size(index_cache_size);
|
||||
}
|
||||
let tbl = builder
|
||||
.execute()
|
||||
.await
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
[bumpversion]
|
||||
current_version = 0.6.11
|
||||
current_version = 0.6.10
|
||||
commit = True
|
||||
message = [python] Bump version: {current_version} → {new_version}
|
||||
tag = True
|
||||
|
||||
@@ -14,7 +14,7 @@ name = "_lancedb"
|
||||
crate-type = ["cdylib"]
|
||||
|
||||
[dependencies]
|
||||
arrow = { version = "51.0.0", features = ["pyarrow"] }
|
||||
arrow = { version = "50.0.0", features = ["pyarrow"] }
|
||||
lancedb = { path = "../rust/lancedb" }
|
||||
env_logger = "0.10"
|
||||
pyo3 = { version = "0.20", features = ["extension-module", "abi3-py38"] }
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[project]
|
||||
name = "lancedb"
|
||||
version = "0.6.11"
|
||||
version = "0.6.10"
|
||||
dependencies = [
|
||||
"deprecation",
|
||||
"pylance==0.10.12",
|
||||
|
||||
@@ -224,23 +224,13 @@ class DBConnection(EnforceOverrides):
|
||||
def __getitem__(self, name: str) -> LanceTable:
|
||||
return self.open_table(name)
|
||||
|
||||
def open_table(self, name: str, *, index_cache_size: Optional[int] = None) -> Table:
|
||||
def open_table(self, name: str) -> Table:
|
||||
"""Open a Lance Table in the database.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
name: str
|
||||
The name of the table.
|
||||
index_cache_size: int, default 256
|
||||
Set the size of the index cache, specified as a number of entries
|
||||
|
||||
The exact meaning of an "entry" will depend on the type of index:
|
||||
* IVF - there is one entry for each IVF partition
|
||||
* BTREE - there is one entry for the entire index
|
||||
|
||||
This cache applies to the entire opened table, across all indices.
|
||||
Setting this value higher will increase performance on larger datasets
|
||||
at the expense of more RAM
|
||||
|
||||
Returns
|
||||
-------
|
||||
@@ -258,18 +248,6 @@ class DBConnection(EnforceOverrides):
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
def rename_table(self, cur_name: str, new_name: str):
|
||||
"""Rename a table in the database.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
cur_name: str
|
||||
The current name of the table.
|
||||
new_name: str
|
||||
The new name of the table.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
def drop_database(self):
|
||||
"""
|
||||
Drop database
|
||||
@@ -429,9 +407,7 @@ class LanceDBConnection(DBConnection):
|
||||
return tbl
|
||||
|
||||
@override
|
||||
def open_table(
|
||||
self, name: str, *, index_cache_size: Optional[int] = None
|
||||
) -> LanceTable:
|
||||
def open_table(self, name: str) -> LanceTable:
|
||||
"""Open a table in the database.
|
||||
|
||||
Parameters
|
||||
@@ -443,7 +419,7 @@ class LanceDBConnection(DBConnection):
|
||||
-------
|
||||
A LanceTable object representing the table.
|
||||
"""
|
||||
return LanceTable.open(self, name, index_cache_size=index_cache_size)
|
||||
return LanceTable.open(self, name)
|
||||
|
||||
@override
|
||||
def drop_table(self, name: str, ignore_missing: bool = False):
|
||||
@@ -775,10 +751,7 @@ class AsyncConnection(object):
|
||||
return AsyncTable(new_table)
|
||||
|
||||
async def open_table(
|
||||
self,
|
||||
name: str,
|
||||
storage_options: Optional[Dict[str, str]] = None,
|
||||
index_cache_size: Optional[int] = None,
|
||||
self, name: str, storage_options: Optional[Dict[str, str]] = None
|
||||
) -> Table:
|
||||
"""Open a Lance Table in the database.
|
||||
|
||||
@@ -791,22 +764,12 @@ class AsyncConnection(object):
|
||||
connection will be inherited by the table, but can be overridden here.
|
||||
See available options at
|
||||
https://lancedb.github.io/lancedb/guides/storage/
|
||||
index_cache_size: int, default 256
|
||||
Set the size of the index cache, specified as a number of entries
|
||||
|
||||
The exact meaning of an "entry" will depend on the type of index:
|
||||
* IVF - there is one entry for each IVF partition
|
||||
* BTREE - there is one entry for the entire index
|
||||
|
||||
This cache applies to the entire opened table, across all indices.
|
||||
Setting this value higher will increase performance on larger datasets
|
||||
at the expense of more RAM
|
||||
|
||||
Returns
|
||||
-------
|
||||
A LanceTable object representing the table.
|
||||
"""
|
||||
table = await self._inner.open_table(name, storage_options, index_cache_size)
|
||||
table = await self._inner.open_table(name, storage_options)
|
||||
return AsyncTable(table)
|
||||
|
||||
async def drop_table(self, name: str):
|
||||
|
||||
@@ -94,7 +94,7 @@ class RemoteDBConnection(DBConnection):
|
||||
yield item
|
||||
|
||||
@override
|
||||
def open_table(self, name: str, *, index_cache_size: Optional[int] = None) -> Table:
|
||||
def open_table(self, name: str) -> Table:
|
||||
"""Open a Lance Table in the database.
|
||||
|
||||
Parameters
|
||||
@@ -110,12 +110,6 @@ class RemoteDBConnection(DBConnection):
|
||||
|
||||
self._client.mount_retry_adapter_for_table(name)
|
||||
|
||||
if index_cache_size is not None:
|
||||
logging.info(
|
||||
"index_cache_size is ignored in LanceDb Cloud"
|
||||
" (there is no local cache to configure)"
|
||||
)
|
||||
|
||||
# check if table exists
|
||||
if self._table_cache.get(name) is None:
|
||||
self._client.post(f"/v1/table/{name}/describe/")
|
||||
|
||||
@@ -806,7 +806,6 @@ class _LanceLatestDatasetRef(_LanceDatasetRef):
|
||||
"""Reference to the latest version of a LanceDataset."""
|
||||
|
||||
uri: str
|
||||
index_cache_size: Optional[int] = None
|
||||
read_consistency_interval: Optional[timedelta] = None
|
||||
last_consistency_check: Optional[float] = None
|
||||
_dataset: Optional[LanceDataset] = None
|
||||
@@ -814,9 +813,7 @@ class _LanceLatestDatasetRef(_LanceDatasetRef):
|
||||
@property
|
||||
def dataset(self) -> LanceDataset:
|
||||
if not self._dataset:
|
||||
self._dataset = lance.dataset(
|
||||
self.uri, index_cache_size=self.index_cache_size
|
||||
)
|
||||
self._dataset = lance.dataset(self.uri)
|
||||
self.last_consistency_check = time.monotonic()
|
||||
elif self.read_consistency_interval is not None:
|
||||
now = time.monotonic()
|
||||
@@ -845,15 +842,12 @@ class _LanceLatestDatasetRef(_LanceDatasetRef):
|
||||
class _LanceTimeTravelRef(_LanceDatasetRef):
|
||||
uri: str
|
||||
version: int
|
||||
index_cache_size: Optional[int] = None
|
||||
_dataset: Optional[LanceDataset] = None
|
||||
|
||||
@property
|
||||
def dataset(self) -> LanceDataset:
|
||||
if not self._dataset:
|
||||
self._dataset = lance.dataset(
|
||||
self.uri, version=self.version, index_cache_size=self.index_cache_size
|
||||
)
|
||||
self._dataset = lance.dataset(self.uri, version=self.version)
|
||||
return self._dataset
|
||||
|
||||
@dataset.setter
|
||||
@@ -890,8 +884,6 @@ class LanceTable(Table):
|
||||
connection: "LanceDBConnection",
|
||||
name: str,
|
||||
version: Optional[int] = None,
|
||||
*,
|
||||
index_cache_size: Optional[int] = None,
|
||||
):
|
||||
self._conn = connection
|
||||
self.name = name
|
||||
@@ -900,13 +892,11 @@ class LanceTable(Table):
|
||||
self._ref = _LanceTimeTravelRef(
|
||||
uri=self._dataset_uri,
|
||||
version=version,
|
||||
index_cache_size=index_cache_size,
|
||||
)
|
||||
else:
|
||||
self._ref = _LanceLatestDatasetRef(
|
||||
uri=self._dataset_uri,
|
||||
read_consistency_interval=connection.read_consistency_interval,
|
||||
index_cache_size=index_cache_size,
|
||||
)
|
||||
|
||||
@classmethod
|
||||
|
||||
@@ -368,15 +368,6 @@ async def test_create_exist_ok_async(tmp_path):
|
||||
# await db.create_table("test", schema=bad_schema, exist_ok=True)
|
||||
|
||||
|
||||
def test_open_table_sync(tmp_path):
|
||||
db = lancedb.connect(tmp_path)
|
||||
db.create_table("test", data=[{"id": 0}])
|
||||
assert db.open_table("test").count_rows() == 1
|
||||
assert db.open_table("test", index_cache_size=0).count_rows() == 1
|
||||
with pytest.raises(FileNotFoundError, match="does not exist"):
|
||||
db.open_table("does_not_exist")
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_open_table(tmp_path):
|
||||
db = await lancedb.connect_async(tmp_path)
|
||||
@@ -406,10 +397,6 @@ async def test_open_table(tmp_path):
|
||||
}
|
||||
)
|
||||
|
||||
# No way to verify this yet, but at least make sure we
|
||||
# can pass the parameter
|
||||
await db.open_table("test", index_cache_size=0)
|
||||
|
||||
with pytest.raises(ValueError, match="was not found"):
|
||||
await db.open_table("does_not_exist")
|
||||
|
||||
|
||||
@@ -134,21 +134,17 @@ impl Connection {
|
||||
})
|
||||
}
|
||||
|
||||
#[pyo3(signature = (name, storage_options = None, index_cache_size = None))]
|
||||
#[pyo3(signature = (name, storage_options = None))]
|
||||
pub fn open_table(
|
||||
self_: PyRef<'_, Self>,
|
||||
name: String,
|
||||
storage_options: Option<HashMap<String, String>>,
|
||||
index_cache_size: Option<u32>,
|
||||
) -> PyResult<&PyAny> {
|
||||
let inner = self_.get_inner()?.clone();
|
||||
let mut builder = inner.open_table(name);
|
||||
if let Some(storage_options) = storage_options {
|
||||
builder = builder.storage_options(storage_options);
|
||||
}
|
||||
if let Some(index_cache_size) = index_cache_size {
|
||||
builder = builder.index_cache_size(index_cache_size);
|
||||
}
|
||||
future_into_py(self_.py(), async move {
|
||||
let table = builder.execute().await.infer_error()?;
|
||||
Ok(Table::new(table))
|
||||
|
||||
@@ -52,7 +52,7 @@ aws-sdk-kms = { version = "1.0" }
|
||||
aws-config = { version = "1.0" }
|
||||
|
||||
[features]
|
||||
default = []
|
||||
default = ["remote"]
|
||||
remote = ["dep:reqwest"]
|
||||
fp16kernels = ["lance-linalg/fp16kernels"]
|
||||
s3-test = []
|
||||
s3-test = []
|
||||
@@ -33,9 +33,6 @@ use crate::table::{NativeTable, WriteOptions};
|
||||
use crate::utils::validate_table_name;
|
||||
use crate::Table;
|
||||
|
||||
#[cfg(feature = "remote")]
|
||||
use log::warn;
|
||||
|
||||
pub const LANCE_FILE_EXTENSION: &str = "lance";
|
||||
|
||||
pub type TableBuilderCallback = Box<dyn FnOnce(OpenTableBuilder) -> OpenTableBuilder + Send>;
|
||||
@@ -582,7 +579,6 @@ impl ConnectBuilder {
|
||||
let api_key = self.api_key.ok_or_else(|| Error::InvalidInput {
|
||||
message: "An api_key is required when connecting to LanceDb Cloud".to_string(),
|
||||
})?;
|
||||
warn!("The rust implementation of the remote client is not yet ready for use.");
|
||||
let internal = Arc::new(crate::remote::db::RemoteDatabase::try_new(
|
||||
&self.uri,
|
||||
&api_key,
|
||||
@@ -913,23 +909,12 @@ impl ConnectionInternal for Database {
|
||||
}
|
||||
}
|
||||
|
||||
// Some ReadParams are exposed in the OpenTableBuilder, but we also
|
||||
// let the user provide their own ReadParams.
|
||||
//
|
||||
// If we have a user provided ReadParams use that
|
||||
// If we don't then start with the default ReadParams and customize it with
|
||||
// the options from the OpenTableBuilder
|
||||
let read_params = options.lance_read_params.unwrap_or_else(|| ReadParams {
|
||||
index_cache_size: options.index_cache_size as usize,
|
||||
..Default::default()
|
||||
});
|
||||
|
||||
let native_table = Arc::new(
|
||||
NativeTable::open_with_params(
|
||||
&table_uri,
|
||||
&options.name,
|
||||
self.store_wrapper.clone(),
|
||||
Some(read_params),
|
||||
options.lance_read_params,
|
||||
self.read_consistency_interval,
|
||||
)
|
||||
.await?,
|
||||
@@ -1047,6 +1032,7 @@ mod tests {
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
#[ignore = "this can't pass due to https://github.com/lancedb/lancedb/issues/1019, enable it after the bug fixed"]
|
||||
async fn test_open_table() {
|
||||
let tmp_dir = tempdir().unwrap();
|
||||
let uri = tmp_dir.path().to_str().unwrap();
|
||||
|
||||
@@ -350,16 +350,8 @@ mod test {
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_e2e() {
|
||||
let dir1 = tempfile::tempdir()
|
||||
.unwrap()
|
||||
.into_path()
|
||||
.canonicalize()
|
||||
.unwrap();
|
||||
let dir2 = tempfile::tempdir()
|
||||
.unwrap()
|
||||
.into_path()
|
||||
.canonicalize()
|
||||
.unwrap();
|
||||
let dir1 = tempfile::tempdir().unwrap().into_path();
|
||||
let dir2 = tempfile::tempdir().unwrap().into_path();
|
||||
|
||||
let secondary_store = LocalFileSystem::new_with_prefix(dir2.to_str().unwrap()).unwrap();
|
||||
let object_store_wrapper = Arc::new(MirroringObjectStoreWrapper {
|
||||
|
||||
@@ -34,16 +34,6 @@
|
||||
//! cargo install lancedb
|
||||
//! ```
|
||||
//!
|
||||
//! ## Crate Features
|
||||
//!
|
||||
//! ### Experimental Features
|
||||
//!
|
||||
//! These features are not enabled by default. They are experimental or in-development features that
|
||||
//! are not yet ready to be released.
|
||||
//!
|
||||
//! - `remote` - Enable remote client to connect to LanceDB cloud. This is not yet fully implemented
|
||||
//! and should not be enabled.
|
||||
//!
|
||||
//! ### Quick Start
|
||||
//!
|
||||
//! #### Connect to a database.
|
||||
|
||||
Reference in New Issue
Block a user