feat!: upgrade lance to 0.16 (#1519)

This commit is contained in:
Lei Xu
2024-08-07 13:15:22 -07:00
committed by GitHub
parent 32123713fd
commit 2bdf0a02f9
16 changed files with 153 additions and 75 deletions

View File

@@ -3,7 +3,7 @@ name = "lancedb"
# version in Cargo.toml
dependencies = [
"deprecation",
"pylance==0.15.0",
"pylance==0.16.0",
"ratelimiter~=1.0",
"requests>=2.31.0",
"retry>=0.9.2",

View File

@@ -24,7 +24,7 @@ class Connection(object):
mode: str,
data: pa.RecordBatchReader,
storage_options: Optional[Dict[str, str]] = None,
use_legacy_format: Optional[bool] = None,
data_storage_version: Optional[str] = None,
) -> Table: ...
async def create_empty_table(
self,
@@ -32,7 +32,7 @@ class Connection(object):
mode: str,
schema: pa.Schema,
storage_options: Optional[Dict[str, str]] = None,
use_legacy_format: Optional[bool] = None,
data_storage_version: Optional[str] = None,
) -> Table: ...
class Table:

View File

@@ -560,6 +560,7 @@ class AsyncConnection(object):
fill_value: Optional[float] = None,
storage_options: Optional[Dict[str, str]] = None,
*,
data_storage_version: Optional[str] = None,
use_legacy_format: Optional[bool] = None,
) -> AsyncTable:
"""Create an [AsyncTable][lancedb.table.AsyncTable] in the database.
@@ -603,9 +604,15 @@ class AsyncConnection(object):
connection will be inherited by the table, but can be overridden here.
See available options at
https://lancedb.github.io/lancedb/guides/storage/
use_legacy_format: bool, optional, default True
data_storage_version: optional, str, default "legacy"
The version of the data storage format to use. Newer versions are more
efficient but require newer versions of lance to read. The default is
"legacy" which will use the legacy v1 version. See the user guide
for more details.
use_legacy_format: bool, optional, default True. (Deprecated)
If True, use the legacy format for the table. If False, use the new format.
The default is True while the new format is in beta.
This method is deprecated, use `data_storage_version` instead.
Returns
@@ -765,13 +772,18 @@ class AsyncConnection(object):
if mode == "create" and exist_ok:
mode = "exist_ok"
if not data_storage_version:
data_storage_version = (
"legacy" if use_legacy_format is None or use_legacy_format else "stable"
)
if data is None:
new_table = await self._inner.create_empty_table(
name,
mode,
schema,
storage_options=storage_options,
use_legacy_format=use_legacy_format,
data_storage_version=data_storage_version,
)
else:
data = data_to_reader(data, schema)
@@ -780,7 +792,7 @@ class AsyncConnection(object):
mode,
data,
storage_options=storage_options,
use_legacy_format=use_legacy_format,
data_storage_version=data_storage_version,
)
return AsyncTable(new_table)

View File

@@ -730,7 +730,7 @@ def test_create_scalar_index(db):
indices = table.to_lance().list_indices()
assert len(indices) == 1
scalar_index = indices[0]
assert scalar_index["type"] == "Scalar"
assert scalar_index["type"] == "BTree"
# Confirm that prefiltering still works with the scalar index column
results = table.search().where("x = 'c'").to_arrow()

View File

@@ -1,21 +1,10 @@
// Copyright 2024 Lance Developers.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
use std::{collections::HashMap, sync::Arc, time::Duration};
use std::{collections::HashMap, str::FromStr, sync::Arc, time::Duration};
use arrow::{datatypes::Schema, ffi_stream::ArrowArrayStreamReader, pyarrow::FromPyArrow};
use lancedb::connection::{Connection as LanceConnection, CreateTableMode};
use lancedb::connection::{Connection as LanceConnection, CreateTableMode, LanceFileVersion};
use pyo3::{
exceptions::{PyRuntimeError, PyValueError},
pyclass, pyfunction, pymethods, Bound, PyAny, PyRef, PyResult, Python,
@@ -91,7 +80,7 @@ impl Connection {
mode: &str,
data: Bound<'_, PyAny>,
storage_options: Option<HashMap<String, String>>,
use_legacy_format: Option<bool>,
data_storage_version: Option<String>,
) -> PyResult<Bound<'a, PyAny>> {
let inner = self_.get_inner()?.clone();
@@ -104,8 +93,11 @@ impl Connection {
builder = builder.storage_options(storage_options);
}
if let Some(use_legacy_format) = use_legacy_format {
builder = builder.use_legacy_format(use_legacy_format);
if let Some(data_storage_version) = data_storage_version.as_ref() {
builder = builder.data_storage_version(
LanceFileVersion::from_str(data_storage_version)
.map_err(|e| PyValueError::new_err(e.to_string()))?,
);
}
future_into_py(self_.py(), async move {
@@ -120,7 +112,7 @@ impl Connection {
mode: &str,
schema: Bound<'_, PyAny>,
storage_options: Option<HashMap<String, String>>,
use_legacy_format: Option<bool>,
data_storage_version: Option<String>,
) -> PyResult<Bound<'a, PyAny>> {
let inner = self_.get_inner()?.clone();
@@ -134,8 +126,11 @@ impl Connection {
builder = builder.storage_options(storage_options);
}
if let Some(use_legacy_format) = use_legacy_format {
builder = builder.use_legacy_format(use_legacy_format);
if let Some(data_storage_version) = data_storage_version.as_ref() {
builder = builder.data_storage_version(
LanceFileVersion::from_str(data_storage_version)
.map_err(|e| PyValueError::new_err(e.to_string()))?,
);
}
future_into_py(self_.py(), async move {

View File

@@ -63,7 +63,10 @@ pub struct Table {
#[pymethods]
impl OptimizeStats {
pub fn __repr__(&self) -> String {
format!("OptimizeStats(compaction={:?}, prune={:?})", self.compaction, self.prune)
format!(
"OptimizeStats(compaction={:?}, prune={:?})",
self.compaction, self.prune
)
}
}
@@ -273,6 +276,7 @@ impl Table {
.optimize(OptimizeAction::Prune {
older_than,
delete_unverified: None,
error_if_tagged_old_versions: None,
})
.await
.infer_error()?