mirror of
https://github.com/lancedb/lancedb.git
synced 2026-01-03 10:22:56 +00:00
Compare commits
3 Commits
python-v0.
...
changhiskh
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
9eca8e7cd1 | ||
|
|
587fe6ffc1 | ||
|
|
89c8e5839b |
@@ -397,14 +397,6 @@ class LanceTable(Table):
|
|||||||
self.name = name
|
self.name = name
|
||||||
self._version = version
|
self._version = version
|
||||||
|
|
||||||
def _reset_dataset(self, version=None):
|
|
||||||
try:
|
|
||||||
if "_dataset" in self.__dict__:
|
|
||||||
del self.__dict__["_dataset"]
|
|
||||||
self._version = version
|
|
||||||
except AttributeError:
|
|
||||||
pass
|
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def schema(self) -> pa.Schema:
|
def schema(self) -> pa.Schema:
|
||||||
"""Return the schema of the table.
|
"""Return the schema of the table.
|
||||||
@@ -413,16 +405,16 @@ class LanceTable(Table):
|
|||||||
-------
|
-------
|
||||||
pa.Schema
|
pa.Schema
|
||||||
A PyArrow schema object."""
|
A PyArrow schema object."""
|
||||||
return self._dataset.schema
|
return self.to_lance().schema
|
||||||
|
|
||||||
def list_versions(self):
|
def list_versions(self):
|
||||||
"""List all versions of the table"""
|
"""List all versions of the table"""
|
||||||
return self._dataset.versions()
|
return self.to_lance().versions()
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def version(self) -> int:
|
def version(self) -> int:
|
||||||
"""Get the current version of the table"""
|
"""Get the current version of the table"""
|
||||||
return self._dataset.version
|
return self.to_lance().version
|
||||||
|
|
||||||
def checkout(self, version: int):
|
def checkout(self, version: int):
|
||||||
"""Checkout a version of the table. This is an in-place operation.
|
"""Checkout a version of the table. This is an in-place operation.
|
||||||
@@ -455,14 +447,12 @@ class LanceTable(Table):
|
|||||||
vector type
|
vector type
|
||||||
0 [1.1, 0.9] vector
|
0 [1.1, 0.9] vector
|
||||||
"""
|
"""
|
||||||
max_ver = max([v["version"] for v in self._dataset.versions()])
|
max_ver = max([v["version"] for v in self.to_lance().versions()])
|
||||||
if version < 1 or version > max_ver:
|
if version < 1 or version > max_ver:
|
||||||
raise ValueError(f"Invalid version {version}")
|
raise ValueError(f"Invalid version {version}")
|
||||||
self._reset_dataset(version=version)
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Accessing the property updates the cached value
|
self.to_lance().checkout(version)
|
||||||
_ = self._dataset
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
if "not found" in str(e):
|
if "not found" in str(e):
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
@@ -505,7 +495,7 @@ class LanceTable(Table):
|
|||||||
>>> len(table.list_versions())
|
>>> len(table.list_versions())
|
||||||
4
|
4
|
||||||
"""
|
"""
|
||||||
max_ver = max([v["version"] for v in self._dataset.versions()])
|
max_ver = max([v["version"] for v in self.to_lance().versions()])
|
||||||
if version is None:
|
if version is None:
|
||||||
version = self.version
|
version = self.version
|
||||||
elif version < 1 or version > max_ver:
|
elif version < 1 or version > max_ver:
|
||||||
@@ -517,11 +507,10 @@ class LanceTable(Table):
|
|||||||
# no-op if restoring the latest version
|
# no-op if restoring the latest version
|
||||||
return
|
return
|
||||||
|
|
||||||
self._dataset.restore()
|
self.to_lance().restore()
|
||||||
self._reset_dataset()
|
|
||||||
|
|
||||||
def __len__(self):
|
def __len__(self):
|
||||||
return self._dataset.count_rows()
|
return self.to_lance().count_rows()
|
||||||
|
|
||||||
def __repr__(self) -> str:
|
def __repr__(self) -> str:
|
||||||
return f"LanceTable({self.name})"
|
return f"LanceTable({self.name})"
|
||||||
@@ -531,7 +520,7 @@ class LanceTable(Table):
|
|||||||
|
|
||||||
def head(self, n=5) -> pa.Table:
|
def head(self, n=5) -> pa.Table:
|
||||||
"""Return the first n rows of the table."""
|
"""Return the first n rows of the table."""
|
||||||
return self._dataset.head(n)
|
return self.to_lance().head(n)
|
||||||
|
|
||||||
def to_pandas(self) -> "pd.DataFrame":
|
def to_pandas(self) -> "pd.DataFrame":
|
||||||
"""Return the table as a pandas DataFrame.
|
"""Return the table as a pandas DataFrame.
|
||||||
@@ -548,7 +537,7 @@ class LanceTable(Table):
|
|||||||
Returns
|
Returns
|
||||||
-------
|
-------
|
||||||
pa.Table"""
|
pa.Table"""
|
||||||
return self._dataset.to_table()
|
return self.to_lance().to_table()
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def _dataset_uri(self) -> str:
|
def _dataset_uri(self) -> str:
|
||||||
@@ -575,7 +564,6 @@ class LanceTable(Table):
|
|||||||
accelerator=accelerator,
|
accelerator=accelerator,
|
||||||
index_cache_size=index_cache_size,
|
index_cache_size=index_cache_size,
|
||||||
)
|
)
|
||||||
self._reset_dataset()
|
|
||||||
register_event("create_index")
|
register_event("create_index")
|
||||||
|
|
||||||
def create_fts_index(
|
def create_fts_index(
|
||||||
@@ -607,7 +595,11 @@ class LanceTable(Table):
|
|||||||
raise ValueError(
|
raise ValueError(
|
||||||
f"Index already exists. Use replace=True to overwrite."
|
f"Index already exists. Use replace=True to overwrite."
|
||||||
)
|
)
|
||||||
fs.delete_dir(path)
|
try:
|
||||||
|
fs.delete_dir(path)
|
||||||
|
except FileNotFoundError as e:
|
||||||
|
if "Cannot get information for path" in str(e):
|
||||||
|
pass
|
||||||
|
|
||||||
index = create_index(self._get_fts_index_path(), field_names)
|
index = create_index(self._get_fts_index_path(), field_names)
|
||||||
populate_index(index, self, field_names)
|
populate_index(index, self, field_names)
|
||||||
@@ -662,8 +654,7 @@ class LanceTable(Table):
|
|||||||
on_bad_vectors=on_bad_vectors,
|
on_bad_vectors=on_bad_vectors,
|
||||||
fill_value=fill_value,
|
fill_value=fill_value,
|
||||||
)
|
)
|
||||||
lance.write_dataset(data, self._dataset_uri, schema=self.schema, mode=mode)
|
self.to_lance().write(data, mode=mode)
|
||||||
self._reset_dataset()
|
|
||||||
register_event("add")
|
register_event("add")
|
||||||
|
|
||||||
def merge(
|
def merge(
|
||||||
@@ -724,10 +715,9 @@ class LanceTable(Table):
|
|||||||
other_table = other_table.to_lance()
|
other_table = other_table.to_lance()
|
||||||
if isinstance(other_table, LanceDataset):
|
if isinstance(other_table, LanceDataset):
|
||||||
other_table = other_table.to_table()
|
other_table = other_table.to_table()
|
||||||
self._dataset.merge(
|
self.to_lance().merge(
|
||||||
other_table, left_on=left_on, right_on=right_on, schema=schema
|
other_table, left_on=left_on, right_on=right_on, schema=schema
|
||||||
)
|
)
|
||||||
self._reset_dataset()
|
|
||||||
register_event("merge")
|
register_event("merge")
|
||||||
|
|
||||||
@cached_property
|
@cached_property
|
||||||
@@ -930,7 +920,7 @@ class LanceTable(Table):
|
|||||||
return tbl
|
return tbl
|
||||||
|
|
||||||
def delete(self, where: str):
|
def delete(self, where: str):
|
||||||
self._dataset.delete(where)
|
self.to_lance().delete(where)
|
||||||
|
|
||||||
def update(
|
def update(
|
||||||
self,
|
self,
|
||||||
@@ -985,7 +975,6 @@ class LanceTable(Table):
|
|||||||
values_sql = {k: value_to_sql(v) for k, v in values.items()}
|
values_sql = {k: value_to_sql(v) for k, v in values.items()}
|
||||||
|
|
||||||
self.to_lance().update(values_sql, where)
|
self.to_lance().update(values_sql, where)
|
||||||
self._reset_dataset()
|
|
||||||
register_event("update")
|
register_event("update")
|
||||||
|
|
||||||
def _execute_query(self, query: Query) -> pa.Table:
|
def _execute_query(self, query: Query) -> pa.Table:
|
||||||
|
|||||||
@@ -95,12 +95,12 @@ def test_create_index_from_table(tmp_path, table):
|
|||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
||||||
with pytest.raises(ValueError, match="already exists"):
|
|
||||||
table.create_fts_index("text")
|
|
||||||
|
|
||||||
table.create_fts_index("text", replace=True)
|
table.create_fts_index("text", replace=True)
|
||||||
assert len(table.search("gorilla").limit(1).to_pandas()) == 1
|
assert len(table.search("gorilla").limit(1).to_pandas()) == 1
|
||||||
|
|
||||||
|
with pytest.raises(ValueError, match="already exists"):
|
||||||
|
table.create_fts_index("text")
|
||||||
|
|
||||||
|
|
||||||
def test_create_index_multiple_columns(tmp_path, table):
|
def test_create_index_multiple_columns(tmp_path, table):
|
||||||
table.create_fts_index(["text", "text2"])
|
table.create_fts_index(["text", "text2"])
|
||||||
|
|||||||
@@ -226,39 +226,38 @@ def test_versioning(db):
|
|||||||
|
|
||||||
|
|
||||||
def test_create_index_method():
|
def test_create_index_method():
|
||||||
with patch.object(LanceTable, "_reset_dataset", return_value=None):
|
with patch.object(
|
||||||
with patch.object(
|
LanceTable, "_dataset", new_callable=PropertyMock
|
||||||
LanceTable, "_dataset", new_callable=PropertyMock
|
) as mock_dataset:
|
||||||
) as mock_dataset:
|
# Setup mock responses
|
||||||
# Setup mock responses
|
mock_dataset.return_value.create_index.return_value = None
|
||||||
mock_dataset.return_value.create_index.return_value = None
|
|
||||||
|
|
||||||
# Create a LanceTable object
|
# Create a LanceTable object
|
||||||
connection = LanceDBConnection(uri="mock.uri")
|
connection = LanceDBConnection(uri="mock.uri")
|
||||||
table = LanceTable(connection, "test_table")
|
table = LanceTable(connection, "test_table")
|
||||||
|
|
||||||
# Call the create_index method
|
# Call the create_index method
|
||||||
table.create_index(
|
table.create_index(
|
||||||
metric="L2",
|
metric="L2",
|
||||||
num_partitions=256,
|
num_partitions=256,
|
||||||
num_sub_vectors=96,
|
num_sub_vectors=96,
|
||||||
vector_column_name="vector",
|
vector_column_name="vector",
|
||||||
replace=True,
|
replace=True,
|
||||||
index_cache_size=256,
|
index_cache_size=256,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Check that the _dataset.create_index method was called
|
# Check that the _dataset.create_index method was called
|
||||||
# with the right parameters
|
# with the right parameters
|
||||||
mock_dataset.return_value.create_index.assert_called_once_with(
|
mock_dataset.return_value.create_index.assert_called_once_with(
|
||||||
column="vector",
|
column="vector",
|
||||||
index_type="IVF_PQ",
|
index_type="IVF_PQ",
|
||||||
metric="L2",
|
metric="L2",
|
||||||
num_partitions=256,
|
num_partitions=256,
|
||||||
num_sub_vectors=96,
|
num_sub_vectors=96,
|
||||||
replace=True,
|
replace=True,
|
||||||
accelerator=None,
|
accelerator=None,
|
||||||
index_cache_size=256,
|
index_cache_size=256,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def test_add_with_nans(db):
|
def test_add_with_nans(db):
|
||||||
|
|||||||
Reference in New Issue
Block a user