Merge pull request #4 from lancedb/changhiskhan/tests

add unit tests
This commit is contained in:
Chang She
2023-03-22 12:03:08 -07:00
committed by GitHub
5 changed files with 140 additions and 8 deletions

View File

@@ -78,7 +78,7 @@ class LanceQueryBuilder:
def to_df(self) -> pd.DataFrame:
"""Execute the query and return the results as a pandas DataFrame.
"""
ds = self._table._dataset
ds = self._table.to_lance()
# TODO indexed search
tbl = ds.to_table(
columns=self._columns,
@@ -89,4 +89,4 @@ class LanceQueryBuilder:
"k": self._limit
}
)
return tbl.to_pandas()
return tbl.to_pandas()

View File

@@ -108,7 +108,7 @@ class LanceTable:
return LanceQueryBuilder(self, query)
@classmethod
def create(cls, db, name, data, schema):
def create(cls, db, name, data, schema=None):
tbl = LanceTable(db, name)
data = _sanitize_data(data, schema)
lance.write_dataset(data, tbl._dataset_uri, mode="create")
@@ -131,10 +131,8 @@ def _sanitize_schema(data: pa.Table, schema: pa.Schema = None) -> pa.Table:
return data
# cast the columns to the expected types
data = data.combine_chunks()
return pa.Table.from_arrays([
data[name].cast(schema.field(name).type)
for name in schema.names
], schema=schema)
return pa.Table.from_arrays([data[name] for name in schema.names],
schema=schema)
# just check the vector column
return _sanitize_vector_column(data, vector_column_name=VECTOR_COLUMN_NAME)

View File

@@ -16,6 +16,10 @@ import lancedb
def test_basic(tmp_path):
db = lancedb.connect(tmp_path)
assert db.uri == str(tmp_path)
assert db.table_names() == []
table = db.create_table("test",
data=[{"vector": [3.1, 4.1], "item": "foo", "price": 10.0},
{"vector": [5.9, 26.5], "item": "bar", "price": 20.0}])
@@ -23,6 +27,12 @@ def test_basic(tmp_path):
assert len(rs) == 1
assert rs["item"].iloc[0] == "bar"
rs = table.search([100, 100]).where("price < 15").limit(1).to_df()
rs = table.search([100, 100]).where("price < 15").limit(2).to_df()
assert len(rs) == 1
assert rs["item"].iloc[0] == "foo"
assert db.table_names() == ["test"]
assert "test" in db
assert len(db) == 1
assert db.open_table("test").name == db["test"].name

View File

@@ -0,0 +1,59 @@
# Copyright 2023 LanceDB Developers
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import lance
from lancedb.query import LanceQueryBuilder
import pandas as pd
import pyarrow as pa
import pytest
class MockTable:
def __init__(self, tmp_path):
self.uri = tmp_path
def to_lance(self):
return lance.dataset(self.uri)
@pytest.fixture
def table(tmp_path) -> MockTable:
df = pd.DataFrame({
"vector": [[1, 2], [3, 4]],
"id": [1, 2],
"str_field": ["a", "b"],
"float_field": [1.0, 2.0]
})
schema = pa.schema([pa.field("vector", pa.list_(pa.float32(), list_size=2)),
pa.field("id", pa.int32()),
pa.field("str_field", pa.string()),
pa.field("float_field", pa.float64())])
lance.write_dataset(df, tmp_path, schema)
return MockTable(tmp_path)
def test_query_builder(table):
df = LanceQueryBuilder(table, [0, 0]).limit(1).select(["id"]).to_df()
assert df["id"].values[0] == 1
assert all(df["vector"].values[0] == [1, 2])
def test_query_builder_with_filter(table):
df = LanceQueryBuilder(table, [0, 0]).where("id = 2").to_df()
assert df["id"].values[0] == 2
assert all(df["vector"].values[0] == [3, 4])

View File

@@ -0,0 +1,65 @@
# Copyright 2023 LanceDB Developers
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from pathlib import Path
import pandas as pd
import pyarrow as pa
import pytest
from lancedb.table import LanceTable
class MockDB:
def __init__(self, uri: Path):
self.uri = uri
@pytest.fixture
def db(tmp_path) -> MockDB:
return MockDB(tmp_path)
def test_basic(db):
ds = LanceTable.create(
db, "test",
data=[{"vector": [3.1, 4.1], "item": "foo", "price": 10.0},
{"vector": [5.9, 26.5], "item": "bar", "price": 20.0}]
).to_lance()
table = LanceTable(db, "test")
assert table.name == "test"
assert table.schema == ds.schema
assert table.to_lance().to_table() == ds.to_table()
def test_add(db):
schema = pa.schema([pa.field("vector", pa.list_(pa.float32())),
pa.field("item", pa.string()),
pa.field("price", pa.float32())])
expected = pa.Table.from_arrays([
pa.array([[3.1, 4.1], [5.9, 26.5]]),
pa.array(["foo", "bar"]),
pa.array([10.0, 20.0])
], schema=schema)
data = [[{"vector": [3.1, 4.1], "item": "foo", "price": 10.0},
{"vector": [5.9, 26.5], "item": "bar", "price": 20.0}]]
df = pd.DataFrame(data[0])
data.append(df)
data.append(pa.Table.from_pandas(df, schema=schema))
for i, d in enumerate(data):
tbl = (LanceTable.create(db, f"test_{i}", data=d, schema=schema)
.to_lance().to_table())
assert expected == tbl