This commit is contained in:
Chang She
2023-03-22 18:29:07 -07:00
parent 1f42104c77
commit 5ef5141812
6 changed files with 63 additions and 43 deletions

View File

@@ -20,9 +20,13 @@ def test_basic(tmp_path):
assert db.uri == str(tmp_path)
assert db.table_names() == []
table = db.create_table("test",
data=[{"vector": [3.1, 4.1], "item": "foo", "price": 10.0},
{"vector": [5.9, 26.5], "item": "bar", "price": 20.0}])
table = db.create_table(
"test",
data=[
{"vector": [3.1, 4.1], "item": "foo", "price": 10.0},
{"vector": [5.9, 26.5], "item": "bar", "price": 20.0},
],
)
rs = table.search([100, 100]).limit(1).to_df()
assert len(rs) == 1
assert rs["item"].iloc[0] == "bar"

View File

@@ -21,7 +21,6 @@ import pytest
class MockTable:
def __init__(self, tmp_path):
self.uri = tmp_path
@@ -31,16 +30,22 @@ class MockTable:
@pytest.fixture
def table(tmp_path) -> MockTable:
df = pd.DataFrame({
"vector": [[1, 2], [3, 4]],
"id": [1, 2],
"str_field": ["a", "b"],
"float_field": [1.0, 2.0]
})
schema = pa.schema([pa.field("vector", pa.list_(pa.float32(), list_size=2)),
pa.field("id", pa.int32()),
pa.field("str_field", pa.string()),
pa.field("float_field", pa.float64())])
df = pd.DataFrame(
{
"vector": [[1, 2], [3, 4]],
"id": [1, 2],
"str_field": ["a", "b"],
"float_field": [1.0, 2.0],
}
)
schema = pa.schema(
[
pa.field("vector", pa.list_(pa.float32(), list_size=2)),
pa.field("id", pa.int32()),
pa.field("str_field", pa.string()),
pa.field("float_field", pa.float64()),
]
)
lance.write_dataset(df, tmp_path, schema)
return MockTable(tmp_path)
@@ -55,5 +60,3 @@ def test_query_builder_with_filter(table):
df = LanceQueryBuilder(table, [0, 0]).where("id = 2").to_df()
assert df["id"].values[0] == 2
assert all(df["vector"].values[0] == [3, 4])

View File

@@ -21,7 +21,6 @@ from lancedb.table import LanceTable
class MockDB:
def __init__(self, uri: Path):
self.uri = uri
@@ -33,9 +32,12 @@ def db(tmp_path) -> MockDB:
def test_basic(db):
ds = LanceTable.create(
db, "test",
data=[{"vector": [3.1, 4.1], "item": "foo", "price": 10.0},
{"vector": [5.9, 26.5], "item": "bar", "price": 20.0}]
db,
"test",
data=[
{"vector": [3.1, 4.1], "item": "foo", "price": 10.0},
{"vector": [5.9, 26.5], "item": "bar", "price": 20.0},
],
).to_lance()
table = LanceTable(db, "test")
@@ -45,21 +47,35 @@ def test_basic(db):
def test_add(db):
schema = pa.schema([pa.field("vector", pa.list_(pa.float32())),
pa.field("item", pa.string()),
pa.field("price", pa.float32())])
expected = pa.Table.from_arrays([
pa.array([[3.1, 4.1], [5.9, 26.5]]),
pa.array(["foo", "bar"]),
pa.array([10.0, 20.0])
], schema=schema)
data = [[{"vector": [3.1, 4.1], "item": "foo", "price": 10.0},
{"vector": [5.9, 26.5], "item": "bar", "price": 20.0}]]
schema = pa.schema(
[
pa.field("vector", pa.list_(pa.float32())),
pa.field("item", pa.string()),
pa.field("price", pa.float32()),
]
)
expected = pa.Table.from_arrays(
[
pa.array([[3.1, 4.1], [5.9, 26.5]]),
pa.array(["foo", "bar"]),
pa.array([10.0, 20.0]),
],
schema=schema,
)
data = [
[
{"vector": [3.1, 4.1], "item": "foo", "price": 10.0},
{"vector": [5.9, 26.5], "item": "bar", "price": 20.0},
]
]
df = pd.DataFrame(data[0])
data.append(df)
data.append(pa.Table.from_pandas(df, schema=schema))
for i, d in enumerate(data):
tbl = (LanceTable.create(db, f"test_{i}", data=d, schema=schema)
.to_lance().to_table())
tbl = (
LanceTable.create(db, f"test_{i}", data=d, schema=schema)
.to_lance()
.to_table()
)
assert expected == tbl