mirror of
https://github.com/lancedb/lancedb.git
synced 2025-12-22 21:09:58 +00:00
@@ -78,7 +78,7 @@ class LanceQueryBuilder:
|
||||
def to_df(self) -> pd.DataFrame:
|
||||
"""Execute the query and return the results as a pandas DataFrame.
|
||||
"""
|
||||
ds = self._table._dataset
|
||||
ds = self._table.to_lance()
|
||||
# TODO indexed search
|
||||
tbl = ds.to_table(
|
||||
columns=self._columns,
|
||||
@@ -89,4 +89,4 @@ class LanceQueryBuilder:
|
||||
"k": self._limit
|
||||
}
|
||||
)
|
||||
return tbl.to_pandas()
|
||||
return tbl.to_pandas()
|
||||
|
||||
@@ -108,7 +108,7 @@ class LanceTable:
|
||||
return LanceQueryBuilder(self, query)
|
||||
|
||||
@classmethod
|
||||
def create(cls, db, name, data, schema):
|
||||
def create(cls, db, name, data, schema=None):
|
||||
tbl = LanceTable(db, name)
|
||||
data = _sanitize_data(data, schema)
|
||||
lance.write_dataset(data, tbl._dataset_uri, mode="create")
|
||||
@@ -131,10 +131,8 @@ def _sanitize_schema(data: pa.Table, schema: pa.Schema = None) -> pa.Table:
|
||||
return data
|
||||
# cast the columns to the expected types
|
||||
data = data.combine_chunks()
|
||||
return pa.Table.from_arrays([
|
||||
data[name].cast(schema.field(name).type)
|
||||
for name in schema.names
|
||||
], schema=schema)
|
||||
return pa.Table.from_arrays([data[name] for name in schema.names],
|
||||
schema=schema)
|
||||
# just check the vector column
|
||||
return _sanitize_vector_column(data, vector_column_name=VECTOR_COLUMN_NAME)
|
||||
|
||||
|
||||
@@ -16,6 +16,10 @@ import lancedb
|
||||
|
||||
def test_basic(tmp_path):
|
||||
db = lancedb.connect(tmp_path)
|
||||
|
||||
assert db.uri == str(tmp_path)
|
||||
assert db.table_names() == []
|
||||
|
||||
table = db.create_table("test",
|
||||
data=[{"vector": [3.1, 4.1], "item": "foo", "price": 10.0},
|
||||
{"vector": [5.9, 26.5], "item": "bar", "price": 20.0}])
|
||||
@@ -23,6 +27,12 @@ def test_basic(tmp_path):
|
||||
assert len(rs) == 1
|
||||
assert rs["item"].iloc[0] == "bar"
|
||||
|
||||
rs = table.search([100, 100]).where("price < 15").limit(1).to_df()
|
||||
rs = table.search([100, 100]).where("price < 15").limit(2).to_df()
|
||||
assert len(rs) == 1
|
||||
assert rs["item"].iloc[0] == "foo"
|
||||
|
||||
assert db.table_names() == ["test"]
|
||||
assert "test" in db
|
||||
assert len(db) == 1
|
||||
|
||||
assert db.open_table("test").name == db["test"].name
|
||||
|
||||
59
python/tests/test_query.py
Normal file
59
python/tests/test_query.py
Normal file
@@ -0,0 +1,59 @@
|
||||
# Copyright 2023 LanceDB Developers
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import lance
|
||||
from lancedb.query import LanceQueryBuilder
|
||||
|
||||
import pandas as pd
|
||||
import pyarrow as pa
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
class MockTable:
|
||||
|
||||
def __init__(self, tmp_path):
|
||||
self.uri = tmp_path
|
||||
|
||||
def to_lance(self):
|
||||
return lance.dataset(self.uri)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def table(tmp_path) -> MockTable:
|
||||
df = pd.DataFrame({
|
||||
"vector": [[1, 2], [3, 4]],
|
||||
"id": [1, 2],
|
||||
"str_field": ["a", "b"],
|
||||
"float_field": [1.0, 2.0]
|
||||
})
|
||||
schema = pa.schema([pa.field("vector", pa.list_(pa.float32(), list_size=2)),
|
||||
pa.field("id", pa.int32()),
|
||||
pa.field("str_field", pa.string()),
|
||||
pa.field("float_field", pa.float64())])
|
||||
lance.write_dataset(df, tmp_path, schema)
|
||||
return MockTable(tmp_path)
|
||||
|
||||
|
||||
def test_query_builder(table):
|
||||
df = LanceQueryBuilder(table, [0, 0]).limit(1).select(["id"]).to_df()
|
||||
assert df["id"].values[0] == 1
|
||||
assert all(df["vector"].values[0] == [1, 2])
|
||||
|
||||
|
||||
def test_query_builder_with_filter(table):
|
||||
df = LanceQueryBuilder(table, [0, 0]).where("id = 2").to_df()
|
||||
assert df["id"].values[0] == 2
|
||||
assert all(df["vector"].values[0] == [3, 4])
|
||||
|
||||
|
||||
65
python/tests/test_table.py
Normal file
65
python/tests/test_table.py
Normal file
@@ -0,0 +1,65 @@
|
||||
# Copyright 2023 LanceDB Developers
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
import pandas as pd
|
||||
import pyarrow as pa
|
||||
import pytest
|
||||
|
||||
from lancedb.table import LanceTable
|
||||
|
||||
|
||||
class MockDB:
|
||||
|
||||
def __init__(self, uri: Path):
|
||||
self.uri = uri
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def db(tmp_path) -> MockDB:
|
||||
return MockDB(tmp_path)
|
||||
|
||||
|
||||
def test_basic(db):
|
||||
ds = LanceTable.create(
|
||||
db, "test",
|
||||
data=[{"vector": [3.1, 4.1], "item": "foo", "price": 10.0},
|
||||
{"vector": [5.9, 26.5], "item": "bar", "price": 20.0}]
|
||||
).to_lance()
|
||||
|
||||
table = LanceTable(db, "test")
|
||||
assert table.name == "test"
|
||||
assert table.schema == ds.schema
|
||||
assert table.to_lance().to_table() == ds.to_table()
|
||||
|
||||
|
||||
def test_add(db):
|
||||
schema = pa.schema([pa.field("vector", pa.list_(pa.float32())),
|
||||
pa.field("item", pa.string()),
|
||||
pa.field("price", pa.float32())])
|
||||
expected = pa.Table.from_arrays([
|
||||
pa.array([[3.1, 4.1], [5.9, 26.5]]),
|
||||
pa.array(["foo", "bar"]),
|
||||
pa.array([10.0, 20.0])
|
||||
], schema=schema)
|
||||
data = [[{"vector": [3.1, 4.1], "item": "foo", "price": 10.0},
|
||||
{"vector": [5.9, 26.5], "item": "bar", "price": 20.0}]]
|
||||
df = pd.DataFrame(data[0])
|
||||
data.append(df)
|
||||
data.append(pa.Table.from_pandas(df, schema=schema))
|
||||
|
||||
for i, d in enumerate(data):
|
||||
tbl = (LanceTable.create(db, f"test_{i}", data=d, schema=schema)
|
||||
.to_lance().to_table())
|
||||
assert expected == tbl
|
||||
Reference in New Issue
Block a user