Files
lancedb/python/tests/test_query.py
2023-05-05 11:27:09 -07:00

82 lines
2.4 KiB
Python

# Copyright 2023 LanceDB Developers
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import lance
import numpy as np
import pandas as pd
import pandas.testing as tm
import pyarrow as pa
import pytest
from lancedb.query import LanceQueryBuilder
class MockTable:
def __init__(self, tmp_path):
self.uri = tmp_path
def to_lance(self):
return lance.dataset(self.uri)
@pytest.fixture
def table(tmp_path) -> MockTable:
df = pd.DataFrame(
{
"vector": [[1, 2], [3, 4]],
"id": [1, 2],
"str_field": ["a", "b"],
"float_field": [1.0, 2.0],
}
)
schema = pa.schema(
[
pa.field("vector", pa.list_(pa.float32(), list_size=2)),
pa.field("id", pa.int32()),
pa.field("str_field", pa.string()),
pa.field("float_field", pa.float64()),
]
)
lance.write_dataset(df, tmp_path, schema)
return MockTable(tmp_path)
def test_query_builder(table):
df = LanceQueryBuilder(table, [0, 0]).limit(1).select(["id"]).to_df()
assert df["id"].values[0] == 1
assert all(df["vector"].values[0] == [1, 2])
def test_query_builder_with_filter(table):
df = LanceQueryBuilder(table, [0, 0]).where("id = 2").to_df()
assert df["id"].values[0] == 2
assert all(df["vector"].values[0] == [3, 4])
def test_query_builder_with_metric(table):
query = [4, 8]
df_default = LanceQueryBuilder(table, query).to_df()
df_l2 = LanceQueryBuilder(table, query).metric("l2").to_df()
tm.assert_frame_equal(df_default, df_l2)
df_cosine = LanceQueryBuilder(table, query).metric("cosine").limit(1).to_df()
assert df_cosine.score[0] == pytest.approx(
cosine_distance(query, df_cosine.vector[0]),
abs=1e-6,
)
assert 0 <= df_cosine.score[0] <= 1
def cosine_distance(vec1, vec2):
return 1 - np.dot(vec1, vec2) / (np.linalg.norm(vec1) * np.linalg.norm(vec2))