feat: add distance_type() parameter to python sync query builders and metric() as an alias (#2073)

This PR aims to fix #2047 by doing the following things:
- Add a distance_type parameter to the sync query builders of Python
SDK.
- Make metric an alias to distance_type.
This commit is contained in:
Vaibhav
2025-01-28 13:59:53 -08:00
committed by GitHub
parent 0a9e1eab75
commit dac0857745
6 changed files with 74 additions and 16 deletions

View File

@@ -38,7 +38,7 @@ def test_binary_vector():
query = np.random.randint(0, 2, size=256)
packed_query = np.packbits(query)
tbl.search(packed_query).metric("hamming").to_arrow()
tbl.search(packed_query).distance_type("hamming").to_arrow()
# --8<-- [end:sync_binary_vector]
db.drop_table("my_binary_vectors")

View File

@@ -65,7 +65,7 @@ def test_vector_search():
tbl.search(np.random.random((1536))).limit(10).to_list()
# --8<-- [end:exhaustive_search]
# --8<-- [start:exhaustive_search_cosine]
tbl.search(np.random.random((1536))).metric("cosine").limit(10).to_list()
tbl.search(np.random.random((1536))).distance_type("cosine").limit(10).to_list()
# --8<-- [end:exhaustive_search_cosine]
# --8<-- [start:create_table_with_nested_schema]
# Let's add 100 sample rows to our dataset

View File

@@ -377,14 +377,14 @@ def test_query_builder_with_metric(table):
df_default = LanceVectorQueryBuilder(table, query, vector_column_name).to_pandas()
df_l2 = (
LanceVectorQueryBuilder(table, query, vector_column_name)
.metric("L2")
.distance_type("L2")
.to_pandas()
)
tm.assert_frame_equal(df_default, df_l2)
df_cosine = (
LanceVectorQueryBuilder(table, query, vector_column_name)
.metric("cosine")
.distance_type("cosine")
.limit(1)
.to_pandas()
)
@@ -401,7 +401,7 @@ def test_query_builder_with_different_vector_column():
vector_column_name = "foo_vector"
builder = (
LanceVectorQueryBuilder(table, query, vector_column_name)
.metric("cosine")
.distance_type("cosine")
.where("b < 10")
.select(["b"])
.limit(2)

View File

@@ -366,7 +366,7 @@ def test_query_sync_maximal():
with query_test_table(handler) as table:
(
table.search([1, 2, 3], vector_column_name="vector2", fast_search=True)
.metric("cosine")
.distance_type("cosine")
.limit(42)
.offset(10)
.refine_factor(10)

View File

@@ -1242,7 +1242,9 @@ def test_hybrid_search_metric_type(tmp_db: DBConnection):
# with custom metric
result_dot = (
table.search("feeling lucky", query_type="hybrid").metric("dot").to_arrow()
table.search("feeling lucky", query_type="hybrid")
.distance_type("dot")
.to_arrow()
)
result_l2 = table.search("feeling lucky", query_type="hybrid").to_arrow()
assert len(result_dot) > 0