feat: adds isin support to the 'Expr' builder (#3523)

The `Expr` build already includes a lot of useful filtering options,
`eq, ne, gt/gte, lt/lte, and_, or_, contains, cast`, but is was missing
a membership like `isin`. This PR adds that support, as minimally as
possible, allowing easy filtering for membership in a list, without
needing to be a series of `where` expressions.

I didn't see anything in CONTRIBUTING.md about needing a feature request
or issue first, so I just made the change. My apologies if I missed that
somewhere.

Thanks for the vector store, we're using it now in paperless-ngx.
This commit is contained in:
Trenton H
2026-06-10 15:28:19 -07:00
committed by GitHub
parent d786e39fdc
commit 85d9c1ce63
6 changed files with 51 additions and 2 deletions

View File

@@ -48,6 +48,7 @@ class PyExpr:
def lower(self) -> "PyExpr": ...
def upper(self) -> "PyExpr": ...
def contains(self, substr: "PyExpr") -> "PyExpr": ...
def isin(self, values: List["PyExpr"]) -> "PyExpr": ...
def cast(self, data_type: pa.DataType) -> "PyExpr": ...
def to_sql(self) -> str: ...

View File

@@ -19,7 +19,7 @@ operators::
from __future__ import annotations
from typing import Union
from typing import Iterable, Union
import pyarrow as pa
@@ -174,6 +174,11 @@ class Expr:
"""Return True where the string contains *substr*."""
return Expr(self._inner.contains(_coerce(substr)._inner))
def isin(self, values: "Iterable[ExprLike]") -> "Expr":
"""Return True where the value is one of *values* (SQL ``IN``)."""
inner = [_coerce(v)._inner for v in values]
return Expr(self._inner.isin(inner))
# ── type cast ────────────────────────────────────────────────────────────
def cast(self, data_type: Union[str, "pa.DataType"]) -> "Expr":

View File

@@ -9,7 +9,9 @@
use arrow::{datatypes::DataType, pyarrow::PyArrowType};
use datafusion_common::ScalarValue;
use lancedb::expr::{DfExpr, col as ldb_col, contains, expr_cast, lit as df_lit, lower, upper};
use lancedb::expr::{
DfExpr, col as ldb_col, contains, expr_cast, is_in, lit as df_lit, lower, upper,
};
use pyo3::types::PyBytes;
use pyo3::{Bound, PyAny, PyResult, exceptions::PyValueError, prelude::*, pyfunction};
@@ -105,6 +107,14 @@ impl PyExpr {
Self(contains(self.0.clone(), substr.0.clone()))
}
// ── membership ───────────────────────────────────────────────────────────
/// Return true where the value is one of the given expressions (SQL ``IN``).
fn isin(&self, list: Vec<Self>) -> Self {
let items: Vec<DfExpr> = list.into_iter().map(|e| e.0).collect();
Self(is_in(self.0.clone(), items))
}
// ── type cast ────────────────────────────────────────────────────────────
/// Cast the expression to `data_type`.

View File

@@ -450,6 +450,27 @@ def binary_table(tmp_path):
return db.create_table("binary_test", data)
class TestExprIsin:
def test_isin_ints(self):
assert col("id").isin([1, 2, 3]).to_sql() == "id IN (1, 2, 3)"
def test_isin_strs(self):
assert (
col("status").isin(["active", "pending"]).to_sql()
== "status IN ('active', 'pending')"
)
def test_isin_coerces_and_mixes(self):
assert col("id").isin([lit(1), 2]).to_sql() == "id IN (1, 2)"
def test_isin_empty(self):
assert col("id").isin([]).to_sql() == "id IN ()"
def test_isin_filter(self, simple_table):
result = simple_table.search().where(col("id").isin([1, 3, 5])).to_arrow()
assert result.num_rows == 3
class TestExprBytesIntegration:
def test_binary_equality_filter(self, binary_table):
result = (