feat(python): enable polars predict pushdown

fix hybrid search example (#922 )
[python] Bump version: 0.5.2 → 0.5.3
2026-01-08 04:42:57 +00:00 · 2024-02-03 19:33:45 -08:00 · 2024-02-03 09:26:32 +05:30 · 2024-02-03 03:04:04 +00:00 · 2024-02-02 18:57:13 -08:00 · 2024-02-02 22:37:23 +00:00
14 changed files with 80 additions and 53 deletions
--- a/.bumpversion.cfg
+++ b/.bumpversion.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.4.7
+current_version = 0.4.8
 commit = True
 message = Bump version: {current_version} → {new_version}
 tag = True
--- a/docs/src/hybrid_search.md
+++ b/docs/src/hybrid_search.md
@@ -6,17 +6,24 @@ LanceDB supports both semantic and keyword-based search. In real world applicati
 You can perform hybrid search in LanceDB by combining the results of semantic and full-text search via a reranking algorithm of your choice. LanceDB provides multiple rerankers out of the box. However, you can always write a custom reranker if your use case need more sophisticated logic .
 ```python
 import os
 import lancedb
 import openai
 from lancedb.embeddings import get_registry
-from lancedb.pydanatic import LanceModel, Vector
+from lancedb.pydantic import LanceModel, Vector
 db = lancedb.connect("~/.lancedb")
 # Ingest embedding function in LanceDB table
 # Configuring the environment variable OPENAI_API_KEY
 if "OPENAI_API_KEY" not in os.environ:
 # OR set the key here as a variable
    openai.api_key = "sk-..."
 embeddings = get_registry().get("openai").create()
 class Documents(LanceModel):
-    vector: Vector(embeddings.ndims) = embeddings.VectorField()
+    vector: Vector(embeddings.ndims()) = embeddings.VectorField()
    text: str = embeddings.SourceField()
 table = db.create_table("documents", schema=Documents)
@@ -31,17 +38,19 @@ data = [
 # ingest docs with auto-vectorization
 table.add(data)
 # Create a fts index before the hybrid search
 table.create_fts_index("text")
 # hybrid search with default re-ranker
 results = table.search("flower moon", query_type="hybrid").to_pandas()
 ```
-By default, LanceDB uses `LinearCombinationReranker(weights=0.7)` to combine and rerank the results of semantic and full-text search. You can customize the hyperparameters as needed or write your own custom reranker. Here's how you can use any of the available rerankers:
+By default, LanceDB uses `LinearCombinationReranker(weight=0.7)` to combine and rerank the results of semantic and full-text search. You can customize the hyperparameters as needed or write your own custom reranker. Here's how you can use any of the available rerankers:
 ### `rerank()` arguments
 * `normalize`: `str`, default `"score"`:
    The method to normalize the scores. Can be "rank" or "score". If "rank", the scores are converted to ranks and then normalized. If "score", the scores are normalized directly.
-* `reranker`: `Reranker`, default `LinearCombinationReranker(weights=0.7)`.
+* `reranker`: `Reranker`, default `LinearCombinationReranker(weight=0.7)`.
    The reranker to use. If not specified, the default reranker is used.
@@ -55,7 +64,7 @@ This is the default re-ranker used by LanceDB. It combines the results of semant
 ```python
 from lancedb.rerankers import LinearCombinationReranker
-reranker = LinearCombinationReranker(weights=0.3) # Use 0.3 as the weight for vector search
+reranker = LinearCombinationReranker(weight=0.3) # Use 0.3 as the weight for vector search
 results = table.search("rebel", query_type="hybrid").rerank(reranker=reranker).to_pandas()
 ```
--- a/node/package-lock.json
+++ b/node/package-lock.json
@@ -1,12 +1,12 @@
 {
  "name": "vectordb",
-  "version": "0.4.7",
+  "version": "0.4.8",
  "lockfileVersion": 3,
  "requires": true,
  "packages": {
    "": {
      "name": "vectordb",
-      "version": "0.4.7",
+      "version": "0.4.8",
      "cpu": [
        "x64",
        "arm64"
@@ -53,11 +53,11 @@
        "uuid": "^9.0.0"
      },
      "optionalDependencies": {
-        "@lancedb/vectordb-darwin-arm64": "0.4.7",
+        "@lancedb/vectordb-darwin-arm64": "0.4.8",
-        "@lancedb/vectordb-darwin-x64": "0.4.7",
+        "@lancedb/vectordb-darwin-x64": "0.4.8",
-        "@lancedb/vectordb-linux-arm64-gnu": "0.4.7",
+        "@lancedb/vectordb-linux-arm64-gnu": "0.4.8",
-        "@lancedb/vectordb-linux-x64-gnu": "0.4.7",
+        "@lancedb/vectordb-linux-x64-gnu": "0.4.8",
-        "@lancedb/vectordb-win32-x64-msvc": "0.4.7"
+        "@lancedb/vectordb-win32-x64-msvc": "0.4.8"
      }
    },
    "node_modules/@75lb/deep-merge": {
@@ -329,9 +329,9 @@
      }
    },
    "node_modules/@lancedb/vectordb-darwin-arm64": {
-      "version": "0.4.7",
+      "version": "0.4.8",
-      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.4.7.tgz",
+      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.4.8.tgz",
-      "integrity": "sha512-kACOIytgjBfX8NRwjPKe311XRN3lbSN13B7avT5htMd3kYm3AnnMag9tZhlwoO7lIuvGaXhy7mApygJrjhfJ4g==",
+      "integrity": "sha512-FpnJaw7KmNdD/FtOw9AcmPL5P+L04AcnfPj9ZyEjN8iCwB/qaOGYgdfBv+EbEtfHIsqA12q/1BRduu9KdB6BIA==",
      "cpu": [
        "arm64"
      ],
@@ -341,9 +341,9 @@
      ]
    },
    "node_modules/@lancedb/vectordb-darwin-x64": {
-      "version": "0.4.7",
+      "version": "0.4.8",
-      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.4.7.tgz",
+      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.4.8.tgz",
-      "integrity": "sha512-vb74iK5uPWCwz5E60r3yWp/R/HSg54/Z9AZWYckYXqsPv4w/nfbkM5iZhfRqqR/9uE6JClWJKOtjbk7b8CFRFg==",
+      "integrity": "sha512-RafOEYyZIgphp8wPGuVLFaTc8aAqo0NCO1LQMx0mB0xV96vrdo0Mooivs+dYN3RFfSHtTKPw9O1Jc957Vp1TLg==",
      "cpu": [
        "x64"
      ],
@@ -353,9 +353,9 @@
      ]
    },
    "node_modules/@lancedb/vectordb-linux-arm64-gnu": {
-      "version": "0.4.7",
+      "version": "0.4.8",
-      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.4.7.tgz",
+      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.4.8.tgz",
-      "integrity": "sha512-jHp7THm6S9sB8RaCxGoZXLAwGAUHnawUUilB1K3mvQsRdfB2bBs0f7wDehW+PDhr+Iog4LshaWbcnoQEUJWR+Q==",
+      "integrity": "sha512-WlbYNfj4+v1hBHUluF+hnlG/A0ZaQFdXBTGDfHQniL11o+n3emWm4ujP5nSAoQHXjSH9DaOTGr/N4Mc9Xe+luw==",
      "cpu": [
        "arm64"
      ],
@@ -365,9 +365,9 @@
      ]
    },
    "node_modules/@lancedb/vectordb-linux-x64-gnu": {
-      "version": "0.4.7",
+      "version": "0.4.8",
-      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.4.7.tgz",
+      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.4.8.tgz",
-      "integrity": "sha512-LKbVe6Wrp/AGqCCjKliNDmYoeTNgY/wfb2DTLjrx41Jko/04ywLrJ6xSEAn3XD5RDCO5u3fyUdXHHHv5a3VAAQ==",
+      "integrity": "sha512-z+qFJrDqnNEv4JcwYDyt51PHmWjuM/XaOlSjpBnyyuUImeY+QcwctMuyXt8+Q4zhuqQR1AhLKrMwCU+YmMfk5g==",
      "cpu": [
        "x64"
      ],
@@ -377,9 +377,9 @@
      ]
    },
    "node_modules/@lancedb/vectordb-win32-x64-msvc": {
-      "version": "0.4.7",
+      "version": "0.4.8",
-      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.4.7.tgz",
+      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.4.8.tgz",
-      "integrity": "sha512-C5ln4+wafeY1Sm4PeV0Ios9lUaQVVip5Mjl9XU7ngioSEMEuXI/XMVfIdVfDPppVNXPeQxg33wLA272uw88D1Q==",
+      "integrity": "sha512-VjUryVvEA04r0j4lU9pJy84cmjuQm1GhBzbPc8kwbn5voT4A6BPglrlNsU0Zc+j8Fbjyvauzw2lMEcMsF4F0rw==",
      "cpu": [
        "x64"
      ],
--- a/node/package.json
+++ b/node/package.json
@@ -1,6 +1,6 @@
 {
  "name": "vectordb",
-  "version": "0.4.7",
+  "version": "0.4.8",
  "description": " Serverless, low-latency vector database for AI applications",
  "main": "dist/index.js",
  "types": "dist/index.d.ts",
@@ -85,10 +85,10 @@
    }
  },
  "optionalDependencies": {
-    "@lancedb/vectordb-darwin-arm64": "0.4.7",
+    "@lancedb/vectordb-darwin-arm64": "0.4.8",
-    "@lancedb/vectordb-darwin-x64": "0.4.7",
+    "@lancedb/vectordb-darwin-x64": "0.4.8",
-    "@lancedb/vectordb-linux-arm64-gnu": "0.4.7",
+    "@lancedb/vectordb-linux-arm64-gnu": "0.4.8",
-    "@lancedb/vectordb-linux-x64-gnu": "0.4.7",
+    "@lancedb/vectordb-linux-x64-gnu": "0.4.8",
-    "@lancedb/vectordb-win32-x64-msvc": "0.4.7"
+    "@lancedb/vectordb-win32-x64-msvc": "0.4.8"
  }
 }
--- a/python/.bumpversion.cfg
+++ b/python/.bumpversion.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.5.2
+current_version = 0.5.3
 commit = True
 message = [python] Bump version: {current_version} → {new_version}
 tag = True
--- a/python/lancedb/common.py
+++ b/python/lancedb/common.py
@@ -16,9 +16,9 @@ from typing import Iterable, List, Union
 import numpy as np
 import pyarrow as pa
-from .util import safe_import
+from .util import safe_import_pandas
-pd = safe_import("pandas")
+pd = safe_import_pandas()
 DATA = Union[List[dict], dict, "pd.DataFrame", pa.Table, Iterable[pa.RecordBatch]]
 VEC = Union[list, np.ndarray, pa.Array, pa.ChunkedArray]
--- a/python/lancedb/context.py
+++ b/python/lancedb/context.py
@@ -16,9 +16,9 @@ import deprecation
 from . import __version__
 from .exceptions import MissingColumnError, MissingValueError
-from .util import safe_import
+from .util import safe_import_pandas
-pd = safe_import("pandas")
+pd = safe_import_pandas()
 def contextualize(raw_df: "pd.DataFrame") -> Contextualizer:
--- a/python/lancedb/embeddings/utils.py
+++ b/python/lancedb/embeddings/utils.py
@@ -26,10 +26,10 @@ import pyarrow as pa
 from lance.vector import vec_to_table
 from retry import retry
-from ..util import safe_import
+from ..util import safe_import_pandas
 from ..utils.general import LOGGER
-pd = safe_import("pandas")
+pd = safe_import_pandas()
 DATA = Union[pa.Table, "pd.DataFrame"]
 TEXT = Union[str, List[str], pa.Array, pa.ChunkedArray, np.ndarray]
--- a/python/lancedb/query.py
+++ b/python/lancedb/query.py
@@ -27,7 +27,7 @@ from . import __version__
 from .common import VEC, VECTOR_COLUMN_NAME
 from .rerankers.base import Reranker
 from .rerankers.linear_combination import LinearCombinationReranker
-from .util import safe_import
+from .util import safe_import_pandas
 if TYPE_CHECKING:
    import PIL
@@ -36,7 +36,7 @@ if TYPE_CHECKING:
    from .pydantic import LanceModel
    from .table import Table
-pd = safe_import("pandas")
+pd = safe_import_pandas()
 class Query(pydantic.BaseModel):
--- a/python/lancedb/table.py
+++ b/python/lancedb/table.py
@@ -34,7 +34,8 @@ from .query import LanceQueryBuilder, Query
 from .util import (
    fs_from_uri,
    join_uri,
-    safe_import,
+    safe_import_pandas,
    safe_import_polars,
    value_to_sql,
 )
 from .utils.events import register_event
@@ -48,8 +49,8 @@ if TYPE_CHECKING:
    from .db import LanceDBConnection
-pd = safe_import("pandas")
+pd = safe_import_pandas()
-pl = safe_import("polars")
+pl = safe_import_polars()
 def _sanitize_data(
@@ -835,9 +836,7 @@ class LanceTable(Table):
        -------
        pl.LazyFrame
        """
-        return pl.scan_pyarrow_dataset(
+        return pl.scan_pyarrow_dataset(self.to_lance(), batch_size=batch_size)
            self.to_lance(), allow_pyarrow_filter=False, batch_size=batch_size
        )
    @property
    def _dataset_uri(self) -> str:
--- a/python/lancedb/util.py
+++ b/python/lancedb/util.py
@@ -134,6 +134,24 @@ def safe_import(module: str, mitigation=None):
        raise ImportError(f"Please install {mitigation or module}")
 def safe_import_pandas():
    try:
        import pandas as pd
        return pd
    except ImportError:
        return None
 def safe_import_polars():
    try:
        import polars as pl
        return pl
    except ImportError:
        return None
@singledispatch
 def value_to_sql(value):
    raise NotImplementedError("SQL conversion is not implemented for this type")
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "lancedb"
-version = "0.5.2"
+version = "0.5.3"
 dependencies = [
    "deprecation",
    "pylance==0.9.12",
@@ -14,7 +14,8 @@ dependencies = [
    "pyyaml>=6.0",
    "click>=8.1.7",
    "requests>=2.31.0",
-    "overrides>=0.7"
+    "overrides>=0.7",
    "pyarrow>=14.0"
 ]
 description = "lancedb"
 authors = [{ name = "LanceDB Devs", email = "dev@lancedb.com" }]
--- a/rust/ffi/node/Cargo.toml
+++ b/rust/ffi/node/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "vectordb-node"
-version = "0.4.7"
+version = "0.4.8"
 description = "Serverless, low-latency vector database for AI applications"
 license = "Apache-2.0"
 edition = "2018"
--- a/rust/vectordb/Cargo.toml
+++ b/rust/vectordb/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "vectordb"
-version = "0.4.7"
+version = "0.4.8"
 edition = "2021"
 description = "LanceDB: A serverless, low-latency vector database for AI applications"
 license = "Apache-2.0"
Author	SHA1	Message	Date
Chang She	d1b0c8f0fe	feat(python): enable polars predict pushdown	2024-02-03 19:33:45 -08:00
QianZhu	e412194008	fix hybrid search example (#922 )	2024-02-03 09:26:32 +05:30
Lance Release	a9088224c5	[python] Bump version: 0.5.2 → 0.5.3	2024-02-03 03:04:04 +00:00
Ayush Chaurasia	688c57a0d8	fix: revert safe_import_pandas usage (#921 )	2024-02-02 18:57:13 -08:00
Lance Release	12a98deded	Updating package-lock.json	2024-02-02 22:37:23 +00:00
Lance Release	e4bb042918	Updating package-lock.json	2024-02-02 21:57:07 +00:00
Lance Release	04e1662681	Bump version: 0.4.7 → 0.4.8	2024-02-02 21:56:57 +00:00