feat: add to_list and to_pandas api's (#556)

Add `to_list` to return query results as list of python dict (so we're not too pandas-centric). Closes #555 Add `to_pandas` API and add deprecation warning on `to_df`. Closes #545 Co-authored-by: Chang She <chang@lancedb.com>
2026-01-03 18:32:55 +00:00 · 2023-10-11 12:18:55 -07:00
parent a737bbff19
commit 8469d010f8
26 changed files with 125 additions and 71 deletions
--- a/README.md
+++ b/README.md
@@ -71,7 +71,7 @@ db = lancedb.connect(uri)
 table = db.create_table("my_table",
                         data=[{"vector": [3.1, 4.1], "item": "foo", "price": 10.0},
                               {"vector": [5.9, 26.5], "item": "bar", "price": 20.0}])
-result = table.search([100, 100]).limit(2).to_df()
+result = table.search([100, 100]).limit(2).to_pandas()
 ```

 ## Blogs, Tutorials & Videos
--- a/docs/src/ann_indexes.md
+++ b/docs/src/ann_indexes.md
@@ -97,7 +97,7 @@ There are a couple of parameters that can be used to fine-tune the search:
         .limit(2) \
         .nprobes(20) \
         .refine_factor(10) \
-         .to_df()
+         .to_pandas()
     ```
     ```
                                              vector       item       _distance
@@ -124,7 +124,7 @@ You can further filter the elements returned by a search using a where clause.

 === "Python"
     ```python
-     tbl.search(np.random.random((1536))).where("item != 'item 1141'").to_df()
+     tbl.search(np.random.random((1536))).where("item != 'item 1141'").to_pandas()
     ```

 === "Javascript"
@@ -141,7 +141,7 @@ You can select the columns returned by the query using a select clause.

 === "Python"
     ```python
-     tbl.search(np.random.random((1536))).select(["vector"]).to_df()
+     tbl.search(np.random.random((1536))).select(["vector"]).to_pandas()
     ```
     ```
        vector                                             _distance
--- a/docs/src/basic.md
+++ b/docs/src/basic.md
@@ -146,7 +146,7 @@ Once you've embedded the query, you can find its nearest neighbors using the fol

 === "Python"
      ```python
-      tbl.search([100, 100]).limit(2).to_df()
+      tbl.search([100, 100]).limit(2).to_pandas()
      ```

      This returns a pandas DataFrame with the results.
--- a/docs/src/embedding.md
+++ b/docs/src/embedding.md
@@ -118,7 +118,7 @@ belong in the same latent space and your results will be nonsensical.
     ```python
     query = "What's the best pizza topping?"
     query_vector = embed_func([query])[0]
-     tbl.search(query_vector).limit(10).to_df()
+     tbl.search(query_vector).limit(10).to_pandas()
     ```

     The above snippet returns a pandas DataFrame with the 10 closest vectors to the query.
--- a/docs/src/examples/serverless_lancedb_with_s3_and_lambda.md
+++ b/docs/src/examples/serverless_lancedb_with_s3_and_lambda.md
@@ -80,14 +80,14 @@ def handler(event, context):
    # Shape of SIFT is (128,1M), d=float32
    query_vector = np.array(event['query_vector'], dtype=np.float32)

-    rs = table.search(query_vector).limit(2).to_df()
+    rs = table.search(query_vector).limit(2).to_list()

    return {
        "statusCode": status_code,
        "headers": {
            "Content-Type": "application/json"
        },
-        "body": rs.to_json()
+        "body": json.dumps(rs)
    }
 ``` 

--- a/docs/src/fts.md
+++ b/docs/src/fts.md
@@ -43,7 +43,13 @@ table.create_fts_index("text")
 To search:

 ```python
-df = table.search("puppy").limit(10).select(["text"]).to_df()
+table.search("puppy").limit(10).select(["text"]).to_list()
+```
+
+Which returns a list of dictionaries:
+
+```python
+[{'text': 'Frodo was a happy puppy', 'score': 0.6931471824645996}]
 ```

 LanceDB automatically looks for an FTS index if the input is str.
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -36,7 +36,7 @@ LanceDB's core is written in Rust 🦀 and is built using <a href="https://githu
      table = db.create_table("my_table",
                              data=[{"vector": [3.1, 4.1], "item": "foo", "price": 10.0},
                                    {"vector": [5.9, 26.5], "item": "bar", "price": 20.0}])
-      result = table.search([100, 100]).limit(2).to_df()
+      result = table.search([100, 100]).limit(2).to_list()
      ```

 === "Javascript"
--- a/docs/src/notebooks/multimodal_search.ipynb
+++ b/docs/src/notebooks/multimodal_search.ipynb
@@ -19,11 +19,11 @@
     "output_type": "stream",
     "text": [
      "\n",
-      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.3.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.1.2\u001b[0m\n",
-      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n",
+      "\u001B[1m[\u001B[0m\u001B[34;49mnotice\u001B[0m\u001B[1;39;49m]\u001B[0m\u001B[39;49m A new release of pip available: \u001B[0m\u001B[31;49m22.3.1\u001B[0m\u001B[39;49m -> \u001B[0m\u001B[32;49m23.1.2\u001B[0m\n",
+      "\u001B[1m[\u001B[0m\u001B[34;49mnotice\u001B[0m\u001B[1;39;49m]\u001B[0m\u001B[39;49m To update, run: \u001B[0m\u001B[32;49mpip install --upgrade pip\u001B[0m\n",
      "\n",
-      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.3.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.1.2\u001b[0m\n",
-      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n"
+      "\u001B[1m[\u001B[0m\u001B[34;49mnotice\u001B[0m\u001B[1;39;49m]\u001B[0m\u001B[39;49m A new release of pip available: \u001B[0m\u001B[31;49m22.3.1\u001B[0m\u001B[39;49m -> \u001B[0m\u001B[32;49m23.1.2\u001B[0m\n",
+      "\u001B[1m[\u001B[0m\u001B[34;49mnotice\u001B[0m\u001B[1;39;49m]\u001B[0m\u001B[39;49m To update, run: \u001B[0m\u001B[32;49mpip install --upgrade pip\u001B[0m\n"
     ]
    }
   ],
@@ -39,6 +39,7 @@
   "outputs": [],
   "source": [
    "import io\n",
+    "\n",
    "import PIL\n",
    "import duckdb\n",
    "import lancedb"
@@ -158,18 +159,18 @@
    "        \"db = lancedb.connect('~/datasets/demo')\\n\"\n",
    "        \"tbl = db.open_table('diffusiondb')\\n\\n\"\n",
    "        f\"embedding = embed_func('{query}')\\n\"\n",
-    "        \"tbl.search(embedding).limit(9).to_df()\"\n",
+    "        \"tbl.search(embedding).limit(9).to_pandas()\"\n",
    "    )\n",
-    "    return (_extract(tbl.search(emb).limit(9).to_df()), code)\n",
+    "    return (_extract(tbl.search(emb).limit(9).to_pandas()), code)\n",
    "\n",
    "def find_image_keywords(query):\n",
    "    code = (\n",
    "        \"import lancedb\\n\"\n",
    "        \"db = lancedb.connect('~/datasets/demo')\\n\"\n",
    "        \"tbl = db.open_table('diffusiondb')\\n\\n\"\n",
-    "        f\"tbl.search('{query}').limit(9).to_df()\"\n",
+    "        f\"tbl.search('{query}').limit(9).to_pandas()\"\n",
    "    )\n",
-    "    return (_extract(tbl.search(query).limit(9).to_df()), code)\n",
+    "    return (_extract(tbl.search(query).limit(9).to_pandas()), code)\n",
    "\n",
    "def find_image_sql(query):\n",
    "    code = (\n",
--- a/docs/src/notebooks/youtube_transcript_search.ipynb
+++ b/docs/src/notebooks/youtube_transcript_search.ipynb
@@ -27,11 +27,11 @@
     "output_type": "stream",
     "text": [
      "\n",
-      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.1.1\u001b[0m\n",
-      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n",
+      "\u001B[1m[\u001B[0m\u001B[34;49mnotice\u001B[0m\u001B[1;39;49m]\u001B[0m\u001B[39;49m A new release of pip is available: \u001B[0m\u001B[31;49m23.0\u001B[0m\u001B[39;49m -> \u001B[0m\u001B[32;49m23.1.1\u001B[0m\n",
+      "\u001B[1m[\u001B[0m\u001B[34;49mnotice\u001B[0m\u001B[1;39;49m]\u001B[0m\u001B[39;49m To update, run: \u001B[0m\u001B[32;49mpip install --upgrade pip\u001B[0m\n",
      "\n",
-      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.1.1\u001b[0m\n",
-      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n"
+      "\u001B[1m[\u001B[0m\u001B[34;49mnotice\u001B[0m\u001B[1;39;49m]\u001B[0m\u001B[39;49m A new release of pip is available: \u001B[0m\u001B[31;49m23.0\u001B[0m\u001B[39;49m -> \u001B[0m\u001B[32;49m23.1.1\u001B[0m\n",
+      "\u001B[1m[\u001B[0m\u001B[34;49mnotice\u001B[0m\u001B[1;39;49m]\u001B[0m\u001B[39;49m To update, run: \u001B[0m\u001B[32;49mpip install --upgrade pip\u001B[0m\n"
     ]
    }
   ],
@@ -184,7 +184,7 @@
    "df = (contextualize(data.to_pandas())\n",
    "      .groupby(\"title\").text_col(\"text\")\n",
    "      .window(20).stride(4)\n",
-    "      .to_df())\n",
+    "      .to_pandas())\n",
    "df.head(1)"
   ]
  },
@@ -603,7 +603,7 @@
   "outputs": [],
   "source": [
    "# Use LanceDB to get top 3 most relevant context\n",
-    "context = tbl.search(emb).limit(3).to_df()"
+    "context = tbl.search(emb).limit(3).to_pandas()"
   ]
  },
  {
--- a/docs/src/python/arrow.md
+++ b/docs/src/python/arrow.md
@@ -74,7 +74,7 @@ table = db.open_table("pd_table")

 query_vector = [100, 100]
 # Pandas DataFrame
-df = table.search(query_vector).limit(1).to_df()
+df = table.search(query_vector).limit(1).to_pandas()
 print(df)
 ```

@@ -89,12 +89,12 @@ If you have more complex criteria, you can always apply the filter to the result
 ```python

 # Apply the filter via LanceDB
-results = table.search([100, 100]).where("price < 15").to_df()
+results = table.search([100, 100]).where("price < 15").to_pandas()
 assert len(results) == 1
 assert results["item"].iloc[0] == "foo"

 # Apply the filter via Pandas
-df = results = table.search([100, 100]).to_df()
+df = results = table.search([100, 100]).to_pandas()
 results = df[df.price < 15]
 assert len(results) == 1
 assert results["item"].iloc[0] == "foo"
--- a/docs/src/search.md
+++ b/docs/src/search.md
@@ -67,7 +67,7 @@ await db_setup.createTable('my_vectors', data)

    df = tbl.search(np.random.random((1536))) \
        .limit(10) \
-        .to_df()
+        .to_list()
    ```

 === "JavaScript"
@@ -92,7 +92,7 @@ as well.
    df = tbl.search(np.random.random((1536))) \
        .metric("cosine") \
        .limit(10) \
-        .to_df()
+        .to_list()
    ```


--- a/python/README.md
+++ b/python/README.md
@@ -16,7 +16,7 @@ pip install lancedb
 import lancedb
 db = lancedb.connect('<PATH_TO_LANCEDB_DATASET>')
 table = db.open_table('my_table')
-results = table.search([0.1, 0.3]).limit(20).to_df()
+results = table.search([0.1, 0.3]).limit(20).to_list()
 print(results)
 ```

--- a/python/lancedb/init.py
+++ b/python/lancedb/init.py
@@ -14,12 +14,12 @@
 import importlib.metadata
 from typing import Optional

+__version__ = importlib.metadata.version("lancedb")
+
 from .db import URI, DBConnection, LanceDBConnection
 from .remote.db import RemoteDBConnection
 from .schema import vector

-__version__ = importlib.metadata.version("lancedb")
-

 def connect(
    uri: URI,
--- a/python/lancedb/context.py
+++ b/python/lancedb/context.py
@@ -12,6 +12,9 @@
 #  limitations under the License.
 from __future__ import annotations

+import deprecation
+
+from . import __version__
 from .exceptions import MissingColumnError, MissingValueError
 from .util import safe_import_pandas

@@ -43,7 +46,7 @@ def contextualize(raw_df: "pd.DataFrame") -> Contextualizer:
    this how many tokens, but depending on the input data, it could be sentences,
    paragraphs, messages, etc.

-    >>> contextualize(data).window(3).stride(1).text_col('token').to_df()
+    >>> contextualize(data).window(3).stride(1).text_col('token').to_pandas()
                    token  document_id
    0     The quick brown            1
    1     quick brown fox            1
@@ -56,7 +59,7 @@ def contextualize(raw_df: "pd.DataFrame") -> Contextualizer:
    8          dog I love            1
    9   I love sandwiches            2
    10    love sandwiches            2
-    >>> contextualize(data).window(7).stride(1).min_window_size(7).text_col('token').to_df()
+    >>> contextualize(data).window(7).stride(1).min_window_size(7).text_col('token').to_pandas()
                                      token  document_id
    0   The quick brown fox jumped over the            1
    1  quick brown fox jumped over the lazy            1
@@ -68,7 +71,7 @@ def contextualize(raw_df: "pd.DataFrame") -> Contextualizer:
    ``stride`` determines how many rows to skip between each window start. This can
    be used to reduce the total number of windows generated.

-    >>> contextualize(data).window(4).stride(2).text_col('token').to_df()
+    >>> contextualize(data).window(4).stride(2).text_col('token').to_pandas()
                        token  document_id
    0     The quick brown fox            1
    2   brown fox jumped over            1
@@ -81,7 +84,7 @@ def contextualize(raw_df: "pd.DataFrame") -> Contextualizer:
    context windows that don't cross document boundaries. In this case, we can
    pass ``document_id`` as the group by.

-    >>> contextualize(data).window(4).stride(2).text_col('token').groupby('document_id').to_df()
+    >>> contextualize(data).window(4).stride(2).text_col('token').groupby('document_id').to_pandas()
                       token  document_id
    0    The quick brown fox            1
    2  brown fox jumped over            1
@@ -93,14 +96,14 @@ def contextualize(raw_df: "pd.DataFrame") -> Contextualizer:
    This can be used to trim the last few context windows which have size less than
    ``min_window_size``. By default context windows of size 1 are skipped.

-    >>> contextualize(data).window(6).stride(3).text_col('token').groupby('document_id').to_df()
+    >>> contextualize(data).window(6).stride(3).text_col('token').groupby('document_id').to_pandas()
                                 token  document_id
    0  The quick brown fox jumped over            1
    3     fox jumped over the lazy dog            1
    6                     the lazy dog            1
    9                I love sandwiches            2

-    >>> contextualize(data).window(6).stride(3).min_window_size(4).text_col('token').groupby('document_id').to_df()
+    >>> contextualize(data).window(6).stride(3).min_window_size(4).text_col('token').groupby('document_id').to_pandas()
                                 token  document_id
    0  The quick brown fox jumped over            1
    3     fox jumped over the lazy dog            1
@@ -176,7 +179,16 @@ class Contextualizer:
        self._min_window_size = min_window_size
        return self

+    @deprecation.deprecated(
+        deprecated_in="0.3.1",
+        removed_in="0.4.0",
+        current_version=__version__,
+        details="Use the bar function instead",
+    )
    def to_df(self) -> "pd.DataFrame":
+        return self.to_pandas()
+
+    def to_pandas(self) -> "pd.DataFrame":
        """Create the context windows and return a DataFrame."""
        if pd is None:
            raise ImportError(
--- a/python/lancedb/query.py
+++ b/python/lancedb/query.py
@@ -16,10 +16,12 @@ from __future__ import annotations
 from abc import ABC, abstractmethod
 from typing import List, Literal, Optional, Type, Union

+import deprecation
 import numpy as np
 import pyarrow as pa
 import pydantic

+from . import __version__
 from .common import VECTOR_COLUMN_NAME
 from .pydantic import LanceModel
 from .util import safe_import_pandas
@@ -127,7 +129,24 @@ class LanceQueryBuilder(ABC):
        self._columns = None
        self._where = None

+    @deprecation.deprecated(
+        deprecated_in="0.3.1",
+        removed_in="0.4.0",
+        current_version=__version__,
+        details="Use the bar function instead",
+    )
    def to_df(self) -> "pd.DataFrame":
+        """
+        Deprecated alias for `to_pandas()`. Please use `to_pandas()` instead.
+
+        Execute the query and return the results as a pandas DataFrame.
+        In addition to the selected columns, LanceDB also returns a vector
+        and also the "_distance" column which is the distance between the query
+        vector and the returned vector.
+        """
+        return self.to_pandas()
+
+    def to_pandas(self) -> "pd.DataFrame":
        """
        Execute the query and return the results as a pandas DataFrame.
        In addition to the selected columns, LanceDB also returns a vector
@@ -148,6 +167,16 @@ class LanceQueryBuilder(ABC):
        """
        raise NotImplementedError

+    def to_list(self) -> List[dict]:
+        """
+        Execute the query and return the results as a list of dictionaries.
+
+        Each list entry is a dictionary with the selected column names as keys,
+        or all table columns if `select` is not called. The vector and the "_distance"
+        fields are returned whether or not they're explicitly selected.
+        """
+        return self.to_arrow().to_pylist()
+
    def to_pydantic(self, model: Type[LanceModel]) -> List[LanceModel]:
        """Return the table as a list of pydantic models.

@@ -232,7 +261,7 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
    ...       .where("b < 10")
    ...       .select(["b"])
    ...       .limit(2)
-    ...       .to_df())
+    ...       .to_pandas())
       b      vector  _distance
    0  6  [0.4, 0.4]        0.0
    """
--- a/python/lancedb/table.py
+++ b/python/lancedb/table.py
@@ -136,7 +136,7 @@ class Table(ABC):

    Can query the table with [Table.search][lancedb.table.Table.search].

-    >>> table.search([0.4, 0.4]).select(["b"]).to_df()
+    >>> table.search([0.4, 0.4]).select(["b"]).to_pandas()
       b      vector  _distance
    0  4  [0.5, 1.3]       0.82
    1  2  [1.1, 1.2]       1.13
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -2,6 +2,7 @@
 name = "lancedb"
 version = "0.3.0"
 dependencies = [
+    "deprecation",
    "pylance==0.8.3",
    "ratelimiter~=1.0",
    "retry>=0.9.2",
--- a/python/tests/test_context.py
+++ b/python/tests/test_context.py
@@ -47,7 +47,7 @@ def test_contextualizer(raw_df: pd.DataFrame):
        .stride(3)
        .text_col("token")
        .groupby("document_id")
-        .to_df()["token"]
+        .to_pandas()["token"]
        .to_list()
    )

@@ -67,7 +67,7 @@ def test_contextualizer_with_threshold(raw_df: pd.DataFrame):
        .text_col("token")
        .groupby("document_id")
        .min_window_size(4)
-        .to_df()["token"]
+        .to_pandas()["token"]
        .to_list()
    )

--- a/python/tests/test_db.py
+++ b/python/tests/test_db.py
@@ -33,11 +33,11 @@ def test_basic(tmp_path):
            {"vector": [5.9, 26.5], "item": "bar", "price": 20.0},
        ],
    )
-    rs = table.search([100, 100]).limit(1).to_df()
+    rs = table.search([100, 100]).limit(1).to_pandas()
    assert len(rs) == 1
    assert rs["item"].iloc[0] == "bar"

-    rs = table.search([100, 100]).where("price < 15").limit(2).to_df()
+    rs = table.search([100, 100]).where("price < 15").limit(2).to_pandas()
    assert len(rs) == 1
    assert rs["item"].iloc[0] == "foo"

@@ -62,11 +62,11 @@ def test_ingest_pd(tmp_path):
        }
    )
    table = db.create_table("test", data=data)
-    rs = table.search([100, 100]).limit(1).to_df()
+    rs = table.search([100, 100]).limit(1).to_pandas()
    assert len(rs) == 1
    assert rs["item"].iloc[0] == "bar"

-    rs = table.search([100, 100]).where("price < 15").limit(2).to_df()
+    rs = table.search([100, 100]).where("price < 15").limit(2).to_pandas()
    assert len(rs) == 1
    assert rs["item"].iloc[0] == "foo"

@@ -137,8 +137,8 @@ def test_ingest_iterator(tmp_path):
        db = lancedb.connect(tmp_path)
        tbl = db.create_table("table2", make_batches(), schema=schema, mode="overwrite")
        tbl.to_pandas()
-        assert tbl.search([3.1, 4.1]).limit(1).to_df()["_distance"][0] == 0.0
-        assert tbl.search([5.9, 26.5]).limit(1).to_df()["_distance"][0] == 0.0
+        assert tbl.search([3.1, 4.1]).limit(1).to_pandas()["_distance"][0] == 0.0
+        assert tbl.search([5.9, 26.5]).limit(1).to_pandas()["_distance"][0] == 0.0
        tbl_len = len(tbl)
        tbl.add(make_batches())
        assert tbl_len == 50
--- a/python/tests/test_e2e_remote_db.py
+++ b/python/tests/test_e2e_remote_db.py
@@ -23,5 +23,5 @@ from lancedb import LanceDBConnection
 def test_against_local_server():
    conn = LanceDBConnection("lancedb+http://localhost:10024")
    table = conn.open_table("sift1m_ivf1024_pq16")
-    df = table.search(np.random.rand(128)).to_df()
+    df = table.search(np.random.rand(128)).to_pandas()
    assert len(df) == 10
--- a/python/tests/test_fts.py
+++ b/python/tests/test_fts.py
@@ -71,14 +71,14 @@ def test_search_index(tmp_path, table):

 def test_create_index_from_table(tmp_path, table):
    table.create_fts_index("text")
-    df = table.search("puppy").limit(10).select(["text"]).to_df()
+    df = table.search("puppy").limit(10).select(["text"]).to_pandas()
    assert len(df) == 10
    assert "text" in df.columns


 def test_create_index_multiple_columns(tmp_path, table):
    table.create_fts_index(["text", "text2"])
-    df = table.search("puppy").limit(10).to_df()
+    df = table.search("puppy").limit(10).to_pandas()
    assert len(df) == 10
    assert "text" in df.columns
    assert "text2" in df.columns
@@ -87,5 +87,5 @@ def test_create_index_multiple_columns(tmp_path, table):
 def test_empty_rs(tmp_path, table, mocker):
    table.create_fts_index(["text", "text2"])
    mocker.patch("lancedb.fts.search_index", return_value=([], []))
-    df = table.search("puppy").limit(10).to_df()
+    df = table.search("puppy").limit(10).to_pandas()
    assert len(df) == 0
--- a/python/tests/test_io.py
+++ b/python/tests/test_io.py
@@ -36,11 +36,11 @@ def test_s3_io():
            {"vector": [5.9, 26.5], "item": "bar", "price": 20.0},
        ],
    )
-    rs = table.search([100, 100]).limit(1).to_df()
+    rs = table.search([100, 100]).limit(1).to_pandas()
    assert len(rs) == 1
    assert rs["item"].iloc[0] == "bar"

-    rs = table.search([100, 100]).where("price < 15").limit(2).to_df()
+    rs = table.search([100, 100]).where("price < 15").limit(2).to_pandas()
    assert len(rs) == 1
    assert rs["item"].iloc[0] == "foo"

--- a/python/tests/test_query.py
+++ b/python/tests/test_query.py
@@ -85,17 +85,20 @@ def test_cast(table):


 def test_query_builder(table):
-    df = (
-        LanceVectorQueryBuilder(table, [0, 0], "vector").limit(1).select(["id"]).to_df()
+    rs = (
+        LanceVectorQueryBuilder(table, [0, 0], "vector")
+        .limit(1)
+        .select(["id"])
+        .to_list()
    )
-    assert df["id"].values[0] == 1
-    assert all(df["vector"].values[0] == [1, 2])
+    assert rs[0]["id"] == 1
+    assert all(np.array(rs[0]["vector"]) == [1, 2])


 def test_query_builder_with_filter(table):
-    df = LanceVectorQueryBuilder(table, [0, 0], "vector").where("id = 2").to_df()
-    assert df["id"].values[0] == 2
-    assert all(df["vector"].values[0] == [3, 4])
+    rs = LanceVectorQueryBuilder(table, [0, 0], "vector").where("id = 2").to_list()
+    assert rs[0]["id"] == 2
+    assert all(np.array(rs[0]["vector"]) == [3, 4])


 def test_query_builder_with_prefilter(table):
@@ -103,7 +106,7 @@ def test_query_builder_with_prefilter(table):
        LanceVectorQueryBuilder(table, [0, 0], "vector")
        .where("id = 2")
        .limit(1)
-        .to_df()
+        .to_pandas()
    )
    assert len(df) == 0

@@ -111,7 +114,7 @@ def test_query_builder_with_prefilter(table):
        LanceVectorQueryBuilder(table, [0, 0], "vector")
        .where("id = 2", prefilter=True)
        .limit(1)
-        .to_df()
+        .to_pandas()
    )
    assert df["id"].values[0] == 2
    assert all(df["vector"].values[0] == [3, 4])
@@ -120,9 +123,11 @@ def test_query_builder_with_prefilter(table):
 def test_query_builder_with_metric(table):
    query = [4, 8]
    vector_column_name = "vector"
-    df_default = LanceVectorQueryBuilder(table, query, vector_column_name).to_df()
+    df_default = LanceVectorQueryBuilder(table, query, vector_column_name).to_pandas()
    df_l2 = (
-        LanceVectorQueryBuilder(table, query, vector_column_name).metric("L2").to_df()
+        LanceVectorQueryBuilder(table, query, vector_column_name)
+        .metric("L2")
+        .to_pandas()
    )
    tm.assert_frame_equal(df_default, df_l2)

@@ -130,7 +135,7 @@ def test_query_builder_with_metric(table):
        LanceVectorQueryBuilder(table, query, vector_column_name)
        .metric("cosine")
        .limit(1)
-        .to_df()
+        .to_pandas()
    )
    assert df_cosine._distance[0] == pytest.approx(
        cosine_distance(query, df_cosine.vector[0]),
--- a/python/tests/test_remote_client.py
+++ b/python/tests/test_remote_client.py
@@ -86,7 +86,7 @@ async def test_e2e_with_mock_server():
                    columns=["id", "vector"],
                ),
            )
-        ).to_df()
+        ).to_pandas()

        assert "vector" in df.columns
        assert "id" in df.columns
--- a/python/tests/test_remote_db.py
+++ b/python/tests/test_remote_db.py
@@ -32,4 +32,4 @@ def test_remote_db():
    setattr(conn, "_client", FakeLanceDBClient())

    table = conn["test"]
-    table.search([1.0, 2.0]).to_df()
+    table.search([1.0, 2.0]).to_pandas()
--- a/python/tests/test_table.py
+++ b/python/tests/test_table.py
@@ -427,8 +427,8 @@ def test_multiple_vector_columns(db):
    table.add(df)

    q = np.random.randn(10)
-    result1 = table.search(q, vector_column_name="vector1").limit(1).to_df()
-    result2 = table.search(q, vector_column_name="vector2").limit(1).to_df()
+    result1 = table.search(q, vector_column_name="vector1").limit(1).to_pandas()
+    result2 = table.search(q, vector_column_name="vector2").limit(1).to_pandas()

    assert result1["text"].iloc[0] != result2["text"].iloc[0]

@@ -439,6 +439,6 @@ def test_empty_query(db):
        "my_table",
        data=[{"text": "foo", "id": 0}, {"text": "bar", "id": 1}],
    )
-    df = table.search().select(["id"]).where("text='bar'").limit(1).to_df()
+    df = table.search().select(["id"]).where("text='bar'").limit(1).to_pandas()
    val = df.id.iloc[0]
    assert val == 1