docs: add async examples to doc (#1941)

- added sync and async tabs for python examples - moved python code to tests/docs --------- Co-authored-by: Will Jones <willjones127@gmail.com>
2026-01-09 13:22:58 +00:00 · 2025-01-07 15:10:25 -08:00
parent 0b45ef93c0
commit 17c9e9afea
21 changed files with 3639 additions and 987 deletions
--- a/python/python/tests/docs/test_basic.py
+++ b/python/python/tests/docs/test_basic.py
@@ -125,7 +125,7 @@ async def test_quickstart_async():

    # --8<-- [start:create_table_async]
    # Asynchronous client
-    async_tbl = await async_db.create_table("my_table2", data=data)
+    async_tbl = await async_db.create_table("my_table_async", data=data)
    # --8<-- [end:create_table_async]

    df = pd.DataFrame(
@@ -137,17 +137,17 @@ async def test_quickstart_async():

    # --8<-- [start:create_table_async_pandas]
    # Asynchronous client
-    async_tbl = await async_db.create_table("table_from_df2", df)
+    async_tbl = await async_db.create_table("table_from_df_async", df)
    # --8<-- [end:create_table_async_pandas]

    schema = pa.schema([pa.field("vector", pa.list_(pa.float32(), list_size=2))])
    # --8<-- [start:create_empty_table_async]
    # Asynchronous client
-    async_tbl = await async_db.create_table("empty_table2", schema=schema)
+    async_tbl = await async_db.create_table("empty_table_async", schema=schema)
    # --8<-- [end:create_empty_table_async]
    # --8<-- [start:open_table_async]
    # Asynchronous client
-    async_tbl = await async_db.open_table("my_table2")
+    async_tbl = await async_db.open_table("my_table_async")
    # --8<-- [end:open_table_async]
    # --8<-- [start:table_names_async]
    # Asynchronous client
@@ -161,6 +161,22 @@ async def test_quickstart_async():
    data = [{"vector": [x, x], "item": "filler", "price": x * x} for x in range(1000)]
    await async_tbl.add(data)
    # --8<-- [start:vector_search_async]
+    # --8<-- [start:add_columns_async]
+    await async_tbl.add_columns({"double_price": "cast((price * 2) as float)"})
+    # --8<-- [end:add_columns_async]
+    # --8<-- [start:alter_columns_async]
+    await async_tbl.alter_columns(
+        {
+            "path": "double_price",
+            "rename": "dbl_price",
+            "data_type": pa.float64(),
+            "nullable": True,
+        }
+    )
+    # --8<-- [end:alter_columns_async]
+    # --8<-- [start:drop_columns_async]
+    await async_tbl.drop_columns(["dbl_price"])
+    # --8<-- [end:drop_columns_async]
    # Asynchronous client
    await async_tbl.vector_search([100, 100]).limit(2).to_pandas()
    # --8<-- [end:vector_search_async]
@@ -174,5 +190,5 @@ async def test_quickstart_async():
    # --8<-- [end:delete_rows_async]
    # --8<-- [start:drop_table_async]
    # Asynchronous client
-    await async_db.drop_table("my_table2")
+    await async_db.drop_table("my_table_async")
    # --8<-- [end:drop_table_async]
--- a/python/python/tests/docs/test_guide_index.py
+++ b/python/python/tests/docs/test_guide_index.py
@@ -0,0 +1,169 @@
+# --8<-- [start:import-lancedb]
+import lancedb
+
+# --8<-- [end:import-lancedb]
+# --8<-- [start:import-lancedb-ivfpq]
+from lancedb.index import IvfPq
+
+# --8<-- [end:import-lancedb-ivfpq]
+# --8<-- [start:import-lancedb-btree-bitmap]
+from lancedb.index import BTree, Bitmap
+
+# --8<-- [end:import-lancedb-btree-bitmap]
+# --8<-- [start:import-numpy]
+import numpy as np
+
+# --8<-- [end:import-numpy]
+import pytest
+
+
+def test_ann_index():
+    # --8<-- [start:create_ann_index]
+    uri = "data/sample-lancedb"
+
+    # Create 5,000 sample vectors
+    data = [
+        {"vector": row, "item": f"item {i}"}
+        for i, row in enumerate(np.random.random((5_000, 32)).astype("float32"))
+    ]
+
+    db = lancedb.connect(uri)
+    # Add the vectors to a table
+    tbl = db.create_table("my_vectors", data=data)
+    # Create and train the index - you need to have enough data in the table
+    # for an effective training step
+    tbl.create_index(num_partitions=2, num_sub_vectors=4)
+    # --8<-- [end:create_ann_index]
+    # --8<-- [start:vector_search]
+    tbl.search(np.random.random((32))).limit(2).nprobes(20).refine_factor(
+        10
+    ).to_pandas()
+    # --8<-- [end:vector_search]
+    # --8<-- [start:vector_search_with_filter]
+    tbl.search(np.random.random((32))).where("item != 'item 1141'").to_pandas()
+    # --8<-- [end:vector_search_with_filter]
+    # --8<-- [start:vector_search_with_select]
+    tbl.search(np.random.random((32))).select(["vector"]).to_pandas()
+    # --8<-- [end:vector_search_with_select]
+
+
+@pytest.mark.asyncio
+async def test_ann_index_async():
+    # --8<-- [start:create_ann_index_async]
+    uri = "data/sample-lancedb"
+
+    # Create 5,000 sample vectors
+    data = [
+        {"vector": row, "item": f"item {i}"}
+        for i, row in enumerate(np.random.random((5_000, 32)).astype("float32"))
+    ]
+
+    async_db = await lancedb.connect_async(uri)
+    # Add the vectors to a table
+    async_tbl = await async_db.create_table("my_vectors_async", data=data)
+    # Create and train the index - you need to have enough data in the table
+    # for an effective training step
+    await async_tbl.create_index(
+        "vector", config=IvfPq(num_partitions=2, num_sub_vectors=4)
+    )
+    # --8<-- [end:create_ann_index_async]
+    # --8<-- [start:vector_search_async]
+    await (
+        async_tbl.query()
+        .nearest_to(np.random.random((32)))
+        .limit(2)
+        .nprobes(20)
+        .refine_factor(10)
+        .to_pandas()
+    )
+    # --8<-- [end:vector_search_async]
+    # --8<-- [start:vector_search_async_with_filter]
+    await (
+        async_tbl.query()
+        .nearest_to(np.random.random((32)))
+        .where("item != 'item 1141'")
+        .to_pandas()
+    )
+    # --8<-- [end:vector_search_async_with_filter]
+    # --8<-- [start:vector_search_async_with_select]
+    await (
+        async_tbl.query()
+        .nearest_to(np.random.random((32)))
+        .select(["vector"])
+        .to_pandas()
+    )
+    # --8<-- [end:vector_search_async_with_select]
+
+
+def test_scalar_index():
+    # --8<-- [start:basic_scalar_index]
+    uri = "data/sample-lancedb"
+    db = lancedb.connect(uri)
+    books = [
+        {
+            "book_id": 1,
+            "publisher": "plenty of books",
+            "tags": ["fantasy", "adventure"],
+        },
+        {"book_id": 2, "publisher": "book town", "tags": ["non-fiction"]},
+        {"book_id": 3, "publisher": "oreilly", "tags": ["textbook"]},
+    ]
+    table = db.create_table("books", books)
+    table.create_scalar_index("book_id")  # BTree by default
+    table.create_scalar_index("publisher", index_type="BITMAP")
+    # --8<-- [end:basic_scalar_index]
+    # --8<-- [start:search_with_scalar_index]
+    table = db.open_table("books")
+    table.search().where("book_id = 2").to_pandas()
+    # --8<-- [end:search_with_scalar_index]
+    # --8<-- [start:vector_search_with_scalar_index]
+    data = [
+        {"book_id": 1, "vector": [1, 2]},
+        {"book_id": 2, "vector": [3, 4]},
+        {"book_id": 3, "vector": [5, 6]},
+    ]
+
+    table = db.create_table("book_with_embeddings", data)
+    (table.search([1, 2]).where("book_id != 3", prefilter=True).to_pandas())
+    # --8<-- [end:vector_search_with_scalar_index]
+    # --8<-- [start:update_scalar_index]
+    table.add([{"vector": [7, 8], "book_id": 4}])
+    table.optimize()
+    # --8<-- [end:update_scalar_index]
+
+
+@pytest.mark.asyncio
+async def test_scalar_index_async():
+    # --8<-- [start:basic_scalar_index_async]
+    uri = "data/sample-lancedb"
+    async_db = await lancedb.connect_async(uri)
+    books = [
+        {
+            "book_id": 1,
+            "publisher": "plenty of books",
+            "tags": ["fantasy", "adventure"],
+        },
+        {"book_id": 2, "publisher": "book town", "tags": ["non-fiction"]},
+        {"book_id": 3, "publisher": "oreilly", "tags": ["textbook"]},
+    ]
+    async_tbl = await async_db.create_table("books_async", books)
+    await async_tbl.create_index("book_id", config=BTree())  # BTree by default
+    await async_tbl.create_index("publisher", config=Bitmap())
+    # --8<-- [end:basic_scalar_index_async]
+    # --8<-- [start:search_with_scalar_index_async]
+    async_tbl = await async_db.open_table("books_async")
+    await async_tbl.query().where("book_id = 2").to_pandas()
+    # --8<-- [end:search_with_scalar_index_async]
+    # --8<-- [start:vector_search_with_scalar_index_async]
+    data = [
+        {"book_id": 1, "vector": [1, 2]},
+        {"book_id": 2, "vector": [3, 4]},
+        {"book_id": 3, "vector": [5, 6]},
+    ]
+    async_tbl = await async_db.create_table("book_with_embeddings_async", data)
+    (await async_tbl.query().where("book_id != 3").nearest_to([1, 2]).to_pandas())
+    # --8<-- [end:vector_search_with_scalar_index_async]
+    # --8<-- [start:update_scalar_index_async]
+    await async_tbl.add([{"vector": [7, 8], "book_id": 4}])
+    await async_tbl.optimize()
+    # --8<-- [end:update_scalar_index_async]
--- a/python/python/tests/docs/test_guide_tables.py
+++ b/python/python/tests/docs/test_guide_tables.py
@@ -0,0 +1,576 @@
+# --8<-- [start:import-lancedb]
+import lancedb
+
+# --8<-- [end:import-lancedb]
+# --8<-- [start:import-pandas]
+import pandas as pd
+
+# --8<-- [end:import-pandas]
+# --8<-- [start:import-pyarrow]
+import pyarrow as pa
+
+# --8<-- [end:import-pyarrow]
+# --8<-- [start:import-polars]
+import polars as pl
+
+# --8<-- [end:import-polars]
+# --8<-- [start:import-numpy]
+import numpy as np
+
+# --8<-- [end:import-numpy]
+# --8<-- [start:import-lancedb-pydantic]
+from lancedb.pydantic import Vector, LanceModel
+
+# --8<-- [end:import-lancedb-pydantic]
+# --8<-- [start:import-datetime]
+from datetime import timedelta
+
+# --8<-- [end:import-datetime]
+# --8<-- [start:import-embeddings]
+from lancedb.embeddings import get_registry
+
+# --8<-- [end:import-embeddings]
+# --8<-- [start:import-pydantic-basemodel]
+from pydantic import BaseModel
+
+# --8<-- [end:import-pydantic-basemodel]
+import pytest
+
+
+# --8<-- [start:class-Content]
+class Content(LanceModel):
+    movie_id: int
+    vector: Vector(128)
+    genres: str
+    title: str
+    imdb_id: int
+
+    @property
+    def imdb_url(self) -> str:
+        return f"https://www.imdb.com/title/tt{self.imdb_id}"
+
+
+# --8<-- [end:class-Content]
+# --8<-- [start:class-Document]
+class Document(BaseModel):
+    content: str
+    source: str
+
+
+# --8<-- [end:class-Document]
+# --8<-- [start:class-NestedSchema]
+class NestedSchema(LanceModel):
+    id: str
+    vector: Vector(1536)
+    document: Document
+
+
+# --8<-- [end:class-NestedSchema]
+# --8<-- [start:class-Item]
+class Item(LanceModel):
+    vector: Vector(2)
+    item: str
+    price: float
+
+
+# --8<-- [end:class-Item]
+
+
+# --8<-- [start:make_batches]
+def make_batches():
+    for i in range(5):
+        yield pa.RecordBatch.from_arrays(
+            [
+                pa.array(
+                    [[3.1, 4.1, 5.1, 6.1], [5.9, 26.5, 4.7, 32.8]],
+                    pa.list_(pa.float32(), 4),
+                ),
+                pa.array(["foo", "bar"]),
+                pa.array([10.0, 20.0]),
+            ],
+            ["vector", "item", "price"],
+        )
+
+
+# --8<-- [end:make_batches]
+
+
+# --8<-- [start:make_batches_for_add]
+def make_batches_for_add():
+    for i in range(5):
+        yield [
+            {"vector": [3.1, 4.1], "item": "peach", "price": 6.0},
+            {"vector": [5.9, 26.5], "item": "pear", "price": 5.0},
+        ]
+
+
+# --8<-- [end:make_batches_for_add]
+
+
+def test_table():
+    # --8<-- [start:connect]
+    uri = "data/sample-lancedb"
+    db = lancedb.connect(uri)
+    # --8<-- [end:connect]
+    # --8<-- [start:create_table]
+    data = [
+        {"vector": [1.1, 1.2], "lat": 45.5, "long": -122.7},
+        {"vector": [0.2, 1.8], "lat": 40.1, "long": -74.1},
+    ]
+    db.create_table("test_table", data)
+    db["test_table"].head()
+    # --8<-- [end:create_table]
+    # --8<-- [start:create_table_exist_ok]
+    db.create_table("test_table", data, exist_ok=True)
+    # --8<-- [end:create_table_exist_ok]
+    # --8<-- [start:create_table_overwrite]
+    db.create_table("test_table", data, mode="overwrite")
+    # --8<-- [end:create_table_overwrite]
+    # --8<-- [start:create_table_from_pandas]
+    data = pd.DataFrame(
+        {
+            "vector": [[1.1, 1.2, 1.3, 1.4], [0.2, 1.8, 0.4, 3.6]],
+            "lat": [45.5, 40.1],
+            "long": [-122.7, -74.1],
+        }
+    )
+    db.create_table("my_table_pandas", data)
+    db["my_table_pandas"].head()
+    # --8<-- [end:create_table_from_pandas]
+    # --8<-- [start:create_table_custom_schema]
+    custom_schema = pa.schema(
+        [
+            pa.field("vector", pa.list_(pa.float32(), 4)),
+            pa.field("lat", pa.float32()),
+            pa.field("long", pa.float32()),
+        ]
+    )
+
+    tbl = db.create_table("my_table_custom_schema", data, schema=custom_schema)
+    # --8<-- [end:create_table_custom_schema]
+    # --8<-- [start:create_table_from_polars]
+    data = pl.DataFrame(
+        {
+            "vector": [[3.1, 4.1], [5.9, 26.5]],
+            "item": ["foo", "bar"],
+            "price": [10.0, 20.0],
+        }
+    )
+    tbl = db.create_table("my_table_pl", data)
+    # --8<-- [end:create_table_from_polars]
+    # --8<-- [start:create_table_from_arrow_table]
+    dim = 16
+    total = 2
+    schema = pa.schema(
+        [pa.field("vector", pa.list_(pa.float16(), dim)), pa.field("text", pa.string())]
+    )
+    data = pa.Table.from_arrays(
+        [
+            pa.array(
+                [np.random.randn(dim).astype(np.float16) for _ in range(total)],
+                pa.list_(pa.float16(), dim),
+            ),
+            pa.array(["foo", "bar"]),
+        ],
+        ["vector", "text"],
+    )
+    tbl = db.create_table("f16_tbl", data, schema=schema)
+    # --8<-- [end:create_table_from_arrow_table]
+    # --8<-- [start:create_table_from_pydantic]
+    tbl = db.create_table("movielens_small", schema=Content)
+    # --8<-- [end:create_table_from_pydantic]
+    # --8<-- [start:create_table_nested_schema]
+    tbl = db.create_table("nested_table", schema=NestedSchema)
+    # --8<-- [end:create_table_nested_schema]
+    # --8<-- [start:create_table_from_batch]
+    schema = pa.schema(
+        [
+            pa.field("vector", pa.list_(pa.float32(), 4)),
+            pa.field("item", pa.utf8()),
+            pa.field("price", pa.float32()),
+        ]
+    )
+    db.create_table("batched_tale", make_batches(), schema=schema)
+    # --8<-- [end:create_table_from_batch]
+    # --8<-- [start:list_tables]
+    print(db.table_names())
+    # --8<-- [end:list_tables]
+    # --8<-- [start:open_table]
+    tbl = db.open_table("test_table")
+    # --8<-- [end:open_table]
+    # --8<-- [start:create_empty_table]
+    schema = pa.schema(
+        [
+            pa.field("vector", pa.list_(pa.float32(), 2)),
+            pa.field("item", pa.string()),
+            pa.field("price", pa.float32()),
+        ]
+    )
+    tbl = db.create_table("test_empty_table", schema=schema)
+    # --8<-- [end:create_empty_table]
+    # --8<-- [start:create_empty_table_pydantic]
+    tbl = db.create_table("test_empty_table_new", schema=Item.to_arrow_schema())
+    # --8<-- [end:create_empty_table_pydantic]
+    # --8<-- [start:add_table_from_pandas]
+    df = pd.DataFrame(
+        {
+            "vector": [[1.3, 1.4], [9.5, 56.2]],
+            "item": ["banana", "apple"],
+            "price": [5.0, 7.0],
+        }
+    )
+
+    tbl.add(df)
+    # --8<-- [end:add_table_from_pandas]
+    # --8<-- [start:add_table_from_polars]
+    df = pl.DataFrame(
+        {
+            "vector": [[1.3, 1.4], [9.5, 56.2]],
+            "item": ["banana", "apple"],
+            "price": [5.0, 7.0],
+        }
+    )
+
+    tbl.add(df)
+    # --8<-- [end:add_table_from_polars]
+    # --8<-- [start:add_table_from_batch]
+    tbl.add(make_batches_for_add())
+    # --8<-- [end:add_table_from_batch]
+    # --8<-- [start:add_table_from_pyarrow]
+    pa_table = pa.Table.from_arrays(
+        [
+            pa.array([[9.1, 6.7], [9.9, 31.2]], pa.list_(pa.float32(), 2)),
+            pa.array(["mango", "orange"]),
+            pa.array([7.0, 4.0]),
+        ],
+        ["vector", "item", "price"],
+    )
+    tbl.add(pa_table)
+    # --8<-- [end:add_table_from_pyarrow]
+    # --8<-- [start:add_table_from_pydantic]
+    pydantic_model_items = [
+        Item(vector=[8.1, 4.7], item="pineapple", price=10.0),
+        Item(vector=[6.9, 9.3], item="avocado", price=9.0),
+    ]
+    tbl.add(pydantic_model_items)
+    # --8<-- [end:add_table_from_pydantic]
+    # --8<-- [start:delete_row]
+    tbl.delete('item = "fizz"')
+    # --8<-- [end:delete_row]
+    # --8<-- [start:delete_specific_row]
+    data = [
+        {"x": 1, "vector": [1, 2]},
+        {"x": 2, "vector": [3, 4]},
+        {"x": 3, "vector": [5, 6]},
+    ]
+    # Synchronous client
+    tbl = db.create_table("delete_row", data)
+    tbl.to_pandas()
+    #   x      vector
+    # 0  1  [1.0, 2.0]
+    # 1  2  [3.0, 4.0]
+    # 2  3  [5.0, 6.0]
+
+    tbl.delete("x = 2")
+    tbl.to_pandas()
+    #   x      vector
+    # 0  1  [1.0, 2.0]
+    # 1  3  [5.0, 6.0]
+    # --8<-- [end:delete_specific_row]
+    # --8<-- [start:delete_list_values]
+    to_remove = [1, 5]
+    to_remove = ", ".join(str(v) for v in to_remove)
+
+    tbl.delete(f"x IN ({to_remove})")
+    tbl.to_pandas()
+    #   x      vector
+    # 0  3  [5.0, 6.0]
+    # --8<-- [end:delete_list_values]
+    # --8<-- [start:update_table]
+    # Create a table from a pandas DataFrame
+    data = pd.DataFrame({"x": [1, 2, 3], "vector": [[1, 2], [3, 4], [5, 6]]})
+
+    tbl = db.create_table("test_table", data, mode="overwrite")
+    # Update the table where x = 2
+    tbl.update(where="x = 2", values={"vector": [10, 10]})
+    # Get the updated table as a pandas DataFrame
+    df = tbl.to_pandas()
+    print(df)
+    # --8<-- [end:update_table]
+    # --8<-- [start:update_table_sql]
+    # Update the table where x = 2
+    tbl.update(values_sql={"x": "x + 1"})
+    print(tbl.to_pandas())
+    # --8<-- [end:update_table_sql]
+    # --8<-- [start:table_strong_consistency]
+    uri = "data/sample-lancedb"
+    db = lancedb.connect(uri, read_consistency_interval=timedelta(0))
+    tbl = db.open_table("test_table")
+    # --8<-- [end:table_strong_consistency]
+    # --8<-- [start:table_eventual_consistency]
+    uri = "data/sample-lancedb"
+    db = lancedb.connect(uri, read_consistency_interval=timedelta(seconds=5))
+    tbl = db.open_table("test_table")
+    # --8<-- [end:table_eventual_consistency]
+    # --8<-- [start:table_checkout_latest]
+    tbl = db.open_table("test_table")
+
+    # (Other writes happen to my_table from another process)
+
+    # Check for updates
+    tbl.checkout_latest()
+    # --8<-- [end:table_checkout_latest]
+
+
+@pytest.mark.skip
+def test_table_with_embedding():
+    db = lancedb.connect("data/sample-lancedb")
+    # --8<-- [start:create_table_with_embedding]
+    embed_fcn = get_registry().get("huggingface").create(name="BAAI/bge-small-en-v1.5")
+
+    class Schema(LanceModel):
+        text: str = embed_fcn.SourceField()
+        vector: Vector(embed_fcn.ndims()) = embed_fcn.VectorField(default=None)
+
+    tbl = db.create_table("my_table_with_embedding", schema=Schema, mode="overwrite")
+    models = [Schema(text="hello"), Schema(text="world")]
+    tbl.add(models)
+    # --8<-- [end:create_table_with_embedding]
+
+
+@pytest.mark.skip
+async def test_table_with_embedding_async():
+    async_db = await lancedb.connect_async("data/sample-lancedb")
+    # --8<-- [start:create_table_async_with_embedding]
+    embed_fcn = get_registry().get("huggingface").create(name="BAAI/bge-small-en-v1.5")
+
+    class Schema(LanceModel):
+        text: str = embed_fcn.SourceField()
+        vector: Vector(embed_fcn.ndims()) = embed_fcn.VectorField(default=None)
+
+    async_tbl = await async_db.create_table(
+        "my_table_async_with_embedding", schema=Schema, mode="overwrite"
+    )
+    models = [Schema(text="hello"), Schema(text="world")]
+    await async_tbl.add(models)
+    # --8<-- [end:create_table_async_with_embedding]
+
+
+@pytest.mark.asyncio
+async def test_table_async():
+    # --8<-- [start:connect_async]
+    uri = "data/sample-lancedb"
+    async_db = await lancedb.connect_async(uri)
+    # --8<-- [end:connect_async]
+    # --8<-- [start:create_table_async]
+    data = [
+        {"vector": [1.1, 1.2], "lat": 45.5, "long": -122.7},
+        {"vector": [0.2, 1.8], "lat": 40.1, "long": -74.1},
+    ]
+    async_tbl = await async_db.create_table("test_table_async", data)
+    await async_tbl.head()
+    # --8<-- [end:create_table_async]
+    # --8<-- [start:create_table_async_exist_ok]
+    await async_db.create_table("test_table_async", data, exist_ok=True)
+    # --8<-- [end:create_table_async_exist_ok]
+    # --8<-- [start:create_table_async_overwrite]
+    await async_db.create_table("test_table_async", data, mode="overwrite")
+    # --8<-- [end:create_table_async_overwrite]
+    # --8<-- [start:create_table_async_from_pandas]
+    data = pd.DataFrame(
+        {
+            "vector": [[1.1, 1.2, 1.3, 1.4], [0.2, 1.8, 0.4, 3.6]],
+            "lat": [45.5, 40.1],
+            "long": [-122.7, -74.1],
+        }
+    )
+    async_tbl = await async_db.create_table("my_table_async_pd", data)
+    await async_tbl.head()
+    # --8<-- [end:create_table_async_from_pandas]
+    # --8<-- [start:create_table_async_custom_schema]
+    custom_schema = pa.schema(
+        [
+            pa.field("vector", pa.list_(pa.float32(), 4)),
+            pa.field("lat", pa.float32()),
+            pa.field("long", pa.float32()),
+        ]
+    )
+    async_tbl = await async_db.create_table(
+        "my_table_async_custom_schema", data, schema=custom_schema
+    )
+    # --8<-- [end:create_table_async_custom_schema]
+    # --8<-- [start:create_table_async_from_polars]
+    data = pl.DataFrame(
+        {
+            "vector": [[3.1, 4.1], [5.9, 26.5]],
+            "item": ["foo", "bar"],
+            "price": [10.0, 20.0],
+        }
+    )
+    async_tbl = await async_db.create_table("my_table_async_pl", data)
+    # --8<-- [end:create_table_async_from_polars]
+    # --8<-- [start:create_table_async_from_arrow_table]
+    dim = 16
+    total = 2
+    schema = pa.schema(
+        [pa.field("vector", pa.list_(pa.float16(), dim)), pa.field("text", pa.string())]
+    )
+    data = pa.Table.from_arrays(
+        [
+            pa.array(
+                [np.random.randn(dim).astype(np.float16) for _ in range(total)],
+                pa.list_(pa.float16(), dim),
+            ),
+            pa.array(["foo", "bar"]),
+        ],
+        ["vector", "text"],
+    )
+    async_tbl = await async_db.create_table("f16_tbl_async", data, schema=schema)
+    # --8<-- [end:create_table_async_from_arrow_table]
+    # --8<-- [start:create_table_async_from_pydantic]
+    async_tbl = await async_db.create_table("movielens_small_async", schema=Content)
+    # --8<-- [end:create_table_async_from_pydantic]
+    # --8<-- [start:create_table_async_nested_schema]
+    async_tbl = await async_db.create_table("nested_table_async", schema=NestedSchema)
+    # --8<-- [end:create_table_async_nested_schema]
+    # --8<-- [start:create_table_async_from_batch]
+    schema = pa.schema(
+        [
+            pa.field("vector", pa.list_(pa.float32(), 4)),
+            pa.field("item", pa.utf8()),
+            pa.field("price", pa.float32()),
+        ]
+    )
+    await async_db.create_table("batched_table", make_batches(), schema=schema)
+    # --8<-- [end:create_table_async_from_batch]
+    # --8<-- [start:list_tables_async]
+    print(await async_db.table_names())
+    # --8<-- [end:list_tables_async]
+    # --8<-- [start:open_table_async]
+    async_tbl = await async_db.open_table("test_table_async")
+    # --8<-- [end:open_table_async]
+    # --8<-- [start:create_empty_table_async]
+    schema = pa.schema(
+        [
+            pa.field("vector", pa.list_(pa.float32(), 2)),
+            pa.field("item", pa.string()),
+            pa.field("price", pa.float32()),
+        ]
+    )
+    async_tbl = await async_db.create_table("test_empty_table_async", schema=schema)
+    # --8<-- [end:create_empty_table_async]
+    # --8<-- [start:create_empty_table_async_pydantic]
+    async_tbl = await async_db.create_table(
+        "test_empty_table_async_new", schema=Item.to_arrow_schema()
+    )
+    # --8<-- [end:create_empty_table_async_pydantic]
+    # --8<-- [start:add_table_async_from_pandas]
+    df = pd.DataFrame(
+        {
+            "vector": [[1.3, 1.4], [9.5, 56.2]],
+            "item": ["banana", "apple"],
+            "price": [5.0, 7.0],
+        }
+    )
+    await async_tbl.add(df)
+    # --8<-- [end:add_table_async_from_pandas]
+    # --8<-- [start:add_table_async_from_polars]
+    df = pl.DataFrame(
+        {
+            "vector": [[1.3, 1.4], [9.5, 56.2]],
+            "item": ["banana", "apple"],
+            "price": [5.0, 7.0],
+        }
+    )
+    await async_tbl.add(df)
+    # --8<-- [end:add_table_async_from_polars]
+    # --8<-- [start:add_table_async_from_batch]
+    await async_tbl.add(make_batches_for_add())
+    # --8<-- [end:add_table_async_from_batch]
+    # --8<-- [start:add_table_async_from_pyarrow]
+    pa_table = pa.Table.from_arrays(
+        [
+            pa.array([[9.1, 6.7], [9.9, 31.2]], pa.list_(pa.float32(), 2)),
+            pa.array(["mango", "orange"]),
+            pa.array([7.0, 4.0]),
+        ],
+        ["vector", "item", "price"],
+    )
+    await async_tbl.add(pa_table)
+    # --8<-- [end:add_table_async_from_pyarrow]
+    # --8<-- [start:add_table_async_from_pydantic]
+    pydantic_model_items = [
+        Item(vector=[8.1, 4.7], item="pineapple", price=10.0),
+        Item(vector=[6.9, 9.3], item="avocado", price=9.0),
+    ]
+    await async_tbl.add(pydantic_model_items)
+    # --8<-- [end:add_table_async_from_pydantic]
+    # --8<-- [start:delete_row_async]
+    await async_tbl.delete('item = "fizz"')
+    # --8<-- [end:delete_row_async]
+    # --8<-- [start:delete_specific_row_async]
+    data = [
+        {"x": 1, "vector": [1, 2]},
+        {"x": 2, "vector": [3, 4]},
+        {"x": 3, "vector": [5, 6]},
+    ]
+    async_db = await lancedb.connect_async(uri)
+    async_tbl = await async_db.create_table("delete_row_async", data)
+    await async_tbl.to_pandas()
+    #   x      vector
+    # 0  1  [1.0, 2.0]
+    # 1  2  [3.0, 4.0]
+    # 2  3  [5.0, 6.0]
+
+    await async_tbl.delete("x = 2")
+    await async_tbl.to_pandas()
+    #   x      vector
+    # 0  1  [1.0, 2.0]
+    # 1  3  [5.0, 6.0]
+    # --8<-- [end:delete_specific_row_async]
+    # --8<-- [start:delete_list_values_async]
+    to_remove = [1, 5]
+    to_remove = ", ".join(str(v) for v in to_remove)
+
+    await async_tbl.delete(f"x IN ({to_remove})")
+    await async_tbl.to_pandas()
+    #   x      vector
+    # 0  3  [5.0, 6.0]
+    # --8<-- [end:delete_list_values_async]
+    # --8<-- [start:update_table_async]
+    # Create a table from a pandas DataFrame
+    data = pd.DataFrame({"x": [1, 2, 3], "vector": [[1, 2], [3, 4], [5, 6]]})
+
+    async_tbl = await async_db.create_table("update_table_async", data)
+    # Update the table where x = 2
+    await async_tbl.update({"vector": [10, 10]}, where="x = 2")
+    # Get the updated table as a pandas DataFrame
+    df = await async_tbl.to_pandas()
+    # Print the DataFrame
+    print(df)
+    # --8<-- [end:update_table_async]
+    # --8<-- [start:update_table_sql_async]
+    # Update the table where x = 2
+    await async_tbl.update(updates_sql={"x": "x + 1"})
+    print(await async_tbl.to_pandas())
+    # --8<-- [end:update_table_sql_async]
+    # --8<-- [start:table_async_strong_consistency]
+    uri = "data/sample-lancedb"
+    async_db = await lancedb.connect_async(uri, read_consistency_interval=timedelta(0))
+    async_tbl = await async_db.open_table("test_table_async")
+    # --8<-- [end:table_async_strong_consistency]
+    # --8<-- [start:table_async_ventual_consistency]
+    uri = "data/sample-lancedb"
+    async_db = await lancedb.connect_async(
+        uri, read_consistency_interval=timedelta(seconds=5)
+    )
+    async_tbl = await async_db.open_table("test_table_async")
+    # --8<-- [end:table_async_eventual_consistency]
+    # --8<-- [start:table_async_checkout_latest]
+    async_tbl = await async_db.open_table("test_table_async")
+
+    # (Other writes happen to test_table_async from another process)
+
+    # Check for updates
+    await async_tbl.checkout_latest()
+    # --8<-- [end:table_async_checkout_latest]
--- a/python/python/tests/docs/test_python.py
+++ b/python/python/tests/docs/test_python.py
@@ -0,0 +1,187 @@
+# --8<-- [start:import-lancedb]
+import lancedb
+
+# --8<-- [end:import-lancedb]
+# --8<-- [start:import-pandas]
+import pandas as pd
+
+# --8<-- [end:import-pandas]
+# --8<-- [start:import-iterable]
+from typing import Iterable
+
+# --8<-- [end:import-iterable]
+# --8<-- [start:import-pyarrow]
+import pyarrow as pa
+
+# --8<-- [end:import-pyarrow]
+# --8<-- [start:import-polars]
+import polars as pl
+
+# --8<-- [end:import-polars]
+# --8<-- [start:import-lancedb-pydantic]
+from lancedb.pydantic import Vector, LanceModel
+
+# --8<-- [end:import-lancedb-pydantic]
+import pytest
+
+
+# --8<-- [start:make_batches]
+def make_batches() -> Iterable[pa.RecordBatch]:
+    for i in range(5):
+        yield pa.RecordBatch.from_arrays(
+            [
+                pa.array([[3.1, 4.1], [5.9, 26.5]]),
+                pa.array(["foo", "bar"]),
+                pa.array([10.0, 20.0]),
+            ],
+            ["vector", "item", "price"],
+        )
+
+
+# --8<-- [end:make_batches]
+
+
+def test_pandas_and_pyarrow():
+    # --8<-- [start:connect_to_lancedb]
+    uri = "data/sample-lancedb"
+    db = lancedb.connect(uri)
+    # --8<-- [end:connect_to_lancedb]
+    # --8<-- [start:create_table_pandas]
+    data = pd.DataFrame(
+        {
+            "vector": [[3.1, 4.1], [5.9, 26.5]],
+            "item": ["foo", "bar"],
+            "price": [10.0, 20.0],
+        }
+    )
+    table = db.create_table("pd_table", data=data)
+    # --8<-- [end:create_table_pandas]
+    # --8<-- [start:create_table_iterable]
+    schema = pa.schema(
+        [
+            pa.field("vector", pa.list_(pa.float32())),
+            pa.field("item", pa.utf8()),
+            pa.field("price", pa.float32()),
+        ]
+    )
+    table = db.create_table("iterable_table", data=make_batches(), schema=schema)
+    # --8<-- [end:create_table_iterable]
+    # --8<-- [start:vector_search]
+    # Open the table previously created.
+    table = db.open_table("pd_table")
+
+    query_vector = [100, 100]
+    # Pandas DataFrame
+    df = table.search(query_vector).limit(1).to_pandas()
+    print(df)
+    # --8<-- [end:vector_search]
+    # --8<-- [start:vector_search_with_filter]
+    # Apply the filter via LanceDB
+    results = table.search([100, 100]).where("price < 15").to_pandas()
+    assert len(results) == 1
+    assert results["item"].iloc[0] == "foo"
+
+    # Apply the filter via Pandas
+    df = results = table.search([100, 100]).to_pandas()
+    results = df[df.price < 15]
+    assert len(results) == 1
+    assert results["item"].iloc[0] == "foo"
+    # --8<-- [end:vector_search_with_filter]
+
+
+@pytest.mark.asyncio
+async def test_pandas_and_pyarrow_async():
+    # --8<-- [start:connect_to_lancedb_async]
+    uri = "data/sample-lancedb"
+    async_db = await lancedb.connect_async(uri)
+    # --8<-- [end:connect_to_lancedb_async]
+    # --8<-- [start:create_table_pandas_async]
+    data = pd.DataFrame(
+        {
+            "vector": [[3.1, 4.1], [5.9, 26.5]],
+            "item": ["foo", "bar"],
+            "price": [10.0, 20.0],
+        }
+    )
+    await async_db.create_table("pd_table_async", data=data)
+    # --8<-- [end:create_table_pandas_async]
+    # --8<-- [start:create_table_iterable_async]
+    schema = pa.schema(
+        [
+            pa.field("vector", pa.list_(pa.float32())),
+            pa.field("item", pa.utf8()),
+            pa.field("price", pa.float32()),
+        ]
+    )
+    await async_db.create_table(
+        "iterable_table_async", data=make_batches(), schema=schema
+    )
+    # --8<-- [end:create_table_iterable_async]
+    # --8<-- [start:vector_search_async]
+    # Open the table previously created.
+    async_tbl = await async_db.open_table("pd_table_async")
+
+    query_vector = [100, 100]
+    # Pandas DataFrame
+    df = await async_tbl.query().nearest_to(query_vector).limit(1).to_pandas()
+    print(df)
+    # --8<-- [end:vector_search_async]
+    # --8<-- [start:vector_search_with_filter_async]
+    # Apply the filter via LanceDB
+    results = (
+        await async_tbl.query().nearest_to([100, 100]).where("price < 15").to_pandas()
+    )
+    assert len(results) == 1
+    assert results["item"].iloc[0] == "foo"
+
+    # Apply the filter via Pandas
+    df = results = await async_tbl.query().nearest_to([100, 100]).to_pandas()
+    results = df[df.price < 15]
+    assert len(results) == 1
+    assert results["item"].iloc[0] == "foo"
+    # --8<-- [end:vector_search_with_filter_async]
+
+
+# --8<-- [start:class_Item]
+class Item(LanceModel):
+    vector: Vector(2)
+    item: str
+    price: float
+
+
+# --8<-- [end:class_Item]
+
+
+def test_polars():
+    uri = "data/sample-lancedb"
+    db = lancedb.connect(uri)
+
+    # --8<-- [start:create_table_polars]
+    data = pl.DataFrame(
+        {
+            "vector": [[3.1, 4.1], [5.9, 26.5]],
+            "item": ["foo", "bar"],
+            "price": [10.0, 20.0],
+        }
+    )
+    table = db.create_table("pl_table", data=data)
+    # --8<-- [end:create_table_polars]
+    # --8<-- [start:vector_search_polars]
+    query = [3.0, 4.0]
+    result = table.search(query).limit(1).to_polars()
+    print(result)
+    print(type(result))
+    # --8<-- [end:vector_search_polars]
+    # --8<-- [start:create_table_pydantic]
+    table = db.create_table("pydantic_table", schema=Item)
+    df = pl.DataFrame(data)
+    # Add Polars DataFrame to table
+    table.add(df)
+    # --8<-- [end:create_table_pydantic]
+    # --8<-- [start:dump_table_lazyform]
+    ldf = table.to_polars()
+    print(type(ldf))
+    # --8<-- [end:dump_table_lazyform]
+    # --8<-- [start:print_table_lazyform]
+    print(ldf.first().collect())
+    # --8<-- [end:print_table_lazyform]
--- a/python/python/tests/docs/test_search.py
+++ b/python/python/tests/docs/test_search.py
@@ -0,0 +1,366 @@
+# --8<-- [start:import-lancedb]
+import lancedb
+
+# --8<-- [end:import-lancedb]
+# --8<-- [start:import-numpy]
+import numpy as np
+
+# --8<-- [end:import-numpy]
+# --8<-- [start:import-datetime]
+from datetime import datetime
+
+# --8<-- [end:import-datetime]
+# --8<-- [start:import-lancedb-pydantic]
+from lancedb.pydantic import Vector, LanceModel
+
+# --8<-- [end:import-lancedb-pydantic]
+# --8<-- [start:import-pydantic-base-model]
+from pydantic import BaseModel
+
+# --8<-- [end:import-pydantic-base-model]
+# --8<-- [start:import-lancedb-fts]
+from lancedb.index import FTS
+
+# --8<-- [end:import-lancedb-fts]
+# --8<-- [start:import-os]
+import os
+
+# --8<-- [end:import-os]
+# --8<-- [start:import-embeddings]
+from lancedb.embeddings import get_registry
+
+# --8<-- [end:import-embeddings]
+import pytest
+
+
+# --8<-- [start:class-definition]
+class Metadata(BaseModel):
+    source: str
+    timestamp: datetime
+
+
+class Document(BaseModel):
+    content: str
+    meta: Metadata
+
+
+class LanceSchema(LanceModel):
+    id: str
+    vector: Vector(1536)
+    payload: Document
+
+
+# --8<-- [end:class-definition]
+
+
+def test_vector_search():
+    # --8<-- [start:exhaustive_search]
+    uri = "data/sample-lancedb"
+    db = lancedb.connect(uri)
+    data = [
+        {"vector": row, "item": f"item {i}"}
+        for i, row in enumerate(np.random.random((10_000, 1536)).astype("float32"))
+    ]
+    tbl = db.create_table("vector_search", data=data)
+    tbl.search(np.random.random((1536))).limit(10).to_list()
+    # --8<-- [end:exhaustive_search]
+    # --8<-- [start:exhaustive_search_cosine]
+    tbl.search(np.random.random((1536))).metric("cosine").limit(10).to_list()
+    # --8<-- [end:exhaustive_search_cosine]
+    # --8<-- [start:create_table_with_nested_schema]
+    # Let's add 100 sample rows to our dataset
+    data = [
+        LanceSchema(
+            id=f"id{i}",
+            vector=np.random.randn(1536),
+            payload=Document(
+                content=f"document{i}",
+                meta=Metadata(source=f"source{i % 10}", timestamp=datetime.now()),
+            ),
+        )
+        for i in range(100)
+    ]
+
+    # Synchronous client
+    tbl = db.create_table("documents", data=data)
+    # --8<-- [end:create_table_with_nested_schema]
+    # --8<-- [start:search_result_as_pyarrow]
+    tbl.search(np.random.randn(1536)).to_arrow()
+    # --8<-- [end:search_result_as_pyarrow]
+    # --8<-- [start:search_result_as_pandas]
+    tbl.search(np.random.randn(1536)).to_pandas()
+    # --8<-- [end:search_result_as_pandas]
+    # --8<-- [start:search_result_as_pandas_flatten_true]
+    tbl.search(np.random.randn(1536)).to_pandas(flatten=True)
+    # --8<-- [end:search_result_as_pandas_flatten_true]
+    # --8<-- [start:search_result_as_pandas_flatten_1]
+    tbl.search(np.random.randn(1536)).to_pandas(flatten=1)
+    # --8<-- [end:search_result_as_pandas_flatten_1]
+    # --8<-- [start:search_result_as_list]
+    tbl.search(np.random.randn(1536)).to_list()
+    # --8<-- [end:search_result_as_list]
+    # --8<-- [start:search_result_as_pydantic]
+    tbl.search(np.random.randn(1536)).to_pydantic(LanceSchema)
+    # --8<-- [end:search_result_as_pydantic]
+
+
+@pytest.mark.asyncio
+async def test_vector_search_async():
+    # --8<-- [start:exhaustive_search_async]
+    uri = "data/sample-lancedb"
+    async_db = await lancedb.connect_async(uri)
+    data = [
+        {"vector": row, "item": f"item {i}"}
+        for i, row in enumerate(np.random.random((10_000, 1536)).astype("float32"))
+    ]
+    async_tbl = await async_db.create_table("vector_search_async", data=data)
+    (await async_tbl.query().nearest_to(np.random.random((1536))).limit(10).to_list())
+    # --8<-- [end:exhaustive_search_async]
+    # --8<-- [start:exhaustive_search_async_cosine]
+    (
+        await async_tbl.query()
+        .nearest_to(np.random.random((1536)))
+        .distance_type("cosine")
+        .limit(10)
+        .to_list()
+    )
+    # --8<-- [end:exhaustive_search_async_cosine]
+    # --8<-- [start:create_table_async_with_nested_schema]
+    # Let's add 100 sample rows to our dataset
+    data = [
+        LanceSchema(
+            id=f"id{i}",
+            vector=np.random.randn(1536),
+            payload=Document(
+                content=f"document{i}",
+                meta=Metadata(source=f"source{i % 10}", timestamp=datetime.now()),
+            ),
+        )
+        for i in range(100)
+    ]
+
+    async_tbl = await async_db.create_table("documents_async", data=data)
+    # --8<-- [end:create_table_async_with_nested_schema]
+    # --8<-- [start:search_result_async_as_pyarrow]
+    await async_tbl.query().nearest_to(np.random.randn(1536)).to_arrow()
+    # --8<-- [end:search_result_async_as_pyarrow]
+    # --8<-- [start:search_result_async_as_pandas]
+    await async_tbl.query().nearest_to(np.random.randn(1536)).to_pandas()
+    # --8<-- [end:search_result_async_as_pandas]
+    # --8<-- [start:search_result_async_as_list]
+    await async_tbl.query().nearest_to(np.random.randn(1536)).to_list()
+    # --8<-- [end:search_result_async_as_list]
+
+
+def test_fts_native():
+    # --8<-- [start:basic_fts]
+    uri = "data/sample-lancedb"
+    db = lancedb.connect(uri)
+
+    table = db.create_table(
+        "my_table_fts",
+        data=[
+            {"vector": [3.1, 4.1], "text": "Frodo was a happy puppy"},
+            {"vector": [5.9, 26.5], "text": "There are several kittens playing"},
+        ],
+    )
+
+    # passing `use_tantivy=False` to use lance FTS index
+    # `use_tantivy=True` by default
+    table.create_fts_index("text", use_tantivy=False)
+    table.search("puppy").limit(10).select(["text"]).to_list()
+    # [{'text': 'Frodo was a happy puppy', '_score': 0.6931471824645996}]
+    # ...
+    # --8<-- [end:basic_fts]
+    # --8<-- [start:fts_config_stem]
+    table.create_fts_index("text", tokenizer_name="en_stem", replace=True)
+    # --8<-- [end:fts_config_stem]
+    # --8<-- [start:fts_config_folding]
+    table.create_fts_index(
+        "text",
+        use_tantivy=False,
+        language="French",
+        stem=True,
+        ascii_folding=True,
+        replace=True,
+    )
+    # --8<-- [end:fts_config_folding]
+    # --8<-- [start:fts_prefiltering]
+    table.search("puppy").limit(10).where("text='foo'", prefilter=True).to_list()
+    # --8<-- [end:fts_prefiltering]
+    # --8<-- [start:fts_postfiltering]
+    table.search("puppy").limit(10).where("text='foo'", prefilter=False).to_list()
+    # --8<-- [end:fts_postfiltering]
+    # --8<-- [start:fts_with_position]
+    table.create_fts_index("text", use_tantivy=False, with_position=True, replace=True)
+    # --8<-- [end:fts_with_position]
+    # --8<-- [start:fts_incremental_index]
+    table.add([{"vector": [3.1, 4.1], "text": "Frodo was a happy puppy"}])
+    table.optimize()
+    # --8<-- [end:fts_incremental_index]
+
+
+@pytest.mark.asyncio
+async def test_fts_native_async():
+    # --8<-- [start:basic_fts_async]
+    uri = "data/sample-lancedb"
+    async_db = await lancedb.connect_async(uri)
+
+    async_tbl = await async_db.create_table(
+        "my_table_fts_async",
+        data=[
+            {"vector": [3.1, 4.1], "text": "Frodo was a happy puppy"},
+            {"vector": [5.9, 26.5], "text": "There are several kittens playing"},
+        ],
+    )
+
+    # async API uses our native FTS algorithm
+    await async_tbl.create_index("text", config=FTS())
+    await (
+        async_tbl.query().nearest_to_text("puppy").select(["text"]).limit(10).to_list()
+    )
+    # [{'text': 'Frodo was a happy puppy', '_score': 0.6931471824645996}]
+    # ...
+    # --8<-- [end:basic_fts_async]
+    # --8<-- [start:fts_config_stem_async]
+    await async_tbl.create_index(
+        "text", config=FTS(language="English", stem=True, remove_stop_words=True)
+    )  # --8<-- [end:fts_config_stem_async]
+    # --8<-- [start:fts_config_folding_async]
+    await async_tbl.create_index(
+        "text", config=FTS(language="French", stem=True, ascii_folding=True)
+    )
+    # --8<-- [end:fts_config_folding_async]
+    # --8<-- [start:fts_prefiltering_async]
+    await (
+        async_tbl.query()
+        .nearest_to_text("puppy")
+        .limit(10)
+        .where("text='foo'")
+        .to_list()
+    )
+    # --8<-- [end:fts_prefiltering_async]
+    # --8<-- [start:fts_postfiltering_async]
+    await (
+        async_tbl.query()
+        .nearest_to_text("puppy")
+        .limit(10)
+        .where("text='foo'")
+        .postfilter()
+        .to_list()
+    )
+    # --8<-- [end:fts_postfiltering_async]
+    # --8<-- [start:fts_with_position_async]
+    await async_tbl.create_index("text", config=FTS(with_position=True))
+    # --8<-- [end:fts_with_position_async]
+    # --8<-- [start:fts_incremental_index_async]
+    await async_tbl.add([{"vector": [3.1, 4.1], "text": "Frodo was a happy puppy"}])
+    await async_tbl.optimize()
+    # --8<-- [end:fts_incremental_index_async]
+
+
+@pytest.mark.skip()
+def test_hybrid_search():
+    # --8<-- [start:import-openai]
+    import openai
+
+    # --8<-- [end:import-openai]
+    # --8<-- [start:openai-embeddings]
+    # Ingest embedding function in LanceDB table
+    # Configuring the environment variable OPENAI_API_KEY
+    if "OPENAI_API_KEY" not in os.environ:
+        # OR set the key here as a variable
+        openai.api_key = "sk-..."
+    embeddings = get_registry().get("openai").create()
+
+    # --8<-- [end:openai-embeddings]
+    # --8<-- [start:class-Documents]
+    class Documents(LanceModel):
+        vector: Vector(embeddings.ndims()) = embeddings.VectorField()
+        text: str = embeddings.SourceField()
+
+    # --8<-- [end:class-Documents]
+    # --8<-- [start:basic_hybrid_search]
+    data = [
+        {"text": "rebel spaceships striking from a hidden base"},
+        {"text": "have won their first victory against the evil Galactic Empire"},
+        {"text": "during the battle rebel spies managed to steal secret plans"},
+        {"text": "to the Empire's ultimate weapon the Death Star"},
+    ]
+    uri = "data/sample-lancedb"
+    db = lancedb.connect(uri)
+    table = db.create_table("documents", schema=Documents)
+    # ingest docs with auto-vectorization
+    table.add(data)
+    # Create a fts index before the hybrid search
+    table.create_fts_index("text")
+    # hybrid search with default re-ranker
+    table.search("flower moon", query_type="hybrid").to_pandas()
+    # --8<-- [end:basic_hybrid_search]
+    # --8<-- [start:hybrid_search_pass_vector_text]
+    vector_query = [0.1, 0.2, 0.3, 0.4, 0.5]
+    text_query = "flower moon"
+    (
+        table.search(query_type="hybrid")
+        .vector(vector_query)
+        .text(text_query)
+        .limit(5)
+        .to_pandas()
+    )
+    # --8<-- [end:hybrid_search_pass_vector_text]
+
+
+@pytest.mark.skip
+async def test_hybrid_search_async():
+    import openai
+
+    # --8<-- [start:openai-embeddings]
+    # Ingest embedding function in LanceDB table
+    # Configuring the environment variable OPENAI_API_KEY
+    if "OPENAI_API_KEY" not in os.environ:
+        # OR set the key here as a variable
+        openai.api_key = "sk-..."
+    embeddings = get_registry().get("openai").create()
+
+    # --8<-- [end:openai-embeddings]
+    # --8<-- [start:class-Documents]
+    class Documents(LanceModel):
+        vector: Vector(embeddings.ndims()) = embeddings.VectorField()
+        text: str = embeddings.SourceField()
+
+    # --8<-- [end:class-Documents]
+    # --8<-- [start:basic_hybrid_search_async]
+    uri = "data/sample-lancedb"
+    async_db = await lancedb.connect_async(uri)
+    data = [
+        {"text": "rebel spaceships striking from a hidden base"},
+        {"text": "have won their first victory against the evil Galactic Empire"},
+        {"text": "during the battle rebel spies managed to steal secret plans"},
+        {"text": "to the Empire's ultimate weapon the Death Star"},
+    ]
+    async_tbl = await async_db.create_table("documents_async", schema=Documents)
+    # ingest docs with auto-vectorization
+    await async_tbl.add(data)
+    # Create a fts index before the hybrid search
+    await async_tbl.create_index("text", config=FTS())
+    text_query = "flower moon"
+    vector_query = embeddings.compute_query_embeddings(text_query)[0]
+    # hybrid search with default re-ranker
+    await (
+        async_tbl.query()
+        .nearest_to(vector_query)
+        .nearest_to_text(text_query)
+        .to_pandas()
+    )
+    # --8<-- [end:basic_hybrid_search_async]
+    # --8<-- [start:hybrid_search_pass_vector_text_async]
+    vector_query = [0.1, 0.2, 0.3, 0.4, 0.5]
+    text_query = "flower moon"
+    await (
+        async_tbl.query()
+        .nearest_to(vector_query)
+        .nearest_to_text(text_query)
+        .limit(5)
+        .to_pandas()
+    )
+    # --8<-- [end:hybrid_search_pass_vector_text_async]