docs: add sync/async tabs to quickstart (#2087)

Closes #2033
This commit is contained in:
Will Jones
2025-01-31 15:43:54 -08:00
committed by GitHub
parent dba85f4d6f
commit 2fc174f532
3 changed files with 174 additions and 114 deletions

View File

@@ -133,13 +133,22 @@ recommend switching to stable releases.
## Connect to a database
=== "Python"
=== "Sync API"
```python
--8<-- "python/python/tests/docs/test_basic.py:imports"
--8<-- "python/python/tests/docs/test_basic.py:connect"
```python
--8<-- "python/python/tests/docs/test_basic.py:imports"
--8<-- "python/python/tests/docs/test_basic.py:connect_async"
```
--8<-- "python/python/tests/docs/test_basic.py:set_uri"
--8<-- "python/python/tests/docs/test_basic.py:connect"
```
=== "Async API"
```python
--8<-- "python/python/tests/docs/test_basic.py:imports"
--8<-- "python/python/tests/docs/test_basic.py:set_uri"
--8<-- "python/python/tests/docs/test_basic.py:connect_async"
```
=== "Typescript[^1]"
@@ -183,21 +192,33 @@ table.
=== "Python"
```python
--8<-- "python/python/tests/docs/test_basic.py:create_table"
--8<-- "python/python/tests/docs/test_basic.py:create_table_async"
```
If the table already exists, LanceDB will raise an error by default.
If you want to overwrite the table, you can pass in `mode="overwrite"`
to the `create_table` method.
You can also pass in a pandas DataFrame directly:
=== "Sync API"
```python
--8<-- "python/python/tests/docs/test_basic.py:create_table_pandas"
--8<-- "python/python/tests/docs/test_basic.py:create_table_async_pandas"
```
```python
--8<-- "python/python/tests/docs/test_basic.py:create_table"
```
You can also pass in a pandas DataFrame directly:
```python
--8<-- "python/python/tests/docs/test_basic.py:create_table_pandas"
```
=== "Async API"
```python
--8<-- "python/python/tests/docs/test_basic.py:create_table_async"
```
You can also pass in a pandas DataFrame directly:
```python
--8<-- "python/python/tests/docs/test_basic.py:create_table_async_pandas"
```
=== "Typescript[^1]"
@@ -247,10 +268,16 @@ similar to a `CREATE TABLE` statement in SQL.
=== "Python"
```python
--8<-- "python/python/tests/docs/test_basic.py:create_empty_table"
--8<-- "python/python/tests/docs/test_basic.py:create_empty_table_async"
```
=== "Sync API"
```python
--8<-- "python/python/tests/docs/test_basic.py:create_empty_table"
```
=== "Async API"
```python
--8<-- "python/python/tests/docs/test_basic.py:create_empty_table_async"
```
!!! note "You can define schema in Pydantic"
LanceDB comes with Pydantic support, which allows you to define the schema of your data using Pydantic models. This makes it easy to work with LanceDB tables and data. Learn more about all supported types in [tables guide](./guides/tables.md).
@@ -281,10 +308,16 @@ Once created, you can open a table as follows:
=== "Python"
```python
--8<-- "python/python/tests/docs/test_basic.py:open_table"
--8<-- "python/python/tests/docs/test_basic.py:open_table_async"
```
=== "Sync API"
```python
--8<-- "python/python/tests/docs/test_basic.py:open_table"
```
=== "Async API"
```python
--8<-- "python/python/tests/docs/test_basic.py:open_table_async"
```
=== "Typescript[^1]"
=== "@lancedb/lancedb"
@@ -310,10 +343,16 @@ If you forget the name of your table, you can always get a listing of all table
=== "Python"
```python
--8<-- "python/python/tests/docs/test_basic.py:table_names"
--8<-- "python/python/tests/docs/test_basic.py:table_names_async"
```
=== "Sync API"
```python
--8<-- "python/python/tests/docs/test_basic.py:table_names"
```
=== "Async API"
```python
--8<-- "python/python/tests/docs/test_basic.py:table_names_async"
```
=== "Typescript[^1]"
=== "@lancedb/lancedb"
@@ -340,10 +379,16 @@ After a table has been created, you can always add more data to it as follows:
=== "Python"
```python
--8<-- "python/python/tests/docs/test_basic.py:add_data"
--8<-- "python/python/tests/docs/test_basic.py:add_data_async"
```
=== "Sync API"
```python
--8<-- "python/python/tests/docs/test_basic.py:add_data"
```
=== "Async API"
```python
--8<-- "python/python/tests/docs/test_basic.py:add_data_async"
```
=== "Typescript[^1]"
=== "@lancedb/lancedb"
@@ -370,10 +415,16 @@ Once you've embedded the query, you can find its nearest neighbors as follows:
=== "Python"
```python
--8<-- "python/python/tests/docs/test_basic.py:vector_search"
--8<-- "python/python/tests/docs/test_basic.py:vector_search_async"
```
=== "Sync API"
```python
--8<-- "python/python/tests/docs/test_basic.py:vector_search"
```
=== "Async API"
```python
--8<-- "python/python/tests/docs/test_basic.py:vector_search_async"
```
This returns a pandas DataFrame with the results.
@@ -412,10 +463,16 @@ LanceDB allows you to create an ANN index on a table as follows:
=== "Python"
```py
--8<-- "python/python/tests/docs/test_basic.py:create_index"
--8<-- "python/python/tests/docs/test_basic.py:create_index_async"
```
=== "Sync API"
```python
--8<-- "python/python/tests/docs/test_basic.py:create_index"
```
=== "Async API"
```python
--8<-- "python/python/tests/docs/test_basic.py:create_index_async"
```
=== "Typescript[^1]"
=== "@lancedb/lancedb"
@@ -451,10 +508,16 @@ This can delete any number of rows that match the filter.
=== "Python"
```python
--8<-- "python/python/tests/docs/test_basic.py:delete_rows"
--8<-- "python/python/tests/docs/test_basic.py:delete_rows_async"
```
=== "Sync API"
```python
--8<-- "python/python/tests/docs/test_basic.py:delete_rows"
```
=== "Async API"
```python
--8<-- "python/python/tests/docs/test_basic.py:delete_rows_async"
```
=== "Typescript[^1]"
@@ -483,7 +546,10 @@ simple or complex as needed. To see what expressions are supported, see the
=== "Python"
Read more: [lancedb.table.Table.delete][]
=== "Sync API"
Read more: [lancedb.table.Table.delete][]
=== "Async API"
Read more: [lancedb.table.AsyncTable.delete][]
=== "Typescript[^1]"
@@ -505,10 +571,16 @@ Use the `drop_table()` method on the database to remove a table.
=== "Python"
```python
--8<-- "python/python/tests/docs/test_basic.py:drop_table"
--8<-- "python/python/tests/docs/test_basic.py:drop_table_async"
```
=== "Sync API"
```python
--8<-- "python/python/tests/docs/test_basic.py:drop_table"
```
=== "Async API"
```python
--8<-- "python/python/tests/docs/test_basic.py:drop_table_async"
```
This permanently removes the table and is not recoverable, unlike deleting rows.
By default, if the table does not exist an exception is raised. To suppress this,
@@ -543,10 +615,17 @@ You can use the embedding API when working with embedding models. It automatical
=== "Python"
```python
--8<-- "python/python/tests/docs/test_embeddings_optional.py:imports"
--8<-- "python/python/tests/docs/test_embeddings_optional.py:openai_embeddings"
```
=== "Sync API"
```python
--8<-- "python/python/tests/docs/test_embeddings_optional.py:imports"
--8<-- "python/python/tests/docs/test_embeddings_optional.py:openai_embeddings"
```
=== "Async API"
Coming soon to the async API.
https://github.com/lancedb/lancedb/issues/1938
=== "Typescript[^1]"

View File

@@ -122,7 +122,7 @@ LanceDB supports binary vectors as a data type, and has the ability to search bi
=== "Python"
=== "sync API"
=== "Sync API"
```python
--8<-- "python/python/tests/docs/test_binary_vector.py:imports"
@@ -130,7 +130,7 @@ LanceDB supports binary vectors as a data type, and has the ability to search bi
--8<-- "python/python/tests/docs/test_binary_vector.py:sync_binary_vector"
```
=== "async API"
=== "Async API"
```python
--8<-- "python/python/tests/docs/test_binary_vector.py:imports"
@@ -153,7 +153,7 @@ The vector value type can be `float16`, `float32` or `float64`.
=== "Python"
=== "sync API"
=== "Sync API"
```python
--8<-- "python/python/tests/docs/test_multivector.py:imports"
@@ -161,7 +161,7 @@ The vector value type can be `float16`, `float32` or `float64`.
--8<-- "python/python/tests/docs/test_multivector.py:sync_multivector"
```
=== "async API"
=== "Async API"
```python
--8<-- "python/python/tests/docs/test_multivector.py:imports"
@@ -175,7 +175,7 @@ You can also search for vectors within a specific distance range from the query
=== "Python"
=== "sync API"
=== "Sync API"
```python
--8<-- "python/python/tests/docs/test_distance_range.py:imports"
@@ -183,7 +183,7 @@ You can also search for vectors within a specific distance range from the query
--8<-- "python/python/tests/docs/test_distance_range.py:sync_distance_range"
```
=== "async API"
=== "Async API"
```python
--8<-- "python/python/tests/docs/test_distance_range.py:imports"

View File

@@ -1,23 +1,22 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright The LanceDB Authors
import shutil
# --8<-- [start:imports]
import lancedb
import pandas as pd
import pyarrow as pa
# --8<-- [end:imports]
import pytest
from numpy.random import randint, random
shutil.rmtree("data/sample-lancedb", ignore_errors=True)
def test_quickstart():
# --8<-- [start:connect]
def test_quickstart(tmp_path):
# --8<-- [start:set_uri]
uri = "data/sample-lancedb"
# --8<-- [end:set_uri]
uri = tmp_path
# --8<-- [start:connect]
db = lancedb.connect(uri)
# --8<-- [end:connect]
@@ -27,7 +26,6 @@ def test_quickstart():
{"vector": [5.9, 26.5], "item": "bar", "price": 20.0},
]
# Synchronous client
tbl = db.create_table("my_table", data=data)
# --8<-- [end:create_table]
@@ -38,24 +36,19 @@ def test_quickstart():
{"vector": [5.9, 26.5], "item": "bar", "price": 20.0},
]
)
# Synchronous client
tbl = db.create_table("table_from_df", data=df)
# --8<-- [end:create_table_pandas]
# --8<-- [start:create_empty_table]
schema = pa.schema([pa.field("vector", pa.list_(pa.float32(), list_size=2))])
# Synchronous client
tbl = db.create_table("empty_table", schema=schema)
# --8<-- [end:create_empty_table]
# --8<-- [start:open_table]
# Synchronous client
tbl = db.open_table("my_table")
# --8<-- [end:open_table]
# --8<-- [start:table_names]
# Synchronous client
print(db.table_names())
# --8<-- [end:table_names]
# Synchronous client
# --8<-- [start:add_data]
# Option 1: Add a list of dicts to a table
data = [
@@ -69,7 +62,6 @@ def test_quickstart():
tbl.add(data)
# --8<-- [end:add_data]
# --8<-- [start:vector_search]
# Synchronous client
tbl.search([100, 100]).limit(2).to_pandas()
# --8<-- [end:vector_search]
tbl.add(
@@ -95,42 +87,31 @@ def test_quickstart():
tbl.drop_columns(["dbl_price"])
# --8<-- [end:drop_columns]
# --8<-- [start:create_index]
# Synchronous client
tbl.create_index(num_sub_vectors=1)
# --8<-- [end:create_index]
# --8<-- [start:delete_rows]
# Synchronous client
tbl.delete('item = "fizz"')
# --8<-- [end:delete_rows]
# --8<-- [start:drop_table]
# Synchronous client
db.drop_table("my_table")
# --8<-- [end:drop_table]
@pytest.mark.asyncio
async def test_quickstart_async():
async def test_quickstart_async(tmp_path):
uri = tmp_path
# --8<-- [start:connect_async]
# LanceDb offers both a synchronous and an asynchronous client. There are still a
# few operations that are only supported by the synchronous client (e.g. embedding
# functions, full text search) but both APIs should soon be equivalent
# In this guide we will give examples of both clients. In other guides we will
# typically only provide examples with one client or the other.
uri = "data/sample-lancedb"
async_db = await lancedb.connect_async(uri)
db = await lancedb.connect_async(uri)
# --8<-- [end:connect_async]
# --8<-- [start:create_table_async]
data = [
{"vector": [3.1, 4.1], "item": "foo", "price": 10.0},
{"vector": [5.9, 26.5], "item": "bar", "price": 20.0},
]
# --8<-- [start:create_table_async]
# Asynchronous client
async_tbl = await async_db.create_table("my_table_async", data=data)
tbl = await db.create_table("my_table_async", data=data)
# --8<-- [end:create_table_async]
# --8<-- [start:create_table_async_pandas]
df = pd.DataFrame(
[
{"vector": [3.1, 4.1], "item": "foo", "price": 10.0},
@@ -138,37 +119,41 @@ async def test_quickstart_async():
]
)
# --8<-- [start:create_table_async_pandas]
# Asynchronous client
async_tbl = await async_db.create_table("table_from_df_async", df)
tbl = await db.create_table("table_from_df_async", df)
# --8<-- [end:create_table_async_pandas]
schema = pa.schema([pa.field("vector", pa.list_(pa.float32(), list_size=2))])
# --8<-- [start:create_empty_table_async]
# Asynchronous client
async_tbl = await async_db.create_table("empty_table_async", schema=schema)
schema = pa.schema([pa.field("vector", pa.list_(pa.float32(), list_size=2))])
tbl = await db.create_table("empty_table_async", schema=schema)
# --8<-- [end:create_empty_table_async]
# --8<-- [start:open_table_async]
# Asynchronous client
async_tbl = await async_db.open_table("my_table_async")
tbl = await db.open_table("my_table_async")
# --8<-- [end:open_table_async]
# --8<-- [start:table_names_async]
# Asynchronous client
print(await async_db.table_names())
print(await db.table_names())
# --8<-- [end:table_names_async]
# --8<-- [start:add_data_async]
# Asynchronous client
await async_tbl.add(data)
# Option 1: Add a list of dicts to a table
data = [
{"vector": [1.3, 1.4], "item": "fizz", "price": 100.0},
{"vector": [9.5, 56.2], "item": "buzz", "price": 200.0},
]
await tbl.add(data)
# Option 2: Add a pandas DataFrame to a table
df = pd.DataFrame(data)
await tbl.add(data)
# --8<-- [end:add_data_async]
# Add sufficient data for training
data = [{"vector": [x, x], "item": "filler", "price": x * x} for x in range(1000)]
await async_tbl.add(data)
await tbl.add(data)
# --8<-- [start:vector_search_async]
await tbl.vector_search([100, 100]).limit(2).to_pandas()
# --8<-- [end:vector_search_async]
# --8<-- [start:add_columns_async]
await async_tbl.add_columns({"double_price": "cast((price * 2) as float)"})
await tbl.add_columns({"double_price": "cast((price * 2) as float)"})
# --8<-- [end:add_columns_async]
# --8<-- [start:alter_columns_async]
await async_tbl.alter_columns(
await tbl.alter_columns(
{
"path": "double_price",
"rename": "dbl_price",
@@ -178,20 +163,16 @@ async def test_quickstart_async():
)
# --8<-- [end:alter_columns_async]
# --8<-- [start:drop_columns_async]
await async_tbl.drop_columns(["dbl_price"])
await tbl.drop_columns(["dbl_price"])
# --8<-- [end:drop_columns_async]
# Asynchronous client
await async_tbl.vector_search([100, 100]).limit(2).to_pandas()
await tbl.vector_search([100, 100]).limit(2).to_pandas()
# --8<-- [end:vector_search_async]
# --8<-- [start:create_index_async]
# Asynchronous client (must specify column to index)
await async_tbl.create_index("vector")
await tbl.create_index("vector")
# --8<-- [end:create_index_async]
# --8<-- [start:delete_rows_async]
# Asynchronous client
await async_tbl.delete('item = "fizz"')
await tbl.delete('item = "fizz"')
# --8<-- [end:delete_rows_async]
# --8<-- [start:drop_table_async]
# Asynchronous client
await async_db.drop_table("my_table_async")
await db.drop_table("my_table_async")
# --8<-- [end:drop_table_async]