From 2fc174f5327e7a05c2e61aae8175dfa70f3d9932 Mon Sep 17 00:00:00 2001 From: Will Jones Date: Fri, 31 Jan 2025 15:43:54 -0800 Subject: [PATCH] docs: add sync/async tabs to quickstart (#2087) Closes #2033 --- docs/src/basic.md | 183 ++++++++++++++++++------- docs/src/search.md | 12 +- python/python/tests/docs/test_basic.py | 93 +++++-------- 3 files changed, 174 insertions(+), 114 deletions(-) diff --git a/docs/src/basic.md b/docs/src/basic.md index 1faf206f..625368ce 100644 --- a/docs/src/basic.md +++ b/docs/src/basic.md @@ -133,13 +133,22 @@ recommend switching to stable releases. ## Connect to a database === "Python" + === "Sync API" - ```python - --8<-- "python/python/tests/docs/test_basic.py:imports" - --8<-- "python/python/tests/docs/test_basic.py:connect" + ```python + --8<-- "python/python/tests/docs/test_basic.py:imports" - --8<-- "python/python/tests/docs/test_basic.py:connect_async" - ``` + --8<-- "python/python/tests/docs/test_basic.py:set_uri" + --8<-- "python/python/tests/docs/test_basic.py:connect" + ``` + === "Async API" + + ```python + --8<-- "python/python/tests/docs/test_basic.py:imports" + + --8<-- "python/python/tests/docs/test_basic.py:set_uri" + --8<-- "python/python/tests/docs/test_basic.py:connect_async" + ``` === "Typescript[^1]" @@ -183,21 +192,33 @@ table. === "Python" - ```python - --8<-- "python/python/tests/docs/test_basic.py:create_table" - --8<-- "python/python/tests/docs/test_basic.py:create_table_async" - ``` - If the table already exists, LanceDB will raise an error by default. If you want to overwrite the table, you can pass in `mode="overwrite"` to the `create_table` method. - You can also pass in a pandas DataFrame directly: + === "Sync API" - ```python - --8<-- "python/python/tests/docs/test_basic.py:create_table_pandas" - --8<-- "python/python/tests/docs/test_basic.py:create_table_async_pandas" - ``` + ```python + --8<-- "python/python/tests/docs/test_basic.py:create_table" + ``` + + You can also pass in a pandas DataFrame directly: + + ```python + --8<-- "python/python/tests/docs/test_basic.py:create_table_pandas" + ``` + + === "Async API" + + ```python + --8<-- "python/python/tests/docs/test_basic.py:create_table_async" + ``` + + You can also pass in a pandas DataFrame directly: + + ```python + --8<-- "python/python/tests/docs/test_basic.py:create_table_async_pandas" + ``` === "Typescript[^1]" @@ -247,10 +268,16 @@ similar to a `CREATE TABLE` statement in SQL. === "Python" - ```python - --8<-- "python/python/tests/docs/test_basic.py:create_empty_table" - --8<-- "python/python/tests/docs/test_basic.py:create_empty_table_async" - ``` + === "Sync API" + + ```python + --8<-- "python/python/tests/docs/test_basic.py:create_empty_table" + ``` + === "Async API" + + ```python + --8<-- "python/python/tests/docs/test_basic.py:create_empty_table_async" + ``` !!! note "You can define schema in Pydantic" LanceDB comes with Pydantic support, which allows you to define the schema of your data using Pydantic models. This makes it easy to work with LanceDB tables and data. Learn more about all supported types in [tables guide](./guides/tables.md). @@ -281,10 +308,16 @@ Once created, you can open a table as follows: === "Python" - ```python - --8<-- "python/python/tests/docs/test_basic.py:open_table" - --8<-- "python/python/tests/docs/test_basic.py:open_table_async" - ``` + === "Sync API" + + ```python + --8<-- "python/python/tests/docs/test_basic.py:open_table" + ``` + === "Async API" + + ```python + --8<-- "python/python/tests/docs/test_basic.py:open_table_async" + ``` === "Typescript[^1]" === "@lancedb/lancedb" @@ -310,10 +343,16 @@ If you forget the name of your table, you can always get a listing of all table === "Python" - ```python - --8<-- "python/python/tests/docs/test_basic.py:table_names" - --8<-- "python/python/tests/docs/test_basic.py:table_names_async" - ``` + === "Sync API" + + ```python + --8<-- "python/python/tests/docs/test_basic.py:table_names" + ``` + === "Async API" + + ```python + --8<-- "python/python/tests/docs/test_basic.py:table_names_async" + ``` === "Typescript[^1]" === "@lancedb/lancedb" @@ -340,10 +379,16 @@ After a table has been created, you can always add more data to it as follows: === "Python" - ```python - --8<-- "python/python/tests/docs/test_basic.py:add_data" - --8<-- "python/python/tests/docs/test_basic.py:add_data_async" - ``` + === "Sync API" + + ```python + --8<-- "python/python/tests/docs/test_basic.py:add_data" + ``` + === "Async API" + + ```python + --8<-- "python/python/tests/docs/test_basic.py:add_data_async" + ``` === "Typescript[^1]" === "@lancedb/lancedb" @@ -370,10 +415,16 @@ Once you've embedded the query, you can find its nearest neighbors as follows: === "Python" - ```python - --8<-- "python/python/tests/docs/test_basic.py:vector_search" - --8<-- "python/python/tests/docs/test_basic.py:vector_search_async" - ``` + === "Sync API" + + ```python + --8<-- "python/python/tests/docs/test_basic.py:vector_search" + ``` + === "Async API" + + ```python + --8<-- "python/python/tests/docs/test_basic.py:vector_search_async" + ``` This returns a pandas DataFrame with the results. @@ -412,10 +463,16 @@ LanceDB allows you to create an ANN index on a table as follows: === "Python" - ```py - --8<-- "python/python/tests/docs/test_basic.py:create_index" - --8<-- "python/python/tests/docs/test_basic.py:create_index_async" - ``` + === "Sync API" + + ```python + --8<-- "python/python/tests/docs/test_basic.py:create_index" + ``` + === "Async API" + + ```python + --8<-- "python/python/tests/docs/test_basic.py:create_index_async" + ``` === "Typescript[^1]" === "@lancedb/lancedb" @@ -451,10 +508,16 @@ This can delete any number of rows that match the filter. === "Python" - ```python - --8<-- "python/python/tests/docs/test_basic.py:delete_rows" - --8<-- "python/python/tests/docs/test_basic.py:delete_rows_async" - ``` + === "Sync API" + + ```python + --8<-- "python/python/tests/docs/test_basic.py:delete_rows" + ``` + === "Async API" + + ```python + --8<-- "python/python/tests/docs/test_basic.py:delete_rows_async" + ``` === "Typescript[^1]" @@ -483,7 +546,10 @@ simple or complex as needed. To see what expressions are supported, see the === "Python" - Read more: [lancedb.table.Table.delete][] + === "Sync API" + Read more: [lancedb.table.Table.delete][] + === "Async API" + Read more: [lancedb.table.AsyncTable.delete][] === "Typescript[^1]" @@ -505,10 +571,16 @@ Use the `drop_table()` method on the database to remove a table. === "Python" - ```python - --8<-- "python/python/tests/docs/test_basic.py:drop_table" - --8<-- "python/python/tests/docs/test_basic.py:drop_table_async" - ``` + === "Sync API" + + ```python + --8<-- "python/python/tests/docs/test_basic.py:drop_table" + ``` + === "Async API" + + ```python + --8<-- "python/python/tests/docs/test_basic.py:drop_table_async" + ``` This permanently removes the table and is not recoverable, unlike deleting rows. By default, if the table does not exist an exception is raised. To suppress this, @@ -543,10 +615,17 @@ You can use the embedding API when working with embedding models. It automatical === "Python" - ```python - --8<-- "python/python/tests/docs/test_embeddings_optional.py:imports" - --8<-- "python/python/tests/docs/test_embeddings_optional.py:openai_embeddings" - ``` + === "Sync API" + + ```python + --8<-- "python/python/tests/docs/test_embeddings_optional.py:imports" + + --8<-- "python/python/tests/docs/test_embeddings_optional.py:openai_embeddings" + ``` + === "Async API" + + Coming soon to the async API. + https://github.com/lancedb/lancedb/issues/1938 === "Typescript[^1]" diff --git a/docs/src/search.md b/docs/src/search.md index 4e07af28..3806f5c0 100644 --- a/docs/src/search.md +++ b/docs/src/search.md @@ -122,7 +122,7 @@ LanceDB supports binary vectors as a data type, and has the ability to search bi === "Python" - === "sync API" + === "Sync API" ```python --8<-- "python/python/tests/docs/test_binary_vector.py:imports" @@ -130,7 +130,7 @@ LanceDB supports binary vectors as a data type, and has the ability to search bi --8<-- "python/python/tests/docs/test_binary_vector.py:sync_binary_vector" ``` - === "async API" + === "Async API" ```python --8<-- "python/python/tests/docs/test_binary_vector.py:imports" @@ -153,7 +153,7 @@ The vector value type can be `float16`, `float32` or `float64`. === "Python" - === "sync API" + === "Sync API" ```python --8<-- "python/python/tests/docs/test_multivector.py:imports" @@ -161,7 +161,7 @@ The vector value type can be `float16`, `float32` or `float64`. --8<-- "python/python/tests/docs/test_multivector.py:sync_multivector" ``` - === "async API" + === "Async API" ```python --8<-- "python/python/tests/docs/test_multivector.py:imports" @@ -175,7 +175,7 @@ You can also search for vectors within a specific distance range from the query === "Python" - === "sync API" + === "Sync API" ```python --8<-- "python/python/tests/docs/test_distance_range.py:imports" @@ -183,7 +183,7 @@ You can also search for vectors within a specific distance range from the query --8<-- "python/python/tests/docs/test_distance_range.py:sync_distance_range" ``` - === "async API" + === "Async API" ```python --8<-- "python/python/tests/docs/test_distance_range.py:imports" diff --git a/python/python/tests/docs/test_basic.py b/python/python/tests/docs/test_basic.py index 433b209c..0e7f4897 100644 --- a/python/python/tests/docs/test_basic.py +++ b/python/python/tests/docs/test_basic.py @@ -1,23 +1,22 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright The LanceDB Authors -import shutil - # --8<-- [start:imports] import lancedb import pandas as pd import pyarrow as pa - # --8<-- [end:imports] + import pytest from numpy.random import randint, random -shutil.rmtree("data/sample-lancedb", ignore_errors=True) - -def test_quickstart(): - # --8<-- [start:connect] +def test_quickstart(tmp_path): + # --8<-- [start:set_uri] uri = "data/sample-lancedb" + # --8<-- [end:set_uri] + uri = tmp_path + # --8<-- [start:connect] db = lancedb.connect(uri) # --8<-- [end:connect] @@ -27,7 +26,6 @@ def test_quickstart(): {"vector": [5.9, 26.5], "item": "bar", "price": 20.0}, ] - # Synchronous client tbl = db.create_table("my_table", data=data) # --8<-- [end:create_table] @@ -38,24 +36,19 @@ def test_quickstart(): {"vector": [5.9, 26.5], "item": "bar", "price": 20.0}, ] ) - # Synchronous client tbl = db.create_table("table_from_df", data=df) # --8<-- [end:create_table_pandas] # --8<-- [start:create_empty_table] schema = pa.schema([pa.field("vector", pa.list_(pa.float32(), list_size=2))]) - # Synchronous client tbl = db.create_table("empty_table", schema=schema) # --8<-- [end:create_empty_table] # --8<-- [start:open_table] - # Synchronous client tbl = db.open_table("my_table") # --8<-- [end:open_table] # --8<-- [start:table_names] - # Synchronous client print(db.table_names()) # --8<-- [end:table_names] - # Synchronous client # --8<-- [start:add_data] # Option 1: Add a list of dicts to a table data = [ @@ -69,7 +62,6 @@ def test_quickstart(): tbl.add(data) # --8<-- [end:add_data] # --8<-- [start:vector_search] - # Synchronous client tbl.search([100, 100]).limit(2).to_pandas() # --8<-- [end:vector_search] tbl.add( @@ -95,42 +87,31 @@ def test_quickstart(): tbl.drop_columns(["dbl_price"]) # --8<-- [end:drop_columns] # --8<-- [start:create_index] - # Synchronous client tbl.create_index(num_sub_vectors=1) # --8<-- [end:create_index] # --8<-- [start:delete_rows] - # Synchronous client tbl.delete('item = "fizz"') # --8<-- [end:delete_rows] # --8<-- [start:drop_table] - # Synchronous client db.drop_table("my_table") # --8<-- [end:drop_table] @pytest.mark.asyncio -async def test_quickstart_async(): +async def test_quickstart_async(tmp_path): + uri = tmp_path # --8<-- [start:connect_async] - # LanceDb offers both a synchronous and an asynchronous client. There are still a - # few operations that are only supported by the synchronous client (e.g. embedding - # functions, full text search) but both APIs should soon be equivalent - - # In this guide we will give examples of both clients. In other guides we will - # typically only provide examples with one client or the other. - uri = "data/sample-lancedb" - async_db = await lancedb.connect_async(uri) + db = await lancedb.connect_async(uri) # --8<-- [end:connect_async] - + # --8<-- [start:create_table_async] data = [ {"vector": [3.1, 4.1], "item": "foo", "price": 10.0}, {"vector": [5.9, 26.5], "item": "bar", "price": 20.0}, ] - # --8<-- [start:create_table_async] - # Asynchronous client - async_tbl = await async_db.create_table("my_table_async", data=data) + tbl = await db.create_table("my_table_async", data=data) # --8<-- [end:create_table_async] - + # --8<-- [start:create_table_async_pandas] df = pd.DataFrame( [ {"vector": [3.1, 4.1], "item": "foo", "price": 10.0}, @@ -138,37 +119,41 @@ async def test_quickstart_async(): ] ) - # --8<-- [start:create_table_async_pandas] - # Asynchronous client - async_tbl = await async_db.create_table("table_from_df_async", df) + tbl = await db.create_table("table_from_df_async", df) # --8<-- [end:create_table_async_pandas] - - schema = pa.schema([pa.field("vector", pa.list_(pa.float32(), list_size=2))]) # --8<-- [start:create_empty_table_async] - # Asynchronous client - async_tbl = await async_db.create_table("empty_table_async", schema=schema) + schema = pa.schema([pa.field("vector", pa.list_(pa.float32(), list_size=2))]) + tbl = await db.create_table("empty_table_async", schema=schema) # --8<-- [end:create_empty_table_async] # --8<-- [start:open_table_async] - # Asynchronous client - async_tbl = await async_db.open_table("my_table_async") + tbl = await db.open_table("my_table_async") # --8<-- [end:open_table_async] # --8<-- [start:table_names_async] - # Asynchronous client - print(await async_db.table_names()) + print(await db.table_names()) # --8<-- [end:table_names_async] # --8<-- [start:add_data_async] - # Asynchronous client - await async_tbl.add(data) + # Option 1: Add a list of dicts to a table + data = [ + {"vector": [1.3, 1.4], "item": "fizz", "price": 100.0}, + {"vector": [9.5, 56.2], "item": "buzz", "price": 200.0}, + ] + await tbl.add(data) + + # Option 2: Add a pandas DataFrame to a table + df = pd.DataFrame(data) + await tbl.add(data) # --8<-- [end:add_data_async] # Add sufficient data for training data = [{"vector": [x, x], "item": "filler", "price": x * x} for x in range(1000)] - await async_tbl.add(data) + await tbl.add(data) # --8<-- [start:vector_search_async] + await tbl.vector_search([100, 100]).limit(2).to_pandas() + # --8<-- [end:vector_search_async] # --8<-- [start:add_columns_async] - await async_tbl.add_columns({"double_price": "cast((price * 2) as float)"}) + await tbl.add_columns({"double_price": "cast((price * 2) as float)"}) # --8<-- [end:add_columns_async] # --8<-- [start:alter_columns_async] - await async_tbl.alter_columns( + await tbl.alter_columns( { "path": "double_price", "rename": "dbl_price", @@ -178,20 +163,16 @@ async def test_quickstart_async(): ) # --8<-- [end:alter_columns_async] # --8<-- [start:drop_columns_async] - await async_tbl.drop_columns(["dbl_price"]) + await tbl.drop_columns(["dbl_price"]) # --8<-- [end:drop_columns_async] - # Asynchronous client - await async_tbl.vector_search([100, 100]).limit(2).to_pandas() + await tbl.vector_search([100, 100]).limit(2).to_pandas() # --8<-- [end:vector_search_async] # --8<-- [start:create_index_async] - # Asynchronous client (must specify column to index) - await async_tbl.create_index("vector") + await tbl.create_index("vector") # --8<-- [end:create_index_async] # --8<-- [start:delete_rows_async] - # Asynchronous client - await async_tbl.delete('item = "fizz"') + await tbl.delete('item = "fizz"') # --8<-- [end:delete_rows_async] # --8<-- [start:drop_table_async] - # Asynchronous client - await async_db.drop_table("my_table_async") + await db.drop_table("my_table_async") # --8<-- [end:drop_table_async]