From 17c9e9afea69be0a514f33c20412b51ce304630c Mon Sep 17 00:00:00 2001 From: QianZhu Date: Tue, 7 Jan 2025 15:10:25 -0800 Subject: [PATCH] docs: add async examples to doc (#1941) - added sync and async tabs for python examples - moved python code to tests/docs --------- Co-authored-by: Will Jones --- docs/mkdocs.yml | 8 +- docs/src/ann_indexes.md | 75 +- docs/src/fts.md | 115 +- docs/src/fts_tantivy.md | 2 +- docs/src/guides/scalar_index.md | 79 +- docs/src/guides/storage.md | 299 +++-- docs/src/guides/tables.md | 637 +++++----- docs/src/hybrid_search/hybrid_search.md | 77 +- docs/src/notebooks/reproducibility.ipynb | 546 ++++---- .../src/notebooks/reproducibility_async.ipynb | 1096 +++++++++++++++++ docs/src/python/pandas_and_pyarrow.md | 105 +- docs/src/python/polars_arrow.md | 55 +- docs/src/reranking/index.md | 6 +- docs/src/search.md | 153 ++- docs/src/sql.md | 40 +- docs/test/md_testing.py | 9 +- python/python/tests/docs/test_basic.py | 26 +- python/python/tests/docs/test_guide_index.py | 169 +++ python/python/tests/docs/test_guide_tables.py | 576 +++++++++ python/python/tests/docs/test_python.py | 187 +++ python/python/tests/docs/test_search.py | 366 ++++++ 21 files changed, 3639 insertions(+), 987 deletions(-) create mode 100644 docs/src/notebooks/reproducibility_async.ipynb create mode 100644 python/python/tests/docs/test_guide_index.py create mode 100644 python/python/tests/docs/test_guide_tables.py create mode 100644 python/python/tests/docs/test_python.py create mode 100644 python/python/tests/docs/test_search.py diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml index c41f2933..6df3c1d7 100644 --- a/docs/mkdocs.yml +++ b/docs/mkdocs.yml @@ -146,7 +146,9 @@ nav: - Building Custom Rerankers: reranking/custom_reranker.md - Example: notebooks/lancedb_reranking.ipynb - Filtering: sql.md - - Versioning & Reproducibility: notebooks/reproducibility.ipynb + - Versioning & Reproducibility: + - sync API: notebooks/reproducibility.ipynb + - async API: notebooks/reproducibility_async.ipynb - Configuring Storage: guides/storage.md - Migration Guide: migration.md - Tuning retrieval performance: @@ -278,7 +280,9 @@ nav: - Building Custom Rerankers: reranking/custom_reranker.md - Example: notebooks/lancedb_reranking.ipynb - Filtering: sql.md - - Versioning & Reproducibility: notebooks/reproducibility.ipynb + - Versioning & Reproducibility: + - sync API: notebooks/reproducibility.ipynb + - async API: notebooks/reproducibility_async.ipynb - Configuring Storage: guides/storage.md - Migration Guide: migration.md - Tuning retrieval performance: diff --git a/docs/src/ann_indexes.md b/docs/src/ann_indexes.md index 8d293fa4..710ec8e3 100644 --- a/docs/src/ann_indexes.md +++ b/docs/src/ann_indexes.md @@ -18,25 +18,24 @@ See the [indexing](concepts/index_ivfpq.md) concepts guide for more information Lance supports `IVF_PQ` index type by default. === "Python" + === "Sync API" - Creating indexes is done via the [create_index](https://lancedb.github.io/lancedb/python/#lancedb.table.LanceTable.create_index) method. + Creating indexes is done via the [create_index](https://lancedb.github.io/lancedb/python/#lancedb.table.LanceTable.create_index) method. - ```python - import lancedb - import numpy as np - uri = "data/sample-lancedb" - db = lancedb.connect(uri) + ```python + --8<-- "python/python/tests/docs/test_guide_index.py:import-lancedb" + --8<-- "python/python/tests/docs/test_guide_index.py:import-numpy" + --8<-- "python/python/tests/docs/test_guide_index.py:create_ann_index" + ``` + === "Async API" + Creating indexes is done via the [create_index](https://lancedb.github.io/lancedb/python/#lancedb.table.LanceTable.create_index) method. - # Create 10,000 sample vectors - data = [{"vector": row, "item": f"item {i}"} - for i, row in enumerate(np.random.random((10_000, 1536)).astype('float32'))] - - # Add the vectors to a table - tbl = db.create_table("my_vectors", data=data) - - # Create and train the index - you need to have enough data in the table for an effective training step - tbl.create_index(num_partitions=256, num_sub_vectors=96) - ``` + ```python + --8<-- "python/python/tests/docs/test_guide_index.py:import-lancedb" + --8<-- "python/python/tests/docs/test_guide_index.py:import-numpy" + --8<-- "python/python/tests/docs/test_guide_index.py:import-lancedb-ivfpq" + --8<-- "python/python/tests/docs/test_guide_index.py:create_ann_index_async" + ``` === "TypeScript" @@ -127,7 +126,9 @@ You can specify the GPU device to train IVF partitions via accelerator="mps" ) ``` - +!!! note + GPU based indexing is not yet supported with our asynchronous client. + Troubleshooting: If you see `AssertionError: Torch not compiled with CUDA enabled`, you need to [install @@ -152,14 +153,16 @@ There are a couple of parameters that can be used to fine-tune the search: === "Python" + === "Sync API" - ```python - tbl.search(np.random.random((1536))) \ - .limit(2) \ - .nprobes(20) \ - .refine_factor(10) \ - .to_pandas() - ``` + ```python + --8<-- "python/python/tests/docs/test_guide_index.py:vector_search" + ``` + === "Async API" + + ```python + --8<-- "python/python/tests/docs/test_guide_index.py:vector_search_async" + ``` ```text vector item _distance @@ -196,10 +199,16 @@ The search will return the data requested in addition to the distance of each it You can further filter the elements returned by a search using a where clause. === "Python" + === "Sync API" - ```python - tbl.search(np.random.random((1536))).where("item != 'item 1141'").to_pandas() - ``` + ```python + --8<-- "python/python/tests/docs/test_guide_index.py:vector_search_with_filter" + ``` + === "Async API" + + ```python + --8<-- "python/python/tests/docs/test_guide_index.py:vector_search_async_with_filter" + ``` === "TypeScript" @@ -221,10 +230,16 @@ You can select the columns returned by the query using a select clause. === "Python" - ```python - tbl.search(np.random.random((1536))).select(["vector"]).to_pandas() - ``` + === "Sync API" + ```python + --8<-- "python/python/tests/docs/test_guide_index.py:vector_search_with_select" + ``` + === "Async API" + + ```python + --8<-- "python/python/tests/docs/test_guide_index.py:vector_search_async_with_select" + ``` ```text vector _distance diff --git a/docs/src/fts.md b/docs/src/fts.md index c68c3714..cfc719d6 100644 --- a/docs/src/fts.md +++ b/docs/src/fts.md @@ -10,28 +10,20 @@ LanceDB provides support for full-text search via Lance, allowing you to incorpo Consider that we have a LanceDB table named `my_table`, whose string column `text` we want to index and query via keyword search, the FTS index must be created before you can search via keywords. === "Python" + === "Sync API" - ```python - import lancedb + ```python + --8<-- "python/python/tests/docs/test_search.py:import-lancedb" + --8<-- "python/python/tests/docs/test_search.py:import-lancedb-fts" + --8<-- "python/python/tests/docs/test_search.py:basic_fts" + ``` + === "Async API" - uri = "data/sample-lancedb" - db = lancedb.connect(uri) - - table = db.create_table( - "my_table", - data=[ - {"vector": [3.1, 4.1], "text": "Frodo was a happy puppy"}, - {"vector": [5.9, 26.5], "text": "There are several kittens playing"}, - ], - ) - - # passing `use_tantivy=False` to use lance FTS index - # `use_tantivy=True` by default - table.create_fts_index("text", use_tantivy=False) - table.search("puppy").limit(10).select(["text"]).to_list() - # [{'text': 'Frodo was a happy puppy', '_score': 0.6931471824645996}] - # ... - ``` + ```python + --8<-- "python/python/tests/docs/test_search.py:import-lancedb" + --8<-- "python/python/tests/docs/test_search.py:import-lancedb-fts" + --8<-- "python/python/tests/docs/test_search.py:basic_fts_async" + ``` === "TypeScript" @@ -93,22 +85,32 @@ By default the text is tokenized by splitting on punctuation and whitespaces, an Stemming is useful for improving search results by reducing words to their root form, e.g. "running" to "run". LanceDB supports stemming for multiple languages, you can specify the tokenizer name to enable stemming by the pattern `tokenizer_name="{language_code}_stem"`, e.g. `en_stem` for English. For example, to enable stemming for English: -```python -table.create_fts_index("text", use_tantivy=True, tokenizer_name="en_stem") -``` +=== "Sync API" + + ```python + --8<-- "python/python/tests/docs/test_search.py:fts_config_stem" + ``` +=== "Async API" + + ```python + --8<-- "python/python/tests/docs/test_search.py:fts_config_stem_async" + ``` the following [languages](https://docs.rs/tantivy/latest/tantivy/tokenizer/enum.Language.html) are currently supported. The tokenizer is customizable, you can specify how the tokenizer splits the text, and how it filters out words, etc. For example, for language with accents, you can specify the tokenizer to use `ascii_folding` to remove accents, e.g. 'é' to 'e': -```python -table.create_fts_index("text", - use_tantivy=False, - language="French", - stem=True, - ascii_folding=True) -``` +=== "Sync API" + + ```python + --8<-- "python/python/tests/docs/test_search.py:fts_config_folding" + ``` +=== "Async API" + + ```python + --8<-- "python/python/tests/docs/test_search.py:fts_config_folding_async" + ``` ## Filtering @@ -119,9 +121,16 @@ This can be invoked via the familiar `where` syntax. With pre-filtering: === "Python" - ```python - table.search("puppy").limit(10).where("meta='foo'", prefilte=True).to_list() - ``` + === "Sync API" + + ```python + --8<-- "python/python/tests/docs/test_search.py:fts_prefiltering" + ``` + === "Async API" + + ```python + --8<-- "python/python/tests/docs/test_search.py:fts_prefiltering_async" + ``` === "TypeScript" @@ -151,9 +160,16 @@ With pre-filtering: With post-filtering: === "Python" - ```python - table.search("puppy").limit(10).where("meta='foo'", prefilte=False).to_list() - ``` + === "Sync API" + + ```python + --8<-- "python/python/tests/docs/test_search.py:fts_postfiltering" + ``` + === "Async API" + + ```python + --8<-- "python/python/tests/docs/test_search.py:fts_postfiltering_async" + ``` === "TypeScript" @@ -191,9 +207,16 @@ or a **terms** search query like `old man sea`. For more details on the terms query syntax, see Tantivy's [query parser rules](https://docs.rs/tantivy/latest/tantivy/query/struct.QueryParser.html). To search for a phrase, the index must be created with `with_position=True`: -```python -table.create_fts_index("text", use_tantivy=False, with_position=True) -``` +=== "Sync API" + + ```python + --8<-- "python/python/tests/docs/test_search.py:fts_with_position" + ``` +=== "Async API" + + ```python + --8<-- "python/python/tests/docs/test_search.py:fts_with_position_async" + ``` This will allow you to search for phrases, but it will also significantly increase the index size and indexing time. @@ -205,10 +228,16 @@ This can make the query more efficient, especially when the table is large and t === "Python" - ```python - table.add([{"vector": [3.1, 4.1], "text": "Frodo was a happy puppy"}]) - table.optimize() - ``` + === "Sync API" + + ```python + --8<-- "python/python/tests/docs/test_search.py:fts_incremental_index" + ``` + === "Async API" + + ```python + --8<-- "python/python/tests/docs/test_search.py:fts_incremental_index_async" + ``` === "TypeScript" diff --git a/docs/src/fts_tantivy.md b/docs/src/fts_tantivy.md index b41559dd..1c65db30 100644 --- a/docs/src/fts_tantivy.md +++ b/docs/src/fts_tantivy.md @@ -2,7 +2,7 @@ LanceDB also provides support for full-text search via [Tantivy](https://github.com/quickwit-oss/tantivy), allowing you to incorporate keyword-based search (based on BM25) in your retrieval solutions. -The tantivy-based FTS is only available in Python and does not support building indexes on object storage or incremental indexing. If you need these features, try native FTS [native FTS](fts.md). +The tantivy-based FTS is only available in Python synchronous APIs and does not support building indexes on object storage or incremental indexing. If you need these features, try native FTS [native FTS](fts.md). ## Installation diff --git a/docs/src/guides/scalar_index.md b/docs/src/guides/scalar_index.md index 809f1c8f..97835394 100644 --- a/docs/src/guides/scalar_index.md +++ b/docs/src/guides/scalar_index.md @@ -32,19 +32,20 @@ over scalar columns. ### Create a scalar index === "Python" - ```python - import lancedb - books = [ - {"book_id": 1, "publisher": "plenty of books", "tags": ["fantasy", "adventure"]}, - {"book_id": 2, "publisher": "book town", "tags": ["non-fiction"]}, - {"book_id": 3, "publisher": "oreilly", "tags": ["textbook"]} - ] + === "Sync API" - db = lancedb.connect("./db") - table = db.create_table("books", books) - table.create_scalar_index("book_id") # BTree by default - table.create_scalar_index("publisher", index_type="BITMAP") - ``` + ```python + --8<-- "python/python/tests/docs/test_guide_index.py:import-lancedb" + --8<-- "python/python/tests/docs/test_guide_index.py:import-lancedb-btree-bitmap" + --8<-- "python/python/tests/docs/test_guide_index.py:basic_scalar_index" + ``` + === "Async API" + + ```python + --8<-- "python/python/tests/docs/test_guide_index.py:import-lancedb" + --8<-- "python/python/tests/docs/test_guide_index.py:import-lancedb-btree-bitmap" + --8<-- "python/python/tests/docs/test_guide_index.py:basic_scalar_index_async" + ``` === "Typescript" @@ -62,12 +63,18 @@ The following scan will be faster if the column `book_id` has a scalar index: === "Python" - ```python - import lancedb + === "Sync API" - table = db.open_table("books") - my_df = table.search().where("book_id = 2").to_pandas() - ``` + ```python + --8<-- "python/python/tests/docs/test_guide_index.py:import-lancedb" + --8<-- "python/python/tests/docs/test_guide_index.py:search_with_scalar_index" + ``` + === "Async API" + + ```python + --8<-- "python/python/tests/docs/test_guide_index.py:import-lancedb" + --8<-- "python/python/tests/docs/test_guide_index.py:search_with_scalar_index_async" + ``` === "Typescript" @@ -88,22 +95,18 @@ Scalar indices can also speed up scans containing a vector search or full text s === "Python" - ```python - import lancedb + === "Sync API" - data = [ - {"book_id": 1, "vector": [1, 2]}, - {"book_id": 2, "vector": [3, 4]}, - {"book_id": 3, "vector": [5, 6]} - ] - table = db.create_table("book_with_embeddings", data) + ```python + --8<-- "python/python/tests/docs/test_guide_index.py:import-lancedb" + --8<-- "python/python/tests/docs/test_guide_index.py:vector_search_with_scalar_index" + ``` + === "Async API" - ( - table.search([1, 2]) - .where("book_id != 3", prefilter=True) - .to_pandas() - ) - ``` + ```python + --8<-- "python/python/tests/docs/test_guide_index.py:import-lancedb" + --8<-- "python/python/tests/docs/test_guide_index.py:vector_search_with_scalar_index_async" + ``` === "Typescript" @@ -122,10 +125,16 @@ Scalar indices can also speed up scans containing a vector search or full text s Updating the table data (adding, deleting, or modifying records) requires that you also update the scalar index. This can be done by calling `optimize`, which will trigger an update to the existing scalar index. === "Python" - ```python - table.add([{"vector": [7, 8], "book_id": 4}]) - table.optimize() - ``` + === "Sync API" + + ```python + --8<-- "python/python/tests/docs/test_guide_index.py:update_scalar_index" + ``` + === "Async API" + + ```python + --8<-- "python/python/tests/docs/test_guide_index.py:update_scalar_index_async" + ``` === "TypeScript" diff --git a/docs/src/guides/storage.md b/docs/src/guides/storage.md index 88cef2df..2f94dd34 100644 --- a/docs/src/guides/storage.md +++ b/docs/src/guides/storage.md @@ -12,26 +12,50 @@ LanceDB OSS supports object stores such as AWS S3 (and compatible stores), Azure === "Python" AWS S3: + === "Sync API" - ```python - import lancedb - db = lancedb.connect("s3://bucket/path") - ``` + ```python + import lancedb + db = lancedb.connect("s3://bucket/path") + ``` + === "Async API" + + ```python + import lancedb + async_db = await lancedb.connect_async("s3://bucket/path") + ``` Google Cloud Storage: - ```python - import lancedb - db = lancedb.connect("gs://bucket/path") - ``` + === "Sync API" + + ```python + import lancedb + db = lancedb.connect("gs://bucket/path") + ``` + === "Async API" + + ```python + import lancedb + async_db = await lancedb.connect_async("gs://bucket/path") + ``` Azure Blob Storage: - ```python - import lancedb - db = lancedb.connect("az://bucket/path") - ``` + === "Sync API" + + ```python + import lancedb + db = lancedb.connect("az://bucket/path") + ``` + + === "Async API" + + ```python + import lancedb + async_db = await lancedb.connect_async("az://bucket/path") + ``` Note that for Azure, storage credentials must be configured. See [below](#azure-blob-storage) for more details. @@ -94,13 +118,24 @@ If you only want this to apply to one particular connection, you can pass the `s === "Python" - ```python - import lancedb - db = await lancedb.connect_async( - "s3://bucket/path", - storage_options={"timeout": "60s"} - ) - ``` + === "Sync API" + + ```python + import lancedb + db = lancedb.connect( + "s3://bucket/path", + storage_options={"timeout": "60s"} + ) + ``` + === "Async API" + + ```python + import lancedb + async_db = await lancedb.connect_async( + "s3://bucket/path", + storage_options={"timeout": "60s"} + ) + ``` === "TypeScript" @@ -128,15 +163,29 @@ Getting even more specific, you can set the `timeout` for only a particular tabl === "Python" - ```python - import lancedb - db = await lancedb.connect_async("s3://bucket/path") - table = await db.create_table( - "table", - [{"a": 1, "b": 2}], - storage_options={"timeout": "60s"} - ) - ``` + === "Sync API" + + ```python + import lancedb + db = lancedb.connect("s3://bucket/path") + table = db.create_table( + "table", + [{"a": 1, "b": 2}], + storage_options={"timeout": "60s"} + ) + ``` + + === "Async API" + + ```python + import lancedb + async_db = await lancedb.connect_async("s3://bucket/path") + async_table = await async_db.create_table( + "table", + [{"a": 1, "b": 2}], + storage_options={"timeout": "60s"} + ) + ``` === "TypeScript" @@ -194,17 +243,32 @@ These can be set as environment variables or passed in the `storage_options` par === "Python" - ```python - import lancedb - db = await lancedb.connect_async( - "s3://bucket/path", - storage_options={ - "aws_access_key_id": "my-access-key", - "aws_secret_access_key": "my-secret-key", - "aws_session_token": "my-session-token", - } - ) - ``` + === "Sync API" + + ```python + import lancedb + db = lancedb.connect( + "s3://bucket/path", + storage_options={ + "aws_access_key_id": "my-access-key", + "aws_secret_access_key": "my-secret-key", + "aws_session_token": "my-session-token", + } + ) + ``` + === "Async API" + + ```python + import lancedb + async_db = await lancedb.connect_async( + "s3://bucket/path", + storage_options={ + "aws_access_key_id": "my-access-key", + "aws_secret_access_key": "my-secret-key", + "aws_session_token": "my-session-token", + } + ) + ``` === "TypeScript" @@ -348,12 +412,22 @@ name of the table to use. === "Python" - ```python - import lancedb - db = await lancedb.connect_async( - "s3+ddb://bucket/path?ddbTableName=my-dynamodb-table", - ) - ``` + === "Sync API" + + ```python + import lancedb + db = lancedb.connect( + "s3+ddb://bucket/path?ddbTableName=my-dynamodb-table", + ) + ``` + === "Async API" + + ```python + import lancedb + async_db = await lancedb.connect_async( + "s3+ddb://bucket/path?ddbTableName=my-dynamodb-table", + ) + ``` === "JavaScript" @@ -441,16 +515,30 @@ LanceDB can also connect to S3-compatible stores, such as MinIO. To do so, you m === "Python" - ```python - import lancedb - db = await lancedb.connect_async( - "s3://bucket/path", - storage_options={ - "region": "us-east-1", - "endpoint": "http://minio:9000", - } - ) - ``` + === "Sync API" + + ```python + import lancedb + db = lancedb.connect( + "s3://bucket/path", + storage_options={ + "region": "us-east-1", + "endpoint": "http://minio:9000", + } + ) + ``` + === "Async API" + + ```python + import lancedb + async_db = await lancedb.connect_async( + "s3://bucket/path", + storage_options={ + "region": "us-east-1", + "endpoint": "http://minio:9000", + } + ) + ``` === "TypeScript" @@ -502,16 +590,30 @@ To configure LanceDB to use an S3 Express endpoint, you must set the storage opt === "Python" - ```python - import lancedb - db = await lancedb.connect_async( - "s3://my-bucket--use1-az4--x-s3/path", - storage_options={ - "region": "us-east-1", - "s3_express": "true", - } - ) - ``` + === "Sync API" + + ```python + import lancedb + db = lancedb.connect( + "s3://my-bucket--use1-az4--x-s3/path", + storage_options={ + "region": "us-east-1", + "s3_express": "true", + } + ) + ``` + === "Async API" + + ```python + import lancedb + async_db = await lancedb.connect_async( + "s3://my-bucket--use1-az4--x-s3/path", + storage_options={ + "region": "us-east-1", + "s3_express": "true", + } + ) + ``` === "TypeScript" @@ -552,15 +654,29 @@ GCS credentials are configured by setting the `GOOGLE_SERVICE_ACCOUNT` environme === "Python" - ```python - import lancedb - db = await lancedb.connect_async( - "gs://my-bucket/my-database", - storage_options={ - "service_account": "path/to/service-account.json", - } - ) - ``` + === "Sync API" + + ```python + import lancedb + db = lancedb.connect( + "gs://my-bucket/my-database", + storage_options={ + "service_account": "path/to/service-account.json", + } + ) + ``` + + === "Async API" + + ```python + import lancedb + async_db = await lancedb.connect_async( + "gs://my-bucket/my-database", + storage_options={ + "service_account": "path/to/service-account.json", + } + ) + ``` === "TypeScript" @@ -612,16 +728,31 @@ Azure Blob Storage credentials can be configured by setting the `AZURE_STORAGE_A === "Python" - ```python - import lancedb - db = await lancedb.connect_async( - "az://my-container/my-database", - storage_options={ - account_name: "some-account", - account_key: "some-key", - } - ) - ``` + === "Sync API" + + ```python + import lancedb + db = await lancedb.connect( + "az://my-container/my-database", + storage_options={ + account_name: "some-account", + account_key: "some-key", + } + ) + ``` + + === "Async API" + + ```python + import lancedb + async_db = await lancedb.connect_async( + "az://my-container/my-database", + storage_options={ + account_name: "some-account", + account_key: "some-key", + } + ) + ``` === "TypeScript" diff --git a/docs/src/guides/tables.md b/docs/src/guides/tables.md index f4431771..a9fbc643 100644 --- a/docs/src/guides/tables.md +++ b/docs/src/guides/tables.md @@ -12,10 +12,18 @@ Initialize a LanceDB connection and create a table === "Python" - ```python - import lancedb - db = lancedb.connect("./.lancedb") - ``` + === "Sync API" + + ```python + --8<-- "python/python/tests/docs/test_guide_tables.py:import-lancedb" + --8<-- "python/python/tests/docs/test_guide_tables.py:connect" + ``` + === "Async API" + + ```python + --8<-- "python/python/tests/docs/test_guide_tables.py:import-lancedb" + --8<-- "python/python/tests/docs/test_guide_tables.py:connect_async" + ``` LanceDB allows ingesting data from various sources - `dict`, `list[dict]`, `pd.DataFrame`, `pa.Table` or a `Iterator[pa.RecordBatch]`. Let's take a look at some of the these. @@ -47,18 +55,16 @@ Initialize a LanceDB connection and create a table === "Python" - ```python - import lancedb + === "Sync API" - db = lancedb.connect("./.lancedb") + ```python + --8<-- "python/python/tests/docs/test_guide_tables.py:create_table" + ``` + === "Async API" - data = [{"vector": [1.1, 1.2], "lat": 45.5, "long": -122.7}, - {"vector": [0.2, 1.8], "lat": 40.1, "long": -74.1}] - - db.create_table("my_table", data) - - db["my_table"].head() - ``` + ```python + --8<-- "python/python/tests/docs/test_guide_tables.py:create_table_async" + ``` !!! info "Note" If the table already exists, LanceDB will raise an error by default. @@ -67,16 +73,30 @@ Initialize a LanceDB connection and create a table and the table exists, then it simply opens the existing table. The data you passed in will NOT be appended to the table in that case. - ```python - db.create_table("name", data, exist_ok=True) - ``` + === "Sync API" + + ```python + --8<-- "python/python/tests/docs/test_guide_tables.py:create_table_exist_ok" + ``` + === "Async API" + + ```python + --8<-- "python/python/tests/docs/test_guide_tables.py:create_table_async_exist_ok" + ``` Sometimes you want to make sure that you start fresh. If you want to overwrite the table, you can pass in mode="overwrite" to the createTable function. - ```python - db.create_table("name", data, mode="overwrite") - ``` + === "Sync API" + + ```python + --8<-- "python/python/tests/docs/test_guide_tables.py:create_table_overwrite" + ``` + === "Async API" + + ```python + --8<-- "python/python/tests/docs/test_guide_tables.py:create_table_async_overwrite" + ``` === "Typescript[^1]" You can create a LanceDB table in JavaScript using an array of records as follows. @@ -146,34 +166,37 @@ Initialize a LanceDB connection and create a table ### From a Pandas DataFrame -```python -import pandas as pd -data = pd.DataFrame({ - "vector": [[1.1, 1.2, 1.3, 1.4], [0.2, 1.8, 0.4, 3.6]], - "lat": [45.5, 40.1], - "long": [-122.7, -74.1] -}) +=== "Sync API" -db.create_table("my_table", data) + ```python + --8<-- "python/python/tests/docs/test_guide_tables.py:import-pandas" + --8<-- "python/python/tests/docs/test_guide_tables.py:create_table_from_pandas" + ``` +=== "Async API" -db["my_table"].head() -``` + ```python + --8<-- "python/python/tests/docs/test_guide_tables.py:import-pandas" + --8<-- "python/python/tests/docs/test_guide_tables.py:create_table_async_from_pandas" + ``` !!! info "Note" Data is converted to Arrow before being written to disk. For maximum control over how data is saved, either provide the PyArrow schema to convert to or else provide a PyArrow Table directly. The **`vector`** column needs to be a [Vector](../python/pydantic.md#vector-field) (defined as [pyarrow.FixedSizeList](https://arrow.apache.org/docs/python/generated/pyarrow.list_.html)) type. -```python -custom_schema = pa.schema([ -pa.field("vector", pa.list_(pa.float32(), 4)), -pa.field("lat", pa.float32()), -pa.field("long", pa.float32()) -]) +=== "Sync API" -table = db.create_table("my_table", data, schema=custom_schema) -``` + ```python + --8<-- "python/python/tests/docs/test_guide_tables.py:import-pyarrow" + --8<-- "python/python/tests/docs/test_guide_tables.py:create_table_custom_schema" + ``` +=== "Async API" + + ```python + --8<-- "python/python/tests/docs/test_guide_tables.py:import-pyarrow" + --8<-- "python/python/tests/docs/test_guide_tables.py:create_table_async_custom_schema" + ``` ### From a Polars DataFrame @@ -182,45 +205,38 @@ written in Rust. Just like in Pandas, the Polars integration is enabled by PyArr under the hood. A deeper integration between LanceDB Tables and Polars DataFrames is on the way. -```python -import polars as pl +=== "Sync API" -data = pl.DataFrame({ - "vector": [[3.1, 4.1], [5.9, 26.5]], - "item": ["foo", "bar"], - "price": [10.0, 20.0] -}) -table = db.create_table("pl_table", data=data) -``` + ```python + --8<-- "python/python/tests/docs/test_guide_tables.py:import-polars" + --8<-- "python/python/tests/docs/test_guide_tables.py:create_table_from_polars" + ``` +=== "Async API" + + ```python + --8<-- "python/python/tests/docs/test_guide_tables.py:import-polars" + --8<-- "python/python/tests/docs/test_guide_tables.py:create_table_async_from_polars" + ``` ### From an Arrow Table You can also create LanceDB tables directly from Arrow tables. LanceDB supports float16 data type! === "Python" + === "Sync API" - ```python - import pyarrows as pa - import numpy as np + ```python + --8<-- "python/python/tests/docs/test_guide_tables.py:import-pyarrow" + --8<-- "python/python/tests/docs/test_guide_tables.py:import-numpy" + --8<-- "python/python/tests/docs/test_guide_tables.py:create_table_from_arrow_table" + ``` + === "Async API" - dim = 16 - total = 2 - schema = pa.schema( - [ - pa.field("vector", pa.list_(pa.float16(), dim)), - pa.field("text", pa.string()) - ] - ) - data = pa.Table.from_arrays( - [ - pa.array([np.random.randn(dim).astype(np.float16) for _ in range(total)], - pa.list_(pa.float16(), dim)), - pa.array(["foo", "bar"]) - ], - ["vector", "text"], - ) - tbl = db.create_table("f16_tbl", data, schema=schema) - ``` + ```python + --8<-- "python/python/tests/docs/test_guide_tables.py:import-polars" + --8<-- "python/python/tests/docs/test_guide_tables.py:import-numpy" + --8<-- "python/python/tests/docs/test_guide_tables.py:create_table_async_from_arrow_table" + ``` === "Typescript[^1]" @@ -250,25 +266,22 @@ can be configured with the vector dimensions. It is also important to note that LanceDB only understands subclasses of `lancedb.pydantic.LanceModel` (which itself derives from `pydantic.BaseModel`). -```python -from lancedb.pydantic import Vector, LanceModel +=== "Sync API" -class Content(LanceModel): - movie_id: int - vector: Vector(128) - genres: str - title: str - imdb_id: int + ```python + --8<-- "python/python/tests/docs/test_guide_tables.py:import-lancedb-pydantic" + --8<-- "python/python/tests/docs/test_guide_tables.py:import-pyarrow" + --8<-- "python/python/tests/docs/test_guide_tables.py:class-Content" + --8<-- "python/python/tests/docs/test_guide_tables.py:create_table_from_pydantic" + ``` +=== "Async API" - @property - def imdb_url(self) -> str: - return f"https://www.imdb.com/title/tt{self.imdb_id}" - -import pyarrow as pa -db = lancedb.connect("~/.lancedb") -table_name = "movielens_small" -table = db.create_table(table_name, schema=Content) -``` + ```python + --8<-- "python/python/tests/docs/test_guide_tables.py:import-lancedb-pydantic" + --8<-- "python/python/tests/docs/test_guide_tables.py:import-pyarrow" + --8<-- "python/python/tests/docs/test_guide_tables.py:class-Content" + --8<-- "python/python/tests/docs/test_guide_tables.py:create_table_async_from_pydantic" + ``` #### Nested schemas @@ -277,22 +290,24 @@ For example, you may want to store the document string and the document source name as a nested Document object: ```python -class Document(BaseModel): - content: str - source: str +--8<-- "python/python/tests/docs/test_guide_tables.py:import-pydantic-basemodel" +--8<-- "python/python/tests/docs/test_guide_tables.py:class-Document" ``` This can be used as the type of a LanceDB table column: -```python -class NestedSchema(LanceModel): - id: str - vector: Vector(1536) - document: Document +=== "Sync API" -tbl = db.create_table("nested_table", schema=NestedSchema, mode="overwrite") -``` + ```python + --8<-- "python/python/tests/docs/test_guide_tables.py:class-NestedSchema" + --8<-- "python/python/tests/docs/test_guide_tables.py:create_table_nested_schema" + ``` +=== "Async API" + ```python + --8<-- "python/python/tests/docs/test_guide_tables.py:class-NestedSchema" + --8<-- "python/python/tests/docs/test_guide_tables.py:create_table_async_nested_schema" + ``` This creates a struct column called "document" that has two subfields called "content" and "source": @@ -356,29 +371,20 @@ LanceDB additionally supports PyArrow's `RecordBatch` Iterators or other generat Here's an example using using `RecordBatch` iterator for creating tables. -```python -import pyarrow as pa +=== "Sync API" -def make_batches(): - for i in range(5): - yield pa.RecordBatch.from_arrays( - [ - pa.array([[3.1, 4.1, 5.1, 6.1], [5.9, 26.5, 4.7, 32.8]], - pa.list_(pa.float32(), 4)), - pa.array(["foo", "bar"]), - pa.array([10.0, 20.0]), - ], - ["vector", "item", "price"], - ) + ```python + --8<-- "python/python/tests/docs/test_guide_tables.py:import-pyarrow" + --8<-- "python/python/tests/docs/test_guide_tables.py:make_batches" + --8<-- "python/python/tests/docs/test_guide_tables.py:create_table_from_batch" + ``` +=== "Async API" -schema = pa.schema([ - pa.field("vector", pa.list_(pa.float32(), 4)), - pa.field("item", pa.utf8()), - pa.field("price", pa.float32()), -]) - -db.create_table("batched_tale", make_batches(), schema=schema) -``` + ```python + --8<-- "python/python/tests/docs/test_guide_tables.py:import-pyarrow" + --8<-- "python/python/tests/docs/test_guide_tables.py:make_batches" + --8<-- "python/python/tests/docs/test_guide_tables.py:create_table_async_from_batch" + ``` You can also use iterators of other types like Pandas DataFrame or Pylists directly in the above example. @@ -387,15 +393,29 @@ You can also use iterators of other types like Pandas DataFrame or Pylists direc === "Python" If you forget the name of your table, you can always get a listing of all table names. - ```python - print(db.table_names()) - ``` + === "Sync API" + + ```python + --8<-- "python/python/tests/docs/test_guide_tables.py:list_tables" + ``` + === "Async API" + + ```python + --8<-- "python/python/tests/docs/test_guide_tables.py:list_tables_async" + ``` Then, you can open any existing tables. - ```python - tbl = db.open_table("my_table") - ``` + === "Sync API" + + ```python + --8<-- "python/python/tests/docs/test_guide_tables.py:open_table" + ``` + === "Async API" + + ```python + --8<-- "python/python/tests/docs/test_guide_tables.py:open_table_async" + ``` === "Typescript[^1]" @@ -418,35 +438,41 @@ You can create an empty table for scenarios where you want to add data to the ta An empty table can be initialized via a PyArrow schema. + === "Sync API" - ```python - import lancedb - import pyarrow as pa + ```python + --8<-- "python/python/tests/docs/test_guide_tables.py:import-lancedb" + --8<-- "python/python/tests/docs/test_guide_tables.py:import-pyarrow" + --8<-- "python/python/tests/docs/test_guide_tables.py:create_empty_table" + ``` + === "Async API" - schema = pa.schema( - [ - pa.field("vector", pa.list_(pa.float32(), 2)), - pa.field("item", pa.string()), - pa.field("price", pa.float32()), - ]) - tbl = db.create_table("empty_table_add", schema=schema) - ``` + ```python + --8<-- "python/python/tests/docs/test_guide_tables.py:import-lancedb" + --8<-- "python/python/tests/docs/test_guide_tables.py:import-pyarrow" + --8<-- "python/python/tests/docs/test_guide_tables.py:create_empty_table_async" + ``` Alternatively, you can also use Pydantic to specify the schema for the empty table. Note that we do not directly import `pydantic` but instead use `lancedb.pydantic` which is a subclass of `pydantic.BaseModel` that has been extended to support LanceDB specific types like `Vector`. - ```python - import lancedb - from lancedb.pydantic import LanceModel, vector + === "Sync API" - class Item(LanceModel): - vector: Vector(2) - item: str - price: float + ```python + --8<-- "python/python/tests/docs/test_guide_tables.py:import-lancedb" + --8<-- "python/python/tests/docs/test_guide_tables.py:import-lancedb-pydantic" + --8<-- "python/python/tests/docs/test_guide_tables.py:class-Item" + --8<-- "python/python/tests/docs/test_guide_tables.py:create_empty_table_pydantic" + ``` + === "Async API" - tbl = db.create_table("empty_table_add", schema=Item.to_arrow_schema()) - ``` + ```python + --8<-- "python/python/tests/docs/test_guide_tables.py:import-lancedb" + --8<-- "python/python/tests/docs/test_guide_tables.py:import-lancedb-pydantic" + --8<-- "python/python/tests/docs/test_guide_tables.py:class-Item" + --8<-- "python/python/tests/docs/test_guide_tables.py:create_empty_table_async_pydantic" + ``` Once the empty table has been created, you can add data to it via the various methods listed in the [Adding to a table](#adding-to-a-table) section. @@ -473,86 +499,96 @@ After a table has been created, you can always add more data to it using the `ad ### Add a Pandas DataFrame - ```python - df = pd.DataFrame({ - "vector": [[1.3, 1.4], [9.5, 56.2]], "item": ["banana", "apple"], "price": [5.0, 7.0] - }) - tbl.add(df) - ``` + === "Sync API" + + ```python + --8<-- "python/python/tests/docs/test_guide_tables.py:add_table_from_pandas" + ``` + === "Async API" + + ```python + --8<-- "python/python/tests/docs/test_guide_tables.py:add_table_async_from_pandas" + ``` ### Add a Polars DataFrame - ```python - df = pl.DataFrame({ - "vector": [[1.3, 1.4], [9.5, 56.2]], "item": ["banana", "apple"], "price": [5.0, 7.0] - }) - tbl.add(df) - ``` + === "Sync API" + + ```python + --8<-- "python/python/tests/docs/test_guide_tables.py:add_table_from_polars" + ``` + === "Async API" + + ```python + --8<-- "python/python/tests/docs/test_guide_tables.py:add_table_async_from_polars" + ``` ### Add an Iterator You can also add a large dataset batch in one go using Iterator of any supported data types. - ```python - def make_batches(): - for i in range(5): - yield [ - {"vector": [3.1, 4.1], "item": "peach", "price": 6.0}, - {"vector": [5.9, 26.5], "item": "pear", "price": 5.0} - ] - tbl.add(make_batches()) - ``` + === "Sync API" + + ```python + --8<-- "python/python/tests/docs/test_guide_tables.py:make_batches_for_add" + --8<-- "python/python/tests/docs/test_guide_tables.py:add_table_from_batch" + ``` + === "Async API" + + ```python + --8<-- "python/python/tests/docs/test_guide_tables.py:make_batches_for_add" + --8<-- "python/python/tests/docs/test_guide_tables.py:add_table_async_from_batch" + ``` ### Add a PyArrow table If you have data coming in as a PyArrow table, you can add it directly to the LanceDB table. - ```python - pa_table = pa.Table.from_arrays( - [ - pa.array([[9.1, 6.7], [9.9, 31.2]], - pa.list_(pa.float32(), 2)), - pa.array(["mango", "orange"]), - pa.array([7.0, 4.0]), - ], - ["vector", "item", "price"], - ) + === "Sync API" - tbl.add(pa_table) - ``` + ```python + --8<-- "python/python/tests/docs/test_guide_tables.py:add_table_from_pyarrow" + ``` + === "Async API" + + ```python + --8<-- "python/python/tests/docs/test_guide_tables.py:add_table_async_from_pyarrow" + ``` ### Add a Pydantic Model Assuming that a table has been created with the correct schema as shown [above](#creating-empty-table), you can add data items that are valid Pydantic models to the table. - ```python - pydantic_model_items = [ - Item(vector=[8.1, 4.7], item="pineapple", price=10.0), - Item(vector=[6.9, 9.3], item="avocado", price=9.0) - ] + === "Sync API" - tbl.add(pydantic_model_items) - ``` + ```python + --8<-- "python/python/tests/docs/test_guide_tables.py:add_table_from_pydantic" + ``` + === "Async API" + + ```python + --8<-- "python/python/tests/docs/test_guide_tables.py:add_table_async_from_pydantic" + ``` ??? "Ingesting Pydantic models with LanceDB embedding API" When using LanceDB's embedding API, you can add Pydantic models directly to the table. LanceDB will automatically convert the `vector` field to a vector before adding it to the table. You need to specify the default value of `vector` field as None to allow LanceDB to automatically vectorize the data. - ```python - import lancedb - from lancedb.pydantic import LanceModel, Vector - from lancedb.embeddings import get_registry + === "Sync API" - db = lancedb.connect("~/tmp") - embed_fcn = get_registry().get("huggingface").create(name="BAAI/bge-small-en-v1.5") + ```python + --8<-- "python/python/tests/docs/test_guide_tables.py:import-lancedb" + --8<-- "python/python/tests/docs/test_guide_tables.py:import-lancedb-pydantic" + --8<-- "python/python/tests/docs/test_guide_tables.py:import-embeddings" + --8<-- "python/python/tests/docs/test_guide_tables.py:create_table_with_embedding" + ``` + === "Async API" - class Schema(LanceModel): - text: str = embed_fcn.SourceField() - vector: Vector(embed_fcn.ndims()) = embed_fcn.VectorField(default=None) - - tbl = db.create_table("my_table", schema=Schema, mode="overwrite") - models = [Schema(text="hello"), Schema(text="world")] - tbl.add(models) - ``` + ```python + --8<-- "python/python/tests/docs/test_guide_tables.py:import-lancedb" + --8<-- "python/python/tests/docs/test_guide_tables.py:import-lancedb-pydantic" + --8<-- "python/python/tests/docs/test_guide_tables.py:import-embeddings" + --8<-- "python/python/tests/docs/test_guide_tables.py:create_table_async_with_embedding" + ``` === "Typescript[^1]" @@ -571,44 +607,41 @@ Use the `delete()` method on tables to delete rows from a table. To choose which === "Python" - ```python - tbl.delete('item = "fizz"') - ``` + === "Sync API" + + ```python + --8<-- "python/python/tests/docs/test_guide_tables.py:delete_row" + ``` + === "Async API" + + ```python + --8<-- "python/python/tests/docs/test_guide_tables.py:delete_row_async" + ``` ### Deleting row with specific column value - ```python - import lancedb + === "Sync API" - data = [{"x": 1, "vector": [1, 2]}, - {"x": 2, "vector": [3, 4]}, - {"x": 3, "vector": [5, 6]}] - db = lancedb.connect("./.lancedb") - table = db.create_table("my_table", data) - table.to_pandas() - # x vector - # 0 1 [1.0, 2.0] - # 1 2 [3.0, 4.0] - # 2 3 [5.0, 6.0] - - table.delete("x = 2") - table.to_pandas() - # x vector - # 0 1 [1.0, 2.0] - # 1 3 [5.0, 6.0] - ``` + ```python + --8<-- "python/python/tests/docs/test_guide_tables.py:delete_specific_row" + ``` + === "Async API" + ```python + --8<-- "python/python/tests/docs/test_guide_tables.py:delete_specific_row_async" + ``` + ### Delete from a list of values + === "Sync API" - ```python - to_remove = [1, 5] - to_remove = ", ".join(str(v) for v in to_remove) + ```python + --8<-- "python/python/tests/docs/test_guide_tables.py:delete_list_values" + ``` + === "Async API" - table.delete(f"x IN ({to_remove})") - table.to_pandas() - # x vector - # 0 3 [5.0, 6.0] - ``` + ```python + --8<-- "python/python/tests/docs/test_guide_tables.py:delete_list_values_async" + ``` === "Typescript[^1]" @@ -659,27 +692,20 @@ This can be used to update zero to all rows depending on how many rows match the === "Python" API Reference: [lancedb.table.Table.update][] + === "Sync API" - ```python - import lancedb - import pandas as pd + ```python + --8<-- "python/python/tests/docs/test_guide_tables.py:import-lancedb" + --8<-- "python/python/tests/docs/test_guide_tables.py:import-pandas" + --8<-- "python/python/tests/docs/test_guide_tables.py:update_table" + ``` + === "Async API" - # Create a lancedb connection - db = lancedb.connect("./.lancedb") - - # Create a table from a pandas DataFrame - data = pd.DataFrame({"x": [1, 2, 3], "vector": [[1, 2], [3, 4], [5, 6]]}) - table = db.create_table("my_table", data) - - # Update the table where x = 2 - table.update(where="x = 2", values={"vector": [10, 10]}) - - # Get the updated table as a pandas DataFrame - df = table.to_pandas() - - # Print the DataFrame - print(df) - ``` + ```python + --8<-- "python/python/tests/docs/test_guide_tables.py:import-lancedb" + --8<-- "python/python/tests/docs/test_guide_tables.py:import-pandas" + --8<-- "python/python/tests/docs/test_guide_tables.py:update_table_async" + ``` Output ```shell @@ -734,13 +760,16 @@ This can be used to update zero to all rows depending on how many rows match the The `values` parameter is used to provide the new values for the columns as literal values. You can also use the `values_sql` / `valuesSql` parameter to provide SQL expressions for the new values. For example, you can use `values_sql="x + 1"` to increment the value of the `x` column by 1. === "Python" + === "Sync API" - ```python - # Update the table where x = 2 - table.update(valuesSql={"x": "x + 1"}) + ```python + --8<-- "python/python/tests/docs/test_guide_tables.py:update_table_sql" + ``` + === "Async API" - print(table.to_pandas()) - ``` + ```python + --8<-- "python/python/tests/docs/test_guide_tables.py:update_table_sql_async" + ``` Output ```shell @@ -771,11 +800,16 @@ This can be used to update zero to all rows depending on how many rows match the Use the `drop_table()` method on the database to remove a table. === "Python" + === "Sync API" - ```python - --8<-- "python/python/tests/docs/test_basic.py:drop_table" - --8<-- "python/python/tests/docs/test_basic.py:drop_table_async" - ``` + ```python + --8<-- "python/python/tests/docs/test_basic.py:drop_table" + ``` + === "Async API" + + ```python + --8<-- "python/python/tests/docs/test_basic.py:drop_table_async" + ``` This permanently removes the table and is not recoverable, unlike deleting rows. By default, if the table does not exist an exception is raised. To suppress this, @@ -809,9 +843,16 @@ data type for it. === "Python" - ```python - --8<-- "python/python/tests/docs/test_basic.py:add_columns" - ``` + === "Sync API" + + ```python + --8<-- "python/python/tests/docs/test_basic.py:add_columns" + ``` + === "Async API" + + ```python + --8<-- "python/python/tests/docs/test_basic.py:add_columns_async" + ``` **API Reference:** [lancedb.table.Table.add_columns][] === "Typescript" @@ -848,10 +889,18 @@ rewriting the column, which can be a heavy operation. === "Python" - ```python - import pyarrow as pa - --8<-- "python/python/tests/docs/test_basic.py:alter_columns" - ``` + === "Sync API" + + ```python + --8<-- "python/python/tests/docs/test_guide_tables.py:import-pyarrow" + --8<-- "python/python/tests/docs/test_basic.py:alter_columns" + ``` + === "Async API" + + ```python + --8<-- "python/python/tests/docs/test_guide_tables.py:import-pyarrow" + --8<-- "python/python/tests/docs/test_basic.py:alter_columns_async" + ``` **API Reference:** [lancedb.table.Table.alter_columns][] === "Typescript" @@ -872,9 +921,16 @@ will remove the column from the schema. === "Python" - ```python - --8<-- "python/python/tests/docs/test_basic.py:drop_columns" - ``` + === "Sync API" + + ```python + --8<-- "python/python/tests/docs/test_basic.py:drop_columns" + ``` + === "Async API" + + ```python + --8<-- "python/python/tests/docs/test_basic.py:drop_columns_async" + ``` **API Reference:** [lancedb.table.Table.drop_columns][] === "Typescript" @@ -925,31 +981,46 @@ There are three possible settings for `read_consistency_interval`: To set strong consistency, use `timedelta(0)`: - ```python - from datetime import timedelta - db = lancedb.connect("./.lancedb",. read_consistency_interval=timedelta(0)) - table = db.open_table("my_table") - ``` + === "Sync API" + + ```python + --8<-- "python/python/tests/docs/test_guide_tables.py:import-datetime" + --8<-- "python/python/tests/docs/test_guide_tables.py:table_strong_consistency" + ``` + === "Async API" + + ```python + --8<-- "python/python/tests/docs/test_guide_tables.py:import-datetime" + --8<-- "python/python/tests/docs/test_guide_tables.py:table_async_strong_consistency" + ``` For eventual consistency, use a custom `timedelta`: - ```python - from datetime import timedelta - db = lancedb.connect("./.lancedb", read_consistency_interval=timedelta(seconds=5)) - table = db.open_table("my_table") - ``` + === "Sync API" + + ```python + --8<-- "python/python/tests/docs/test_guide_tables.py:import-datetime" + --8<-- "python/python/tests/docs/test_guide_tables.py:table_eventual_consistency" + ``` + === "Async API" + + ```python + --8<-- "python/python/tests/docs/test_guide_tables.py:import-datetime" + --8<-- "python/python/tests/docs/test_guide_tables.py:table_async_eventual_consistency" + ``` By default, a `Table` will never check for updates from other writers. To manually check for updates you can use `checkout_latest`: - ```python - db = lancedb.connect("./.lancedb") - table = db.open_table("my_table") + === "Sync API" - # (Other writes happen to my_table from another process) + ```python + --8<-- "python/python/tests/docs/test_guide_tables.py:table_checkout_latest" + ``` + === "Async API" - # Check for updates - table.checkout_latest() - ``` + ```python + --8<-- "python/python/tests/docs/test_guide_tables.py:table_async_checkout_latest" + ``` === "Typescript[^1]" @@ -957,14 +1028,14 @@ There are three possible settings for `read_consistency_interval`: ```ts const db = await lancedb.connect({ uri: "./.lancedb", readConsistencyInterval: 0 }); - const table = await db.openTable("my_table"); + const tbl = await db.openTable("my_table"); ``` For eventual consistency, specify the update interval as seconds: ```ts const db = await lancedb.connect({ uri: "./.lancedb", readConsistencyInterval: 5 }); - const table = await db.openTable("my_table"); + const tbl = await db.openTable("my_table"); ```