From f35f8e451fb6b8f83ca4bad68aefb22c1341eece Mon Sep 17 00:00:00 2001 From: Ayush Chaurasia Date: Fri, 18 Aug 2023 09:59:22 +0530 Subject: [PATCH] [DOCS] Update integrations + small typos (#432) Depends on - https://github.com/lancedb/lancedb/pull/430 --------- Co-authored-by: Kevin Tse --- docs/mkdocs.yml | 1 + docs/src/guides/tables.md | 47 +++++++++++++++++----------- docs/src/integrations/prompttools.md | 7 +++++ python/lancedb/db.py | 4 +-- 4 files changed, 39 insertions(+), 20 deletions(-) create mode 100644 docs/src/integrations/prompttools.md diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml index d75f0def..9c04c1ed 100644 --- a/docs/mkdocs.yml +++ b/docs/mkdocs.yml @@ -67,6 +67,7 @@ nav: - LlamaIndex 🦙: https://gpt-index.readthedocs.io/en/latest/examples/vector_stores/LanceDBIndexDemo.html - Pydantic: python/pydantic.md - Voxel51: integrations/voxel51.md + - PromptTools: integrations/prompttools.md - Python examples: - YouTube Transcript Search: notebooks/youtube_transcript_search.ipynb - Documentation QA Bot using LangChain: notebooks/code_qa_bot.ipynb diff --git a/docs/src/guides/tables.md b/docs/src/guides/tables.md index b7dd4cfc..08ffdb07 100644 --- a/docs/src/guides/tables.md +++ b/docs/src/guides/tables.md @@ -63,6 +63,25 @@ A Table is a collection of Records in a LanceDB Database. table = db.create_table("table3", data, schema=custom_schema) ``` + ### From PyArrow Tables + You can also create LanceDB tables directly from pyarrow tables + + ```python + table = pa.Table.from_arrays( + [ + pa.array([[3.1, 4.1], [5.9, 26.5]], + pa.list_(pa.float32(), 2)), + pa.array(["foo", "bar"]), + pa.array([10.0, 20.0]), + ], + ["vector", "item", "price"], + ) + + db = lancedb.connect("db") + + tbl = db.create_table("test1", table) + ``` + ### From Pydantic Models LanceDB supports to create Apache Arrow Schema from a Pydantic BaseModel via pydantic_to_schema() method. @@ -86,10 +105,14 @@ A Table is a collection of Records in a LanceDB Database. table = db.create_table(table_name, schema=Content.to_arrow_schema()) ``` - ### Using RecordBatch Iterator / Writing Large Datasets + ### Using Iterators / Writing Large Datasets - It is recommended to use RecordBatch itertator to add large datasets in batches when creating your table in one go. This does not create multiple versions of your dataset unlike manually adding batches using `table.add()` + It is recommended to use itertators to add large datasets in batches when creating your table in one go. This does not create multiple versions of your dataset unlike manually adding batches using `table.add()` + LanceDB additionally supports pyarrow's `RecordBatch` Iterators or other generators producing supported data types. + + Here's an example using using `RecordBatch` iterator for creating tables. + ```python import pyarrow as pa @@ -97,7 +120,8 @@ A Table is a collection of Records in a LanceDB Database. for i in range(5): yield pa.RecordBatch.from_arrays( [ - pa.array([[3.1, 4.1], [5.9, 26.5]]), + pa.array([[3.1, 4.1], [5.9, 26.5]], + pa.list_(pa.float32(), 2)), pa.array(["foo", "bar"]), pa.array([10.0, 20.0]), ], @@ -105,7 +129,7 @@ A Table is a collection of Records in a LanceDB Database. ) schema = pa.schema([ - pa.field("vector", pa.list_(pa.float32())), + pa.field("vector", pa.list_(pa.float32(), 2)), pa.field("item", pa.utf8()), pa.field("price", pa.float32()), ]) @@ -113,20 +137,7 @@ A Table is a collection of Records in a LanceDB Database. db.create_table("table4", make_batches(), schema=schema) ``` - You can also use Pandas dataframe directly in the above example by converting it to `RecordBatch` object - - ```python - import pandas as pd - import pyarrow as pa - - df = pd.DataFrame({'vector': [[0,1], [2,3], [4,5],[6,7]], - 'month': [3, 5, 7, 9], - 'day': [1, 5, 9, 13], - 'n_legs': [2, 4, 5, 100], - 'animals': ["Flamingo", "Horse", "Brittle stars", "Centipede"]}) - - batch = pa.RecordBatch.from_pandas(df) - ``` + You can also use iterators of other types like Pandas dataframe or Pylists directly in the above example. ## Creating Empty Table You can also create empty tables in python. Initialize it with schema and later ingest data into it. diff --git a/docs/src/integrations/prompttools.md b/docs/src/integrations/prompttools.md new file mode 100644 index 00000000..4b067f1e --- /dev/null +++ b/docs/src/integrations/prompttools.md @@ -0,0 +1,7 @@ + +[PromptTools](https://github.com/hegelai/prompttools) offers a set of free, open-source tools for testing and experimenting with models, prompts, and configurations. The core idea is to enable developers to evaluate prompts using familiar interfaces like code and notebooks. You can use it to experiment with different configurations of LanceDB, and test how LanceDB integrates with the LLM of your choice. + +[Evaluating Prompts with PromptTools](./examples/prompttools-eval-prompts/) | Open In Colab + +![Alt text](https://prompttools.readthedocs.io/en/latest/_images/demo.gif "a title") + diff --git a/python/lancedb/db.py b/python/lancedb/db.py index 69aecc8c..a898a8bf 100644 --- a/python/lancedb/db.py +++ b/python/lancedb/db.py @@ -149,14 +149,14 @@ class DBConnection(ABC): ... for i in range(5): ... yield pa.RecordBatch.from_arrays( ... [ - ... pa.array([[3.1, 4.1], [5.9, 26.5]]), + ... pa.array([[3.1, 4.1], [5.9, 26.5]], pa.list_(pa.float32(), 2)), ... pa.array(["foo", "bar"]), ... pa.array([10.0, 20.0]), ... ], ... ["vector", "item", "price"], ... ) >>> schema=pa.schema([ - ... pa.field("vector", pa.list_(pa.float32())), + ... pa.field("vector", pa.list_(pa.float32(), 2)), ... pa.field("item", pa.utf8()), ... pa.field("price", pa.float32()), ... ])