diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml index 7d74cc60..00804354 100644 --- a/docs/mkdocs.yml +++ b/docs/mkdocs.yml @@ -57,12 +57,14 @@ nav: - Basics: basic.md - Embeddings: embedding.md - Python full-text search: fts.md -- Python integrations: +- Integrations: - Pandas and PyArrow: python/arrow.md - DuckDB: python/duckdb.md - LangChain 🦜️🔗: https://python.langchain.com/en/latest/modules/indexes/vectorstores/examples/lancedb.html + - LangChain JS/TS 🦜️🔗: https://js.langchain.com/docs/modules/data_connection/vectorstores/integrations/lancedb - LlamaIndex 🦙: https://gpt-index.readthedocs.io/en/latest/examples/vector_stores/LanceDBIndexDemo.html - Pydantic: python/pydantic.md + - Voxel51: integrations/voxel51.md - Python examples: - YouTube Transcript Search: notebooks/youtube_transcript_search.ipynb - Documentation QA Bot using LangChain: notebooks/code_qa_bot.ipynb @@ -72,6 +74,7 @@ nav: - Javascript examples: - YouTube Transcript Search: examples/youtube_transcript_bot_with_nodejs.md - TransformersJS Embedding Search: examples/transformerjs_embedding_search_nodejs.md + - References: - Vector Search: search.md - SQL filters: sql.md diff --git a/docs/src/assets/voxel.gif b/docs/src/assets/voxel.gif new file mode 100644 index 00000000..b74d112c Binary files /dev/null and b/docs/src/assets/voxel.gif differ diff --git a/docs/src/examples/youtube_transcript_bot.md b/docs/src/examples/youtube_transcript_bot.md index 8afa8101..790e09d0 100644 --- a/docs/src/examples/youtube_transcript_bot.md +++ b/docs/src/examples/youtube_transcript_bot.md @@ -4,4 +4,10 @@ youtube transcript search + +Open In Colab + +Scripts - [![Python](https://img.shields.io/badge/python-3670A0?style=for-the-badge&logo=python&logoColor=ffdd54)](./examples/youtube_bot/main.py) [![JavaScript](https://img.shields.io/badge/javascript-%23323330.svg?style=for-the-badge&logo=javascript&logoColor=%23F7DF1E)](./examples/youtube_bot/index.js) + + This example is in a [notebook](https://github.com/lancedb/lancedb/blob/main/docs/src/notebooks/youtube_transcript_search.ipynb) diff --git a/docs/src/integrations/voxel51.md b/docs/src/integrations/voxel51.md new file mode 100644 index 00000000..dcd0d8b8 --- /dev/null +++ b/docs/src/integrations/voxel51.md @@ -0,0 +1,71 @@ +![example](/assets/voxel.gif) + +Basic recipe +____________ + +The basic workflow to use LanceDB to create a similarity index on your FiftyOne +datasets and use this to query your data is as follows: + +1) Load a dataset into FiftyOne + +2) Compute embedding vectors for samples or patches in your dataset, or select + a model to use to generate embeddings + +3) Use the `compute_similarity()` + method to generate a LanceDB table for the samples or object + patches embeddings in a dataset by setting the parameter `backend="lancedb"` and + specifying a `brain_key` of your choice + +4) Use this LanceDB table to query your data with + `sort_by_similarity()` + +5) If desired, delete the table + +The example below demonstrates this workflow. + +!!! Note + + You must install the LanceDB Python client to run this + ``` + pip install lancedb + ``` + +```python + +import fiftyone as fo +import fiftyone.brain as fob +import fiftyone.zoo as foz + +# Step 1: Load your data into FiftyOne +dataset = foz.load_zoo_dataset("quickstart") + +# Steps 2 and 3: Compute embeddings and create a similarity index +lancedb_index = fob.compute_similarity( + dataset, + model="clip-vit-base32-torch", + brain_key="lancedb_index", + backend="lancedb", +) +``` +Once the similarity index has been generated, we can query our data in FiftyOne +by specifying the `brain_key`: + +```python +# Step 4: Query your data +query = dataset.first().id # query by sample ID +view = dataset.sort_by_similarity( + query, + brain_key="lancedb_index", + k=10, # limit to 10 most similar samples +) + +# Step 5 (optional): Cleanup + +# Delete the LanceDB table +lancedb_index.cleanup() + +# Delete run record from FiftyOne +dataset.delete_brain_run("lancedb_index") +``` + +More in depth walkthrough of the integration, visit the LanceDB guide on Voxel51 - [LaceDB x Voxel51](https://docs.voxel51.com/integrations/lancedb.html) diff --git a/docs/src/notebooks/code_qa_bot.ipynb b/docs/src/notebooks/code_qa_bot.ipynb index 2ea6ba10..58b0a563 100644 --- a/docs/src/notebooks/code_qa_bot.ipynb +++ b/docs/src/notebooks/code_qa_bot.ipynb @@ -10,7 +10,11 @@ "\n", "This Q&A bot will allow you to query your own documentation easily using questions. We'll also demonstrate the use of LangChain and LanceDB using the OpenAI API. \n", "\n", - "In this example we'll use Pandas 2.0 documentation, but, this could be replaced for your own docs as well" + "In this example we'll use Pandas 2.0 documentation, but, this could be replaced for your own docs as well\n", + "\n", + "\"Open\n", + "\n", + "Scripts - [![Python](https://img.shields.io/badge/python-3670A0?style=for-the-badge&logo=python&logoColor=ffdd54)](./examples/Code-Documentation-QA-Bot/main.py) [![JavaScript](https://img.shields.io/badge/javascript-%23323330.svg?style=for-the-badge&logo=javascript&logoColor=%23F7DF1E)](./examples/Code-Documentation-QA-Bot/index.js)" ] }, { diff --git a/docs/src/notebooks/multimodal_search.ipynb b/docs/src/notebooks/multimodal_search.ipynb index 8b146f71..c42859d6 100644 --- a/docs/src/notebooks/multimodal_search.ipynb +++ b/docs/src/notebooks/multimodal_search.ipynb @@ -1,5 +1,14 @@ { "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![example](https://github.com/lancedb/vectordb-recipes/assets/15766192/799f94a1-a01d-4a5b-a627-2a733bbb4227)\n", + "\n", + " \"Open| [![Python](https://img.shields.io/badge/python-3670A0?style=for-the-badge&logo=python&logoColor=ffdd54)](./examples/multimodal_clip/main.py) |" + ] + }, { "cell_type": "code", "execution_count": 2, @@ -42,6 +51,19 @@ "## First run setup: Download data and pre-process" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "### Get dataset\n", + "\n", + "!wget https://eto-public.s3.us-west-2.amazonaws.com/datasets/diffusiondb_lance.tar.gz\n", + "!tar -xvf diffusiondb_lance.tar.gz\n", + "!mv diffusiondb_test rawdata.lance\n" + ] + }, { "cell_type": "code", "execution_count": 30, @@ -247,7 +269,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "Python 3.11.4 64-bit", "language": "python", "name": "python3" }, @@ -261,7 +283,12 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.3" + "version": "3.11.4" + }, + "vscode": { + "interpreter": { + "hash": "b0fa6594d8f4cbf19f97940f81e996739fb7646882a419484c72d19e05852a7e" + } } }, "nbformat": 4, diff --git a/docs/src/notebooks/youtube_transcript_search.ipynb b/docs/src/notebooks/youtube_transcript_search.ipynb index 67407f92..8165e0fc 100644 --- a/docs/src/notebooks/youtube_transcript_search.ipynb +++ b/docs/src/notebooks/youtube_transcript_search.ipynb @@ -8,7 +8,12 @@ "source": [ "# Youtube Transcript Search QA Bot\n", "\n", - "This Q&A bot will allow you to search through youtube transcripts using natural language! By going through this notebook, we'll introduce how you can use LanceDB to store and manage your data easily." + "This Q&A bot will allow you to search through youtube transcripts using natural language! By going through this notebook, we'll introduce how you can use LanceDB to store and manage your data easily.\n", + "\n", + "\n", + "\"Open\n", + "\n", + "Scripts - [![Python](https://img.shields.io/badge/python-3670A0?style=for-the-badge&logo=python&logoColor=ffdd54)](./examples/youtube_bot/main.py) [![JavaScript](https://img.shields.io/badge/javascript-%23323330.svg?style=for-the-badge&logo=javascript&logoColor=%23F7DF1E)](./examples/youtube_bot/index.js)\n" ] }, { diff --git a/docs/test/md_testing.py b/docs/test/md_testing.py index 0a566a6e..9b392cb0 100644 --- a/docs/test/md_testing.py +++ b/docs/test/md_testing.py @@ -7,7 +7,8 @@ excluded_files = [ "../src/embedding.md", "../src/examples/serverless_lancedb_with_s3_and_lambda.md", "../src/examples/serverless_qa_bot_with_modal_and_langchain.md", - "../src/examples/youtube_transcript_bot_with_nodejs.md" + "../src/examples/youtube_transcript_bot_with_nodejs.md", + "../src/integrations/voxel51.md", ] python_prefix = "py"