update references to end to end examples, use s3 for langchain exampl… (#133)

2025-12-22 21:09:58 +00:00 · 2023-06-02 16:08:56 -07:00
parent 8af5f19cc1
commit daedf1396b
5 changed files with 28 additions and 8 deletions
--- a/README.md
+++ b/README.md
@@ -29,7 +29,7 @@ The key features of LanceDB include:

 * Ecosystem integrations with [LangChain 🦜️🔗](https://python.langchain.com/en/latest/modules/indexes/vectorstores/examples/lanecdb.html), [LlamaIndex 🦙](https://gpt-index.readthedocs.io/en/latest/examples/vector_stores/LanceDBIndexDemo.html), Apache-Arrow, Pandas, Polars, DuckDB and more on the way.

-LanceDB's core is written in Rust 🦀 and is built using <a href="https://github.com/eto-ai/lance">Lance</a>, an open-source columnar format designed for performant ML workloads.
+LanceDB's core is written in Rust 🦀 and is built using <a href="https://github.com/lancedb/lance">Lance</a>, an open-source columnar format designed for performant ML workloads.

 ## Quick Start

--- a/docs/mkdocs.yml
+++ b/docs/mkdocs.yml
@@ -44,6 +44,7 @@ nav:
 - Python examples:
  - YouTube Transcript Search using OpenAI: notebooks/youtube_transcript_search.ipynb
  - Documentation QA Bot using LangChain: notebooks/code_qa_bot.ipynb
+  - Multimodal search using OpenAI and CLIP: notebooks/multimodal_search.ipynb
 - API references:
  - Python API: python/python.md
  - Javascript API: javascript/modules.md
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -14,7 +14,7 @@ The key features of LanceDB include:

 * Ecosystem integrations with [LangChain 🦜️🔗](https://python.langchain.com/en/latest/modules/indexes/vectorstores/examples/lanecdb.html), [LlamaIndex 🦙](https://gpt-index.readthedocs.io/en/latest/examples/vector_stores/LanceDBIndexDemo.html), Apache-Arrow, Pandas, Polars, DuckDB and more on the way.

-LanceDB's core is written in Rust 🦀 and is built using <a href="https://github.com/eto-ai/lance">Lance</a>, an open-source columnar format designed for performant ML workloads.
+LanceDB's core is written in Rust 🦀 and is built using <a href="https://github.com/lancedb/lance">Lance</a>, an open-source columnar format designed for performant ML workloads.

 ## Quick Start

@@ -50,11 +50,12 @@ LanceDB's core is written in Rust 🦀 and is built using <a href="https://githu
      const results = await table.search([100, 100]).limit(2).execute();
      ```

-## Complete Demos
+## Complete Demos (Python)

 We will be adding completed demo apps built using LanceDB.
 - [YouTube Transcript Search](notebooks/youtube_transcript_search.ipynb)
-
+- [Documentation QA Bot using LangChain](notebooks/code_qa_bot.ipynb)
+- [Multimodal search using OpenAI and CLIP](notebooks/multimodal_search.ipynb)

 ## Documentation Quick Links
 * [`Basic Operations`](basic.md) - basic functionality of LanceDB.
--- a/docs/src/notebooks/code_qa_bot.ipynb
+++ b/docs/src/notebooks/code_qa_bot.ipynb
@@ -72,6 +72,8 @@
    "import lancedb\n",
    "import re\n",
    "import pickle\n",
+    "import requests\n",
+    "import zipfile\n",
    "from pathlib import Path\n",
    "\n",
    "from langchain.document_loaders import UnstructuredHTMLLoader\n",
@@ -85,10 +87,25 @@
  {
   "attachments": {},
   "cell_type": "markdown",
-   "id": "6ccf9b2b",
+   "id": "56cc6d50",
   "metadata": {},
   "source": [
-    "You can download the Pandas documentation from https://pandas.pydata.org/docs/. To make sure we're not littering our repo with docs, we won't include it in the LanceDB repo, so download this and store it locally first."
+    "To make this easier, we've downloaded Pandas documentation and stored the raw HTML files for you to download. We'll download them and then use LangChain's HTML document readers to parse them and store them in LanceDB as a vector store, along with relevant metadata."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7da77e75",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pandas_docs = requests.get(\"https://eto-public.s3.us-west-2.amazonaws.com/datasets/pandas_docs/pandas.documentation.zip\")\n",
+    "with open('/tmp/pandas.documentation.zip', 'wb') as f:\n",
+    "    f.write(pandas_docs.content)\n",
+    "\n",
+    "file = zipfile.ZipFile(\"/tmp/pandas.documentation.zip\")\n",
+    "file.extractall(path=\"/tmp/pandas_docs\")"
   ]
  },
  {
@@ -137,7 +154,8 @@
    "docs = []\n",
    "\n",
    "if not docs_path.exists():\n",
-    "    for p in Path(\"./pandas.documentation\").rglob(\"*.html\"):\n",
+    "    for p in Path(\"/tmp/pandas_docs/pandas.documentation\").rglob(\"*.html\"):\n",
+    "        print(p)\n",
    "        if p.is_dir():\n",
    "            continue\n",
    "        loader = UnstructuredHTMLLoader(p)\n",
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -33,7 +33,7 @@ classifiers = [
 ]

 [project.urls]
-repository = "https://github.com/eto-ai/lancedb"
+repository = "https://github.com/lancedb/lancedb"

 [project.optional-dependencies]
 tests = [