From e7fdb931deb6883e6e81ace388b01852ce56cdcc Mon Sep 17 00:00:00 2001
From: Lei Xu <lei@lancedb.com>
Date: Sun, 28 Jan 2024 11:39:25 -0800
Subject: [PATCH] chore: convert all js doc test to use snippet. (#881)

---
 .github/workflows/docs_test.yml | 24 +-------
 docs/package.json               |  4 +-
 docs/src/ann_indexes.md         | 74 ++++++++++++-------------
 docs/src/ann_indexes.ts         | 53 ++++++++++++++++++
 docs/src/search.md              | 67 ++++++++--------------
 docs/src/search_legacy.ts       | 41 ++++++++++++++
 docs/src/sql.md                 | 98 ++++++++++++++++-----------------
 docs/src/sql_legacy.ts          | 38 +++++++++++++
 docs/test/md_testing.js         | 60 --------------------
 docs/test/package.json          | 13 -----
 node/src/index.ts               |  2 +
 node/src/remote/index.ts        |  4 ++
 12 files changed, 246 insertions(+), 232 deletions(-)
 create mode 100644 docs/src/ann_indexes.ts
 create mode 100644 docs/src/search_legacy.ts
 create mode 100644 docs/src/sql_legacy.ts
 delete mode 100644 docs/test/md_testing.js
 delete mode 100644 docs/test/package.json

diff --git a/.github/workflows/docs_test.yml b/.github/workflows/docs_test.yml
index 22cb6007..1132bd74 100644
--- a/.github/workflows/docs_test.yml
+++ b/.github/workflows/docs_test.yml
@@ -68,31 +68,13 @@ jobs:
       run: |
         cd node
         npm ci
-        npm run build
+        npm run build-release
         cd ../docs
         npm install
-    - name: Run doc test
-      run: |
-        cd docs
-        npm t
-    - name: Install dependencies for generated code
-      run: |
-        cd docs/test
-        npm install
-    - name: Install LanceDB
-      run: |
-        cd docs/test/node_modules/vectordb
-        npm ci
-        npm run build-release
-        npm run tsc
-    - name: Create test files
-      run: |
-        cd docs/test
-        node md_testing.js
     - name: Test
       env:
         LANCEDB_URI: ${{ secrets.LANCEDB_URI }}
         LANCEDB_DEV_API_KEY: ${{ secrets.LANCEDB_DEV_API_KEY }}
       run: |
-        cd docs/test/node
-        for d in *; do cd "$d"; echo "$d".js; node "$d".js; cd ..; done
+        cd docs
+        npm t
diff --git a/docs/package.json b/docs/package.json
index 792cdc89..041e5524 100644
--- a/docs/package.json
+++ b/docs/package.json
@@ -9,9 +9,9 @@
     "vectordb": "file:../node"
   },
   "scripts": {
-    "build": "tsc -b && cd ../node && npm run build",
+    "build": "tsc -b && cd ../node && npm run build-release",
     "example": "npm run build && node",
-    "test": "npm run build && node $(ls dist/*.js)"
+    "test": "npm run build && ls dist/*.js | xargs -n 1 node"
   },
   "devDependencies": {
     "@types/node": "^20.11.8",
diff --git a/docs/src/ann_indexes.md b/docs/src/ann_indexes.md
index 230e5a3e..bc745b75 100644
--- a/docs/src/ann_indexes.md
+++ b/docs/src/ann_indexes.md
@@ -7,7 +7,7 @@ for brute-force scanning of the entire vector space.
 A vector index is faster but less accurate than exhaustive search (kNN or flat search).
 LanceDB provides many parameters to fine-tune the index's size, the speed of queries, and the accuracy of results.
 
-Currently, LanceDB does *not* automatically create the ANN index.
+Currently, LanceDB does _not_ automatically create the ANN index.
 LanceDB has optimized code for kNN as well. For many use-cases, datasets under 100K vectors won't require index creation at all.
 If you can live with <100ms latency, skipping index creation is a simpler workflow while guaranteeing 100% recall.
 
@@ -17,16 +17,17 @@ In the future we will look to automatically create and configure the ANN index a
 
 Lance can support multiple index types, the most widely used one is `IVF_PQ`.
 
-* `IVF_PQ`: use **Inverted File Index (IVF)** to first divide the dataset into `N` partitions,
-   and then use **Product Quantization** to compress vectors in each partition.
-* `DiskANN` (**Experimental**): organize the vector as a on-disk graph, where the vertices approximately
-   represent the nearest neighbors of each vector.
+- `IVF_PQ`: use **Inverted File Index (IVF)** to first divide the dataset into `N` partitions,
+  and then use **Product Quantization** to compress vectors in each partition.
+- `DiskANN` (**Experimental**): organize the vector as a on-disk graph, where the vertices approximately
+  represent the nearest neighbors of each vector.
 
 ## Creating an IVF_PQ Index
 
 Lance supports `IVF_PQ` index type by default.
 
 === "Python"
+
      Creating indexes is done via the [create_index](https://lancedb.github.io/lancedb/python/#lancedb.table.LanceTable.create_index) method.
 
      ```python
@@ -47,24 +48,19 @@ Lance supports `IVF_PQ` index type by default.
      ```
 
 === "Javascript"
-     ```javascript
-     const vectordb = require('vectordb')
-     const db = await vectordb.connect('data/sample-lancedb')
 
-     let data = []
-     for (let i = 0; i < 10_000; i++) {
-         data.push({vector: Array(1536).fill(i), id: `${i}`, content: "", longId: `${i}`},)
-     }
-     const table = await db.createTable('my_vectors', data)
-     await table.createIndex({ type: 'ivf_pq', column: 'vector', num_partitions: 16, num_sub_vectors: 48 })
+     ```javascript
+     --8<--- "src/ann_indexes.ts:import"
+
+     --8<-- "src/ann_indexes.ts:ingest"
      ```
 
 - **metric** (default: "L2"): The distance metric to use. By default it uses euclidean distance "`L2`".
-We also support "cosine" and "dot" distance as well.
+  We also support "cosine" and "dot" distance as well.
 - **num_partitions** (default: 256): The number of partitions of the index.
 - **num_sub_vectors** (default: 96): The number of sub-vectors (M) that will be created during Product Quantization (PQ).
-For D dimensional vector, it will be divided into `M` of `D/M` sub-vectors, each of which is presented by
-a single PQ code.
+  For D dimensional vector, it will be divided into `M` of `D/M` sub-vectors, each of which is presented by
+  a single PQ code.
 
 <figure markdown>
   ![IVF PQ](./assets/ivf_pq.png)
@@ -78,7 +74,7 @@ Using GPU for index creation requires [PyTorch>2.0](https://pytorch.org/) being
 
 You can specify the GPU device to train IVF partitions via
 
-- **accelerator**: Specify to ``cuda`` or ``mps`` (on Apple Silicon) to enable GPU training.
+- **accelerator**: Specify to `cuda` or `mps` (on Apple Silicon) to enable GPU training.
 
 === "Linux"
 
@@ -106,10 +102,9 @@ You can specify the GPU device to train IVF partitions via
 
 Trouble shootings:
 
-If you see ``AssertionError: Torch not compiled with CUDA enabled``, you need to [install
+If you see `AssertionError: Torch not compiled with CUDA enabled`, you need to [install
 PyTorch with CUDA support](https://pytorch.org/get-started/locally/).
 
-
 ## Querying an ANN Index
 
 Querying vector indexes is done via the [search](https://lancedb.github.io/lancedb/python/#lancedb.table.LanceTable.search) function.
@@ -127,6 +122,7 @@ There are a couple of parameters that can be used to fine-tune the search:
   Note: refine_factor is only applicable if an ANN index is present. If specified on a table without an ANN index, it is ignored.
 
 === "Python"
+
      ```python
      tbl.search(np.random.random((1536))) \
          .limit(2) \
@@ -134,41 +130,35 @@ There are a couple of parameters that can be used to fine-tune the search:
          .refine_factor(10) \
          .to_pandas()
      ```
-     ```
+
+     ```text
                                               vector       item       _distance
      0  [0.44949695, 0.8444449, 0.06281311, 0.23338133...  item 1141  103.575333
      1  [0.48587373, 0.269207, 0.15095535, 0.65531915,...  item 3953  108.393867
      ```
 
 === "Javascript"
+
      ```javascript
-     const results_1 = await table
-         .search(Array(1536).fill(1.2))
-         .limit(2)
-         .nprobes(20)
-         .refineFactor(10)
-         .execute()
+     --8<-- "src/ann_indexes.ts:search1"
      ```
 
 The search will return the data requested in addition to the distance of each item.
 
-
 ### Filtering (where clause)
 
 You can further filter the elements returned by a search using a where clause.
 
 === "Python"
+
      ```python
      tbl.search(np.random.random((1536))).where("item != 'item 1141'").to_pandas()
      ```
 
 === "Javascript"
+
      ```javascript
-     const results_2 = await table
-         .search(Array(1536).fill(1.2))
-         .where("id != '1141'")
-         .limit(2)
-         .execute()
+     --8<-- "src/ann_indexes.ts:search2"
      ```
 
 ### Projections (select clause)
@@ -176,23 +166,23 @@ You can further filter the elements returned by a search using a where clause.
 You can select the columns returned by the query using a select clause.
 
 === "Python"
+
      ```python
      tbl.search(np.random.random((1536))).select(["vector"]).to_pandas()
      ```
-     ```
-        vector                                             _distance
+
+
+     ```text
+                                                   vector _distance
      0  [0.30928212, 0.022668175, 0.1756372, 0.4911822...  93.971092
      1  [0.2525465, 0.01723831, 0.261568, 0.002007689,...  95.173485
      ...
      ```
 
 === "Javascript"
+
      ```javascript
-     const results_3 = await table
-         .search(Array(1536).fill(1.2))
-         .select(["id"])
-         .limit(2)
-         .execute()
+     --8<-- "src/ann_indexes.ts:search3"
      ```
 
 ## FAQ
@@ -222,3 +212,7 @@ On `SIFT-1M` dataset, our benchmark shows that keeping each partition 1K-4K rows
 PQ is a lossy compression of the original vector, a higher `num_sub_vectors` usually results in
 less space distortion, and thus yields better accuracy. However, a higher `num_sub_vectors` also causes heavier I/O and
 more PQ computation, and thus, higher latency. `dimension / num_sub_vectors` should be a multiple of 8 for optimum SIMD efficiency.
+
+```
+
+```
diff --git a/docs/src/ann_indexes.ts b/docs/src/ann_indexes.ts
new file mode 100644
index 00000000..b6bafb8c
--- /dev/null
+++ b/docs/src/ann_indexes.ts
@@ -0,0 +1,53 @@
+// --8<-- [start:import]
+import * as vectordb from "vectordb";
+// --8<-- [end:import]
+
+(async () => {
+  // --8<-- [start:ingest]
+  const db = await vectordb.connect("data/sample-lancedb");
+
+  let data = [];
+  for (let i = 0; i < 10_000; i++) {
+    data.push({
+      vector: Array(1536).fill(i),
+      id: `${i}`,
+      content: "",
+      longId: `${i}`,
+    });
+  }
+  const table = await db.createTable("my_vectors", data);
+  await table.createIndex({
+    type: "ivf_pq",
+    column: "vector",
+    num_partitions: 16,
+    num_sub_vectors: 48,
+  });
+  // --8<-- [end:ingest]
+
+  // --8<-- [start:search1]
+  const results_1 = await table
+    .search(Array(1536).fill(1.2))
+    .limit(2)
+    .nprobes(20)
+    .refineFactor(10)
+    .execute();
+  // --8<-- [end:search1]
+
+  // --8<-- [start:search2]
+  const results_2 = await table
+    .search(Array(1536).fill(1.2))
+    .where("id != '1141'")
+    .limit(2)
+    .execute();
+  // --8<-- [end:search2]
+
+  // --8<-- [start:search3]
+  const results_3 = await table
+    .search(Array(1536).fill(1.2))
+    .select(["id"])
+    .limit(2)
+    .execute();
+  // --8<-- [end:search3]
+
+  console.log("Ann indexes: done");
+})();
diff --git a/docs/src/search.md b/docs/src/search.md
index e80d4ffd..ac4613f1 100644
--- a/docs/src/search.md
+++ b/docs/src/search.md
@@ -2,27 +2,26 @@
 
 A vector search finds the approximate or exact nearest neighbors to a given query vector.
 
-* In a recommendation system or search engine, you can find similar records to
-the one you searched.
-* In LLM and other AI applications,
-each data point can be represented by [embeddings generated from existing models](embeddings/index.md),
-following which the search returns the most relevant features.
+- In a recommendation system or search engine, you can find similar records to
+  the one you searched.
+- In LLM and other AI applications,
+  each data point can be represented by [embeddings generated from existing models](embeddings/index.md),
+  following which the search returns the most relevant features.
 
 ## Distance metrics
 
 Distance metrics are a measure of the similarity between a pair of vectors.
 Currently, LanceDB supports the following metrics:
 
-| Metric      | Description                          |
-| ----------- | ------------------------------------ |
-| `l2`        | [Euclidean / L2 distance](https://en.wikipedia.org/wiki/Euclidean_distance) |
-| `cosine`    | [Cosine Similarity](https://en.wikipedia.org/wiki/Cosine_similarity)|
-| `dot`       | [Dot Production](https://en.wikipedia.org/wiki/Dot_product) |
-
+| Metric   | Description                                                                 |
+| -------- | --------------------------------------------------------------------------- |
+| `l2`     | [Euclidean / L2 distance](https://en.wikipedia.org/wiki/Euclidean_distance) |
+| `cosine` | [Cosine Similarity](https://en.wikipedia.org/wiki/Cosine_similarity)        |
+| `dot`    | [Dot Production](https://en.wikipedia.org/wiki/Dot_product)                 |
 
 ## Exhaustive search (kNN)
 
-If you do not create a vector index, LanceDB exhaustively scans the *entire* vector space
+If you do not create a vector index, LanceDB exhaustively scans the _entire_ vector space
 and compute the distance to every vector in order to find the exact nearest neighbors. This is effectively a kNN search.
 
 <!-- Setup Code
@@ -38,22 +37,9 @@ data = [{"vector": row, "item": f"item {i}"}
 db.create_table("my_vectors", data=data)
 ```
 -->
-<!-- Setup Code
-```javascript
-const vectordb_setup = require('vectordb')
-const db_setup = await vectordb_setup.connect('data/sample-lancedb')
-
-let data = []
-for (let i = 0; i < 10_000; i++) {
-     data.push({vector: Array(1536).fill(i), id: `${i}`, content: "", longId: `${i}`},)
-}
-await db_setup.createTable('my_vectors', data)
-```
--->
 
 === "Python"
 
-
     ```python
     import lancedb
     import numpy as np
@@ -70,17 +56,12 @@ await db_setup.createTable('my_vectors', data)
 === "JavaScript"
 
     ```javascript
-    const vectordb = require('vectordb')
-    const db = await vectordb.connect('data/sample-lancedb')
+    --8<-- "src/search_legacy.ts:import"
 
-    const tbl = await db.openTable("my_vectors")
-
-    const results_1 = await tbl.search(Array(1536).fill(1.2))
-        .limit(10)
-        .execute()
+    --8<-- "src/search_legancy.ts:search1"
     ```
 
-By default, `l2` will be used as metric type. You can specify the metric type as 
+By default, `l2` will be used as metric type. You can specify the metric type as
 `cosine` or `dot` if required.
 
 === "Python"
@@ -92,20 +73,16 @@ By default, `l2` will be used as metric type. You can specify the metric type as
         .to_list()
     ```
 
-
 === "JavaScript"
 
     ```javascript
-    const results_2 = await tbl.search(Array(1536).fill(1.2))
-        .metricType("cosine")
-        .limit(10)
-        .execute()
+    --8<-- "src/search_legacy.ts:search2"
     ```
 
 ## Approximate nearest neighbor (ANN) search
 
 To perform scalable vector retrieval with acceptable latencies, it's common to build a vector index.
-While the exhaustive search is guaranteed to always return 100% recall, the approximate nature of 
+While the exhaustive search is guaranteed to always return 100% recall, the approximate nature of
 an ANN search means that using an index often involves a trade-off between recall and latency.
 
 See the [IVF_PQ index](./concepts/index_ivfpq.md.md) for a deeper description of how `IVF_PQ`
@@ -117,7 +94,9 @@ LanceDB returns vector search results via different formats commonly used in pyt
 Let's create a LanceDB table with a nested schema:
 
 === "Python"
+
     ```python
+
     from datetime import datetime
     import lancedb
     from lancedb.pydantic import LanceModel, Vector
@@ -153,7 +132,7 @@ Let's create a LanceDB table with a nested schema:
     ### As a PyArrow table
 
     Using `to_arrow()` we can get the results back as a pyarrow Table.
-    This result table has the same columns as the LanceDB table, with 
+    This result table has the same columns as the LanceDB table, with
     the addition of an `_distance` column for vector search or a `score`
     column for full text search.
 
@@ -169,11 +148,11 @@ Let's create a LanceDB table with a nested schema:
     tbl.search(np.random.randn(1536)).to_pandas()
     ```
 
-    While other formats like Arrow/Pydantic/Python dicts have a natural 
-    way to handle nested schemas, pandas can only store nested data as a 
+    While other formats like Arrow/Pydantic/Python dicts have a natural
+    way to handle nested schemas, pandas can only store nested data as a
     python dict column, which makes it difficult to support nested references.
-    So for convenience, you can also tell LanceDB to flatten a nested schema 
-    when creating the pandas dataframe. 
+    So for convenience, you can also tell LanceDB to flatten a nested schema
+    when creating the pandas dataframe.
 
     ```python
     tbl.search(np.random.randn(1536)).to_pandas(flatten=True)
diff --git a/docs/src/search_legacy.ts b/docs/src/search_legacy.ts
new file mode 100644
index 00000000..dab567e3
--- /dev/null
+++ b/docs/src/search_legacy.ts
@@ -0,0 +1,41 @@
+// --8<-- [start:import]
+import * as lancedb from "vectordb";
+// --8<-- [end:import]
+import * as fs from "fs";
+
+async function setup() {
+  fs.rmSync("data/sample-lancedb", { recursive: true, force: true });
+  const db = await lancedb.connect("data/sample-lancedb");
+
+  let data = [];
+  for (let i = 0; i < 10_000; i++) {
+    data.push({
+      vector: Array(1536).fill(i),
+      id: `${i}`,
+      content: "",
+      longId: `${i}`,
+    });
+  }
+  await db.createTable("my_vectors", data);
+}
+
+async () => {
+  await setup();
+
+  // --8<-- [start:search1]
+  const db = await lancedb.connect("data/sample-lancedb");
+  const tbl = await db.openTable("my_vectors");
+
+  const results_1 = await tbl.search(Array(1536).fill(1.2)).limit(10).execute();
+  // --8<-- [end:search1]
+
+  // --8<-- [start:search2]
+  const results_2 = await tbl
+    .search(Array(1536).fill(1.2))
+    .metricType(lancedb.MetricType.Cosine)
+    .limit(10)
+    .execute();
+  // --8<-- [end:search2]
+
+  console.log("search: done");
+};
diff --git a/docs/src/sql.md b/docs/src/sql.md
index 0f7f91f3..c8847529 100644
--- a/docs/src/sql.md
+++ b/docs/src/sql.md
@@ -8,7 +8,7 @@ option that performs the filter prior to vector search. This can be useful to na
 the search space on a very large dataset to reduce query latency.
 
 <!-- Setup Code
-```python 
+```python
 import lancedb
 import numpy as np
 uri = "data/sample-lancedb"
@@ -21,7 +21,7 @@ tbl = db.create_table("my_vectors", data=data)
 ```
 -->
 <!-- Setup Code
-```javascript 
+```javascript
 const vectordb = require('vectordb')
 const db = await vectordb.connect('data/sample-lancedb')
 
@@ -34,6 +34,7 @@ const tbl = await db.createTable('myVectors', data)
 -->
 
 === "Python"
+
     ```py
     result = (
         tbl.search([0.5, 0.2])
@@ -44,12 +45,9 @@ const tbl = await db.createTable('myVectors', data)
     ```
 
 === "JavaScript"
+
     ```javascript
-    let result = await tbl.search(Array(1536).fill(0.5))
-        .limit(1)
-        .filter("id = 10")
-        .prefilter(true)
-        .execute()
+    --8<-- "src/sql_legacy.ts:search"
     ```
 
 ## SQL filters
@@ -60,14 +58,14 @@ It can be used during vector search, update, and deletion operations.
 
 Currently, Lance supports a growing list of SQL expressions.
 
-* ``>``, ``>=``, ``<``, ``<=``, ``=``
-* ``AND``, ``OR``, ``NOT``
-* ``IS NULL``, ``IS NOT NULL``
-* ``IS TRUE``, ``IS NOT TRUE``, ``IS FALSE``, ``IS NOT FALSE``
-* ``IN``
-* ``LIKE``, ``NOT LIKE``
-* ``CAST``
-* ``regexp_match(column, pattern)``
+- `>`, `>=`, `<`, `<=`, `=`
+- `AND`, `OR`, `NOT`
+- `IS NULL`, `IS NOT NULL`
+- `IS TRUE`, `IS NOT TRUE`, `IS FALSE`, `IS NOT FALSE`
+- `IN`
+- `LIKE`, `NOT LIKE`
+- `CAST`
+- `regexp_match(column, pattern)`
 
 For example, the following filter string is acceptable:
 
@@ -82,29 +80,27 @@ For example, the following filter string is acceptable:
 === "Javascript"
 
     ```javascript
-    await tbl.search(Array(1536).fill(0))
-       .where("(item IN ('item 0', 'item 2')) AND (id > 10)")
-       .execute()
+    --8<-- "src/sql_legacy.ts:vec_search"
     ```
 
-
 If your column name contains special characters or is a [SQL Keyword](https://docs.rs/sqlparser/latest/sqlparser/keywords/index.html),
 you can use backtick (`` ` ``) to escape it. For nested fields, each segment of the
 path must be wrapped in backticks.
 
 === "SQL"
+
     ```sql
     `CUBE` = 10 AND `column name with space` IS NOT NULL
       AND `nested with space`.`inner with space` < 2
     ```
 
-!!! warning
-    Field names containing periods (``.``) are not supported.
+!!!warning "Field names containing periods (`.`) are not supported."
 
 Literals for dates, timestamps, and decimals can be written by writing the string
 value after the type name. For example
 
 === "SQL"
+
     ```sql
     date_col = date '2021-01-01'
     and timestamp_col = timestamp '2021-01-01 00:00:00'
@@ -114,49 +110,47 @@ value after the type name. For example
 For timestamp columns, the precision can be specified as a number in the type
 parameter. Microsecond precision (6) is the default.
 
-| SQL              | Time unit    |
-|------------------|--------------|
-| ``timestamp(0)`` | Seconds      |
-| ``timestamp(3)`` | Milliseconds |
-| ``timestamp(6)`` | Microseconds |
-| ``timestamp(9)`` | Nanoseconds  |
+| SQL            | Time unit    |
+| -------------- | ------------ |
+| `timestamp(0)` | Seconds      |
+| `timestamp(3)` | Milliseconds |
+| `timestamp(6)` | Microseconds |
+| `timestamp(9)` | Nanoseconds  |
 
 LanceDB internally stores data in [Apache Arrow](https://arrow.apache.org/) format.
 The mapping from SQL types to Arrow types is:
 
-| SQL type | Arrow type |
-|----------|------------|
-| ``boolean`` | ``Boolean`` |
-| ``tinyint`` / ``tinyint unsigned`` | ``Int8`` / ``UInt8`` |
-| ``smallint`` / ``smallint unsigned`` | ``Int16`` / ``UInt16`` |
-| ``int`` or ``integer`` / ``int unsigned`` or ``integer unsigned`` | ``Int32`` / ``UInt32`` |
-| ``bigint`` / ``bigint unsigned`` | ``Int64`` / ``UInt64`` |
-| ``float`` | ``Float32`` |
-| ``double`` | ``Float64`` |
-| ``decimal(precision, scale)`` | ``Decimal128`` |
-| ``date`` | ``Date32`` |
-| ``timestamp`` | ``Timestamp`` [^1] |
-| ``string`` | ``Utf8`` |
-| ``binary`` | ``Binary`` |
+| SQL type                                                  | Arrow type         |
+| --------------------------------------------------------- | ------------------ |
+| `boolean`                                                 | `Boolean`          |
+| `tinyint` / `tinyint unsigned`                            | `Int8` / `UInt8`   |
+| `smallint` / `smallint unsigned`                          | `Int16` / `UInt16` |
+| `int` or `integer` / `int unsigned` or `integer unsigned` | `Int32` / `UInt32` |
+| `bigint` / `bigint unsigned`                              | `Int64` / `UInt64` |
+| `float`                                                   | `Float32`          |
+| `double`                                                  | `Float64`          |
+| `decimal(precision, scale)`                               | `Decimal128`       |
+| `date`                                                    | `Date32`           |
+| `timestamp`                                               | `Timestamp` [^1]   |
+| `string`                                                  | `Utf8`             |
+| `binary`                                                  | `Binary`           |
 
 [^1]: See precision mapping in previous table.
 
-
 ## Filtering without Vector Search
 
 You can also filter your data without search.
 
 === "Python"
-      ```python
-      tbl.search().where("id = 10").limit(10).to_arrow()
-      ```
+
+    ```python
+    tbl.search().where("id = 10").limit(10).to_arrow()
+    ```
 
 === "JavaScript"
-      ```javascript
-      await tbl.where('id = 10').limit(10).execute()
-      ```
 
-!!! warning
-    If your table is large, this could potentially return a very large
-    amount of data. Please be sure to use a `limit` clause unless
-    you're sure you want to return the whole result set.
+    ```javascript
+    --8<---- "src/sql_legacy.ts:sql_search"
+    ```
+
+!!!warning "If your table is large, this could potentially return a very large amount of data. Please be sure to use a `limit` clause unless you're sure you want to return the whole result set."
diff --git a/docs/src/sql_legacy.ts b/docs/src/sql_legacy.ts
new file mode 100644
index 00000000..76366481
--- /dev/null
+++ b/docs/src/sql_legacy.ts
@@ -0,0 +1,38 @@
+import * as vectordb from "vectordb";
+
+(async () => {
+  const db = await vectordb.connect("data/sample-lancedb");
+
+  let data = [];
+  for (let i = 0; i < 10_000; i++) {
+    data.push({
+      vector: Array(1536).fill(i),
+      id: i,
+      item: `item ${i}`,
+      strId: `${i}`,
+    });
+  }
+  const tbl = await db.createTable("myVectors", data);
+
+  // --8<-- [start:search]
+  let result = await tbl
+    .search(Array(1536).fill(0.5))
+    .limit(1)
+    .filter("id = 10")
+    .prefilter(true)
+    .execute();
+  // --8<-- [end:search]
+
+  // --8<-- [start:vec_search]
+  await tbl
+    .search(Array(1536).fill(0))
+    .where("(item IN ('item 0', 'item 2')) AND (id > 10)")
+    .execute();
+  // --8<-- [end:vec_search]
+
+  // --8<-- [start:sql_search]
+  await tbl.filter("id = 10").limit(10).execute();
+  // --8<-- [end:sql_search]
+
+  console.log("SQL search: done");
+})();
diff --git a/docs/test/md_testing.js b/docs/test/md_testing.js
deleted file mode 100644
index fae34fb6..00000000
--- a/docs/test/md_testing.js
+++ /dev/null
@@ -1,60 +0,0 @@
-const glob = require("glob");
-const fs = require("fs");
-const path = require("path");
-
-const globString = "../src/**/*.md";
-
-const excludedGlobs = [
-  "../src/fts.md",
-  "../src/embedding.md",
-  "../src/examples/*.md",
-  "../src/guides/tables.md",
-  "../src/guides/storage.md",
-  "../src/embeddings/*.md",
-  "../src/javascript/**/*.md",
-  "../src/basic.md",
-];
-
-const nodePrefix = "javascript";
-const nodeFile = ".js";
-const nodeFolder = "node";
-const asyncPrefix = "(async () => {\n";
-const asyncSuffix = "})();";
-
-function* yieldLines(lines, prefix, suffix) {
-  let inCodeBlock = false;
-  for (const line of lines) {
-    if (line.trim().startsWith(prefix + nodePrefix)) {
-      inCodeBlock = true;
-    } else if (inCodeBlock && line.trim().startsWith(suffix)) {
-      inCodeBlock = false;
-      yield "\n";
-    } else if (inCodeBlock) {
-      yield line;
-    }
-  }
-}
-
-const files = glob.sync(globString, { recursive: true });
-const excludedFiles = glob.sync(excludedGlobs, { recursive: true });
-
-for (const file of files.filter((file) => !excludedFiles.includes(file))) {
-  const lines = [];
-  const data = fs.readFileSync(file, "utf-8");
-  const fileLines = data.split("\n");
-
-  for (const line of yieldLines(fileLines, "```", "```")) {
-    lines.push(line);
-  }
-
-  if (lines.length > 0) {
-    const fileName = path.basename(file, ".md");
-    const outPath = path.join(nodeFolder, fileName, `${fileName}${nodeFile}`);
-    console.log(outPath);
-    fs.mkdirSync(path.dirname(outPath), { recursive: true });
-    fs.writeFileSync(
-      outPath,
-      asyncPrefix + "\n" + lines.join("\n") + asyncSuffix
-    );
-  }
-}
diff --git a/docs/test/package.json b/docs/test/package.json
deleted file mode 100644
index 37c676c8..00000000
--- a/docs/test/package.json
+++ /dev/null
@@ -1,13 +0,0 @@
-{
-  "name": "lancedb-docs-test",
-  "version": "1.0.0",
-  "description": "",
-  "author": "",
-  "license": "ISC",
-  "dependencies": {
-    "fs": "^0.0.1-security",
-    "glob": "^10.2.7",
-    "path": "^0.12.7",
-    "vectordb": "https://gitpkg.now.sh/lancedb/lancedb/node?main"
-  }
-}
diff --git a/node/src/index.ts b/node/src/index.ts
index 50661d9e..bb2af069 100644
--- a/node/src/index.ts
+++ b/node/src/index.ts
@@ -443,6 +443,8 @@ export interface Table<T = number[]> {
    */
   indexStats: (indexUuid: string) => Promise<IndexStats>
 
+  filter (value: string): Query<T>
+
   schema: Promise<Schema>
 }
 
diff --git a/node/src/remote/index.ts b/node/src/remote/index.ts
index e49ae3be..b08d9e6c 100644
--- a/node/src/remote/index.ts
+++ b/node/src/remote/index.ts
@@ -270,6 +270,10 @@ export class RemoteTable<T = number[]> implements Table<T> {
     return new RemoteQuery(query, this._client, this._name) //, this._embeddings_new)
   }
 
+  filter (where: string): Query<T> {
+    throw new Error('Not implemented')
+  }
+
   async add (data: Array<Record<string, unknown>> | ArrowTable): Promise<number> {
     let tbl: ArrowTable
     if (data instanceof ArrowTable) {